]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Factor unrelated declarations out of tree.h.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "gimple.h"
54 #include "gimplify.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "params.h"
58 #include "df.h"
59 #include "opts.h"
60 #include "tree-pass.h"
61 #include "context.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const struct processor_costs *sparc_costs = &cypress_costs;
428
429 #ifdef HAVE_AS_RELAX_OPTION
430 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
431 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
432 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
433 somebody does not branch between the sethi and jmp. */
434 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
435 #else
436 #define LEAF_SIBCALL_SLOT_RESERVED_P \
437 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
438 #endif
439
440 /* Vector to say how input registers are mapped to output registers.
441 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
442 eliminate it. You must use -fomit-frame-pointer to get that. */
443 char leaf_reg_remap[] =
444 { 0, 1, 2, 3, 4, 5, 6, 7,
445 -1, -1, -1, -1, -1, -1, 14, -1,
446 -1, -1, -1, -1, -1, -1, -1, -1,
447 8, 9, 10, 11, 12, 13, -1, 15,
448
449 32, 33, 34, 35, 36, 37, 38, 39,
450 40, 41, 42, 43, 44, 45, 46, 47,
451 48, 49, 50, 51, 52, 53, 54, 55,
452 56, 57, 58, 59, 60, 61, 62, 63,
453 64, 65, 66, 67, 68, 69, 70, 71,
454 72, 73, 74, 75, 76, 77, 78, 79,
455 80, 81, 82, 83, 84, 85, 86, 87,
456 88, 89, 90, 91, 92, 93, 94, 95,
457 96, 97, 98, 99, 100, 101, 102};
458
459 /* Vector, indexed by hard register number, which contains 1
460 for a register that is allowable in a candidate for leaf
461 function treatment. */
462 char sparc_leaf_regs[] =
463 { 1, 1, 1, 1, 1, 1, 1, 1,
464 0, 0, 0, 0, 0, 0, 1, 0,
465 0, 0, 0, 0, 0, 0, 0, 0,
466 1, 1, 1, 1, 1, 1, 0, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1, 1, 1,
473 1, 1, 1, 1, 1, 1, 1, 1,
474 1, 1, 1, 1, 1, 1, 1, 1,
475 1, 1, 1, 1, 1, 1, 1};
476
477 struct GTY(()) machine_function
478 {
479 /* Size of the frame of the function. */
480 HOST_WIDE_INT frame_size;
481
482 /* Size of the frame of the function minus the register window save area
483 and the outgoing argument area. */
484 HOST_WIDE_INT apparent_frame_size;
485
486 /* Register we pretend the frame pointer is allocated to. Normally, this
487 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
488 record "offset" separately as it may be too big for (reg + disp). */
489 rtx frame_base_reg;
490 HOST_WIDE_INT frame_base_offset;
491
492 /* Some local-dynamic TLS symbol name. */
493 const char *some_ld_name;
494
495 /* Number of global or FP registers to be saved (as 4-byte quantities). */
496 int n_global_fp_regs;
497
498 /* True if the current function is leaf and uses only leaf regs,
499 so that the SPARC leaf function optimization can be applied.
500 Private version of crtl->uses_only_leaf_regs, see
501 sparc_expand_prologue for the rationale. */
502 int leaf_function_p;
503
504 /* True if the prologue saves local or in registers. */
505 bool save_local_in_regs_p;
506
507 /* True if the data calculated by sparc_expand_prologue are valid. */
508 bool prologue_data_valid_p;
509 };
510
511 #define sparc_frame_size cfun->machine->frame_size
512 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
513 #define sparc_frame_base_reg cfun->machine->frame_base_reg
514 #define sparc_frame_base_offset cfun->machine->frame_base_offset
515 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
516 #define sparc_leaf_function_p cfun->machine->leaf_function_p
517 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
518 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
519
520 /* 1 if the next opcode is to be specially indented. */
521 int sparc_indent_opcode = 0;
522
523 static void sparc_option_override (void);
524 static void sparc_init_modes (void);
525 static void scan_record_type (const_tree, int *, int *, int *);
526 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
527 const_tree, bool, bool, int *, int *);
528
529 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
530 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
531
532 static void sparc_emit_set_const32 (rtx, rtx);
533 static void sparc_emit_set_const64 (rtx, rtx);
534 static void sparc_output_addr_vec (rtx);
535 static void sparc_output_addr_diff_vec (rtx);
536 static void sparc_output_deferred_case_vectors (void);
537 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
538 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
539 static rtx sparc_builtin_saveregs (void);
540 static int epilogue_renumber (rtx *, int);
541 static bool sparc_assemble_integer (rtx, unsigned int, int);
542 static int set_extends (rtx);
543 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
544 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
545 #ifdef TARGET_SOLARIS
546 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
547 tree) ATTRIBUTE_UNUSED;
548 #endif
549 static int sparc_adjust_cost (rtx, rtx, rtx, int);
550 static int sparc_issue_rate (void);
551 static void sparc_sched_init (FILE *, int, int);
552 static int sparc_use_sched_lookahead (void);
553
554 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
555 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
556 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
557 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
558 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
559
560 static bool sparc_function_ok_for_sibcall (tree, tree);
561 static void sparc_init_libfuncs (void);
562 static void sparc_init_builtins (void);
563 static void sparc_vis_init_builtins (void);
564 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
565 static tree sparc_fold_builtin (tree, int, tree *, bool);
566 static int sparc_vis_mul8x16 (int, int);
567 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
568 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
569 HOST_WIDE_INT, tree);
570 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
571 HOST_WIDE_INT, const_tree);
572 static struct machine_function * sparc_init_machine_status (void);
573 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
574 static rtx sparc_tls_get_addr (void);
575 static rtx sparc_tls_got (void);
576 static const char *get_some_local_dynamic_name (void);
577 static int get_some_local_dynamic_name_1 (rtx *, void *);
578 static int sparc_register_move_cost (enum machine_mode,
579 reg_class_t, reg_class_t);
580 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
581 static rtx sparc_function_value (const_tree, const_tree, bool);
582 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
583 static bool sparc_function_value_regno_p (const unsigned int);
584 static rtx sparc_struct_value_rtx (tree, int);
585 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
586 int *, const_tree, int);
587 static bool sparc_return_in_memory (const_tree, const_tree);
588 static bool sparc_strict_argument_naming (cumulative_args_t);
589 static void sparc_va_start (tree, rtx);
590 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
591 static bool sparc_vector_mode_supported_p (enum machine_mode);
592 static bool sparc_tls_referenced_p (rtx);
593 static rtx sparc_legitimize_tls_address (rtx);
594 static rtx sparc_legitimize_pic_address (rtx, rtx);
595 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
596 static rtx sparc_delegitimize_address (rtx);
597 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
598 static bool sparc_pass_by_reference (cumulative_args_t,
599 enum machine_mode, const_tree, bool);
600 static void sparc_function_arg_advance (cumulative_args_t,
601 enum machine_mode, const_tree, bool);
602 static rtx sparc_function_arg_1 (cumulative_args_t,
603 enum machine_mode, const_tree, bool, bool);
604 static rtx sparc_function_arg (cumulative_args_t,
605 enum machine_mode, const_tree, bool);
606 static rtx sparc_function_incoming_arg (cumulative_args_t,
607 enum machine_mode, const_tree, bool);
608 static unsigned int sparc_function_arg_boundary (enum machine_mode,
609 const_tree);
610 static int sparc_arg_partial_bytes (cumulative_args_t,
611 enum machine_mode, tree, bool);
612 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
613 static void sparc_file_end (void);
614 static bool sparc_frame_pointer_required (void);
615 static bool sparc_can_eliminate (const int, const int);
616 static rtx sparc_builtin_setjmp_frame_value (void);
617 static void sparc_conditional_register_usage (void);
618 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
619 static const char *sparc_mangle_type (const_tree);
620 #endif
621 static void sparc_trampoline_init (rtx, tree, rtx);
622 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
623 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
624 static bool sparc_print_operand_punct_valid_p (unsigned char);
625 static void sparc_print_operand (FILE *, rtx, int);
626 static void sparc_print_operand_address (FILE *, rtx);
627 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
628 enum machine_mode,
629 secondary_reload_info *);
630 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
631 \f
632 #ifdef SUBTARGET_ATTRIBUTE_TABLE
633 /* Table of valid machine attributes. */
634 static const struct attribute_spec sparc_attribute_table[] =
635 {
636 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
637 do_diagnostic } */
638 SUBTARGET_ATTRIBUTE_TABLE,
639 { NULL, 0, 0, false, false, false, NULL, false }
640 };
641 #endif
642 \f
643 /* Option handling. */
644
645 /* Parsed value. */
646 enum cmodel sparc_cmodel;
647
648 char sparc_hard_reg_printed[8];
649
650 /* Initialize the GCC target structure. */
651
652 /* The default is to use .half rather than .short for aligned HI objects. */
653 #undef TARGET_ASM_ALIGNED_HI_OP
654 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
655
656 #undef TARGET_ASM_UNALIGNED_HI_OP
657 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
658 #undef TARGET_ASM_UNALIGNED_SI_OP
659 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
660 #undef TARGET_ASM_UNALIGNED_DI_OP
661 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
662
663 /* The target hook has to handle DI-mode values. */
664 #undef TARGET_ASM_INTEGER
665 #define TARGET_ASM_INTEGER sparc_assemble_integer
666
667 #undef TARGET_ASM_FUNCTION_PROLOGUE
668 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
669 #undef TARGET_ASM_FUNCTION_EPILOGUE
670 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
671
672 #undef TARGET_SCHED_ADJUST_COST
673 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
674 #undef TARGET_SCHED_ISSUE_RATE
675 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
676 #undef TARGET_SCHED_INIT
677 #define TARGET_SCHED_INIT sparc_sched_init
678 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
679 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
680
681 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
682 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
683
684 #undef TARGET_INIT_LIBFUNCS
685 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
686 #undef TARGET_INIT_BUILTINS
687 #define TARGET_INIT_BUILTINS sparc_init_builtins
688
689 #undef TARGET_LEGITIMIZE_ADDRESS
690 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
691 #undef TARGET_DELEGITIMIZE_ADDRESS
692 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
693 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
694 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
695
696 #undef TARGET_EXPAND_BUILTIN
697 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
698 #undef TARGET_FOLD_BUILTIN
699 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
700
701 #if TARGET_TLS
702 #undef TARGET_HAVE_TLS
703 #define TARGET_HAVE_TLS true
704 #endif
705
706 #undef TARGET_CANNOT_FORCE_CONST_MEM
707 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
708
709 #undef TARGET_ASM_OUTPUT_MI_THUNK
710 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
711 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
712 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
713
714 #undef TARGET_RTX_COSTS
715 #define TARGET_RTX_COSTS sparc_rtx_costs
716 #undef TARGET_ADDRESS_COST
717 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
718 #undef TARGET_REGISTER_MOVE_COST
719 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
720
721 #undef TARGET_PROMOTE_FUNCTION_MODE
722 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
723
724 #undef TARGET_FUNCTION_VALUE
725 #define TARGET_FUNCTION_VALUE sparc_function_value
726 #undef TARGET_LIBCALL_VALUE
727 #define TARGET_LIBCALL_VALUE sparc_libcall_value
728 #undef TARGET_FUNCTION_VALUE_REGNO_P
729 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
730
731 #undef TARGET_STRUCT_VALUE_RTX
732 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
733 #undef TARGET_RETURN_IN_MEMORY
734 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
735 #undef TARGET_MUST_PASS_IN_STACK
736 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
737 #undef TARGET_PASS_BY_REFERENCE
738 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
739 #undef TARGET_ARG_PARTIAL_BYTES
740 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
741 #undef TARGET_FUNCTION_ARG_ADVANCE
742 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
743 #undef TARGET_FUNCTION_ARG
744 #define TARGET_FUNCTION_ARG sparc_function_arg
745 #undef TARGET_FUNCTION_INCOMING_ARG
746 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
747 #undef TARGET_FUNCTION_ARG_BOUNDARY
748 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
749
750 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
751 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
752 #undef TARGET_STRICT_ARGUMENT_NAMING
753 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
754
755 #undef TARGET_EXPAND_BUILTIN_VA_START
756 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
757 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
758 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
759
760 #undef TARGET_VECTOR_MODE_SUPPORTED_P
761 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
762
763 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
764 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
765
766 #ifdef SUBTARGET_INSERT_ATTRIBUTES
767 #undef TARGET_INSERT_ATTRIBUTES
768 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
769 #endif
770
771 #ifdef SUBTARGET_ATTRIBUTE_TABLE
772 #undef TARGET_ATTRIBUTE_TABLE
773 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
774 #endif
775
776 #undef TARGET_RELAXED_ORDERING
777 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
778
779 #undef TARGET_OPTION_OVERRIDE
780 #define TARGET_OPTION_OVERRIDE sparc_option_override
781
782 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
783 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
784 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
785 #endif
786
787 #undef TARGET_ASM_FILE_END
788 #define TARGET_ASM_FILE_END sparc_file_end
789
790 #undef TARGET_FRAME_POINTER_REQUIRED
791 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
792
793 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
794 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
795
796 #undef TARGET_CAN_ELIMINATE
797 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
798
799 #undef TARGET_PREFERRED_RELOAD_CLASS
800 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
801
802 #undef TARGET_SECONDARY_RELOAD
803 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
804
805 #undef TARGET_CONDITIONAL_REGISTER_USAGE
806 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
807
808 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
809 #undef TARGET_MANGLE_TYPE
810 #define TARGET_MANGLE_TYPE sparc_mangle_type
811 #endif
812
813 #undef TARGET_LEGITIMATE_ADDRESS_P
814 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
815
816 #undef TARGET_LEGITIMATE_CONSTANT_P
817 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
818
819 #undef TARGET_TRAMPOLINE_INIT
820 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
821
822 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
823 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
824 #undef TARGET_PRINT_OPERAND
825 #define TARGET_PRINT_OPERAND sparc_print_operand
826 #undef TARGET_PRINT_OPERAND_ADDRESS
827 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
828
829 /* The value stored by LDSTUB. */
830 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
831 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
832
833 #undef TARGET_CSTORE_MODE
834 #define TARGET_CSTORE_MODE sparc_cstore_mode
835
836 struct gcc_target targetm = TARGET_INITIALIZER;
837
838 /* Return the memory reference contained in X if any, zero otherwise. */
839
840 static rtx
841 mem_ref (rtx x)
842 {
843 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
844 x = XEXP (x, 0);
845
846 if (MEM_P (x))
847 return x;
848
849 return NULL_RTX;
850 }
851
852 /* We use a machine specific pass to enable workarounds for errata.
853 We need to have the (essentially) final form of the insn stream in order
854 to properly detect the various hazards. Therefore, this machine specific
855 pass runs as late as possible. The pass is inserted in the pass pipeline
856 at the end of sparc_option_override. */
857
858 static bool
859 sparc_gate_work_around_errata (void)
860 {
861 /* The only errata we handle are those of the AT697F and UT699. */
862 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
863 }
864
865 static unsigned int
866 sparc_do_work_around_errata (void)
867 {
868 rtx insn, next;
869
870 /* Force all instructions to be split into their final form. */
871 split_all_insns_noflow ();
872
873 /* Now look for specific patterns in the insn stream. */
874 for (insn = get_insns (); insn; insn = next)
875 {
876 bool insert_nop = false;
877 rtx set;
878
879 /* Look into the instruction in a delay slot. */
880 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
881 insn = XVECEXP (PATTERN (insn), 0, 1);
882
883 /* Look for a single-word load into an odd-numbered FP register. */
884 if (sparc_fix_at697f
885 && NONJUMP_INSN_P (insn)
886 && (set = single_set (insn)) != NULL_RTX
887 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
888 && MEM_P (SET_SRC (set))
889 && REG_P (SET_DEST (set))
890 && REGNO (SET_DEST (set)) > 31
891 && REGNO (SET_DEST (set)) % 2 != 0)
892 {
893 /* The wrong dependency is on the enclosing double register. */
894 unsigned int x = REGNO (SET_DEST (set)) - 1;
895 unsigned int src1, src2, dest;
896 int code;
897
898 /* If the insn has a delay slot, then it cannot be problematic. */
899 next = next_active_insn (insn);
900 if (!next)
901 break;
902 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
903 continue;
904
905 extract_insn (next);
906 code = INSN_CODE (next);
907
908 switch (code)
909 {
910 case CODE_FOR_adddf3:
911 case CODE_FOR_subdf3:
912 case CODE_FOR_muldf3:
913 case CODE_FOR_divdf3:
914 dest = REGNO (recog_data.operand[0]);
915 src1 = REGNO (recog_data.operand[1]);
916 src2 = REGNO (recog_data.operand[2]);
917 if (src1 != src2)
918 {
919 /* Case [1-4]:
920 ld [address], %fx+1
921 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
922 if ((src1 == x || src2 == x)
923 && (dest == src1 || dest == src2))
924 insert_nop = true;
925 }
926 else
927 {
928 /* Case 5:
929 ld [address], %fx+1
930 FPOPd %fx, %fx, %fx */
931 if (src1 == x
932 && dest == src1
933 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
934 insert_nop = true;
935 }
936 break;
937
938 case CODE_FOR_sqrtdf2:
939 dest = REGNO (recog_data.operand[0]);
940 src1 = REGNO (recog_data.operand[1]);
941 /* Case 6:
942 ld [address], %fx+1
943 fsqrtd %fx, %fx */
944 if (src1 == x && dest == src1)
945 insert_nop = true;
946 break;
947
948 default:
949 break;
950 }
951 }
952
953 /* Look for a single-word load into an integer register. */
954 else if (sparc_fix_ut699
955 && NONJUMP_INSN_P (insn)
956 && (set = single_set (insn)) != NULL_RTX
957 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
958 && mem_ref (SET_SRC (set)) != NULL_RTX
959 && REG_P (SET_DEST (set))
960 && REGNO (SET_DEST (set)) < 32)
961 {
962 /* There is no problem if the second memory access has a data
963 dependency on the first single-cycle load. */
964 rtx x = SET_DEST (set);
965
966 /* If the insn has a delay slot, then it cannot be problematic. */
967 next = next_active_insn (insn);
968 if (!next)
969 break;
970 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
971 continue;
972
973 /* Look for a second memory access to/from an integer register. */
974 if ((set = single_set (next)) != NULL_RTX)
975 {
976 rtx src = SET_SRC (set);
977 rtx dest = SET_DEST (set);
978 rtx mem;
979
980 /* LDD is affected. */
981 if ((mem = mem_ref (src)) != NULL_RTX
982 && REG_P (dest)
983 && REGNO (dest) < 32
984 && !reg_mentioned_p (x, XEXP (mem, 0)))
985 insert_nop = true;
986
987 /* STD is *not* affected. */
988 else if ((mem = mem_ref (dest)) != NULL_RTX
989 && GET_MODE_SIZE (GET_MODE (mem)) <= 4
990 && (src == const0_rtx
991 || (REG_P (src)
992 && REGNO (src) < 32
993 && REGNO (src) != REGNO (x)))
994 && !reg_mentioned_p (x, XEXP (mem, 0)))
995 insert_nop = true;
996 }
997 }
998
999 else
1000 next = NEXT_INSN (insn);
1001
1002 if (insert_nop)
1003 emit_insn_before (gen_nop (), next);
1004 }
1005
1006 return 0;
1007 }
1008
1009 namespace {
1010
1011 const pass_data pass_data_work_around_errata =
1012 {
1013 RTL_PASS, /* type */
1014 "errata", /* name */
1015 OPTGROUP_NONE, /* optinfo_flags */
1016 true, /* has_gate */
1017 true, /* has_execute */
1018 TV_MACH_DEP, /* tv_id */
1019 0, /* properties_required */
1020 0, /* properties_provided */
1021 0, /* properties_destroyed */
1022 0, /* todo_flags_start */
1023 TODO_verify_rtl_sharing, /* todo_flags_finish */
1024 };
1025
1026 class pass_work_around_errata : public rtl_opt_pass
1027 {
1028 public:
1029 pass_work_around_errata(gcc::context *ctxt)
1030 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1031 {}
1032
1033 /* opt_pass methods: */
1034 bool gate () { return sparc_gate_work_around_errata (); }
1035 unsigned int execute () { return sparc_do_work_around_errata (); }
1036
1037 }; // class pass_work_around_errata
1038
1039 } // anon namespace
1040
1041 rtl_opt_pass *
1042 make_pass_work_around_errata (gcc::context *ctxt)
1043 {
1044 return new pass_work_around_errata (ctxt);
1045 }
1046
1047 /* Helpers for TARGET_DEBUG_OPTIONS. */
1048 static void
1049 dump_target_flag_bits (const int flags)
1050 {
1051 if (flags & MASK_64BIT)
1052 fprintf (stderr, "64BIT ");
1053 if (flags & MASK_APP_REGS)
1054 fprintf (stderr, "APP_REGS ");
1055 if (flags & MASK_FASTER_STRUCTS)
1056 fprintf (stderr, "FASTER_STRUCTS ");
1057 if (flags & MASK_FLAT)
1058 fprintf (stderr, "FLAT ");
1059 if (flags & MASK_FMAF)
1060 fprintf (stderr, "FMAF ");
1061 if (flags & MASK_FPU)
1062 fprintf (stderr, "FPU ");
1063 if (flags & MASK_HARD_QUAD)
1064 fprintf (stderr, "HARD_QUAD ");
1065 if (flags & MASK_POPC)
1066 fprintf (stderr, "POPC ");
1067 if (flags & MASK_PTR64)
1068 fprintf (stderr, "PTR64 ");
1069 if (flags & MASK_STACK_BIAS)
1070 fprintf (stderr, "STACK_BIAS ");
1071 if (flags & MASK_UNALIGNED_DOUBLES)
1072 fprintf (stderr, "UNALIGNED_DOUBLES ");
1073 if (flags & MASK_V8PLUS)
1074 fprintf (stderr, "V8PLUS ");
1075 if (flags & MASK_VIS)
1076 fprintf (stderr, "VIS ");
1077 if (flags & MASK_VIS2)
1078 fprintf (stderr, "VIS2 ");
1079 if (flags & MASK_VIS3)
1080 fprintf (stderr, "VIS3 ");
1081 if (flags & MASK_CBCOND)
1082 fprintf (stderr, "CBCOND ");
1083 if (flags & MASK_DEPRECATED_V8_INSNS)
1084 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1085 if (flags & MASK_SPARCLET)
1086 fprintf (stderr, "SPARCLET ");
1087 if (flags & MASK_SPARCLITE)
1088 fprintf (stderr, "SPARCLITE ");
1089 if (flags & MASK_V8)
1090 fprintf (stderr, "V8 ");
1091 if (flags & MASK_V9)
1092 fprintf (stderr, "V9 ");
1093 }
1094
1095 static void
1096 dump_target_flags (const char *prefix, const int flags)
1097 {
1098 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1099 dump_target_flag_bits (flags);
1100 fprintf(stderr, "]\n");
1101 }
1102
1103 /* Validate and override various options, and do some machine dependent
1104 initialization. */
1105
1106 static void
1107 sparc_option_override (void)
1108 {
1109 static struct code_model {
1110 const char *const name;
1111 const enum cmodel value;
1112 } const cmodels[] = {
1113 { "32", CM_32 },
1114 { "medlow", CM_MEDLOW },
1115 { "medmid", CM_MEDMID },
1116 { "medany", CM_MEDANY },
1117 { "embmedany", CM_EMBMEDANY },
1118 { NULL, (enum cmodel) 0 }
1119 };
1120 const struct code_model *cmodel;
1121 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1122 static struct cpu_default {
1123 const int cpu;
1124 const enum processor_type processor;
1125 } const cpu_default[] = {
1126 /* There must be one entry here for each TARGET_CPU value. */
1127 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1128 { TARGET_CPU_v8, PROCESSOR_V8 },
1129 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1130 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1131 { TARGET_CPU_leon, PROCESSOR_LEON },
1132 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1133 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1134 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1135 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1136 { TARGET_CPU_v9, PROCESSOR_V9 },
1137 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1138 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1139 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1140 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1141 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1142 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1143 { -1, PROCESSOR_V7 }
1144 };
1145 const struct cpu_default *def;
1146 /* Table of values for -m{cpu,tune}=. This must match the order of
1147 the enum processor_type in sparc-opts.h. */
1148 static struct cpu_table {
1149 const char *const name;
1150 const int disable;
1151 const int enable;
1152 } const cpu_table[] = {
1153 { "v7", MASK_ISA, 0 },
1154 { "cypress", MASK_ISA, 0 },
1155 { "v8", MASK_ISA, MASK_V8 },
1156 /* TI TMS390Z55 supersparc */
1157 { "supersparc", MASK_ISA, MASK_V8 },
1158 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1159 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1160 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1161 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1162 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1163 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1164 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1165 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1166 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1167 { "sparclet", MASK_ISA, MASK_SPARCLET },
1168 /* TEMIC sparclet */
1169 { "tsc701", MASK_ISA, MASK_SPARCLET },
1170 { "v9", MASK_ISA, MASK_V9 },
1171 /* UltraSPARC I, II, IIi */
1172 { "ultrasparc", MASK_ISA,
1173 /* Although insns using %y are deprecated, it is a clear win. */
1174 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1175 /* UltraSPARC III */
1176 /* ??? Check if %y issue still holds true. */
1177 { "ultrasparc3", MASK_ISA,
1178 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1179 /* UltraSPARC T1 */
1180 { "niagara", MASK_ISA,
1181 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1182 /* UltraSPARC T2 */
1183 { "niagara2", MASK_ISA,
1184 MASK_V9|MASK_POPC|MASK_VIS2 },
1185 /* UltraSPARC T3 */
1186 { "niagara3", MASK_ISA,
1187 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1188 /* UltraSPARC T4 */
1189 { "niagara4", MASK_ISA,
1190 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1191 };
1192 const struct cpu_table *cpu;
1193 unsigned int i;
1194 int fpu;
1195
1196 if (sparc_debug_string != NULL)
1197 {
1198 const char *q;
1199 char *p;
1200
1201 p = ASTRDUP (sparc_debug_string);
1202 while ((q = strtok (p, ",")) != NULL)
1203 {
1204 bool invert;
1205 int mask;
1206
1207 p = NULL;
1208 if (*q == '!')
1209 {
1210 invert = true;
1211 q++;
1212 }
1213 else
1214 invert = false;
1215
1216 if (! strcmp (q, "all"))
1217 mask = MASK_DEBUG_ALL;
1218 else if (! strcmp (q, "options"))
1219 mask = MASK_DEBUG_OPTIONS;
1220 else
1221 error ("unknown -mdebug-%s switch", q);
1222
1223 if (invert)
1224 sparc_debug &= ~mask;
1225 else
1226 sparc_debug |= mask;
1227 }
1228 }
1229
1230 if (TARGET_DEBUG_OPTIONS)
1231 {
1232 dump_target_flags("Initial target_flags", target_flags);
1233 dump_target_flags("target_flags_explicit", target_flags_explicit);
1234 }
1235
1236 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1237 SUBTARGET_OVERRIDE_OPTIONS;
1238 #endif
1239
1240 #ifndef SPARC_BI_ARCH
1241 /* Check for unsupported architecture size. */
1242 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1243 error ("%s is not supported by this configuration",
1244 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1245 #endif
1246
1247 /* We force all 64bit archs to use 128 bit long double */
1248 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1249 {
1250 error ("-mlong-double-64 not allowed with -m64");
1251 target_flags |= MASK_LONG_DOUBLE_128;
1252 }
1253
1254 /* Code model selection. */
1255 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1256
1257 #ifdef SPARC_BI_ARCH
1258 if (TARGET_ARCH32)
1259 sparc_cmodel = CM_32;
1260 #endif
1261
1262 if (sparc_cmodel_string != NULL)
1263 {
1264 if (TARGET_ARCH64)
1265 {
1266 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1267 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1268 break;
1269 if (cmodel->name == NULL)
1270 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1271 else
1272 sparc_cmodel = cmodel->value;
1273 }
1274 else
1275 error ("-mcmodel= is not supported on 32 bit systems");
1276 }
1277
1278 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1279 for (i = 8; i < 16; i++)
1280 if (!call_used_regs [i])
1281 {
1282 error ("-fcall-saved-REG is not supported for out registers");
1283 call_used_regs [i] = 1;
1284 }
1285
1286 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1287
1288 /* Set the default CPU. */
1289 if (!global_options_set.x_sparc_cpu_and_features)
1290 {
1291 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1292 if (def->cpu == TARGET_CPU_DEFAULT)
1293 break;
1294 gcc_assert (def->cpu != -1);
1295 sparc_cpu_and_features = def->processor;
1296 }
1297
1298 if (!global_options_set.x_sparc_cpu)
1299 sparc_cpu = sparc_cpu_and_features;
1300
1301 cpu = &cpu_table[(int) sparc_cpu_and_features];
1302
1303 if (TARGET_DEBUG_OPTIONS)
1304 {
1305 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1306 fprintf (stderr, "sparc_cpu: %s\n",
1307 cpu_table[(int) sparc_cpu].name);
1308 dump_target_flags ("cpu->disable", cpu->disable);
1309 dump_target_flags ("cpu->enable", cpu->enable);
1310 }
1311
1312 target_flags &= ~cpu->disable;
1313 target_flags |= (cpu->enable
1314 #ifndef HAVE_AS_FMAF_HPC_VIS3
1315 & ~(MASK_FMAF | MASK_VIS3)
1316 #endif
1317 #ifndef HAVE_AS_SPARC4
1318 & ~MASK_CBCOND
1319 #endif
1320 #ifndef HAVE_AS_LEON
1321 & ~(MASK_LEON | MASK_LEON3)
1322 #endif
1323 );
1324
1325 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1326 the processor default. */
1327 if (target_flags_explicit & MASK_FPU)
1328 target_flags = (target_flags & ~MASK_FPU) | fpu;
1329
1330 /* -mvis2 implies -mvis */
1331 if (TARGET_VIS2)
1332 target_flags |= MASK_VIS;
1333
1334 /* -mvis3 implies -mvis2 and -mvis */
1335 if (TARGET_VIS3)
1336 target_flags |= MASK_VIS2 | MASK_VIS;
1337
1338 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1339 disabled. */
1340 if (! TARGET_FPU)
1341 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1342
1343 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1344 are available.
1345 -m64 also implies v9. */
1346 if (TARGET_VIS || TARGET_ARCH64)
1347 {
1348 target_flags |= MASK_V9;
1349 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1350 }
1351
1352 /* -mvis also implies -mv8plus on 32-bit */
1353 if (TARGET_VIS && ! TARGET_ARCH64)
1354 target_flags |= MASK_V8PLUS;
1355
1356 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1357 if (TARGET_V9 && TARGET_ARCH32)
1358 target_flags |= MASK_DEPRECATED_V8_INSNS;
1359
1360 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1361 if (! TARGET_V9 || TARGET_ARCH64)
1362 target_flags &= ~MASK_V8PLUS;
1363
1364 /* Don't use stack biasing in 32 bit mode. */
1365 if (TARGET_ARCH32)
1366 target_flags &= ~MASK_STACK_BIAS;
1367
1368 /* Supply a default value for align_functions. */
1369 if (align_functions == 0
1370 && (sparc_cpu == PROCESSOR_ULTRASPARC
1371 || sparc_cpu == PROCESSOR_ULTRASPARC3
1372 || sparc_cpu == PROCESSOR_NIAGARA
1373 || sparc_cpu == PROCESSOR_NIAGARA2
1374 || sparc_cpu == PROCESSOR_NIAGARA3
1375 || sparc_cpu == PROCESSOR_NIAGARA4))
1376 align_functions = 32;
1377
1378 /* Validate PCC_STRUCT_RETURN. */
1379 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1380 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1381
1382 /* Only use .uaxword when compiling for a 64-bit target. */
1383 if (!TARGET_ARCH64)
1384 targetm.asm_out.unaligned_op.di = NULL;
1385
1386 /* Do various machine dependent initializations. */
1387 sparc_init_modes ();
1388
1389 /* Set up function hooks. */
1390 init_machine_status = sparc_init_machine_status;
1391
1392 switch (sparc_cpu)
1393 {
1394 case PROCESSOR_V7:
1395 case PROCESSOR_CYPRESS:
1396 sparc_costs = &cypress_costs;
1397 break;
1398 case PROCESSOR_V8:
1399 case PROCESSOR_SPARCLITE:
1400 case PROCESSOR_SUPERSPARC:
1401 sparc_costs = &supersparc_costs;
1402 break;
1403 case PROCESSOR_F930:
1404 case PROCESSOR_F934:
1405 case PROCESSOR_HYPERSPARC:
1406 case PROCESSOR_SPARCLITE86X:
1407 sparc_costs = &hypersparc_costs;
1408 break;
1409 case PROCESSOR_LEON:
1410 sparc_costs = &leon_costs;
1411 break;
1412 case PROCESSOR_LEON3:
1413 sparc_costs = &leon3_costs;
1414 break;
1415 case PROCESSOR_SPARCLET:
1416 case PROCESSOR_TSC701:
1417 sparc_costs = &sparclet_costs;
1418 break;
1419 case PROCESSOR_V9:
1420 case PROCESSOR_ULTRASPARC:
1421 sparc_costs = &ultrasparc_costs;
1422 break;
1423 case PROCESSOR_ULTRASPARC3:
1424 sparc_costs = &ultrasparc3_costs;
1425 break;
1426 case PROCESSOR_NIAGARA:
1427 sparc_costs = &niagara_costs;
1428 break;
1429 case PROCESSOR_NIAGARA2:
1430 sparc_costs = &niagara2_costs;
1431 break;
1432 case PROCESSOR_NIAGARA3:
1433 sparc_costs = &niagara3_costs;
1434 break;
1435 case PROCESSOR_NIAGARA4:
1436 sparc_costs = &niagara4_costs;
1437 break;
1438 case PROCESSOR_NATIVE:
1439 gcc_unreachable ();
1440 };
1441
1442 if (sparc_memory_model == SMM_DEFAULT)
1443 {
1444 /* Choose the memory model for the operating system. */
1445 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1446 if (os_default != SMM_DEFAULT)
1447 sparc_memory_model = os_default;
1448 /* Choose the most relaxed model for the processor. */
1449 else if (TARGET_V9)
1450 sparc_memory_model = SMM_RMO;
1451 else if (TARGET_LEON3)
1452 sparc_memory_model = SMM_TSO;
1453 else if (TARGET_LEON)
1454 sparc_memory_model = SMM_SC;
1455 else if (TARGET_V8)
1456 sparc_memory_model = SMM_PSO;
1457 else
1458 sparc_memory_model = SMM_SC;
1459 }
1460
1461 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1462 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1463 target_flags |= MASK_LONG_DOUBLE_128;
1464 #endif
1465
1466 if (TARGET_DEBUG_OPTIONS)
1467 dump_target_flags ("Final target_flags", target_flags);
1468
1469 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1470 ((sparc_cpu == PROCESSOR_ULTRASPARC
1471 || sparc_cpu == PROCESSOR_NIAGARA
1472 || sparc_cpu == PROCESSOR_NIAGARA2
1473 || sparc_cpu == PROCESSOR_NIAGARA3
1474 || sparc_cpu == PROCESSOR_NIAGARA4)
1475 ? 2
1476 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1477 ? 8 : 3)),
1478 global_options.x_param_values,
1479 global_options_set.x_param_values);
1480 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1481 ((sparc_cpu == PROCESSOR_ULTRASPARC
1482 || sparc_cpu == PROCESSOR_ULTRASPARC3
1483 || sparc_cpu == PROCESSOR_NIAGARA
1484 || sparc_cpu == PROCESSOR_NIAGARA2
1485 || sparc_cpu == PROCESSOR_NIAGARA3
1486 || sparc_cpu == PROCESSOR_NIAGARA4)
1487 ? 64 : 32),
1488 global_options.x_param_values,
1489 global_options_set.x_param_values);
1490
1491 /* Disable save slot sharing for call-clobbered registers by default.
1492 The IRA sharing algorithm works on single registers only and this
1493 pessimizes for double floating-point registers. */
1494 if (!global_options_set.x_flag_ira_share_save_slots)
1495 flag_ira_share_save_slots = 0;
1496
1497 /* We register a machine specific pass to work around errata, if any.
1498 The pass mut be scheduled as late as possible so that we have the
1499 (essentially) final form of the insn stream to work on.
1500 Registering the pass must be done at start up. It's convenient to
1501 do it here. */
1502 opt_pass *errata_pass = make_pass_work_around_errata (g);
1503 struct register_pass_info insert_pass_work_around_errata =
1504 {
1505 errata_pass, /* pass */
1506 "dbr", /* reference_pass_name */
1507 1, /* ref_pass_instance_number */
1508 PASS_POS_INSERT_AFTER /* po_op */
1509 };
1510 register_pass (&insert_pass_work_around_errata);
1511 }
1512 \f
1513 /* Miscellaneous utilities. */
1514
1515 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1516 or branch on register contents instructions. */
1517
1518 int
1519 v9_regcmp_p (enum rtx_code code)
1520 {
1521 return (code == EQ || code == NE || code == GE || code == LT
1522 || code == LE || code == GT);
1523 }
1524
1525 /* Nonzero if OP is a floating point constant which can
1526 be loaded into an integer register using a single
1527 sethi instruction. */
1528
1529 int
1530 fp_sethi_p (rtx op)
1531 {
1532 if (GET_CODE (op) == CONST_DOUBLE)
1533 {
1534 REAL_VALUE_TYPE r;
1535 long i;
1536
1537 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1538 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1539 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1540 }
1541
1542 return 0;
1543 }
1544
1545 /* Nonzero if OP is a floating point constant which can
1546 be loaded into an integer register using a single
1547 mov instruction. */
1548
1549 int
1550 fp_mov_p (rtx op)
1551 {
1552 if (GET_CODE (op) == CONST_DOUBLE)
1553 {
1554 REAL_VALUE_TYPE r;
1555 long i;
1556
1557 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1558 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1559 return SPARC_SIMM13_P (i);
1560 }
1561
1562 return 0;
1563 }
1564
1565 /* Nonzero if OP is a floating point constant which can
1566 be loaded into an integer register using a high/losum
1567 instruction sequence. */
1568
1569 int
1570 fp_high_losum_p (rtx op)
1571 {
1572 /* The constraints calling this should only be in
1573 SFmode move insns, so any constant which cannot
1574 be moved using a single insn will do. */
1575 if (GET_CODE (op) == CONST_DOUBLE)
1576 {
1577 REAL_VALUE_TYPE r;
1578 long i;
1579
1580 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1581 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1582 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1583 }
1584
1585 return 0;
1586 }
1587
1588 /* Return true if the address of LABEL can be loaded by means of the
1589 mov{si,di}_pic_label_ref patterns in PIC mode. */
1590
1591 static bool
1592 can_use_mov_pic_label_ref (rtx label)
1593 {
1594 /* VxWorks does not impose a fixed gap between segments; the run-time
1595 gap can be different from the object-file gap. We therefore can't
1596 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1597 are absolutely sure that X is in the same segment as the GOT.
1598 Unfortunately, the flexibility of linker scripts means that we
1599 can't be sure of that in general, so assume that GOT-relative
1600 accesses are never valid on VxWorks. */
1601 if (TARGET_VXWORKS_RTP)
1602 return false;
1603
1604 /* Similarly, if the label is non-local, it might end up being placed
1605 in a different section than the current one; now mov_pic_label_ref
1606 requires the label and the code to be in the same section. */
1607 if (LABEL_REF_NONLOCAL_P (label))
1608 return false;
1609
1610 /* Finally, if we are reordering basic blocks and partition into hot
1611 and cold sections, this might happen for any label. */
1612 if (flag_reorder_blocks_and_partition)
1613 return false;
1614
1615 return true;
1616 }
1617
1618 /* Expand a move instruction. Return true if all work is done. */
1619
1620 bool
1621 sparc_expand_move (enum machine_mode mode, rtx *operands)
1622 {
1623 /* Handle sets of MEM first. */
1624 if (GET_CODE (operands[0]) == MEM)
1625 {
1626 /* 0 is a register (or a pair of registers) on SPARC. */
1627 if (register_or_zero_operand (operands[1], mode))
1628 return false;
1629
1630 if (!reload_in_progress)
1631 {
1632 operands[0] = validize_mem (operands[0]);
1633 operands[1] = force_reg (mode, operands[1]);
1634 }
1635 }
1636
1637 /* Fixup TLS cases. */
1638 if (TARGET_HAVE_TLS
1639 && CONSTANT_P (operands[1])
1640 && sparc_tls_referenced_p (operands [1]))
1641 {
1642 operands[1] = sparc_legitimize_tls_address (operands[1]);
1643 return false;
1644 }
1645
1646 /* Fixup PIC cases. */
1647 if (flag_pic && CONSTANT_P (operands[1]))
1648 {
1649 if (pic_address_needs_scratch (operands[1]))
1650 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1651
1652 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1653 if (GET_CODE (operands[1]) == LABEL_REF
1654 && can_use_mov_pic_label_ref (operands[1]))
1655 {
1656 if (mode == SImode)
1657 {
1658 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1659 return true;
1660 }
1661
1662 if (mode == DImode)
1663 {
1664 gcc_assert (TARGET_ARCH64);
1665 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1666 return true;
1667 }
1668 }
1669
1670 if (symbolic_operand (operands[1], mode))
1671 {
1672 operands[1]
1673 = sparc_legitimize_pic_address (operands[1],
1674 reload_in_progress
1675 ? operands[0] : NULL_RTX);
1676 return false;
1677 }
1678 }
1679
1680 /* If we are trying to toss an integer constant into FP registers,
1681 or loading a FP or vector constant, force it into memory. */
1682 if (CONSTANT_P (operands[1])
1683 && REG_P (operands[0])
1684 && (SPARC_FP_REG_P (REGNO (operands[0]))
1685 || SCALAR_FLOAT_MODE_P (mode)
1686 || VECTOR_MODE_P (mode)))
1687 {
1688 /* emit_group_store will send such bogosity to us when it is
1689 not storing directly into memory. So fix this up to avoid
1690 crashes in output_constant_pool. */
1691 if (operands [1] == const0_rtx)
1692 operands[1] = CONST0_RTX (mode);
1693
1694 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1695 always other regs. */
1696 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1697 && (const_zero_operand (operands[1], mode)
1698 || const_all_ones_operand (operands[1], mode)))
1699 return false;
1700
1701 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1702 /* We are able to build any SF constant in integer registers
1703 with at most 2 instructions. */
1704 && (mode == SFmode
1705 /* And any DF constant in integer registers. */
1706 || (mode == DFmode
1707 && ! can_create_pseudo_p ())))
1708 return false;
1709
1710 operands[1] = force_const_mem (mode, operands[1]);
1711 if (!reload_in_progress)
1712 operands[1] = validize_mem (operands[1]);
1713 return false;
1714 }
1715
1716 /* Accept non-constants and valid constants unmodified. */
1717 if (!CONSTANT_P (operands[1])
1718 || GET_CODE (operands[1]) == HIGH
1719 || input_operand (operands[1], mode))
1720 return false;
1721
1722 switch (mode)
1723 {
1724 case QImode:
1725 /* All QImode constants require only one insn, so proceed. */
1726 break;
1727
1728 case HImode:
1729 case SImode:
1730 sparc_emit_set_const32 (operands[0], operands[1]);
1731 return true;
1732
1733 case DImode:
1734 /* input_operand should have filtered out 32-bit mode. */
1735 sparc_emit_set_const64 (operands[0], operands[1]);
1736 return true;
1737
1738 case TImode:
1739 {
1740 rtx high, low;
1741 /* TImode isn't available in 32-bit mode. */
1742 split_double (operands[1], &high, &low);
1743 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1744 high));
1745 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1746 low));
1747 }
1748 return true;
1749
1750 default:
1751 gcc_unreachable ();
1752 }
1753
1754 return false;
1755 }
1756
1757 /* Load OP1, a 32-bit constant, into OP0, a register.
1758 We know it can't be done in one insn when we get
1759 here, the move expander guarantees this. */
1760
1761 static void
1762 sparc_emit_set_const32 (rtx op0, rtx op1)
1763 {
1764 enum machine_mode mode = GET_MODE (op0);
1765 rtx temp = op0;
1766
1767 if (can_create_pseudo_p ())
1768 temp = gen_reg_rtx (mode);
1769
1770 if (GET_CODE (op1) == CONST_INT)
1771 {
1772 gcc_assert (!small_int_operand (op1, mode)
1773 && !const_high_operand (op1, mode));
1774
1775 /* Emit them as real moves instead of a HIGH/LO_SUM,
1776 this way CSE can see everything and reuse intermediate
1777 values if it wants. */
1778 emit_insn (gen_rtx_SET (VOIDmode, temp,
1779 GEN_INT (INTVAL (op1)
1780 & ~(HOST_WIDE_INT)0x3ff)));
1781
1782 emit_insn (gen_rtx_SET (VOIDmode,
1783 op0,
1784 gen_rtx_IOR (mode, temp,
1785 GEN_INT (INTVAL (op1) & 0x3ff))));
1786 }
1787 else
1788 {
1789 /* A symbol, emit in the traditional way. */
1790 emit_insn (gen_rtx_SET (VOIDmode, temp,
1791 gen_rtx_HIGH (mode, op1)));
1792 emit_insn (gen_rtx_SET (VOIDmode,
1793 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1794 }
1795 }
1796
1797 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1798 If TEMP is nonzero, we are forbidden to use any other scratch
1799 registers. Otherwise, we are allowed to generate them as needed.
1800
1801 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1802 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1803
1804 void
1805 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1806 {
1807 rtx temp1, temp2, temp3, temp4, temp5;
1808 rtx ti_temp = 0;
1809
1810 if (temp && GET_MODE (temp) == TImode)
1811 {
1812 ti_temp = temp;
1813 temp = gen_rtx_REG (DImode, REGNO (temp));
1814 }
1815
1816 /* SPARC-V9 code-model support. */
1817 switch (sparc_cmodel)
1818 {
1819 case CM_MEDLOW:
1820 /* The range spanned by all instructions in the object is less
1821 than 2^31 bytes (2GB) and the distance from any instruction
1822 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1823 than 2^31 bytes (2GB).
1824
1825 The executable must be in the low 4TB of the virtual address
1826 space.
1827
1828 sethi %hi(symbol), %temp1
1829 or %temp1, %lo(symbol), %reg */
1830 if (temp)
1831 temp1 = temp; /* op0 is allowed. */
1832 else
1833 temp1 = gen_reg_rtx (DImode);
1834
1835 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1836 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1837 break;
1838
1839 case CM_MEDMID:
1840 /* The range spanned by all instructions in the object is less
1841 than 2^31 bytes (2GB) and the distance from any instruction
1842 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1843 than 2^31 bytes (2GB).
1844
1845 The executable must be in the low 16TB of the virtual address
1846 space.
1847
1848 sethi %h44(symbol), %temp1
1849 or %temp1, %m44(symbol), %temp2
1850 sllx %temp2, 12, %temp3
1851 or %temp3, %l44(symbol), %reg */
1852 if (temp)
1853 {
1854 temp1 = op0;
1855 temp2 = op0;
1856 temp3 = temp; /* op0 is allowed. */
1857 }
1858 else
1859 {
1860 temp1 = gen_reg_rtx (DImode);
1861 temp2 = gen_reg_rtx (DImode);
1862 temp3 = gen_reg_rtx (DImode);
1863 }
1864
1865 emit_insn (gen_seth44 (temp1, op1));
1866 emit_insn (gen_setm44 (temp2, temp1, op1));
1867 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1868 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1869 emit_insn (gen_setl44 (op0, temp3, op1));
1870 break;
1871
1872 case CM_MEDANY:
1873 /* The range spanned by all instructions in the object is less
1874 than 2^31 bytes (2GB) and the distance from any instruction
1875 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1876 than 2^31 bytes (2GB).
1877
1878 The executable can be placed anywhere in the virtual address
1879 space.
1880
1881 sethi %hh(symbol), %temp1
1882 sethi %lm(symbol), %temp2
1883 or %temp1, %hm(symbol), %temp3
1884 sllx %temp3, 32, %temp4
1885 or %temp4, %temp2, %temp5
1886 or %temp5, %lo(symbol), %reg */
1887 if (temp)
1888 {
1889 /* It is possible that one of the registers we got for operands[2]
1890 might coincide with that of operands[0] (which is why we made
1891 it TImode). Pick the other one to use as our scratch. */
1892 if (rtx_equal_p (temp, op0))
1893 {
1894 gcc_assert (ti_temp);
1895 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1896 }
1897 temp1 = op0;
1898 temp2 = temp; /* op0 is _not_ allowed, see above. */
1899 temp3 = op0;
1900 temp4 = op0;
1901 temp5 = op0;
1902 }
1903 else
1904 {
1905 temp1 = gen_reg_rtx (DImode);
1906 temp2 = gen_reg_rtx (DImode);
1907 temp3 = gen_reg_rtx (DImode);
1908 temp4 = gen_reg_rtx (DImode);
1909 temp5 = gen_reg_rtx (DImode);
1910 }
1911
1912 emit_insn (gen_sethh (temp1, op1));
1913 emit_insn (gen_setlm (temp2, op1));
1914 emit_insn (gen_sethm (temp3, temp1, op1));
1915 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1916 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1917 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1918 gen_rtx_PLUS (DImode, temp4, temp2)));
1919 emit_insn (gen_setlo (op0, temp5, op1));
1920 break;
1921
1922 case CM_EMBMEDANY:
1923 /* Old old old backwards compatibility kruft here.
1924 Essentially it is MEDLOW with a fixed 64-bit
1925 virtual base added to all data segment addresses.
1926 Text-segment stuff is computed like MEDANY, we can't
1927 reuse the code above because the relocation knobs
1928 look different.
1929
1930 Data segment: sethi %hi(symbol), %temp1
1931 add %temp1, EMBMEDANY_BASE_REG, %temp2
1932 or %temp2, %lo(symbol), %reg */
1933 if (data_segment_operand (op1, GET_MODE (op1)))
1934 {
1935 if (temp)
1936 {
1937 temp1 = temp; /* op0 is allowed. */
1938 temp2 = op0;
1939 }
1940 else
1941 {
1942 temp1 = gen_reg_rtx (DImode);
1943 temp2 = gen_reg_rtx (DImode);
1944 }
1945
1946 emit_insn (gen_embmedany_sethi (temp1, op1));
1947 emit_insn (gen_embmedany_brsum (temp2, temp1));
1948 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1949 }
1950
1951 /* Text segment: sethi %uhi(symbol), %temp1
1952 sethi %hi(symbol), %temp2
1953 or %temp1, %ulo(symbol), %temp3
1954 sllx %temp3, 32, %temp4
1955 or %temp4, %temp2, %temp5
1956 or %temp5, %lo(symbol), %reg */
1957 else
1958 {
1959 if (temp)
1960 {
1961 /* It is possible that one of the registers we got for operands[2]
1962 might coincide with that of operands[0] (which is why we made
1963 it TImode). Pick the other one to use as our scratch. */
1964 if (rtx_equal_p (temp, op0))
1965 {
1966 gcc_assert (ti_temp);
1967 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1968 }
1969 temp1 = op0;
1970 temp2 = temp; /* op0 is _not_ allowed, see above. */
1971 temp3 = op0;
1972 temp4 = op0;
1973 temp5 = op0;
1974 }
1975 else
1976 {
1977 temp1 = gen_reg_rtx (DImode);
1978 temp2 = gen_reg_rtx (DImode);
1979 temp3 = gen_reg_rtx (DImode);
1980 temp4 = gen_reg_rtx (DImode);
1981 temp5 = gen_reg_rtx (DImode);
1982 }
1983
1984 emit_insn (gen_embmedany_textuhi (temp1, op1));
1985 emit_insn (gen_embmedany_texthi (temp2, op1));
1986 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1987 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1988 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1989 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1990 gen_rtx_PLUS (DImode, temp4, temp2)));
1991 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1992 }
1993 break;
1994
1995 default:
1996 gcc_unreachable ();
1997 }
1998 }
1999
2000 #if HOST_BITS_PER_WIDE_INT == 32
2001 static void
2002 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2003 {
2004 gcc_unreachable ();
2005 }
2006 #else
2007 /* These avoid problems when cross compiling. If we do not
2008 go through all this hair then the optimizer will see
2009 invalid REG_EQUAL notes or in some cases none at all. */
2010 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2011 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2012 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2013 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2014
2015 /* The optimizer is not to assume anything about exactly
2016 which bits are set for a HIGH, they are unspecified.
2017 Unfortunately this leads to many missed optimizations
2018 during CSE. We mask out the non-HIGH bits, and matches
2019 a plain movdi, to alleviate this problem. */
2020 static rtx
2021 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2022 {
2023 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2024 }
2025
2026 static rtx
2027 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2028 {
2029 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2030 }
2031
2032 static rtx
2033 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2034 {
2035 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2036 }
2037
2038 static rtx
2039 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2040 {
2041 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2042 }
2043
2044 /* Worker routines for 64-bit constant formation on arch64.
2045 One of the key things to be doing in these emissions is
2046 to create as many temp REGs as possible. This makes it
2047 possible for half-built constants to be used later when
2048 such values are similar to something required later on.
2049 Without doing this, the optimizer cannot see such
2050 opportunities. */
2051
2052 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2053 unsigned HOST_WIDE_INT, int);
2054
2055 static void
2056 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2057 unsigned HOST_WIDE_INT low_bits, int is_neg)
2058 {
2059 unsigned HOST_WIDE_INT high_bits;
2060
2061 if (is_neg)
2062 high_bits = (~low_bits) & 0xffffffff;
2063 else
2064 high_bits = low_bits;
2065
2066 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2067 if (!is_neg)
2068 {
2069 emit_insn (gen_rtx_SET (VOIDmode, op0,
2070 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2071 }
2072 else
2073 {
2074 /* If we are XOR'ing with -1, then we should emit a one's complement
2075 instead. This way the combiner will notice logical operations
2076 such as ANDN later on and substitute. */
2077 if ((low_bits & 0x3ff) == 0x3ff)
2078 {
2079 emit_insn (gen_rtx_SET (VOIDmode, op0,
2080 gen_rtx_NOT (DImode, temp)));
2081 }
2082 else
2083 {
2084 emit_insn (gen_rtx_SET (VOIDmode, op0,
2085 gen_safe_XOR64 (temp,
2086 (-(HOST_WIDE_INT)0x400
2087 | (low_bits & 0x3ff)))));
2088 }
2089 }
2090 }
2091
2092 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2093 unsigned HOST_WIDE_INT, int);
2094
2095 static void
2096 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2097 unsigned HOST_WIDE_INT high_bits,
2098 unsigned HOST_WIDE_INT low_immediate,
2099 int shift_count)
2100 {
2101 rtx temp2 = op0;
2102
2103 if ((high_bits & 0xfffffc00) != 0)
2104 {
2105 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2106 if ((high_bits & ~0xfffffc00) != 0)
2107 emit_insn (gen_rtx_SET (VOIDmode, op0,
2108 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2109 else
2110 temp2 = temp;
2111 }
2112 else
2113 {
2114 emit_insn (gen_safe_SET64 (temp, high_bits));
2115 temp2 = temp;
2116 }
2117
2118 /* Now shift it up into place. */
2119 emit_insn (gen_rtx_SET (VOIDmode, op0,
2120 gen_rtx_ASHIFT (DImode, temp2,
2121 GEN_INT (shift_count))));
2122
2123 /* If there is a low immediate part piece, finish up by
2124 putting that in as well. */
2125 if (low_immediate != 0)
2126 emit_insn (gen_rtx_SET (VOIDmode, op0,
2127 gen_safe_OR64 (op0, low_immediate)));
2128 }
2129
2130 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2131 unsigned HOST_WIDE_INT);
2132
2133 /* Full 64-bit constant decomposition. Even though this is the
2134 'worst' case, we still optimize a few things away. */
2135 static void
2136 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2137 unsigned HOST_WIDE_INT high_bits,
2138 unsigned HOST_WIDE_INT low_bits)
2139 {
2140 rtx sub_temp = op0;
2141
2142 if (can_create_pseudo_p ())
2143 sub_temp = gen_reg_rtx (DImode);
2144
2145 if ((high_bits & 0xfffffc00) != 0)
2146 {
2147 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2148 if ((high_bits & ~0xfffffc00) != 0)
2149 emit_insn (gen_rtx_SET (VOIDmode,
2150 sub_temp,
2151 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2152 else
2153 sub_temp = temp;
2154 }
2155 else
2156 {
2157 emit_insn (gen_safe_SET64 (temp, high_bits));
2158 sub_temp = temp;
2159 }
2160
2161 if (can_create_pseudo_p ())
2162 {
2163 rtx temp2 = gen_reg_rtx (DImode);
2164 rtx temp3 = gen_reg_rtx (DImode);
2165 rtx temp4 = gen_reg_rtx (DImode);
2166
2167 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2168 gen_rtx_ASHIFT (DImode, sub_temp,
2169 GEN_INT (32))));
2170
2171 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2172 if ((low_bits & ~0xfffffc00) != 0)
2173 {
2174 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2175 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2176 emit_insn (gen_rtx_SET (VOIDmode, op0,
2177 gen_rtx_PLUS (DImode, temp4, temp3)));
2178 }
2179 else
2180 {
2181 emit_insn (gen_rtx_SET (VOIDmode, op0,
2182 gen_rtx_PLUS (DImode, temp4, temp2)));
2183 }
2184 }
2185 else
2186 {
2187 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2188 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2189 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2190 int to_shift = 12;
2191
2192 /* We are in the middle of reload, so this is really
2193 painful. However we do still make an attempt to
2194 avoid emitting truly stupid code. */
2195 if (low1 != const0_rtx)
2196 {
2197 emit_insn (gen_rtx_SET (VOIDmode, op0,
2198 gen_rtx_ASHIFT (DImode, sub_temp,
2199 GEN_INT (to_shift))));
2200 emit_insn (gen_rtx_SET (VOIDmode, op0,
2201 gen_rtx_IOR (DImode, op0, low1)));
2202 sub_temp = op0;
2203 to_shift = 12;
2204 }
2205 else
2206 {
2207 to_shift += 12;
2208 }
2209 if (low2 != const0_rtx)
2210 {
2211 emit_insn (gen_rtx_SET (VOIDmode, op0,
2212 gen_rtx_ASHIFT (DImode, sub_temp,
2213 GEN_INT (to_shift))));
2214 emit_insn (gen_rtx_SET (VOIDmode, op0,
2215 gen_rtx_IOR (DImode, op0, low2)));
2216 sub_temp = op0;
2217 to_shift = 8;
2218 }
2219 else
2220 {
2221 to_shift += 8;
2222 }
2223 emit_insn (gen_rtx_SET (VOIDmode, op0,
2224 gen_rtx_ASHIFT (DImode, sub_temp,
2225 GEN_INT (to_shift))));
2226 if (low3 != const0_rtx)
2227 emit_insn (gen_rtx_SET (VOIDmode, op0,
2228 gen_rtx_IOR (DImode, op0, low3)));
2229 /* phew... */
2230 }
2231 }
2232
2233 /* Analyze a 64-bit constant for certain properties. */
2234 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2235 unsigned HOST_WIDE_INT,
2236 int *, int *, int *);
2237
2238 static void
2239 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2240 unsigned HOST_WIDE_INT low_bits,
2241 int *hbsp, int *lbsp, int *abbasp)
2242 {
2243 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2244 int i;
2245
2246 lowest_bit_set = highest_bit_set = -1;
2247 i = 0;
2248 do
2249 {
2250 if ((lowest_bit_set == -1)
2251 && ((low_bits >> i) & 1))
2252 lowest_bit_set = i;
2253 if ((highest_bit_set == -1)
2254 && ((high_bits >> (32 - i - 1)) & 1))
2255 highest_bit_set = (64 - i - 1);
2256 }
2257 while (++i < 32
2258 && ((highest_bit_set == -1)
2259 || (lowest_bit_set == -1)));
2260 if (i == 32)
2261 {
2262 i = 0;
2263 do
2264 {
2265 if ((lowest_bit_set == -1)
2266 && ((high_bits >> i) & 1))
2267 lowest_bit_set = i + 32;
2268 if ((highest_bit_set == -1)
2269 && ((low_bits >> (32 - i - 1)) & 1))
2270 highest_bit_set = 32 - i - 1;
2271 }
2272 while (++i < 32
2273 && ((highest_bit_set == -1)
2274 || (lowest_bit_set == -1)));
2275 }
2276 /* If there are no bits set this should have gone out
2277 as one instruction! */
2278 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2279 all_bits_between_are_set = 1;
2280 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2281 {
2282 if (i < 32)
2283 {
2284 if ((low_bits & (1 << i)) != 0)
2285 continue;
2286 }
2287 else
2288 {
2289 if ((high_bits & (1 << (i - 32))) != 0)
2290 continue;
2291 }
2292 all_bits_between_are_set = 0;
2293 break;
2294 }
2295 *hbsp = highest_bit_set;
2296 *lbsp = lowest_bit_set;
2297 *abbasp = all_bits_between_are_set;
2298 }
2299
2300 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2301
2302 static int
2303 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2304 unsigned HOST_WIDE_INT low_bits)
2305 {
2306 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2307
2308 if (high_bits == 0
2309 || high_bits == 0xffffffff)
2310 return 1;
2311
2312 analyze_64bit_constant (high_bits, low_bits,
2313 &highest_bit_set, &lowest_bit_set,
2314 &all_bits_between_are_set);
2315
2316 if ((highest_bit_set == 63
2317 || lowest_bit_set == 0)
2318 && all_bits_between_are_set != 0)
2319 return 1;
2320
2321 if ((highest_bit_set - lowest_bit_set) < 21)
2322 return 1;
2323
2324 return 0;
2325 }
2326
2327 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2328 unsigned HOST_WIDE_INT,
2329 int, int);
2330
2331 static unsigned HOST_WIDE_INT
2332 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2333 unsigned HOST_WIDE_INT low_bits,
2334 int lowest_bit_set, int shift)
2335 {
2336 HOST_WIDE_INT hi, lo;
2337
2338 if (lowest_bit_set < 32)
2339 {
2340 lo = (low_bits >> lowest_bit_set) << shift;
2341 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2342 }
2343 else
2344 {
2345 lo = 0;
2346 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2347 }
2348 gcc_assert (! (hi & lo));
2349 return (hi | lo);
2350 }
2351
2352 /* Here we are sure to be arch64 and this is an integer constant
2353 being loaded into a register. Emit the most efficient
2354 insn sequence possible. Detection of all the 1-insn cases
2355 has been done already. */
2356 static void
2357 sparc_emit_set_const64 (rtx op0, rtx op1)
2358 {
2359 unsigned HOST_WIDE_INT high_bits, low_bits;
2360 int lowest_bit_set, highest_bit_set;
2361 int all_bits_between_are_set;
2362 rtx temp = 0;
2363
2364 /* Sanity check that we know what we are working with. */
2365 gcc_assert (TARGET_ARCH64
2366 && (GET_CODE (op0) == SUBREG
2367 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2368
2369 if (! can_create_pseudo_p ())
2370 temp = op0;
2371
2372 if (GET_CODE (op1) != CONST_INT)
2373 {
2374 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2375 return;
2376 }
2377
2378 if (! temp)
2379 temp = gen_reg_rtx (DImode);
2380
2381 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2382 low_bits = (INTVAL (op1) & 0xffffffff);
2383
2384 /* low_bits bits 0 --> 31
2385 high_bits bits 32 --> 63 */
2386
2387 analyze_64bit_constant (high_bits, low_bits,
2388 &highest_bit_set, &lowest_bit_set,
2389 &all_bits_between_are_set);
2390
2391 /* First try for a 2-insn sequence. */
2392
2393 /* These situations are preferred because the optimizer can
2394 * do more things with them:
2395 * 1) mov -1, %reg
2396 * sllx %reg, shift, %reg
2397 * 2) mov -1, %reg
2398 * srlx %reg, shift, %reg
2399 * 3) mov some_small_const, %reg
2400 * sllx %reg, shift, %reg
2401 */
2402 if (((highest_bit_set == 63
2403 || lowest_bit_set == 0)
2404 && all_bits_between_are_set != 0)
2405 || ((highest_bit_set - lowest_bit_set) < 12))
2406 {
2407 HOST_WIDE_INT the_const = -1;
2408 int shift = lowest_bit_set;
2409
2410 if ((highest_bit_set != 63
2411 && lowest_bit_set != 0)
2412 || all_bits_between_are_set == 0)
2413 {
2414 the_const =
2415 create_simple_focus_bits (high_bits, low_bits,
2416 lowest_bit_set, 0);
2417 }
2418 else if (lowest_bit_set == 0)
2419 shift = -(63 - highest_bit_set);
2420
2421 gcc_assert (SPARC_SIMM13_P (the_const));
2422 gcc_assert (shift != 0);
2423
2424 emit_insn (gen_safe_SET64 (temp, the_const));
2425 if (shift > 0)
2426 emit_insn (gen_rtx_SET (VOIDmode,
2427 op0,
2428 gen_rtx_ASHIFT (DImode,
2429 temp,
2430 GEN_INT (shift))));
2431 else if (shift < 0)
2432 emit_insn (gen_rtx_SET (VOIDmode,
2433 op0,
2434 gen_rtx_LSHIFTRT (DImode,
2435 temp,
2436 GEN_INT (-shift))));
2437 return;
2438 }
2439
2440 /* Now a range of 22 or less bits set somewhere.
2441 * 1) sethi %hi(focus_bits), %reg
2442 * sllx %reg, shift, %reg
2443 * 2) sethi %hi(focus_bits), %reg
2444 * srlx %reg, shift, %reg
2445 */
2446 if ((highest_bit_set - lowest_bit_set) < 21)
2447 {
2448 unsigned HOST_WIDE_INT focus_bits =
2449 create_simple_focus_bits (high_bits, low_bits,
2450 lowest_bit_set, 10);
2451
2452 gcc_assert (SPARC_SETHI_P (focus_bits));
2453 gcc_assert (lowest_bit_set != 10);
2454
2455 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2456
2457 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2458 if (lowest_bit_set < 10)
2459 emit_insn (gen_rtx_SET (VOIDmode,
2460 op0,
2461 gen_rtx_LSHIFTRT (DImode, temp,
2462 GEN_INT (10 - lowest_bit_set))));
2463 else if (lowest_bit_set > 10)
2464 emit_insn (gen_rtx_SET (VOIDmode,
2465 op0,
2466 gen_rtx_ASHIFT (DImode, temp,
2467 GEN_INT (lowest_bit_set - 10))));
2468 return;
2469 }
2470
2471 /* 1) sethi %hi(low_bits), %reg
2472 * or %reg, %lo(low_bits), %reg
2473 * 2) sethi %hi(~low_bits), %reg
2474 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2475 */
2476 if (high_bits == 0
2477 || high_bits == 0xffffffff)
2478 {
2479 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2480 (high_bits == 0xffffffff));
2481 return;
2482 }
2483
2484 /* Now, try 3-insn sequences. */
2485
2486 /* 1) sethi %hi(high_bits), %reg
2487 * or %reg, %lo(high_bits), %reg
2488 * sllx %reg, 32, %reg
2489 */
2490 if (low_bits == 0)
2491 {
2492 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2493 return;
2494 }
2495
2496 /* We may be able to do something quick
2497 when the constant is negated, so try that. */
2498 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2499 (~low_bits) & 0xfffffc00))
2500 {
2501 /* NOTE: The trailing bits get XOR'd so we need the
2502 non-negated bits, not the negated ones. */
2503 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2504
2505 if ((((~high_bits) & 0xffffffff) == 0
2506 && ((~low_bits) & 0x80000000) == 0)
2507 || (((~high_bits) & 0xffffffff) == 0xffffffff
2508 && ((~low_bits) & 0x80000000) != 0))
2509 {
2510 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2511
2512 if ((SPARC_SETHI_P (fast_int)
2513 && (~high_bits & 0xffffffff) == 0)
2514 || SPARC_SIMM13_P (fast_int))
2515 emit_insn (gen_safe_SET64 (temp, fast_int));
2516 else
2517 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2518 }
2519 else
2520 {
2521 rtx negated_const;
2522 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2523 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2524 sparc_emit_set_const64 (temp, negated_const);
2525 }
2526
2527 /* If we are XOR'ing with -1, then we should emit a one's complement
2528 instead. This way the combiner will notice logical operations
2529 such as ANDN later on and substitute. */
2530 if (trailing_bits == 0x3ff)
2531 {
2532 emit_insn (gen_rtx_SET (VOIDmode, op0,
2533 gen_rtx_NOT (DImode, temp)));
2534 }
2535 else
2536 {
2537 emit_insn (gen_rtx_SET (VOIDmode,
2538 op0,
2539 gen_safe_XOR64 (temp,
2540 (-0x400 | trailing_bits))));
2541 }
2542 return;
2543 }
2544
2545 /* 1) sethi %hi(xxx), %reg
2546 * or %reg, %lo(xxx), %reg
2547 * sllx %reg, yyy, %reg
2548 *
2549 * ??? This is just a generalized version of the low_bits==0
2550 * thing above, FIXME...
2551 */
2552 if ((highest_bit_set - lowest_bit_set) < 32)
2553 {
2554 unsigned HOST_WIDE_INT focus_bits =
2555 create_simple_focus_bits (high_bits, low_bits,
2556 lowest_bit_set, 0);
2557
2558 /* We can't get here in this state. */
2559 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2560
2561 /* So what we know is that the set bits straddle the
2562 middle of the 64-bit word. */
2563 sparc_emit_set_const64_quick2 (op0, temp,
2564 focus_bits, 0,
2565 lowest_bit_set);
2566 return;
2567 }
2568
2569 /* 1) sethi %hi(high_bits), %reg
2570 * or %reg, %lo(high_bits), %reg
2571 * sllx %reg, 32, %reg
2572 * or %reg, low_bits, %reg
2573 */
2574 if (SPARC_SIMM13_P(low_bits)
2575 && ((int)low_bits > 0))
2576 {
2577 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2578 return;
2579 }
2580
2581 /* The easiest way when all else fails, is full decomposition. */
2582 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2583 }
2584 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2585
2586 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2587 return the mode to be used for the comparison. For floating-point,
2588 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2589 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2590 processing is needed. */
2591
2592 enum machine_mode
2593 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2594 {
2595 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2596 {
2597 switch (op)
2598 {
2599 case EQ:
2600 case NE:
2601 case UNORDERED:
2602 case ORDERED:
2603 case UNLT:
2604 case UNLE:
2605 case UNGT:
2606 case UNGE:
2607 case UNEQ:
2608 case LTGT:
2609 return CCFPmode;
2610
2611 case LT:
2612 case LE:
2613 case GT:
2614 case GE:
2615 return CCFPEmode;
2616
2617 default:
2618 gcc_unreachable ();
2619 }
2620 }
2621 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2622 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2623 {
2624 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2625 return CCX_NOOVmode;
2626 else
2627 return CC_NOOVmode;
2628 }
2629 else
2630 {
2631 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2632 return CCXmode;
2633 else
2634 return CCmode;
2635 }
2636 }
2637
2638 /* Emit the compare insn and return the CC reg for a CODE comparison
2639 with operands X and Y. */
2640
2641 static rtx
2642 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2643 {
2644 enum machine_mode mode;
2645 rtx cc_reg;
2646
2647 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2648 return x;
2649
2650 mode = SELECT_CC_MODE (code, x, y);
2651
2652 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2653 fcc regs (cse can't tell they're really call clobbered regs and will
2654 remove a duplicate comparison even if there is an intervening function
2655 call - it will then try to reload the cc reg via an int reg which is why
2656 we need the movcc patterns). It is possible to provide the movcc
2657 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2658 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2659 to tell cse that CCFPE mode registers (even pseudos) are call
2660 clobbered. */
2661
2662 /* ??? This is an experiment. Rather than making changes to cse which may
2663 or may not be easy/clean, we do our own cse. This is possible because
2664 we will generate hard registers. Cse knows they're call clobbered (it
2665 doesn't know the same thing about pseudos). If we guess wrong, no big
2666 deal, but if we win, great! */
2667
2668 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2669 #if 1 /* experiment */
2670 {
2671 int reg;
2672 /* We cycle through the registers to ensure they're all exercised. */
2673 static int next_fcc_reg = 0;
2674 /* Previous x,y for each fcc reg. */
2675 static rtx prev_args[4][2];
2676
2677 /* Scan prev_args for x,y. */
2678 for (reg = 0; reg < 4; reg++)
2679 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2680 break;
2681 if (reg == 4)
2682 {
2683 reg = next_fcc_reg;
2684 prev_args[reg][0] = x;
2685 prev_args[reg][1] = y;
2686 next_fcc_reg = (next_fcc_reg + 1) & 3;
2687 }
2688 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2689 }
2690 #else
2691 cc_reg = gen_reg_rtx (mode);
2692 #endif /* ! experiment */
2693 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2694 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2695 else
2696 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2697
2698 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2699 will only result in an unrecognizable insn so no point in asserting. */
2700 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2701
2702 return cc_reg;
2703 }
2704
2705
2706 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2707
2708 rtx
2709 gen_compare_reg (rtx cmp)
2710 {
2711 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2712 }
2713
2714 /* This function is used for v9 only.
2715 DEST is the target of the Scc insn.
2716 CODE is the code for an Scc's comparison.
2717 X and Y are the values we compare.
2718
2719 This function is needed to turn
2720
2721 (set (reg:SI 110)
2722 (gt (reg:CCX 100 %icc)
2723 (const_int 0)))
2724 into
2725 (set (reg:SI 110)
2726 (gt:DI (reg:CCX 100 %icc)
2727 (const_int 0)))
2728
2729 IE: The instruction recognizer needs to see the mode of the comparison to
2730 find the right instruction. We could use "gt:DI" right in the
2731 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2732
2733 static int
2734 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2735 {
2736 if (! TARGET_ARCH64
2737 && (GET_MODE (x) == DImode
2738 || GET_MODE (dest) == DImode))
2739 return 0;
2740
2741 /* Try to use the movrCC insns. */
2742 if (TARGET_ARCH64
2743 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2744 && y == const0_rtx
2745 && v9_regcmp_p (compare_code))
2746 {
2747 rtx op0 = x;
2748 rtx temp;
2749
2750 /* Special case for op0 != 0. This can be done with one instruction if
2751 dest == x. */
2752
2753 if (compare_code == NE
2754 && GET_MODE (dest) == DImode
2755 && rtx_equal_p (op0, dest))
2756 {
2757 emit_insn (gen_rtx_SET (VOIDmode, dest,
2758 gen_rtx_IF_THEN_ELSE (DImode,
2759 gen_rtx_fmt_ee (compare_code, DImode,
2760 op0, const0_rtx),
2761 const1_rtx,
2762 dest)));
2763 return 1;
2764 }
2765
2766 if (reg_overlap_mentioned_p (dest, op0))
2767 {
2768 /* Handle the case where dest == x.
2769 We "early clobber" the result. */
2770 op0 = gen_reg_rtx (GET_MODE (x));
2771 emit_move_insn (op0, x);
2772 }
2773
2774 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2775 if (GET_MODE (op0) != DImode)
2776 {
2777 temp = gen_reg_rtx (DImode);
2778 convert_move (temp, op0, 0);
2779 }
2780 else
2781 temp = op0;
2782 emit_insn (gen_rtx_SET (VOIDmode, dest,
2783 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2784 gen_rtx_fmt_ee (compare_code, DImode,
2785 temp, const0_rtx),
2786 const1_rtx,
2787 dest)));
2788 return 1;
2789 }
2790 else
2791 {
2792 x = gen_compare_reg_1 (compare_code, x, y);
2793 y = const0_rtx;
2794
2795 gcc_assert (GET_MODE (x) != CC_NOOVmode
2796 && GET_MODE (x) != CCX_NOOVmode);
2797
2798 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2799 emit_insn (gen_rtx_SET (VOIDmode, dest,
2800 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2801 gen_rtx_fmt_ee (compare_code,
2802 GET_MODE (x), x, y),
2803 const1_rtx, dest)));
2804 return 1;
2805 }
2806 }
2807
2808
2809 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2810 without jumps using the addx/subx instructions. */
2811
2812 bool
2813 emit_scc_insn (rtx operands[])
2814 {
2815 rtx tem;
2816 rtx x;
2817 rtx y;
2818 enum rtx_code code;
2819
2820 /* The quad-word fp compare library routines all return nonzero to indicate
2821 true, which is different from the equivalent libgcc routines, so we must
2822 handle them specially here. */
2823 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2824 {
2825 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2826 GET_CODE (operands[1]));
2827 operands[2] = XEXP (operands[1], 0);
2828 operands[3] = XEXP (operands[1], 1);
2829 }
2830
2831 code = GET_CODE (operands[1]);
2832 x = operands[2];
2833 y = operands[3];
2834
2835 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2836 more applications). The exception to this is "reg != 0" which can
2837 be done in one instruction on v9 (so we do it). */
2838 if (code == EQ)
2839 {
2840 if (GET_MODE (x) == SImode)
2841 {
2842 rtx pat;
2843 if (TARGET_ARCH64)
2844 pat = gen_seqsidi_special (operands[0], x, y);
2845 else
2846 pat = gen_seqsisi_special (operands[0], x, y);
2847 emit_insn (pat);
2848 return true;
2849 }
2850 else if (GET_MODE (x) == DImode)
2851 {
2852 rtx pat = gen_seqdi_special (operands[0], x, y);
2853 emit_insn (pat);
2854 return true;
2855 }
2856 }
2857
2858 if (code == NE)
2859 {
2860 if (GET_MODE (x) == SImode)
2861 {
2862 rtx pat;
2863 if (TARGET_ARCH64)
2864 pat = gen_snesidi_special (operands[0], x, y);
2865 else
2866 pat = gen_snesisi_special (operands[0], x, y);
2867 emit_insn (pat);
2868 return true;
2869 }
2870 else if (GET_MODE (x) == DImode)
2871 {
2872 rtx pat;
2873 if (TARGET_VIS3)
2874 pat = gen_snedi_special_vis3 (operands[0], x, y);
2875 else
2876 pat = gen_snedi_special (operands[0], x, y);
2877 emit_insn (pat);
2878 return true;
2879 }
2880 }
2881
2882 if (TARGET_V9
2883 && TARGET_ARCH64
2884 && GET_MODE (x) == DImode
2885 && !(TARGET_VIS3
2886 && (code == GTU || code == LTU))
2887 && gen_v9_scc (operands[0], code, x, y))
2888 return true;
2889
2890 /* We can do LTU and GEU using the addx/subx instructions too. And
2891 for GTU/LEU, if both operands are registers swap them and fall
2892 back to the easy case. */
2893 if (code == GTU || code == LEU)
2894 {
2895 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2896 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2897 {
2898 tem = x;
2899 x = y;
2900 y = tem;
2901 code = swap_condition (code);
2902 }
2903 }
2904
2905 if (code == LTU
2906 || (!TARGET_VIS3 && code == GEU))
2907 {
2908 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2909 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2910 gen_compare_reg_1 (code, x, y),
2911 const0_rtx)));
2912 return true;
2913 }
2914
2915 /* All the posibilities to use addx/subx based sequences has been
2916 exhausted, try for a 3 instruction sequence using v9 conditional
2917 moves. */
2918 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2919 return true;
2920
2921 /* Nope, do branches. */
2922 return false;
2923 }
2924
2925 /* Emit a conditional jump insn for the v9 architecture using comparison code
2926 CODE and jump target LABEL.
2927 This function exists to take advantage of the v9 brxx insns. */
2928
2929 static void
2930 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2931 {
2932 emit_jump_insn (gen_rtx_SET (VOIDmode,
2933 pc_rtx,
2934 gen_rtx_IF_THEN_ELSE (VOIDmode,
2935 gen_rtx_fmt_ee (code, GET_MODE (op0),
2936 op0, const0_rtx),
2937 gen_rtx_LABEL_REF (VOIDmode, label),
2938 pc_rtx)));
2939 }
2940
2941 /* Emit a conditional jump insn for the UA2011 architecture using
2942 comparison code CODE and jump target LABEL. This function exists
2943 to take advantage of the UA2011 Compare and Branch insns. */
2944
2945 static void
2946 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2947 {
2948 rtx if_then_else;
2949
2950 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2951 gen_rtx_fmt_ee(code, GET_MODE(op0),
2952 op0, op1),
2953 gen_rtx_LABEL_REF (VOIDmode, label),
2954 pc_rtx);
2955
2956 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2957 }
2958
2959 void
2960 emit_conditional_branch_insn (rtx operands[])
2961 {
2962 /* The quad-word fp compare library routines all return nonzero to indicate
2963 true, which is different from the equivalent libgcc routines, so we must
2964 handle them specially here. */
2965 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2966 {
2967 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2968 GET_CODE (operands[0]));
2969 operands[1] = XEXP (operands[0], 0);
2970 operands[2] = XEXP (operands[0], 1);
2971 }
2972
2973 /* If we can tell early on that the comparison is against a constant
2974 that won't fit in the 5-bit signed immediate field of a cbcond,
2975 use one of the other v9 conditional branch sequences. */
2976 if (TARGET_CBCOND
2977 && GET_CODE (operands[1]) == REG
2978 && (GET_MODE (operands[1]) == SImode
2979 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2980 && (GET_CODE (operands[2]) != CONST_INT
2981 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2982 {
2983 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2984 return;
2985 }
2986
2987 if (TARGET_ARCH64 && operands[2] == const0_rtx
2988 && GET_CODE (operands[1]) == REG
2989 && GET_MODE (operands[1]) == DImode)
2990 {
2991 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2992 return;
2993 }
2994
2995 operands[1] = gen_compare_reg (operands[0]);
2996 operands[2] = const0_rtx;
2997 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2998 operands[1], operands[2]);
2999 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3000 operands[3]));
3001 }
3002
3003
3004 /* Generate a DFmode part of a hard TFmode register.
3005 REG is the TFmode hard register, LOW is 1 for the
3006 low 64bit of the register and 0 otherwise.
3007 */
3008 rtx
3009 gen_df_reg (rtx reg, int low)
3010 {
3011 int regno = REGNO (reg);
3012
3013 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3014 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3015 return gen_rtx_REG (DFmode, regno);
3016 }
3017 \f
3018 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3019 Unlike normal calls, TFmode operands are passed by reference. It is
3020 assumed that no more than 3 operands are required. */
3021
3022 static void
3023 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3024 {
3025 rtx ret_slot = NULL, arg[3], func_sym;
3026 int i;
3027
3028 /* We only expect to be called for conversions, unary, and binary ops. */
3029 gcc_assert (nargs == 2 || nargs == 3);
3030
3031 for (i = 0; i < nargs; ++i)
3032 {
3033 rtx this_arg = operands[i];
3034 rtx this_slot;
3035
3036 /* TFmode arguments and return values are passed by reference. */
3037 if (GET_MODE (this_arg) == TFmode)
3038 {
3039 int force_stack_temp;
3040
3041 force_stack_temp = 0;
3042 if (TARGET_BUGGY_QP_LIB && i == 0)
3043 force_stack_temp = 1;
3044
3045 if (GET_CODE (this_arg) == MEM
3046 && ! force_stack_temp)
3047 {
3048 tree expr = MEM_EXPR (this_arg);
3049 if (expr)
3050 mark_addressable (expr);
3051 this_arg = XEXP (this_arg, 0);
3052 }
3053 else if (CONSTANT_P (this_arg)
3054 && ! force_stack_temp)
3055 {
3056 this_slot = force_const_mem (TFmode, this_arg);
3057 this_arg = XEXP (this_slot, 0);
3058 }
3059 else
3060 {
3061 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3062
3063 /* Operand 0 is the return value. We'll copy it out later. */
3064 if (i > 0)
3065 emit_move_insn (this_slot, this_arg);
3066 else
3067 ret_slot = this_slot;
3068
3069 this_arg = XEXP (this_slot, 0);
3070 }
3071 }
3072
3073 arg[i] = this_arg;
3074 }
3075
3076 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3077
3078 if (GET_MODE (operands[0]) == TFmode)
3079 {
3080 if (nargs == 2)
3081 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3082 arg[0], GET_MODE (arg[0]),
3083 arg[1], GET_MODE (arg[1]));
3084 else
3085 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3086 arg[0], GET_MODE (arg[0]),
3087 arg[1], GET_MODE (arg[1]),
3088 arg[2], GET_MODE (arg[2]));
3089
3090 if (ret_slot)
3091 emit_move_insn (operands[0], ret_slot);
3092 }
3093 else
3094 {
3095 rtx ret;
3096
3097 gcc_assert (nargs == 2);
3098
3099 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3100 GET_MODE (operands[0]), 1,
3101 arg[1], GET_MODE (arg[1]));
3102
3103 if (ret != operands[0])
3104 emit_move_insn (operands[0], ret);
3105 }
3106 }
3107
3108 /* Expand soft-float TFmode calls to sparc abi routines. */
3109
3110 static void
3111 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3112 {
3113 const char *func;
3114
3115 switch (code)
3116 {
3117 case PLUS:
3118 func = "_Qp_add";
3119 break;
3120 case MINUS:
3121 func = "_Qp_sub";
3122 break;
3123 case MULT:
3124 func = "_Qp_mul";
3125 break;
3126 case DIV:
3127 func = "_Qp_div";
3128 break;
3129 default:
3130 gcc_unreachable ();
3131 }
3132
3133 emit_soft_tfmode_libcall (func, 3, operands);
3134 }
3135
3136 static void
3137 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3138 {
3139 const char *func;
3140
3141 gcc_assert (code == SQRT);
3142 func = "_Qp_sqrt";
3143
3144 emit_soft_tfmode_libcall (func, 2, operands);
3145 }
3146
3147 static void
3148 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3149 {
3150 const char *func;
3151
3152 switch (code)
3153 {
3154 case FLOAT_EXTEND:
3155 switch (GET_MODE (operands[1]))
3156 {
3157 case SFmode:
3158 func = "_Qp_stoq";
3159 break;
3160 case DFmode:
3161 func = "_Qp_dtoq";
3162 break;
3163 default:
3164 gcc_unreachable ();
3165 }
3166 break;
3167
3168 case FLOAT_TRUNCATE:
3169 switch (GET_MODE (operands[0]))
3170 {
3171 case SFmode:
3172 func = "_Qp_qtos";
3173 break;
3174 case DFmode:
3175 func = "_Qp_qtod";
3176 break;
3177 default:
3178 gcc_unreachable ();
3179 }
3180 break;
3181
3182 case FLOAT:
3183 switch (GET_MODE (operands[1]))
3184 {
3185 case SImode:
3186 func = "_Qp_itoq";
3187 if (TARGET_ARCH64)
3188 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3189 break;
3190 case DImode:
3191 func = "_Qp_xtoq";
3192 break;
3193 default:
3194 gcc_unreachable ();
3195 }
3196 break;
3197
3198 case UNSIGNED_FLOAT:
3199 switch (GET_MODE (operands[1]))
3200 {
3201 case SImode:
3202 func = "_Qp_uitoq";
3203 if (TARGET_ARCH64)
3204 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3205 break;
3206 case DImode:
3207 func = "_Qp_uxtoq";
3208 break;
3209 default:
3210 gcc_unreachable ();
3211 }
3212 break;
3213
3214 case FIX:
3215 switch (GET_MODE (operands[0]))
3216 {
3217 case SImode:
3218 func = "_Qp_qtoi";
3219 break;
3220 case DImode:
3221 func = "_Qp_qtox";
3222 break;
3223 default:
3224 gcc_unreachable ();
3225 }
3226 break;
3227
3228 case UNSIGNED_FIX:
3229 switch (GET_MODE (operands[0]))
3230 {
3231 case SImode:
3232 func = "_Qp_qtoui";
3233 break;
3234 case DImode:
3235 func = "_Qp_qtoux";
3236 break;
3237 default:
3238 gcc_unreachable ();
3239 }
3240 break;
3241
3242 default:
3243 gcc_unreachable ();
3244 }
3245
3246 emit_soft_tfmode_libcall (func, 2, operands);
3247 }
3248
3249 /* Expand a hard-float tfmode operation. All arguments must be in
3250 registers. */
3251
3252 static void
3253 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3254 {
3255 rtx op, dest;
3256
3257 if (GET_RTX_CLASS (code) == RTX_UNARY)
3258 {
3259 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3260 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3261 }
3262 else
3263 {
3264 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3265 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3266 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3267 operands[1], operands[2]);
3268 }
3269
3270 if (register_operand (operands[0], VOIDmode))
3271 dest = operands[0];
3272 else
3273 dest = gen_reg_rtx (GET_MODE (operands[0]));
3274
3275 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3276
3277 if (dest != operands[0])
3278 emit_move_insn (operands[0], dest);
3279 }
3280
3281 void
3282 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3283 {
3284 if (TARGET_HARD_QUAD)
3285 emit_hard_tfmode_operation (code, operands);
3286 else
3287 emit_soft_tfmode_binop (code, operands);
3288 }
3289
3290 void
3291 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3292 {
3293 if (TARGET_HARD_QUAD)
3294 emit_hard_tfmode_operation (code, operands);
3295 else
3296 emit_soft_tfmode_unop (code, operands);
3297 }
3298
3299 void
3300 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3301 {
3302 if (TARGET_HARD_QUAD)
3303 emit_hard_tfmode_operation (code, operands);
3304 else
3305 emit_soft_tfmode_cvt (code, operands);
3306 }
3307 \f
3308 /* Return nonzero if a branch/jump/call instruction will be emitting
3309 nop into its delay slot. */
3310
3311 int
3312 empty_delay_slot (rtx insn)
3313 {
3314 rtx seq;
3315
3316 /* If no previous instruction (should not happen), return true. */
3317 if (PREV_INSN (insn) == NULL)
3318 return 1;
3319
3320 seq = NEXT_INSN (PREV_INSN (insn));
3321 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3322 return 0;
3323
3324 return 1;
3325 }
3326
3327 /* Return nonzero if we should emit a nop after a cbcond instruction.
3328 The cbcond instruction does not have a delay slot, however there is
3329 a severe performance penalty if a control transfer appears right
3330 after a cbcond. Therefore we emit a nop when we detect this
3331 situation. */
3332
3333 int
3334 emit_cbcond_nop (rtx insn)
3335 {
3336 rtx next = next_active_insn (insn);
3337
3338 if (!next)
3339 return 1;
3340
3341 if (NONJUMP_INSN_P (next)
3342 && GET_CODE (PATTERN (next)) == SEQUENCE)
3343 next = XVECEXP (PATTERN (next), 0, 0);
3344 else if (CALL_P (next)
3345 && GET_CODE (PATTERN (next)) == PARALLEL)
3346 {
3347 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3348
3349 if (GET_CODE (delay) == RETURN)
3350 {
3351 /* It's a sibling call. Do not emit the nop if we're going
3352 to emit something other than the jump itself as the first
3353 instruction of the sibcall sequence. */
3354 if (sparc_leaf_function_p || TARGET_FLAT)
3355 return 0;
3356 }
3357 }
3358
3359 if (NONJUMP_INSN_P (next))
3360 return 0;
3361
3362 return 1;
3363 }
3364
3365 /* Return nonzero if TRIAL can go into the call delay slot. */
3366
3367 int
3368 tls_call_delay (rtx trial)
3369 {
3370 rtx pat;
3371
3372 /* Binutils allows
3373 call __tls_get_addr, %tgd_call (foo)
3374 add %l7, %o0, %o0, %tgd_add (foo)
3375 while Sun as/ld does not. */
3376 if (TARGET_GNU_TLS || !TARGET_TLS)
3377 return 1;
3378
3379 pat = PATTERN (trial);
3380
3381 /* We must reject tgd_add{32|64}, i.e.
3382 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3383 and tldm_add{32|64}, i.e.
3384 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3385 for Sun as/ld. */
3386 if (GET_CODE (pat) == SET
3387 && GET_CODE (SET_SRC (pat)) == PLUS)
3388 {
3389 rtx unspec = XEXP (SET_SRC (pat), 1);
3390
3391 if (GET_CODE (unspec) == UNSPEC
3392 && (XINT (unspec, 1) == UNSPEC_TLSGD
3393 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3394 return 0;
3395 }
3396
3397 return 1;
3398 }
3399
3400 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3401 instruction. RETURN_P is true if the v9 variant 'return' is to be
3402 considered in the test too.
3403
3404 TRIAL must be a SET whose destination is a REG appropriate for the
3405 'restore' instruction or, if RETURN_P is true, for the 'return'
3406 instruction. */
3407
3408 static int
3409 eligible_for_restore_insn (rtx trial, bool return_p)
3410 {
3411 rtx pat = PATTERN (trial);
3412 rtx src = SET_SRC (pat);
3413 bool src_is_freg = false;
3414 rtx src_reg;
3415
3416 /* Since we now can do moves between float and integer registers when
3417 VIS3 is enabled, we have to catch this case. We can allow such
3418 moves when doing a 'return' however. */
3419 src_reg = src;
3420 if (GET_CODE (src_reg) == SUBREG)
3421 src_reg = SUBREG_REG (src_reg);
3422 if (GET_CODE (src_reg) == REG
3423 && SPARC_FP_REG_P (REGNO (src_reg)))
3424 src_is_freg = true;
3425
3426 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3427 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3428 && arith_operand (src, GET_MODE (src))
3429 && ! src_is_freg)
3430 {
3431 if (TARGET_ARCH64)
3432 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3433 else
3434 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3435 }
3436
3437 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3438 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3439 && arith_double_operand (src, GET_MODE (src))
3440 && ! src_is_freg)
3441 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3442
3443 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3444 else if (! TARGET_FPU && register_operand (src, SFmode))
3445 return 1;
3446
3447 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3448 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3449 return 1;
3450
3451 /* If we have the 'return' instruction, anything that does not use
3452 local or output registers and can go into a delay slot wins. */
3453 else if (return_p
3454 && TARGET_V9
3455 && !epilogue_renumber (&pat, 1)
3456 && get_attr_in_uncond_branch_delay (trial)
3457 == IN_UNCOND_BRANCH_DELAY_TRUE)
3458 return 1;
3459
3460 /* The 'restore src1,src2,dest' pattern for SImode. */
3461 else if (GET_CODE (src) == PLUS
3462 && register_operand (XEXP (src, 0), SImode)
3463 && arith_operand (XEXP (src, 1), SImode))
3464 return 1;
3465
3466 /* The 'restore src1,src2,dest' pattern for DImode. */
3467 else if (GET_CODE (src) == PLUS
3468 && register_operand (XEXP (src, 0), DImode)
3469 && arith_double_operand (XEXP (src, 1), DImode))
3470 return 1;
3471
3472 /* The 'restore src1,%lo(src2),dest' pattern. */
3473 else if (GET_CODE (src) == LO_SUM
3474 && ! TARGET_CM_MEDMID
3475 && ((register_operand (XEXP (src, 0), SImode)
3476 && immediate_operand (XEXP (src, 1), SImode))
3477 || (TARGET_ARCH64
3478 && register_operand (XEXP (src, 0), DImode)
3479 && immediate_operand (XEXP (src, 1), DImode))))
3480 return 1;
3481
3482 /* The 'restore src,src,dest' pattern. */
3483 else if (GET_CODE (src) == ASHIFT
3484 && (register_operand (XEXP (src, 0), SImode)
3485 || register_operand (XEXP (src, 0), DImode))
3486 && XEXP (src, 1) == const1_rtx)
3487 return 1;
3488
3489 return 0;
3490 }
3491
3492 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3493
3494 int
3495 eligible_for_return_delay (rtx trial)
3496 {
3497 int regno;
3498 rtx pat;
3499
3500 if (! NONJUMP_INSN_P (trial))
3501 return 0;
3502
3503 if (get_attr_length (trial) != 1)
3504 return 0;
3505
3506 /* If the function uses __builtin_eh_return, the eh_return machinery
3507 occupies the delay slot. */
3508 if (crtl->calls_eh_return)
3509 return 0;
3510
3511 /* In the case of a leaf or flat function, anything can go into the slot. */
3512 if (sparc_leaf_function_p || TARGET_FLAT)
3513 return
3514 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3515
3516 pat = PATTERN (trial);
3517 if (GET_CODE (pat) == PARALLEL)
3518 {
3519 int i;
3520
3521 if (! TARGET_V9)
3522 return 0;
3523 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3524 {
3525 rtx expr = XVECEXP (pat, 0, i);
3526 if (GET_CODE (expr) != SET)
3527 return 0;
3528 if (GET_CODE (SET_DEST (expr)) != REG)
3529 return 0;
3530 regno = REGNO (SET_DEST (expr));
3531 if (regno >= 8 && regno < 24)
3532 return 0;
3533 }
3534 return !epilogue_renumber (&pat, 1)
3535 && (get_attr_in_uncond_branch_delay (trial)
3536 == IN_UNCOND_BRANCH_DELAY_TRUE);
3537 }
3538
3539 if (GET_CODE (pat) != SET)
3540 return 0;
3541
3542 if (GET_CODE (SET_DEST (pat)) != REG)
3543 return 0;
3544
3545 regno = REGNO (SET_DEST (pat));
3546
3547 /* Otherwise, only operations which can be done in tandem with
3548 a `restore' or `return' insn can go into the delay slot. */
3549 if (regno >= 8 && regno < 24)
3550 return 0;
3551
3552 /* If this instruction sets up floating point register and we have a return
3553 instruction, it can probably go in. But restore will not work
3554 with FP_REGS. */
3555 if (! SPARC_INT_REG_P (regno))
3556 return (TARGET_V9
3557 && !epilogue_renumber (&pat, 1)
3558 && get_attr_in_uncond_branch_delay (trial)
3559 == IN_UNCOND_BRANCH_DELAY_TRUE);
3560
3561 return eligible_for_restore_insn (trial, true);
3562 }
3563
3564 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3565
3566 int
3567 eligible_for_sibcall_delay (rtx trial)
3568 {
3569 rtx pat;
3570
3571 if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
3572 return 0;
3573
3574 if (get_attr_length (trial) != 1)
3575 return 0;
3576
3577 pat = PATTERN (trial);
3578
3579 if (sparc_leaf_function_p || TARGET_FLAT)
3580 {
3581 /* If the tail call is done using the call instruction,
3582 we have to restore %o7 in the delay slot. */
3583 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3584 return 0;
3585
3586 /* %g1 is used to build the function address */
3587 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3588 return 0;
3589
3590 return 1;
3591 }
3592
3593 /* Otherwise, only operations which can be done in tandem with
3594 a `restore' insn can go into the delay slot. */
3595 if (GET_CODE (SET_DEST (pat)) != REG
3596 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3597 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3598 return 0;
3599
3600 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3601 in most cases. */
3602 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3603 return 0;
3604
3605 return eligible_for_restore_insn (trial, false);
3606 }
3607 \f
3608 /* Determine if it's legal to put X into the constant pool. This
3609 is not possible if X contains the address of a symbol that is
3610 not constant (TLS) or not known at final link time (PIC). */
3611
3612 static bool
3613 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3614 {
3615 switch (GET_CODE (x))
3616 {
3617 case CONST_INT:
3618 case CONST_DOUBLE:
3619 case CONST_VECTOR:
3620 /* Accept all non-symbolic constants. */
3621 return false;
3622
3623 case LABEL_REF:
3624 /* Labels are OK iff we are non-PIC. */
3625 return flag_pic != 0;
3626
3627 case SYMBOL_REF:
3628 /* 'Naked' TLS symbol references are never OK,
3629 non-TLS symbols are OK iff we are non-PIC. */
3630 if (SYMBOL_REF_TLS_MODEL (x))
3631 return true;
3632 else
3633 return flag_pic != 0;
3634
3635 case CONST:
3636 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3637 case PLUS:
3638 case MINUS:
3639 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3640 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3641 case UNSPEC:
3642 return true;
3643 default:
3644 gcc_unreachable ();
3645 }
3646 }
3647 \f
3648 /* Global Offset Table support. */
3649 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3650 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3651
3652 /* Return the SYMBOL_REF for the Global Offset Table. */
3653
3654 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3655
3656 static rtx
3657 sparc_got (void)
3658 {
3659 if (!sparc_got_symbol)
3660 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3661
3662 return sparc_got_symbol;
3663 }
3664
3665 /* Ensure that we are not using patterns that are not OK with PIC. */
3666
3667 int
3668 check_pic (int i)
3669 {
3670 rtx op;
3671
3672 switch (flag_pic)
3673 {
3674 case 1:
3675 op = recog_data.operand[i];
3676 gcc_assert (GET_CODE (op) != SYMBOL_REF
3677 && (GET_CODE (op) != CONST
3678 || (GET_CODE (XEXP (op, 0)) == MINUS
3679 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3680 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3681 case 2:
3682 default:
3683 return 1;
3684 }
3685 }
3686
3687 /* Return true if X is an address which needs a temporary register when
3688 reloaded while generating PIC code. */
3689
3690 int
3691 pic_address_needs_scratch (rtx x)
3692 {
3693 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3694 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3695 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3696 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3697 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3698 return 1;
3699
3700 return 0;
3701 }
3702
3703 /* Determine if a given RTX is a valid constant. We already know this
3704 satisfies CONSTANT_P. */
3705
3706 static bool
3707 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3708 {
3709 switch (GET_CODE (x))
3710 {
3711 case CONST:
3712 case SYMBOL_REF:
3713 if (sparc_tls_referenced_p (x))
3714 return false;
3715 break;
3716
3717 case CONST_DOUBLE:
3718 if (GET_MODE (x) == VOIDmode)
3719 return true;
3720
3721 /* Floating point constants are generally not ok.
3722 The only exception is 0.0 and all-ones in VIS. */
3723 if (TARGET_VIS
3724 && SCALAR_FLOAT_MODE_P (mode)
3725 && (const_zero_operand (x, mode)
3726 || const_all_ones_operand (x, mode)))
3727 return true;
3728
3729 return false;
3730
3731 case CONST_VECTOR:
3732 /* Vector constants are generally not ok.
3733 The only exception is 0 or -1 in VIS. */
3734 if (TARGET_VIS
3735 && (const_zero_operand (x, mode)
3736 || const_all_ones_operand (x, mode)))
3737 return true;
3738
3739 return false;
3740
3741 default:
3742 break;
3743 }
3744
3745 return true;
3746 }
3747
3748 /* Determine if a given RTX is a valid constant address. */
3749
3750 bool
3751 constant_address_p (rtx x)
3752 {
3753 switch (GET_CODE (x))
3754 {
3755 case LABEL_REF:
3756 case CONST_INT:
3757 case HIGH:
3758 return true;
3759
3760 case CONST:
3761 if (flag_pic && pic_address_needs_scratch (x))
3762 return false;
3763 return sparc_legitimate_constant_p (Pmode, x);
3764
3765 case SYMBOL_REF:
3766 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3767
3768 default:
3769 return false;
3770 }
3771 }
3772
3773 /* Nonzero if the constant value X is a legitimate general operand
3774 when generating PIC code. It is given that flag_pic is on and
3775 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3776
3777 bool
3778 legitimate_pic_operand_p (rtx x)
3779 {
3780 if (pic_address_needs_scratch (x))
3781 return false;
3782 if (sparc_tls_referenced_p (x))
3783 return false;
3784 return true;
3785 }
3786
3787 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3788 (CONST_INT_P (X) \
3789 && INTVAL (X) >= -0x1000 \
3790 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3791
3792 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3793 (CONST_INT_P (X) \
3794 && INTVAL (X) >= -0x1000 \
3795 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3796
3797 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3798
3799 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3800 ordinarily. This changes a bit when generating PIC. */
3801
3802 static bool
3803 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3804 {
3805 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3806
3807 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3808 rs1 = addr;
3809 else if (GET_CODE (addr) == PLUS)
3810 {
3811 rs1 = XEXP (addr, 0);
3812 rs2 = XEXP (addr, 1);
3813
3814 /* Canonicalize. REG comes first, if there are no regs,
3815 LO_SUM comes first. */
3816 if (!REG_P (rs1)
3817 && GET_CODE (rs1) != SUBREG
3818 && (REG_P (rs2)
3819 || GET_CODE (rs2) == SUBREG
3820 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3821 {
3822 rs1 = XEXP (addr, 1);
3823 rs2 = XEXP (addr, 0);
3824 }
3825
3826 if ((flag_pic == 1
3827 && rs1 == pic_offset_table_rtx
3828 && !REG_P (rs2)
3829 && GET_CODE (rs2) != SUBREG
3830 && GET_CODE (rs2) != LO_SUM
3831 && GET_CODE (rs2) != MEM
3832 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3833 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3834 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3835 || ((REG_P (rs1)
3836 || GET_CODE (rs1) == SUBREG)
3837 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3838 {
3839 imm1 = rs2;
3840 rs2 = NULL;
3841 }
3842 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3843 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3844 {
3845 /* We prohibit REG + REG for TFmode when there are no quad move insns
3846 and we consequently need to split. We do this because REG+REG
3847 is not an offsettable address. If we get the situation in reload
3848 where source and destination of a movtf pattern are both MEMs with
3849 REG+REG address, then only one of them gets converted to an
3850 offsettable address. */
3851 if (mode == TFmode
3852 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3853 return 0;
3854
3855 /* Likewise for TImode, but in all cases. */
3856 if (mode == TImode)
3857 return 0;
3858
3859 /* We prohibit REG + REG on ARCH32 if not optimizing for
3860 DFmode/DImode because then mem_min_alignment is likely to be zero
3861 after reload and the forced split would lack a matching splitter
3862 pattern. */
3863 if (TARGET_ARCH32 && !optimize
3864 && (mode == DFmode || mode == DImode))
3865 return 0;
3866 }
3867 else if (USE_AS_OFFSETABLE_LO10
3868 && GET_CODE (rs1) == LO_SUM
3869 && TARGET_ARCH64
3870 && ! TARGET_CM_MEDMID
3871 && RTX_OK_FOR_OLO10_P (rs2, mode))
3872 {
3873 rs2 = NULL;
3874 imm1 = XEXP (rs1, 1);
3875 rs1 = XEXP (rs1, 0);
3876 if (!CONSTANT_P (imm1)
3877 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3878 return 0;
3879 }
3880 }
3881 else if (GET_CODE (addr) == LO_SUM)
3882 {
3883 rs1 = XEXP (addr, 0);
3884 imm1 = XEXP (addr, 1);
3885
3886 if (!CONSTANT_P (imm1)
3887 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3888 return 0;
3889
3890 /* We can't allow TFmode in 32-bit mode, because an offset greater
3891 than the alignment (8) may cause the LO_SUM to overflow. */
3892 if (mode == TFmode && TARGET_ARCH32)
3893 return 0;
3894 }
3895 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3896 return 1;
3897 else
3898 return 0;
3899
3900 if (GET_CODE (rs1) == SUBREG)
3901 rs1 = SUBREG_REG (rs1);
3902 if (!REG_P (rs1))
3903 return 0;
3904
3905 if (rs2)
3906 {
3907 if (GET_CODE (rs2) == SUBREG)
3908 rs2 = SUBREG_REG (rs2);
3909 if (!REG_P (rs2))
3910 return 0;
3911 }
3912
3913 if (strict)
3914 {
3915 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3916 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3917 return 0;
3918 }
3919 else
3920 {
3921 if ((! SPARC_INT_REG_P (REGNO (rs1))
3922 && REGNO (rs1) != FRAME_POINTER_REGNUM
3923 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3924 || (rs2
3925 && (! SPARC_INT_REG_P (REGNO (rs2))
3926 && REGNO (rs2) != FRAME_POINTER_REGNUM
3927 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3928 return 0;
3929 }
3930 return 1;
3931 }
3932
3933 /* Return the SYMBOL_REF for the tls_get_addr function. */
3934
3935 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3936
3937 static rtx
3938 sparc_tls_get_addr (void)
3939 {
3940 if (!sparc_tls_symbol)
3941 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3942
3943 return sparc_tls_symbol;
3944 }
3945
3946 /* Return the Global Offset Table to be used in TLS mode. */
3947
3948 static rtx
3949 sparc_tls_got (void)
3950 {
3951 /* In PIC mode, this is just the PIC offset table. */
3952 if (flag_pic)
3953 {
3954 crtl->uses_pic_offset_table = 1;
3955 return pic_offset_table_rtx;
3956 }
3957
3958 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3959 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3960 if (TARGET_SUN_TLS && TARGET_ARCH32)
3961 {
3962 load_got_register ();
3963 return global_offset_table_rtx;
3964 }
3965
3966 /* In all other cases, we load a new pseudo with the GOT symbol. */
3967 return copy_to_reg (sparc_got ());
3968 }
3969
3970 /* Return true if X contains a thread-local symbol. */
3971
3972 static bool
3973 sparc_tls_referenced_p (rtx x)
3974 {
3975 if (!TARGET_HAVE_TLS)
3976 return false;
3977
3978 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3979 x = XEXP (XEXP (x, 0), 0);
3980
3981 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3982 return true;
3983
3984 /* That's all we handle in sparc_legitimize_tls_address for now. */
3985 return false;
3986 }
3987
3988 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3989 this (thread-local) address. */
3990
3991 static rtx
3992 sparc_legitimize_tls_address (rtx addr)
3993 {
3994 rtx temp1, temp2, temp3, ret, o0, got, insn;
3995
3996 gcc_assert (can_create_pseudo_p ());
3997
3998 if (GET_CODE (addr) == SYMBOL_REF)
3999 switch (SYMBOL_REF_TLS_MODEL (addr))
4000 {
4001 case TLS_MODEL_GLOBAL_DYNAMIC:
4002 start_sequence ();
4003 temp1 = gen_reg_rtx (SImode);
4004 temp2 = gen_reg_rtx (SImode);
4005 ret = gen_reg_rtx (Pmode);
4006 o0 = gen_rtx_REG (Pmode, 8);
4007 got = sparc_tls_got ();
4008 emit_insn (gen_tgd_hi22 (temp1, addr));
4009 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4010 if (TARGET_ARCH32)
4011 {
4012 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4013 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4014 addr, const1_rtx));
4015 }
4016 else
4017 {
4018 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4019 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4020 addr, const1_rtx));
4021 }
4022 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4023 insn = get_insns ();
4024 end_sequence ();
4025 emit_libcall_block (insn, ret, o0, addr);
4026 break;
4027
4028 case TLS_MODEL_LOCAL_DYNAMIC:
4029 start_sequence ();
4030 temp1 = gen_reg_rtx (SImode);
4031 temp2 = gen_reg_rtx (SImode);
4032 temp3 = gen_reg_rtx (Pmode);
4033 ret = gen_reg_rtx (Pmode);
4034 o0 = gen_rtx_REG (Pmode, 8);
4035 got = sparc_tls_got ();
4036 emit_insn (gen_tldm_hi22 (temp1));
4037 emit_insn (gen_tldm_lo10 (temp2, temp1));
4038 if (TARGET_ARCH32)
4039 {
4040 emit_insn (gen_tldm_add32 (o0, got, temp2));
4041 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4042 const1_rtx));
4043 }
4044 else
4045 {
4046 emit_insn (gen_tldm_add64 (o0, got, temp2));
4047 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4048 const1_rtx));
4049 }
4050 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4051 insn = get_insns ();
4052 end_sequence ();
4053 emit_libcall_block (insn, temp3, o0,
4054 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4055 UNSPEC_TLSLD_BASE));
4056 temp1 = gen_reg_rtx (SImode);
4057 temp2 = gen_reg_rtx (SImode);
4058 emit_insn (gen_tldo_hix22 (temp1, addr));
4059 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4060 if (TARGET_ARCH32)
4061 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4062 else
4063 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4064 break;
4065
4066 case TLS_MODEL_INITIAL_EXEC:
4067 temp1 = gen_reg_rtx (SImode);
4068 temp2 = gen_reg_rtx (SImode);
4069 temp3 = gen_reg_rtx (Pmode);
4070 got = sparc_tls_got ();
4071 emit_insn (gen_tie_hi22 (temp1, addr));
4072 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4073 if (TARGET_ARCH32)
4074 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4075 else
4076 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4077 if (TARGET_SUN_TLS)
4078 {
4079 ret = gen_reg_rtx (Pmode);
4080 if (TARGET_ARCH32)
4081 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4082 temp3, addr));
4083 else
4084 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4085 temp3, addr));
4086 }
4087 else
4088 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4089 break;
4090
4091 case TLS_MODEL_LOCAL_EXEC:
4092 temp1 = gen_reg_rtx (Pmode);
4093 temp2 = gen_reg_rtx (Pmode);
4094 if (TARGET_ARCH32)
4095 {
4096 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4097 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4098 }
4099 else
4100 {
4101 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4102 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4103 }
4104 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4105 break;
4106
4107 default:
4108 gcc_unreachable ();
4109 }
4110
4111 else if (GET_CODE (addr) == CONST)
4112 {
4113 rtx base, offset;
4114
4115 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4116
4117 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4118 offset = XEXP (XEXP (addr, 0), 1);
4119
4120 base = force_operand (base, NULL_RTX);
4121 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4122 offset = force_reg (Pmode, offset);
4123 ret = gen_rtx_PLUS (Pmode, base, offset);
4124 }
4125
4126 else
4127 gcc_unreachable (); /* for now ... */
4128
4129 return ret;
4130 }
4131
4132 /* Legitimize PIC addresses. If the address is already position-independent,
4133 we return ORIG. Newly generated position-independent addresses go into a
4134 reg. This is REG if nonzero, otherwise we allocate register(s) as
4135 necessary. */
4136
4137 static rtx
4138 sparc_legitimize_pic_address (rtx orig, rtx reg)
4139 {
4140 bool gotdata_op = false;
4141
4142 if (GET_CODE (orig) == SYMBOL_REF
4143 /* See the comment in sparc_expand_move. */
4144 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4145 {
4146 rtx pic_ref, address;
4147 rtx insn;
4148
4149 if (reg == 0)
4150 {
4151 gcc_assert (can_create_pseudo_p ());
4152 reg = gen_reg_rtx (Pmode);
4153 }
4154
4155 if (flag_pic == 2)
4156 {
4157 /* If not during reload, allocate another temp reg here for loading
4158 in the address, so that these instructions can be optimized
4159 properly. */
4160 rtx temp_reg = (! can_create_pseudo_p ()
4161 ? reg : gen_reg_rtx (Pmode));
4162
4163 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4164 won't get confused into thinking that these two instructions
4165 are loading in the true address of the symbol. If in the
4166 future a PIC rtx exists, that should be used instead. */
4167 if (TARGET_ARCH64)
4168 {
4169 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4170 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4171 }
4172 else
4173 {
4174 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4175 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4176 }
4177 address = temp_reg;
4178 gotdata_op = true;
4179 }
4180 else
4181 address = orig;
4182
4183 crtl->uses_pic_offset_table = 1;
4184 if (gotdata_op)
4185 {
4186 if (TARGET_ARCH64)
4187 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4188 pic_offset_table_rtx,
4189 address, orig));
4190 else
4191 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4192 pic_offset_table_rtx,
4193 address, orig));
4194 }
4195 else
4196 {
4197 pic_ref
4198 = gen_const_mem (Pmode,
4199 gen_rtx_PLUS (Pmode,
4200 pic_offset_table_rtx, address));
4201 insn = emit_move_insn (reg, pic_ref);
4202 }
4203
4204 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4205 by loop. */
4206 set_unique_reg_note (insn, REG_EQUAL, orig);
4207 return reg;
4208 }
4209 else if (GET_CODE (orig) == CONST)
4210 {
4211 rtx base, offset;
4212
4213 if (GET_CODE (XEXP (orig, 0)) == PLUS
4214 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4215 return orig;
4216
4217 if (reg == 0)
4218 {
4219 gcc_assert (can_create_pseudo_p ());
4220 reg = gen_reg_rtx (Pmode);
4221 }
4222
4223 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4224 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4225 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4226 base == reg ? NULL_RTX : reg);
4227
4228 if (GET_CODE (offset) == CONST_INT)
4229 {
4230 if (SMALL_INT (offset))
4231 return plus_constant (Pmode, base, INTVAL (offset));
4232 else if (can_create_pseudo_p ())
4233 offset = force_reg (Pmode, offset);
4234 else
4235 /* If we reach here, then something is seriously wrong. */
4236 gcc_unreachable ();
4237 }
4238 return gen_rtx_PLUS (Pmode, base, offset);
4239 }
4240 else if (GET_CODE (orig) == LABEL_REF)
4241 /* ??? We ought to be checking that the register is live instead, in case
4242 it is eliminated. */
4243 crtl->uses_pic_offset_table = 1;
4244
4245 return orig;
4246 }
4247
4248 /* Try machine-dependent ways of modifying an illegitimate address X
4249 to be legitimate. If we find one, return the new, valid address.
4250
4251 OLDX is the address as it was before break_out_memory_refs was called.
4252 In some cases it is useful to look at this to decide what needs to be done.
4253
4254 MODE is the mode of the operand pointed to by X.
4255
4256 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4257
4258 static rtx
4259 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4260 enum machine_mode mode)
4261 {
4262 rtx orig_x = x;
4263
4264 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4265 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4266 force_operand (XEXP (x, 0), NULL_RTX));
4267 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4268 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4269 force_operand (XEXP (x, 1), NULL_RTX));
4270 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4271 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4272 XEXP (x, 1));
4273 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4274 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4275 force_operand (XEXP (x, 1), NULL_RTX));
4276
4277 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4278 return x;
4279
4280 if (sparc_tls_referenced_p (x))
4281 x = sparc_legitimize_tls_address (x);
4282 else if (flag_pic)
4283 x = sparc_legitimize_pic_address (x, NULL_RTX);
4284 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4285 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4286 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4287 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4288 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4289 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4290 else if (GET_CODE (x) == SYMBOL_REF
4291 || GET_CODE (x) == CONST
4292 || GET_CODE (x) == LABEL_REF)
4293 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4294
4295 return x;
4296 }
4297
4298 /* Delegitimize an address that was legitimized by the above function. */
4299
4300 static rtx
4301 sparc_delegitimize_address (rtx x)
4302 {
4303 x = delegitimize_mem_from_attrs (x);
4304
4305 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4306 switch (XINT (XEXP (x, 1), 1))
4307 {
4308 case UNSPEC_MOVE_PIC:
4309 case UNSPEC_TLSLE:
4310 x = XVECEXP (XEXP (x, 1), 0, 0);
4311 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4312 break;
4313 default:
4314 break;
4315 }
4316
4317 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4318 if (GET_CODE (x) == MINUS
4319 && REG_P (XEXP (x, 0))
4320 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4321 && GET_CODE (XEXP (x, 1)) == LO_SUM
4322 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4323 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4324 {
4325 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4326 gcc_assert (GET_CODE (x) == LABEL_REF);
4327 }
4328
4329 return x;
4330 }
4331
4332 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4333 replace the input X, or the original X if no replacement is called for.
4334 The output parameter *WIN is 1 if the calling macro should goto WIN,
4335 0 if it should not.
4336
4337 For SPARC, we wish to handle addresses by splitting them into
4338 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4339 This cuts the number of extra insns by one.
4340
4341 Do nothing when generating PIC code and the address is a symbolic
4342 operand or requires a scratch register. */
4343
4344 rtx
4345 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4346 int opnum, int type,
4347 int ind_levels ATTRIBUTE_UNUSED, int *win)
4348 {
4349 /* Decompose SImode constants into HIGH+LO_SUM. */
4350 if (CONSTANT_P (x)
4351 && (mode != TFmode || TARGET_ARCH64)
4352 && GET_MODE (x) == SImode
4353 && GET_CODE (x) != LO_SUM
4354 && GET_CODE (x) != HIGH
4355 && sparc_cmodel <= CM_MEDLOW
4356 && !(flag_pic
4357 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4358 {
4359 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4360 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4361 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4362 opnum, (enum reload_type)type);
4363 *win = 1;
4364 return x;
4365 }
4366
4367 /* We have to recognize what we have already generated above. */
4368 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4369 {
4370 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4371 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4372 opnum, (enum reload_type)type);
4373 *win = 1;
4374 return x;
4375 }
4376
4377 *win = 0;
4378 return x;
4379 }
4380
4381 /* Return true if ADDR (a legitimate address expression)
4382 has an effect that depends on the machine mode it is used for.
4383
4384 In PIC mode,
4385
4386 (mem:HI [%l7+a])
4387
4388 is not equivalent to
4389
4390 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4391
4392 because [%l7+a+1] is interpreted as the address of (a+1). */
4393
4394
4395 static bool
4396 sparc_mode_dependent_address_p (const_rtx addr,
4397 addr_space_t as ATTRIBUTE_UNUSED)
4398 {
4399 if (flag_pic && GET_CODE (addr) == PLUS)
4400 {
4401 rtx op0 = XEXP (addr, 0);
4402 rtx op1 = XEXP (addr, 1);
4403 if (op0 == pic_offset_table_rtx
4404 && symbolic_operand (op1, VOIDmode))
4405 return true;
4406 }
4407
4408 return false;
4409 }
4410
4411 #ifdef HAVE_GAS_HIDDEN
4412 # define USE_HIDDEN_LINKONCE 1
4413 #else
4414 # define USE_HIDDEN_LINKONCE 0
4415 #endif
4416
4417 static void
4418 get_pc_thunk_name (char name[32], unsigned int regno)
4419 {
4420 const char *reg_name = reg_names[regno];
4421
4422 /* Skip the leading '%' as that cannot be used in a
4423 symbol name. */
4424 reg_name += 1;
4425
4426 if (USE_HIDDEN_LINKONCE)
4427 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4428 else
4429 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4430 }
4431
4432 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4433
4434 static rtx
4435 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4436 {
4437 int orig_flag_pic = flag_pic;
4438 rtx insn;
4439
4440 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4441 flag_pic = 0;
4442 if (TARGET_ARCH64)
4443 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4444 else
4445 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4446 flag_pic = orig_flag_pic;
4447
4448 return insn;
4449 }
4450
4451 /* Emit code to load the GOT register. */
4452
4453 void
4454 load_got_register (void)
4455 {
4456 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4457 if (!global_offset_table_rtx)
4458 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4459
4460 if (TARGET_VXWORKS_RTP)
4461 emit_insn (gen_vxworks_load_got ());
4462 else
4463 {
4464 /* The GOT symbol is subject to a PC-relative relocation so we need a
4465 helper function to add the PC value and thus get the final value. */
4466 if (!got_helper_rtx)
4467 {
4468 char name[32];
4469 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4470 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4471 }
4472
4473 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4474 got_helper_rtx,
4475 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4476 }
4477
4478 /* Need to emit this whether or not we obey regdecls,
4479 since setjmp/longjmp can cause life info to screw up.
4480 ??? In the case where we don't obey regdecls, this is not sufficient
4481 since we may not fall out the bottom. */
4482 emit_use (global_offset_table_rtx);
4483 }
4484
4485 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4486 address of the call target. */
4487
4488 void
4489 sparc_emit_call_insn (rtx pat, rtx addr)
4490 {
4491 rtx insn;
4492
4493 insn = emit_call_insn (pat);
4494
4495 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4496 if (TARGET_VXWORKS_RTP
4497 && flag_pic
4498 && GET_CODE (addr) == SYMBOL_REF
4499 && (SYMBOL_REF_DECL (addr)
4500 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4501 : !SYMBOL_REF_LOCAL_P (addr)))
4502 {
4503 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4504 crtl->uses_pic_offset_table = 1;
4505 }
4506 }
4507 \f
4508 /* Return 1 if RTX is a MEM which is known to be aligned to at
4509 least a DESIRED byte boundary. */
4510
4511 int
4512 mem_min_alignment (rtx mem, int desired)
4513 {
4514 rtx addr, base, offset;
4515
4516 /* If it's not a MEM we can't accept it. */
4517 if (GET_CODE (mem) != MEM)
4518 return 0;
4519
4520 /* Obviously... */
4521 if (!TARGET_UNALIGNED_DOUBLES
4522 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4523 return 1;
4524
4525 /* ??? The rest of the function predates MEM_ALIGN so
4526 there is probably a bit of redundancy. */
4527 addr = XEXP (mem, 0);
4528 base = offset = NULL_RTX;
4529 if (GET_CODE (addr) == PLUS)
4530 {
4531 if (GET_CODE (XEXP (addr, 0)) == REG)
4532 {
4533 base = XEXP (addr, 0);
4534
4535 /* What we are saying here is that if the base
4536 REG is aligned properly, the compiler will make
4537 sure any REG based index upon it will be so
4538 as well. */
4539 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4540 offset = XEXP (addr, 1);
4541 else
4542 offset = const0_rtx;
4543 }
4544 }
4545 else if (GET_CODE (addr) == REG)
4546 {
4547 base = addr;
4548 offset = const0_rtx;
4549 }
4550
4551 if (base != NULL_RTX)
4552 {
4553 int regno = REGNO (base);
4554
4555 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4556 {
4557 /* Check if the compiler has recorded some information
4558 about the alignment of the base REG. If reload has
4559 completed, we already matched with proper alignments.
4560 If not running global_alloc, reload might give us
4561 unaligned pointer to local stack though. */
4562 if (((cfun != 0
4563 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4564 || (optimize && reload_completed))
4565 && (INTVAL (offset) & (desired - 1)) == 0)
4566 return 1;
4567 }
4568 else
4569 {
4570 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4571 return 1;
4572 }
4573 }
4574 else if (! TARGET_UNALIGNED_DOUBLES
4575 || CONSTANT_P (addr)
4576 || GET_CODE (addr) == LO_SUM)
4577 {
4578 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4579 is true, in which case we can only assume that an access is aligned if
4580 it is to a constant address, or the address involves a LO_SUM. */
4581 return 1;
4582 }
4583
4584 /* An obviously unaligned address. */
4585 return 0;
4586 }
4587
4588 \f
4589 /* Vectors to keep interesting information about registers where it can easily
4590 be got. We used to use the actual mode value as the bit number, but there
4591 are more than 32 modes now. Instead we use two tables: one indexed by
4592 hard register number, and one indexed by mode. */
4593
4594 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4595 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4596 mapped into one sparc_mode_class mode. */
4597
4598 enum sparc_mode_class {
4599 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4600 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4601 CC_MODE, CCFP_MODE
4602 };
4603
4604 /* Modes for single-word and smaller quantities. */
4605 #define S_MODES \
4606 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4607
4608 /* Modes for double-word and smaller quantities. */
4609 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4610
4611 /* Modes for quad-word and smaller quantities. */
4612 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4613
4614 /* Modes for 8-word and smaller quantities. */
4615 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4616
4617 /* Modes for single-float quantities. */
4618 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4619
4620 /* Modes for double-float and smaller quantities. */
4621 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4622
4623 /* Modes for quad-float and smaller quantities. */
4624 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4625
4626 /* Modes for quad-float pairs and smaller quantities. */
4627 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4628
4629 /* Modes for double-float only quantities. */
4630 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4631
4632 /* Modes for quad-float and double-float only quantities. */
4633 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4634
4635 /* Modes for quad-float pairs and double-float only quantities. */
4636 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4637
4638 /* Modes for condition codes. */
4639 #define CC_MODES (1 << (int) CC_MODE)
4640 #define CCFP_MODES (1 << (int) CCFP_MODE)
4641
4642 /* Value is 1 if register/mode pair is acceptable on sparc.
4643 The funny mixture of D and T modes is because integer operations
4644 do not specially operate on tetra quantities, so non-quad-aligned
4645 registers can hold quadword quantities (except %o4 and %i4 because
4646 they cross fixed registers). */
4647
4648 /* This points to either the 32 bit or the 64 bit version. */
4649 const int *hard_regno_mode_classes;
4650
4651 static const int hard_32bit_mode_classes[] = {
4652 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4653 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4654 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4655 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4656
4657 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4658 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4659 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4660 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4661
4662 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4663 and none can hold SFmode/SImode values. */
4664 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4665 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4666 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4667 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4668
4669 /* %fcc[0123] */
4670 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4671
4672 /* %icc, %sfp, %gsr */
4673 CC_MODES, 0, D_MODES
4674 };
4675
4676 static const int hard_64bit_mode_classes[] = {
4677 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4678 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4679 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4680 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4681
4682 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4683 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4684 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4685 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4686
4687 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4688 and none can hold SFmode/SImode values. */
4689 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4690 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4691 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4692 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4693
4694 /* %fcc[0123] */
4695 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4696
4697 /* %icc, %sfp, %gsr */
4698 CC_MODES, 0, D_MODES
4699 };
4700
4701 int sparc_mode_class [NUM_MACHINE_MODES];
4702
4703 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4704
4705 static void
4706 sparc_init_modes (void)
4707 {
4708 int i;
4709
4710 for (i = 0; i < NUM_MACHINE_MODES; i++)
4711 {
4712 switch (GET_MODE_CLASS (i))
4713 {
4714 case MODE_INT:
4715 case MODE_PARTIAL_INT:
4716 case MODE_COMPLEX_INT:
4717 if (GET_MODE_SIZE (i) < 4)
4718 sparc_mode_class[i] = 1 << (int) H_MODE;
4719 else if (GET_MODE_SIZE (i) == 4)
4720 sparc_mode_class[i] = 1 << (int) S_MODE;
4721 else if (GET_MODE_SIZE (i) == 8)
4722 sparc_mode_class[i] = 1 << (int) D_MODE;
4723 else if (GET_MODE_SIZE (i) == 16)
4724 sparc_mode_class[i] = 1 << (int) T_MODE;
4725 else if (GET_MODE_SIZE (i) == 32)
4726 sparc_mode_class[i] = 1 << (int) O_MODE;
4727 else
4728 sparc_mode_class[i] = 0;
4729 break;
4730 case MODE_VECTOR_INT:
4731 if (GET_MODE_SIZE (i) == 4)
4732 sparc_mode_class[i] = 1 << (int) SF_MODE;
4733 else if (GET_MODE_SIZE (i) == 8)
4734 sparc_mode_class[i] = 1 << (int) DF_MODE;
4735 else
4736 sparc_mode_class[i] = 0;
4737 break;
4738 case MODE_FLOAT:
4739 case MODE_COMPLEX_FLOAT:
4740 if (GET_MODE_SIZE (i) == 4)
4741 sparc_mode_class[i] = 1 << (int) SF_MODE;
4742 else if (GET_MODE_SIZE (i) == 8)
4743 sparc_mode_class[i] = 1 << (int) DF_MODE;
4744 else if (GET_MODE_SIZE (i) == 16)
4745 sparc_mode_class[i] = 1 << (int) TF_MODE;
4746 else if (GET_MODE_SIZE (i) == 32)
4747 sparc_mode_class[i] = 1 << (int) OF_MODE;
4748 else
4749 sparc_mode_class[i] = 0;
4750 break;
4751 case MODE_CC:
4752 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4753 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4754 else
4755 sparc_mode_class[i] = 1 << (int) CC_MODE;
4756 break;
4757 default:
4758 sparc_mode_class[i] = 0;
4759 break;
4760 }
4761 }
4762
4763 if (TARGET_ARCH64)
4764 hard_regno_mode_classes = hard_64bit_mode_classes;
4765 else
4766 hard_regno_mode_classes = hard_32bit_mode_classes;
4767
4768 /* Initialize the array used by REGNO_REG_CLASS. */
4769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4770 {
4771 if (i < 16 && TARGET_V8PLUS)
4772 sparc_regno_reg_class[i] = I64_REGS;
4773 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4774 sparc_regno_reg_class[i] = GENERAL_REGS;
4775 else if (i < 64)
4776 sparc_regno_reg_class[i] = FP_REGS;
4777 else if (i < 96)
4778 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4779 else if (i < 100)
4780 sparc_regno_reg_class[i] = FPCC_REGS;
4781 else
4782 sparc_regno_reg_class[i] = NO_REGS;
4783 }
4784 }
4785 \f
4786 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4787
4788 static inline bool
4789 save_global_or_fp_reg_p (unsigned int regno,
4790 int leaf_function ATTRIBUTE_UNUSED)
4791 {
4792 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4793 }
4794
4795 /* Return whether the return address register (%i7) is needed. */
4796
4797 static inline bool
4798 return_addr_reg_needed_p (int leaf_function)
4799 {
4800 /* If it is live, for example because of __builtin_return_address (0). */
4801 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4802 return true;
4803
4804 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4805 if (!leaf_function
4806 /* Loading the GOT register clobbers %o7. */
4807 || crtl->uses_pic_offset_table
4808 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4809 return true;
4810
4811 return false;
4812 }
4813
4814 /* Return whether REGNO, a local or in register, must be saved/restored. */
4815
4816 static bool
4817 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4818 {
4819 /* General case: call-saved registers live at some point. */
4820 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4821 return true;
4822
4823 /* Frame pointer register (%fp) if needed. */
4824 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4825 return true;
4826
4827 /* Return address register (%i7) if needed. */
4828 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4829 return true;
4830
4831 /* GOT register (%l7) if needed. */
4832 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4833 return true;
4834
4835 /* If the function accesses prior frames, the frame pointer and the return
4836 address of the previous frame must be saved on the stack. */
4837 if (crtl->accesses_prior_frames
4838 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4839 return true;
4840
4841 return false;
4842 }
4843
4844 /* Compute the frame size required by the function. This function is called
4845 during the reload pass and also by sparc_expand_prologue. */
4846
4847 HOST_WIDE_INT
4848 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4849 {
4850 HOST_WIDE_INT frame_size, apparent_frame_size;
4851 int args_size, n_global_fp_regs = 0;
4852 bool save_local_in_regs_p = false;
4853 unsigned int i;
4854
4855 /* If the function allocates dynamic stack space, the dynamic offset is
4856 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4857 if (leaf_function && !cfun->calls_alloca)
4858 args_size = 0;
4859 else
4860 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4861
4862 /* Calculate space needed for global registers. */
4863 if (TARGET_ARCH64)
4864 for (i = 0; i < 8; i++)
4865 if (save_global_or_fp_reg_p (i, 0))
4866 n_global_fp_regs += 2;
4867 else
4868 for (i = 0; i < 8; i += 2)
4869 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4870 n_global_fp_regs += 2;
4871
4872 /* In the flat window model, find out which local and in registers need to
4873 be saved. We don't reserve space in the current frame for them as they
4874 will be spilled into the register window save area of the caller's frame.
4875 However, as soon as we use this register window save area, we must create
4876 that of the current frame to make it the live one. */
4877 if (TARGET_FLAT)
4878 for (i = 16; i < 32; i++)
4879 if (save_local_or_in_reg_p (i, leaf_function))
4880 {
4881 save_local_in_regs_p = true;
4882 break;
4883 }
4884
4885 /* Calculate space needed for FP registers. */
4886 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4887 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4888 n_global_fp_regs += 2;
4889
4890 if (size == 0
4891 && n_global_fp_regs == 0
4892 && args_size == 0
4893 && !save_local_in_regs_p)
4894 frame_size = apparent_frame_size = 0;
4895 else
4896 {
4897 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4898 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4899 apparent_frame_size += n_global_fp_regs * 4;
4900
4901 /* We need to add the size of the outgoing argument area. */
4902 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4903
4904 /* And that of the register window save area. */
4905 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4906
4907 /* Finally, bump to the appropriate alignment. */
4908 frame_size = SPARC_STACK_ALIGN (frame_size);
4909 }
4910
4911 /* Set up values for use in prologue and epilogue. */
4912 sparc_frame_size = frame_size;
4913 sparc_apparent_frame_size = apparent_frame_size;
4914 sparc_n_global_fp_regs = n_global_fp_regs;
4915 sparc_save_local_in_regs_p = save_local_in_regs_p;
4916
4917 return frame_size;
4918 }
4919
4920 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4921
4922 int
4923 sparc_initial_elimination_offset (int to)
4924 {
4925 int offset;
4926
4927 if (to == STACK_POINTER_REGNUM)
4928 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4929 else
4930 offset = 0;
4931
4932 offset += SPARC_STACK_BIAS;
4933 return offset;
4934 }
4935
4936 /* Output any necessary .register pseudo-ops. */
4937
4938 void
4939 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4940 {
4941 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4942 int i;
4943
4944 if (TARGET_ARCH32)
4945 return;
4946
4947 /* Check if %g[2367] were used without
4948 .register being printed for them already. */
4949 for (i = 2; i < 8; i++)
4950 {
4951 if (df_regs_ever_live_p (i)
4952 && ! sparc_hard_reg_printed [i])
4953 {
4954 sparc_hard_reg_printed [i] = 1;
4955 /* %g7 is used as TLS base register, use #ignore
4956 for it instead of #scratch. */
4957 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4958 i == 7 ? "ignore" : "scratch");
4959 }
4960 if (i == 3) i = 5;
4961 }
4962 #endif
4963 }
4964
4965 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4966
4967 #if PROBE_INTERVAL > 4096
4968 #error Cannot use indexed addressing mode for stack probing
4969 #endif
4970
4971 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4972 inclusive. These are offsets from the current stack pointer.
4973
4974 Note that we don't use the REG+REG addressing mode for the probes because
4975 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4976 so the advantages of having a single code win here. */
4977
4978 static void
4979 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4980 {
4981 rtx g1 = gen_rtx_REG (Pmode, 1);
4982
4983 /* See if we have a constant small number of probes to generate. If so,
4984 that's the easy case. */
4985 if (size <= PROBE_INTERVAL)
4986 {
4987 emit_move_insn (g1, GEN_INT (first));
4988 emit_insn (gen_rtx_SET (VOIDmode, g1,
4989 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4990 emit_stack_probe (plus_constant (Pmode, g1, -size));
4991 }
4992
4993 /* The run-time loop is made up of 10 insns in the generic case while the
4994 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4995 else if (size <= 5 * PROBE_INTERVAL)
4996 {
4997 HOST_WIDE_INT i;
4998
4999 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5000 emit_insn (gen_rtx_SET (VOIDmode, g1,
5001 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5002 emit_stack_probe (g1);
5003
5004 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5005 it exceeds SIZE. If only two probes are needed, this will not
5006 generate any code. Then probe at FIRST + SIZE. */
5007 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5008 {
5009 emit_insn (gen_rtx_SET (VOIDmode, g1,
5010 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5011 emit_stack_probe (g1);
5012 }
5013
5014 emit_stack_probe (plus_constant (Pmode, g1,
5015 (i - PROBE_INTERVAL) - size));
5016 }
5017
5018 /* Otherwise, do the same as above, but in a loop. Note that we must be
5019 extra careful with variables wrapping around because we might be at
5020 the very top (or the very bottom) of the address space and we have
5021 to be able to handle this case properly; in particular, we use an
5022 equality test for the loop condition. */
5023 else
5024 {
5025 HOST_WIDE_INT rounded_size;
5026 rtx g4 = gen_rtx_REG (Pmode, 4);
5027
5028 emit_move_insn (g1, GEN_INT (first));
5029
5030
5031 /* Step 1: round SIZE to the previous multiple of the interval. */
5032
5033 rounded_size = size & -PROBE_INTERVAL;
5034 emit_move_insn (g4, GEN_INT (rounded_size));
5035
5036
5037 /* Step 2: compute initial and final value of the loop counter. */
5038
5039 /* TEST_ADDR = SP + FIRST. */
5040 emit_insn (gen_rtx_SET (VOIDmode, g1,
5041 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5042
5043 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5044 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5045
5046
5047 /* Step 3: the loop
5048
5049 while (TEST_ADDR != LAST_ADDR)
5050 {
5051 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5052 probe at TEST_ADDR
5053 }
5054
5055 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5056 until it is equal to ROUNDED_SIZE. */
5057
5058 if (TARGET_ARCH64)
5059 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5060 else
5061 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5062
5063
5064 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5065 that SIZE is equal to ROUNDED_SIZE. */
5066
5067 if (size != rounded_size)
5068 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5069 }
5070
5071 /* Make sure nothing is scheduled before we are done. */
5072 emit_insn (gen_blockage ());
5073 }
5074
5075 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5076 absolute addresses. */
5077
5078 const char *
5079 output_probe_stack_range (rtx reg1, rtx reg2)
5080 {
5081 static int labelno = 0;
5082 char loop_lab[32], end_lab[32];
5083 rtx xops[2];
5084
5085 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5086 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5087
5088 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5089
5090 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5091 xops[0] = reg1;
5092 xops[1] = reg2;
5093 output_asm_insn ("cmp\t%0, %1", xops);
5094 if (TARGET_ARCH64)
5095 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5096 else
5097 fputs ("\tbe\t", asm_out_file);
5098 assemble_name_raw (asm_out_file, end_lab);
5099 fputc ('\n', asm_out_file);
5100
5101 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5102 xops[1] = GEN_INT (-PROBE_INTERVAL);
5103 output_asm_insn (" add\t%0, %1, %0", xops);
5104
5105 /* Probe at TEST_ADDR and branch. */
5106 if (TARGET_ARCH64)
5107 fputs ("\tba,pt\t%xcc,", asm_out_file);
5108 else
5109 fputs ("\tba\t", asm_out_file);
5110 assemble_name_raw (asm_out_file, loop_lab);
5111 fputc ('\n', asm_out_file);
5112 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5113 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5114
5115 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5116
5117 return "";
5118 }
5119
5120 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5121 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5122 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5123 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5124 the action to be performed if it returns false. Return the new offset. */
5125
5126 typedef bool (*sorr_pred_t) (unsigned int, int);
5127 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5128
5129 static int
5130 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5131 int offset, int leaf_function, sorr_pred_t save_p,
5132 sorr_act_t action_true, sorr_act_t action_false)
5133 {
5134 unsigned int i;
5135 rtx mem, insn;
5136
5137 if (TARGET_ARCH64 && high <= 32)
5138 {
5139 int fp_offset = -1;
5140
5141 for (i = low; i < high; i++)
5142 {
5143 if (save_p (i, leaf_function))
5144 {
5145 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5146 base, offset));
5147 if (action_true == SORR_SAVE)
5148 {
5149 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5150 RTX_FRAME_RELATED_P (insn) = 1;
5151 }
5152 else /* action_true == SORR_RESTORE */
5153 {
5154 /* The frame pointer must be restored last since its old
5155 value may be used as base address for the frame. This
5156 is problematic in 64-bit mode only because of the lack
5157 of double-word load instruction. */
5158 if (i == HARD_FRAME_POINTER_REGNUM)
5159 fp_offset = offset;
5160 else
5161 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5162 }
5163 offset += 8;
5164 }
5165 else if (action_false == SORR_ADVANCE)
5166 offset += 8;
5167 }
5168
5169 if (fp_offset >= 0)
5170 {
5171 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5172 emit_move_insn (hard_frame_pointer_rtx, mem);
5173 }
5174 }
5175 else
5176 {
5177 for (i = low; i < high; i += 2)
5178 {
5179 bool reg0 = save_p (i, leaf_function);
5180 bool reg1 = save_p (i + 1, leaf_function);
5181 enum machine_mode mode;
5182 int regno;
5183
5184 if (reg0 && reg1)
5185 {
5186 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5187 regno = i;
5188 }
5189 else if (reg0)
5190 {
5191 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5192 regno = i;
5193 }
5194 else if (reg1)
5195 {
5196 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5197 regno = i + 1;
5198 offset += 4;
5199 }
5200 else
5201 {
5202 if (action_false == SORR_ADVANCE)
5203 offset += 8;
5204 continue;
5205 }
5206
5207 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5208 if (action_true == SORR_SAVE)
5209 {
5210 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5211 RTX_FRAME_RELATED_P (insn) = 1;
5212 if (mode == DImode)
5213 {
5214 rtx set1, set2;
5215 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5216 offset));
5217 set1 = gen_rtx_SET (VOIDmode, mem,
5218 gen_rtx_REG (SImode, regno));
5219 RTX_FRAME_RELATED_P (set1) = 1;
5220 mem
5221 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5222 offset + 4));
5223 set2 = gen_rtx_SET (VOIDmode, mem,
5224 gen_rtx_REG (SImode, regno + 1));
5225 RTX_FRAME_RELATED_P (set2) = 1;
5226 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5227 gen_rtx_PARALLEL (VOIDmode,
5228 gen_rtvec (2, set1, set2)));
5229 }
5230 }
5231 else /* action_true == SORR_RESTORE */
5232 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5233
5234 /* Always preserve double-word alignment. */
5235 offset = (offset + 8) & -8;
5236 }
5237 }
5238
5239 return offset;
5240 }
5241
5242 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5243
5244 static rtx
5245 emit_adjust_base_to_offset (rtx base, int offset)
5246 {
5247 /* ??? This might be optimized a little as %g1 might already have a
5248 value close enough that a single add insn will do. */
5249 /* ??? Although, all of this is probably only a temporary fix because
5250 if %g1 can hold a function result, then sparc_expand_epilogue will
5251 lose (the result will be clobbered). */
5252 rtx new_base = gen_rtx_REG (Pmode, 1);
5253 emit_move_insn (new_base, GEN_INT (offset));
5254 emit_insn (gen_rtx_SET (VOIDmode,
5255 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5256 return new_base;
5257 }
5258
5259 /* Emit code to save/restore call-saved global and FP registers. */
5260
5261 static void
5262 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5263 {
5264 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5265 {
5266 base = emit_adjust_base_to_offset (base, offset);
5267 offset = 0;
5268 }
5269
5270 offset
5271 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5272 save_global_or_fp_reg_p, action, SORR_NONE);
5273 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5274 save_global_or_fp_reg_p, action, SORR_NONE);
5275 }
5276
5277 /* Emit code to save/restore call-saved local and in registers. */
5278
5279 static void
5280 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5281 {
5282 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5283 {
5284 base = emit_adjust_base_to_offset (base, offset);
5285 offset = 0;
5286 }
5287
5288 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5289 save_local_or_in_reg_p, action, SORR_ADVANCE);
5290 }
5291
5292 /* Emit a window_save insn. */
5293
5294 static rtx
5295 emit_window_save (rtx increment)
5296 {
5297 rtx insn = emit_insn (gen_window_save (increment));
5298 RTX_FRAME_RELATED_P (insn) = 1;
5299
5300 /* The incoming return address (%o7) is saved in %i7. */
5301 add_reg_note (insn, REG_CFA_REGISTER,
5302 gen_rtx_SET (VOIDmode,
5303 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5304 gen_rtx_REG (Pmode,
5305 INCOMING_RETURN_ADDR_REGNUM)));
5306
5307 /* The window save event. */
5308 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5309
5310 /* The CFA is %fp, the hard frame pointer. */
5311 add_reg_note (insn, REG_CFA_DEF_CFA,
5312 plus_constant (Pmode, hard_frame_pointer_rtx,
5313 INCOMING_FRAME_SP_OFFSET));
5314
5315 return insn;
5316 }
5317
5318 /* Generate an increment for the stack pointer. */
5319
5320 static rtx
5321 gen_stack_pointer_inc (rtx increment)
5322 {
5323 return gen_rtx_SET (VOIDmode,
5324 stack_pointer_rtx,
5325 gen_rtx_PLUS (Pmode,
5326 stack_pointer_rtx,
5327 increment));
5328 }
5329
5330 /* Expand the function prologue. The prologue is responsible for reserving
5331 storage for the frame, saving the call-saved registers and loading the
5332 GOT register if needed. */
5333
5334 void
5335 sparc_expand_prologue (void)
5336 {
5337 HOST_WIDE_INT size;
5338 rtx insn;
5339
5340 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5341 on the final value of the flag means deferring the prologue/epilogue
5342 expansion until just before the second scheduling pass, which is too
5343 late to emit multiple epilogues or return insns.
5344
5345 Of course we are making the assumption that the value of the flag
5346 will not change between now and its final value. Of the three parts
5347 of the formula, only the last one can reasonably vary. Let's take a
5348 closer look, after assuming that the first two ones are set to true
5349 (otherwise the last value is effectively silenced).
5350
5351 If only_leaf_regs_used returns false, the global predicate will also
5352 be false so the actual frame size calculated below will be positive.
5353 As a consequence, the save_register_window insn will be emitted in
5354 the instruction stream; now this insn explicitly references %fp
5355 which is not a leaf register so only_leaf_regs_used will always
5356 return false subsequently.
5357
5358 If only_leaf_regs_used returns true, we hope that the subsequent
5359 optimization passes won't cause non-leaf registers to pop up. For
5360 example, the regrename pass has special provisions to not rename to
5361 non-leaf registers in a leaf function. */
5362 sparc_leaf_function_p
5363 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5364
5365 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5366
5367 if (flag_stack_usage_info)
5368 current_function_static_stack_size = size;
5369
5370 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5371 {
5372 if (crtl->is_leaf && !cfun->calls_alloca)
5373 {
5374 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5375 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5376 size - STACK_CHECK_PROTECT);
5377 }
5378 else if (size > 0)
5379 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5380 }
5381
5382 if (size == 0)
5383 ; /* do nothing. */
5384 else if (sparc_leaf_function_p)
5385 {
5386 rtx size_int_rtx = GEN_INT (-size);
5387
5388 if (size <= 4096)
5389 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5390 else if (size <= 8192)
5391 {
5392 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5393 RTX_FRAME_RELATED_P (insn) = 1;
5394
5395 /* %sp is still the CFA register. */
5396 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5397 }
5398 else
5399 {
5400 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5401 emit_move_insn (size_rtx, size_int_rtx);
5402 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5403 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5404 gen_stack_pointer_inc (size_int_rtx));
5405 }
5406
5407 RTX_FRAME_RELATED_P (insn) = 1;
5408 }
5409 else
5410 {
5411 rtx size_int_rtx = GEN_INT (-size);
5412
5413 if (size <= 4096)
5414 emit_window_save (size_int_rtx);
5415 else if (size <= 8192)
5416 {
5417 emit_window_save (GEN_INT (-4096));
5418
5419 /* %sp is not the CFA register anymore. */
5420 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5421
5422 /* Make sure no %fp-based store is issued until after the frame is
5423 established. The offset between the frame pointer and the stack
5424 pointer is calculated relative to the value of the stack pointer
5425 at the end of the function prologue, and moving instructions that
5426 access the stack via the frame pointer between the instructions
5427 that decrement the stack pointer could result in accessing the
5428 register window save area, which is volatile. */
5429 emit_insn (gen_frame_blockage ());
5430 }
5431 else
5432 {
5433 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5434 emit_move_insn (size_rtx, size_int_rtx);
5435 emit_window_save (size_rtx);
5436 }
5437 }
5438
5439 if (sparc_leaf_function_p)
5440 {
5441 sparc_frame_base_reg = stack_pointer_rtx;
5442 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5443 }
5444 else
5445 {
5446 sparc_frame_base_reg = hard_frame_pointer_rtx;
5447 sparc_frame_base_offset = SPARC_STACK_BIAS;
5448 }
5449
5450 if (sparc_n_global_fp_regs > 0)
5451 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5452 sparc_frame_base_offset
5453 - sparc_apparent_frame_size,
5454 SORR_SAVE);
5455
5456 /* Load the GOT register if needed. */
5457 if (crtl->uses_pic_offset_table)
5458 load_got_register ();
5459
5460 /* Advertise that the data calculated just above are now valid. */
5461 sparc_prologue_data_valid_p = true;
5462 }
5463
5464 /* Expand the function prologue. The prologue is responsible for reserving
5465 storage for the frame, saving the call-saved registers and loading the
5466 GOT register if needed. */
5467
5468 void
5469 sparc_flat_expand_prologue (void)
5470 {
5471 HOST_WIDE_INT size;
5472 rtx insn;
5473
5474 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5475
5476 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5477
5478 if (flag_stack_usage_info)
5479 current_function_static_stack_size = size;
5480
5481 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5482 {
5483 if (crtl->is_leaf && !cfun->calls_alloca)
5484 {
5485 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5486 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5487 size - STACK_CHECK_PROTECT);
5488 }
5489 else if (size > 0)
5490 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5491 }
5492
5493 if (sparc_save_local_in_regs_p)
5494 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5495 SORR_SAVE);
5496
5497 if (size == 0)
5498 ; /* do nothing. */
5499 else
5500 {
5501 rtx size_int_rtx, size_rtx;
5502
5503 size_rtx = size_int_rtx = GEN_INT (-size);
5504
5505 /* We establish the frame (i.e. decrement the stack pointer) first, even
5506 if we use a frame pointer, because we cannot clobber any call-saved
5507 registers, including the frame pointer, if we haven't created a new
5508 register save area, for the sake of compatibility with the ABI. */
5509 if (size <= 4096)
5510 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5511 else if (size <= 8192 && !frame_pointer_needed)
5512 {
5513 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5514 RTX_FRAME_RELATED_P (insn) = 1;
5515 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5516 }
5517 else
5518 {
5519 size_rtx = gen_rtx_REG (Pmode, 1);
5520 emit_move_insn (size_rtx, size_int_rtx);
5521 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5522 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5523 gen_stack_pointer_inc (size_int_rtx));
5524 }
5525 RTX_FRAME_RELATED_P (insn) = 1;
5526
5527 /* Ensure nothing is scheduled until after the frame is established. */
5528 emit_insn (gen_blockage ());
5529
5530 if (frame_pointer_needed)
5531 {
5532 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5533 gen_rtx_MINUS (Pmode,
5534 stack_pointer_rtx,
5535 size_rtx)));
5536 RTX_FRAME_RELATED_P (insn) = 1;
5537
5538 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5539 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5540 plus_constant (Pmode, stack_pointer_rtx,
5541 size)));
5542 }
5543
5544 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5545 {
5546 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5547 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5548
5549 insn = emit_move_insn (i7, o7);
5550 RTX_FRAME_RELATED_P (insn) = 1;
5551
5552 add_reg_note (insn, REG_CFA_REGISTER,
5553 gen_rtx_SET (VOIDmode, i7, o7));
5554
5555 /* Prevent this instruction from ever being considered dead,
5556 even if this function has no epilogue. */
5557 emit_use (i7);
5558 }
5559 }
5560
5561 if (frame_pointer_needed)
5562 {
5563 sparc_frame_base_reg = hard_frame_pointer_rtx;
5564 sparc_frame_base_offset = SPARC_STACK_BIAS;
5565 }
5566 else
5567 {
5568 sparc_frame_base_reg = stack_pointer_rtx;
5569 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5570 }
5571
5572 if (sparc_n_global_fp_regs > 0)
5573 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5574 sparc_frame_base_offset
5575 - sparc_apparent_frame_size,
5576 SORR_SAVE);
5577
5578 /* Load the GOT register if needed. */
5579 if (crtl->uses_pic_offset_table)
5580 load_got_register ();
5581
5582 /* Advertise that the data calculated just above are now valid. */
5583 sparc_prologue_data_valid_p = true;
5584 }
5585
5586 /* This function generates the assembly code for function entry, which boils
5587 down to emitting the necessary .register directives. */
5588
5589 static void
5590 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5591 {
5592 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5593 if (!TARGET_FLAT)
5594 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5595
5596 sparc_output_scratch_registers (file);
5597 }
5598
5599 /* Expand the function epilogue, either normal or part of a sibcall.
5600 We emit all the instructions except the return or the call. */
5601
5602 void
5603 sparc_expand_epilogue (bool for_eh)
5604 {
5605 HOST_WIDE_INT size = sparc_frame_size;
5606
5607 if (sparc_n_global_fp_regs > 0)
5608 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5609 sparc_frame_base_offset
5610 - sparc_apparent_frame_size,
5611 SORR_RESTORE);
5612
5613 if (size == 0 || for_eh)
5614 ; /* do nothing. */
5615 else if (sparc_leaf_function_p)
5616 {
5617 if (size <= 4096)
5618 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5619 else if (size <= 8192)
5620 {
5621 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5622 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5623 }
5624 else
5625 {
5626 rtx reg = gen_rtx_REG (Pmode, 1);
5627 emit_move_insn (reg, GEN_INT (size));
5628 emit_insn (gen_stack_pointer_inc (reg));
5629 }
5630 }
5631 }
5632
5633 /* Expand the function epilogue, either normal or part of a sibcall.
5634 We emit all the instructions except the return or the call. */
5635
5636 void
5637 sparc_flat_expand_epilogue (bool for_eh)
5638 {
5639 HOST_WIDE_INT size = sparc_frame_size;
5640
5641 if (sparc_n_global_fp_regs > 0)
5642 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5643 sparc_frame_base_offset
5644 - sparc_apparent_frame_size,
5645 SORR_RESTORE);
5646
5647 /* If we have a frame pointer, we'll need both to restore it before the
5648 frame is destroyed and use its current value in destroying the frame.
5649 Since we don't have an atomic way to do that in the flat window model,
5650 we save the current value into a temporary register (%g1). */
5651 if (frame_pointer_needed && !for_eh)
5652 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5653
5654 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5655 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5656 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5657
5658 if (sparc_save_local_in_regs_p)
5659 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5660 sparc_frame_base_offset,
5661 SORR_RESTORE);
5662
5663 if (size == 0 || for_eh)
5664 ; /* do nothing. */
5665 else if (frame_pointer_needed)
5666 {
5667 /* Make sure the frame is destroyed after everything else is done. */
5668 emit_insn (gen_blockage ());
5669
5670 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5671 }
5672 else
5673 {
5674 /* Likewise. */
5675 emit_insn (gen_blockage ());
5676
5677 if (size <= 4096)
5678 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5679 else if (size <= 8192)
5680 {
5681 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5682 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5683 }
5684 else
5685 {
5686 rtx reg = gen_rtx_REG (Pmode, 1);
5687 emit_move_insn (reg, GEN_INT (size));
5688 emit_insn (gen_stack_pointer_inc (reg));
5689 }
5690 }
5691 }
5692
5693 /* Return true if it is appropriate to emit `return' instructions in the
5694 body of a function. */
5695
5696 bool
5697 sparc_can_use_return_insn_p (void)
5698 {
5699 return sparc_prologue_data_valid_p
5700 && sparc_n_global_fp_regs == 0
5701 && TARGET_FLAT
5702 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5703 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5704 }
5705
5706 /* This function generates the assembly code for function exit. */
5707
5708 static void
5709 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5710 {
5711 /* If the last two instructions of a function are "call foo; dslot;"
5712 the return address might point to the first instruction in the next
5713 function and we have to output a dummy nop for the sake of sane
5714 backtraces in such cases. This is pointless for sibling calls since
5715 the return address is explicitly adjusted. */
5716
5717 rtx insn, last_real_insn;
5718
5719 insn = get_last_insn ();
5720
5721 last_real_insn = prev_real_insn (insn);
5722 if (last_real_insn
5723 && NONJUMP_INSN_P (last_real_insn)
5724 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5725 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5726
5727 if (last_real_insn
5728 && CALL_P (last_real_insn)
5729 && !SIBLING_CALL_P (last_real_insn))
5730 fputs("\tnop\n", file);
5731
5732 sparc_output_deferred_case_vectors ();
5733 }
5734
5735 /* Output a 'restore' instruction. */
5736
5737 static void
5738 output_restore (rtx pat)
5739 {
5740 rtx operands[3];
5741
5742 if (! pat)
5743 {
5744 fputs ("\t restore\n", asm_out_file);
5745 return;
5746 }
5747
5748 gcc_assert (GET_CODE (pat) == SET);
5749
5750 operands[0] = SET_DEST (pat);
5751 pat = SET_SRC (pat);
5752
5753 switch (GET_CODE (pat))
5754 {
5755 case PLUS:
5756 operands[1] = XEXP (pat, 0);
5757 operands[2] = XEXP (pat, 1);
5758 output_asm_insn (" restore %r1, %2, %Y0", operands);
5759 break;
5760 case LO_SUM:
5761 operands[1] = XEXP (pat, 0);
5762 operands[2] = XEXP (pat, 1);
5763 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5764 break;
5765 case ASHIFT:
5766 operands[1] = XEXP (pat, 0);
5767 gcc_assert (XEXP (pat, 1) == const1_rtx);
5768 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5769 break;
5770 default:
5771 operands[1] = pat;
5772 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5773 break;
5774 }
5775 }
5776
5777 /* Output a return. */
5778
5779 const char *
5780 output_return (rtx insn)
5781 {
5782 if (crtl->calls_eh_return)
5783 {
5784 /* If the function uses __builtin_eh_return, the eh_return
5785 machinery occupies the delay slot. */
5786 gcc_assert (!final_sequence);
5787
5788 if (flag_delayed_branch)
5789 {
5790 if (!TARGET_FLAT && TARGET_V9)
5791 fputs ("\treturn\t%i7+8\n", asm_out_file);
5792 else
5793 {
5794 if (!TARGET_FLAT)
5795 fputs ("\trestore\n", asm_out_file);
5796
5797 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5798 }
5799
5800 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5801 }
5802 else
5803 {
5804 if (!TARGET_FLAT)
5805 fputs ("\trestore\n", asm_out_file);
5806
5807 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5808 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5809 }
5810 }
5811 else if (sparc_leaf_function_p || TARGET_FLAT)
5812 {
5813 /* This is a leaf or flat function so we don't have to bother restoring
5814 the register window, which frees us from dealing with the convoluted
5815 semantics of restore/return. We simply output the jump to the
5816 return address and the insn in the delay slot (if any). */
5817
5818 return "jmp\t%%o7+%)%#";
5819 }
5820 else
5821 {
5822 /* This is a regular function so we have to restore the register window.
5823 We may have a pending insn for the delay slot, which will be either
5824 combined with the 'restore' instruction or put in the delay slot of
5825 the 'return' instruction. */
5826
5827 if (final_sequence)
5828 {
5829 rtx delay, pat;
5830
5831 delay = NEXT_INSN (insn);
5832 gcc_assert (delay);
5833
5834 pat = PATTERN (delay);
5835
5836 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5837 {
5838 epilogue_renumber (&pat, 0);
5839 return "return\t%%i7+%)%#";
5840 }
5841 else
5842 {
5843 output_asm_insn ("jmp\t%%i7+%)", NULL);
5844 output_restore (pat);
5845 PATTERN (delay) = gen_blockage ();
5846 INSN_CODE (delay) = -1;
5847 }
5848 }
5849 else
5850 {
5851 /* The delay slot is empty. */
5852 if (TARGET_V9)
5853 return "return\t%%i7+%)\n\t nop";
5854 else if (flag_delayed_branch)
5855 return "jmp\t%%i7+%)\n\t restore";
5856 else
5857 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5858 }
5859 }
5860
5861 return "";
5862 }
5863
5864 /* Output a sibling call. */
5865
5866 const char *
5867 output_sibcall (rtx insn, rtx call_operand)
5868 {
5869 rtx operands[1];
5870
5871 gcc_assert (flag_delayed_branch);
5872
5873 operands[0] = call_operand;
5874
5875 if (sparc_leaf_function_p || TARGET_FLAT)
5876 {
5877 /* This is a leaf or flat function so we don't have to bother restoring
5878 the register window. We simply output the jump to the function and
5879 the insn in the delay slot (if any). */
5880
5881 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5882
5883 if (final_sequence)
5884 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5885 operands);
5886 else
5887 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5888 it into branch if possible. */
5889 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5890 operands);
5891 }
5892 else
5893 {
5894 /* This is a regular function so we have to restore the register window.
5895 We may have a pending insn for the delay slot, which will be combined
5896 with the 'restore' instruction. */
5897
5898 output_asm_insn ("call\t%a0, 0", operands);
5899
5900 if (final_sequence)
5901 {
5902 rtx delay = NEXT_INSN (insn);
5903 gcc_assert (delay);
5904
5905 output_restore (PATTERN (delay));
5906
5907 PATTERN (delay) = gen_blockage ();
5908 INSN_CODE (delay) = -1;
5909 }
5910 else
5911 output_restore (NULL_RTX);
5912 }
5913
5914 return "";
5915 }
5916 \f
5917 /* Functions for handling argument passing.
5918
5919 For 32-bit, the first 6 args are normally in registers and the rest are
5920 pushed. Any arg that starts within the first 6 words is at least
5921 partially passed in a register unless its data type forbids.
5922
5923 For 64-bit, the argument registers are laid out as an array of 16 elements
5924 and arguments are added sequentially. The first 6 int args and up to the
5925 first 16 fp args (depending on size) are passed in regs.
5926
5927 Slot Stack Integral Float Float in structure Double Long Double
5928 ---- ----- -------- ----- ------------------ ------ -----------
5929 15 [SP+248] %f31 %f30,%f31 %d30
5930 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5931 13 [SP+232] %f27 %f26,%f27 %d26
5932 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5933 11 [SP+216] %f23 %f22,%f23 %d22
5934 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5935 9 [SP+200] %f19 %f18,%f19 %d18
5936 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5937 7 [SP+184] %f15 %f14,%f15 %d14
5938 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5939 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5940 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5941 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5942 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5943 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5944 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5945
5946 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5947
5948 Integral arguments are always passed as 64-bit quantities appropriately
5949 extended.
5950
5951 Passing of floating point values is handled as follows.
5952 If a prototype is in scope:
5953 If the value is in a named argument (i.e. not a stdarg function or a
5954 value not part of the `...') then the value is passed in the appropriate
5955 fp reg.
5956 If the value is part of the `...' and is passed in one of the first 6
5957 slots then the value is passed in the appropriate int reg.
5958 If the value is part of the `...' and is not passed in one of the first 6
5959 slots then the value is passed in memory.
5960 If a prototype is not in scope:
5961 If the value is one of the first 6 arguments the value is passed in the
5962 appropriate integer reg and the appropriate fp reg.
5963 If the value is not one of the first 6 arguments the value is passed in
5964 the appropriate fp reg and in memory.
5965
5966
5967 Summary of the calling conventions implemented by GCC on the SPARC:
5968
5969 32-bit ABI:
5970 size argument return value
5971
5972 small integer <4 int. reg. int. reg.
5973 word 4 int. reg. int. reg.
5974 double word 8 int. reg. int. reg.
5975
5976 _Complex small integer <8 int. reg. int. reg.
5977 _Complex word 8 int. reg. int. reg.
5978 _Complex double word 16 memory int. reg.
5979
5980 vector integer <=8 int. reg. FP reg.
5981 vector integer >8 memory memory
5982
5983 float 4 int. reg. FP reg.
5984 double 8 int. reg. FP reg.
5985 long double 16 memory memory
5986
5987 _Complex float 8 memory FP reg.
5988 _Complex double 16 memory FP reg.
5989 _Complex long double 32 memory FP reg.
5990
5991 vector float any memory memory
5992
5993 aggregate any memory memory
5994
5995
5996
5997 64-bit ABI:
5998 size argument return value
5999
6000 small integer <8 int. reg. int. reg.
6001 word 8 int. reg. int. reg.
6002 double word 16 int. reg. int. reg.
6003
6004 _Complex small integer <16 int. reg. int. reg.
6005 _Complex word 16 int. reg. int. reg.
6006 _Complex double word 32 memory int. reg.
6007
6008 vector integer <=16 FP reg. FP reg.
6009 vector integer 16<s<=32 memory FP reg.
6010 vector integer >32 memory memory
6011
6012 float 4 FP reg. FP reg.
6013 double 8 FP reg. FP reg.
6014 long double 16 FP reg. FP reg.
6015
6016 _Complex float 8 FP reg. FP reg.
6017 _Complex double 16 FP reg. FP reg.
6018 _Complex long double 32 memory FP reg.
6019
6020 vector float <=16 FP reg. FP reg.
6021 vector float 16<s<=32 memory FP reg.
6022 vector float >32 memory memory
6023
6024 aggregate <=16 reg. reg.
6025 aggregate 16<s<=32 memory reg.
6026 aggregate >32 memory memory
6027
6028
6029
6030 Note #1: complex floating-point types follow the extended SPARC ABIs as
6031 implemented by the Sun compiler.
6032
6033 Note #2: integral vector types follow the scalar floating-point types
6034 conventions to match what is implemented by the Sun VIS SDK.
6035
6036 Note #3: floating-point vector types follow the aggregate types
6037 conventions. */
6038
6039
6040 /* Maximum number of int regs for args. */
6041 #define SPARC_INT_ARG_MAX 6
6042 /* Maximum number of fp regs for args. */
6043 #define SPARC_FP_ARG_MAX 16
6044
6045 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6046
6047 /* Handle the INIT_CUMULATIVE_ARGS macro.
6048 Initialize a variable CUM of type CUMULATIVE_ARGS
6049 for a call to a function whose data type is FNTYPE.
6050 For a library call, FNTYPE is 0. */
6051
6052 void
6053 init_cumulative_args (struct sparc_args *cum, tree fntype,
6054 rtx libname ATTRIBUTE_UNUSED,
6055 tree fndecl ATTRIBUTE_UNUSED)
6056 {
6057 cum->words = 0;
6058 cum->prototype_p = fntype && prototype_p (fntype);
6059 cum->libcall_p = fntype == 0;
6060 }
6061
6062 /* Handle promotion of pointer and integer arguments. */
6063
6064 static enum machine_mode
6065 sparc_promote_function_mode (const_tree type,
6066 enum machine_mode mode,
6067 int *punsignedp,
6068 const_tree fntype ATTRIBUTE_UNUSED,
6069 int for_return ATTRIBUTE_UNUSED)
6070 {
6071 if (type != NULL_TREE && POINTER_TYPE_P (type))
6072 {
6073 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6074 return Pmode;
6075 }
6076
6077 /* Integral arguments are passed as full words, as per the ABI. */
6078 if (GET_MODE_CLASS (mode) == MODE_INT
6079 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6080 return word_mode;
6081
6082 return mode;
6083 }
6084
6085 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6086
6087 static bool
6088 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6089 {
6090 return TARGET_ARCH64 ? true : false;
6091 }
6092
6093 /* Scan the record type TYPE and return the following predicates:
6094 - INTREGS_P: the record contains at least one field or sub-field
6095 that is eligible for promotion in integer registers.
6096 - FP_REGS_P: the record contains at least one field or sub-field
6097 that is eligible for promotion in floating-point registers.
6098 - PACKED_P: the record contains at least one field that is packed.
6099
6100 Sub-fields are not taken into account for the PACKED_P predicate. */
6101
6102 static void
6103 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6104 int *packed_p)
6105 {
6106 tree field;
6107
6108 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6109 {
6110 if (TREE_CODE (field) == FIELD_DECL)
6111 {
6112 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6113 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6114 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6115 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6116 && TARGET_FPU)
6117 *fpregs_p = 1;
6118 else
6119 *intregs_p = 1;
6120
6121 if (packed_p && DECL_PACKED (field))
6122 *packed_p = 1;
6123 }
6124 }
6125 }
6126
6127 /* Compute the slot number to pass an argument in.
6128 Return the slot number or -1 if passing on the stack.
6129
6130 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6131 the preceding args and about the function being called.
6132 MODE is the argument's machine mode.
6133 TYPE is the data type of the argument (as a tree).
6134 This is null for libcalls where that information may
6135 not be available.
6136 NAMED is nonzero if this argument is a named parameter
6137 (otherwise it is an extra parameter matching an ellipsis).
6138 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6139 *PREGNO records the register number to use if scalar type.
6140 *PPADDING records the amount of padding needed in words. */
6141
6142 static int
6143 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6144 const_tree type, bool named, bool incoming_p,
6145 int *pregno, int *ppadding)
6146 {
6147 int regbase = (incoming_p
6148 ? SPARC_INCOMING_INT_ARG_FIRST
6149 : SPARC_OUTGOING_INT_ARG_FIRST);
6150 int slotno = cum->words;
6151 enum mode_class mclass;
6152 int regno;
6153
6154 *ppadding = 0;
6155
6156 if (type && TREE_ADDRESSABLE (type))
6157 return -1;
6158
6159 if (TARGET_ARCH32
6160 && mode == BLKmode
6161 && type
6162 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6163 return -1;
6164
6165 /* For SPARC64, objects requiring 16-byte alignment get it. */
6166 if (TARGET_ARCH64
6167 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6168 && (slotno & 1) != 0)
6169 slotno++, *ppadding = 1;
6170
6171 mclass = GET_MODE_CLASS (mode);
6172 if (type && TREE_CODE (type) == VECTOR_TYPE)
6173 {
6174 /* Vector types deserve special treatment because they are
6175 polymorphic wrt their mode, depending upon whether VIS
6176 instructions are enabled. */
6177 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6178 {
6179 /* The SPARC port defines no floating-point vector modes. */
6180 gcc_assert (mode == BLKmode);
6181 }
6182 else
6183 {
6184 /* Integral vector types should either have a vector
6185 mode or an integral mode, because we are guaranteed
6186 by pass_by_reference that their size is not greater
6187 than 16 bytes and TImode is 16-byte wide. */
6188 gcc_assert (mode != BLKmode);
6189
6190 /* Vector integers are handled like floats according to
6191 the Sun VIS SDK. */
6192 mclass = MODE_FLOAT;
6193 }
6194 }
6195
6196 switch (mclass)
6197 {
6198 case MODE_FLOAT:
6199 case MODE_COMPLEX_FLOAT:
6200 case MODE_VECTOR_INT:
6201 if (TARGET_ARCH64 && TARGET_FPU && named)
6202 {
6203 if (slotno >= SPARC_FP_ARG_MAX)
6204 return -1;
6205 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6206 /* Arguments filling only one single FP register are
6207 right-justified in the outer double FP register. */
6208 if (GET_MODE_SIZE (mode) <= 4)
6209 regno++;
6210 break;
6211 }
6212 /* fallthrough */
6213
6214 case MODE_INT:
6215 case MODE_COMPLEX_INT:
6216 if (slotno >= SPARC_INT_ARG_MAX)
6217 return -1;
6218 regno = regbase + slotno;
6219 break;
6220
6221 case MODE_RANDOM:
6222 if (mode == VOIDmode)
6223 /* MODE is VOIDmode when generating the actual call. */
6224 return -1;
6225
6226 gcc_assert (mode == BLKmode);
6227
6228 if (TARGET_ARCH32
6229 || !type
6230 || (TREE_CODE (type) != VECTOR_TYPE
6231 && TREE_CODE (type) != RECORD_TYPE))
6232 {
6233 if (slotno >= SPARC_INT_ARG_MAX)
6234 return -1;
6235 regno = regbase + slotno;
6236 }
6237 else /* TARGET_ARCH64 && type */
6238 {
6239 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6240
6241 /* First see what kinds of registers we would need. */
6242 if (TREE_CODE (type) == VECTOR_TYPE)
6243 fpregs_p = 1;
6244 else
6245 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6246
6247 /* The ABI obviously doesn't specify how packed structures
6248 are passed. These are defined to be passed in int regs
6249 if possible, otherwise memory. */
6250 if (packed_p || !named)
6251 fpregs_p = 0, intregs_p = 1;
6252
6253 /* If all arg slots are filled, then must pass on stack. */
6254 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6255 return -1;
6256
6257 /* If there are only int args and all int arg slots are filled,
6258 then must pass on stack. */
6259 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6260 return -1;
6261
6262 /* Note that even if all int arg slots are filled, fp members may
6263 still be passed in regs if such regs are available.
6264 *PREGNO isn't set because there may be more than one, it's up
6265 to the caller to compute them. */
6266 return slotno;
6267 }
6268 break;
6269
6270 default :
6271 gcc_unreachable ();
6272 }
6273
6274 *pregno = regno;
6275 return slotno;
6276 }
6277
6278 /* Handle recursive register counting for structure field layout. */
6279
6280 struct function_arg_record_value_parms
6281 {
6282 rtx ret; /* return expression being built. */
6283 int slotno; /* slot number of the argument. */
6284 int named; /* whether the argument is named. */
6285 int regbase; /* regno of the base register. */
6286 int stack; /* 1 if part of the argument is on the stack. */
6287 int intoffset; /* offset of the first pending integer field. */
6288 unsigned int nregs; /* number of words passed in registers. */
6289 };
6290
6291 static void function_arg_record_value_3
6292 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6293 static void function_arg_record_value_2
6294 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6295 static void function_arg_record_value_1
6296 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6297 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6298 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6299
6300 /* A subroutine of function_arg_record_value. Traverse the structure
6301 recursively and determine how many registers will be required. */
6302
6303 static void
6304 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6305 struct function_arg_record_value_parms *parms,
6306 bool packed_p)
6307 {
6308 tree field;
6309
6310 /* We need to compute how many registers are needed so we can
6311 allocate the PARALLEL but before we can do that we need to know
6312 whether there are any packed fields. The ABI obviously doesn't
6313 specify how structures are passed in this case, so they are
6314 defined to be passed in int regs if possible, otherwise memory,
6315 regardless of whether there are fp values present. */
6316
6317 if (! packed_p)
6318 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6319 {
6320 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6321 {
6322 packed_p = true;
6323 break;
6324 }
6325 }
6326
6327 /* Compute how many registers we need. */
6328 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6329 {
6330 if (TREE_CODE (field) == FIELD_DECL)
6331 {
6332 HOST_WIDE_INT bitpos = startbitpos;
6333
6334 if (DECL_SIZE (field) != 0)
6335 {
6336 if (integer_zerop (DECL_SIZE (field)))
6337 continue;
6338
6339 if (tree_fits_uhwi_p (bit_position (field)))
6340 bitpos += int_bit_position (field);
6341 }
6342
6343 /* ??? FIXME: else assume zero offset. */
6344
6345 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6346 function_arg_record_value_1 (TREE_TYPE (field),
6347 bitpos,
6348 parms,
6349 packed_p);
6350 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6351 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6352 && TARGET_FPU
6353 && parms->named
6354 && ! packed_p)
6355 {
6356 if (parms->intoffset != -1)
6357 {
6358 unsigned int startbit, endbit;
6359 int intslots, this_slotno;
6360
6361 startbit = parms->intoffset & -BITS_PER_WORD;
6362 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6363
6364 intslots = (endbit - startbit) / BITS_PER_WORD;
6365 this_slotno = parms->slotno + parms->intoffset
6366 / BITS_PER_WORD;
6367
6368 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6369 {
6370 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6371 /* We need to pass this field on the stack. */
6372 parms->stack = 1;
6373 }
6374
6375 parms->nregs += intslots;
6376 parms->intoffset = -1;
6377 }
6378
6379 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6380 If it wasn't true we wouldn't be here. */
6381 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6382 && DECL_MODE (field) == BLKmode)
6383 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6384 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6385 parms->nregs += 2;
6386 else
6387 parms->nregs += 1;
6388 }
6389 else
6390 {
6391 if (parms->intoffset == -1)
6392 parms->intoffset = bitpos;
6393 }
6394 }
6395 }
6396 }
6397
6398 /* A subroutine of function_arg_record_value. Assign the bits of the
6399 structure between parms->intoffset and bitpos to integer registers. */
6400
6401 static void
6402 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6403 struct function_arg_record_value_parms *parms)
6404 {
6405 enum machine_mode mode;
6406 unsigned int regno;
6407 unsigned int startbit, endbit;
6408 int this_slotno, intslots, intoffset;
6409 rtx reg;
6410
6411 if (parms->intoffset == -1)
6412 return;
6413
6414 intoffset = parms->intoffset;
6415 parms->intoffset = -1;
6416
6417 startbit = intoffset & -BITS_PER_WORD;
6418 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6419 intslots = (endbit - startbit) / BITS_PER_WORD;
6420 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6421
6422 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6423 if (intslots <= 0)
6424 return;
6425
6426 /* If this is the trailing part of a word, only load that much into
6427 the register. Otherwise load the whole register. Note that in
6428 the latter case we may pick up unwanted bits. It's not a problem
6429 at the moment but may wish to revisit. */
6430
6431 if (intoffset % BITS_PER_WORD != 0)
6432 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6433 MODE_INT);
6434 else
6435 mode = word_mode;
6436
6437 intoffset /= BITS_PER_UNIT;
6438 do
6439 {
6440 regno = parms->regbase + this_slotno;
6441 reg = gen_rtx_REG (mode, regno);
6442 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6443 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6444
6445 this_slotno += 1;
6446 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6447 mode = word_mode;
6448 parms->nregs += 1;
6449 intslots -= 1;
6450 }
6451 while (intslots > 0);
6452 }
6453
6454 /* A subroutine of function_arg_record_value. Traverse the structure
6455 recursively and assign bits to floating point registers. Track which
6456 bits in between need integer registers; invoke function_arg_record_value_3
6457 to make that happen. */
6458
6459 static void
6460 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6461 struct function_arg_record_value_parms *parms,
6462 bool packed_p)
6463 {
6464 tree field;
6465
6466 if (! packed_p)
6467 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6468 {
6469 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6470 {
6471 packed_p = true;
6472 break;
6473 }
6474 }
6475
6476 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6477 {
6478 if (TREE_CODE (field) == FIELD_DECL)
6479 {
6480 HOST_WIDE_INT bitpos = startbitpos;
6481
6482 if (DECL_SIZE (field) != 0)
6483 {
6484 if (integer_zerop (DECL_SIZE (field)))
6485 continue;
6486
6487 if (tree_fits_uhwi_p (bit_position (field)))
6488 bitpos += int_bit_position (field);
6489 }
6490
6491 /* ??? FIXME: else assume zero offset. */
6492
6493 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6494 function_arg_record_value_2 (TREE_TYPE (field),
6495 bitpos,
6496 parms,
6497 packed_p);
6498 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6499 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6500 && TARGET_FPU
6501 && parms->named
6502 && ! packed_p)
6503 {
6504 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6505 int regno, nregs, pos;
6506 enum machine_mode mode = DECL_MODE (field);
6507 rtx reg;
6508
6509 function_arg_record_value_3 (bitpos, parms);
6510
6511 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6512 && mode == BLKmode)
6513 {
6514 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6515 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6516 }
6517 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6518 {
6519 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6520 nregs = 2;
6521 }
6522 else
6523 nregs = 1;
6524
6525 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6526 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6527 regno++;
6528 reg = gen_rtx_REG (mode, regno);
6529 pos = bitpos / BITS_PER_UNIT;
6530 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6531 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6532 parms->nregs += 1;
6533 while (--nregs > 0)
6534 {
6535 regno += GET_MODE_SIZE (mode) / 4;
6536 reg = gen_rtx_REG (mode, regno);
6537 pos += GET_MODE_SIZE (mode);
6538 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6539 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6540 parms->nregs += 1;
6541 }
6542 }
6543 else
6544 {
6545 if (parms->intoffset == -1)
6546 parms->intoffset = bitpos;
6547 }
6548 }
6549 }
6550 }
6551
6552 /* Used by function_arg and sparc_function_value_1 to implement the complex
6553 conventions of the 64-bit ABI for passing and returning structures.
6554 Return an expression valid as a return value for the FUNCTION_ARG
6555 and TARGET_FUNCTION_VALUE.
6556
6557 TYPE is the data type of the argument (as a tree).
6558 This is null for libcalls where that information may
6559 not be available.
6560 MODE is the argument's machine mode.
6561 SLOTNO is the index number of the argument's slot in the parameter array.
6562 NAMED is nonzero if this argument is a named parameter
6563 (otherwise it is an extra parameter matching an ellipsis).
6564 REGBASE is the regno of the base register for the parameter array. */
6565
6566 static rtx
6567 function_arg_record_value (const_tree type, enum machine_mode mode,
6568 int slotno, int named, int regbase)
6569 {
6570 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6571 struct function_arg_record_value_parms parms;
6572 unsigned int nregs;
6573
6574 parms.ret = NULL_RTX;
6575 parms.slotno = slotno;
6576 parms.named = named;
6577 parms.regbase = regbase;
6578 parms.stack = 0;
6579
6580 /* Compute how many registers we need. */
6581 parms.nregs = 0;
6582 parms.intoffset = 0;
6583 function_arg_record_value_1 (type, 0, &parms, false);
6584
6585 /* Take into account pending integer fields. */
6586 if (parms.intoffset != -1)
6587 {
6588 unsigned int startbit, endbit;
6589 int intslots, this_slotno;
6590
6591 startbit = parms.intoffset & -BITS_PER_WORD;
6592 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6593 intslots = (endbit - startbit) / BITS_PER_WORD;
6594 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6595
6596 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6597 {
6598 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6599 /* We need to pass this field on the stack. */
6600 parms.stack = 1;
6601 }
6602
6603 parms.nregs += intslots;
6604 }
6605 nregs = parms.nregs;
6606
6607 /* Allocate the vector and handle some annoying special cases. */
6608 if (nregs == 0)
6609 {
6610 /* ??? Empty structure has no value? Duh? */
6611 if (typesize <= 0)
6612 {
6613 /* Though there's nothing really to store, return a word register
6614 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6615 leads to breakage due to the fact that there are zero bytes to
6616 load. */
6617 return gen_rtx_REG (mode, regbase);
6618 }
6619 else
6620 {
6621 /* ??? C++ has structures with no fields, and yet a size. Give up
6622 for now and pass everything back in integer registers. */
6623 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6624 }
6625 if (nregs + slotno > SPARC_INT_ARG_MAX)
6626 nregs = SPARC_INT_ARG_MAX - slotno;
6627 }
6628 gcc_assert (nregs != 0);
6629
6630 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6631
6632 /* If at least one field must be passed on the stack, generate
6633 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6634 also be passed on the stack. We can't do much better because the
6635 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6636 of structures for which the fields passed exclusively in registers
6637 are not at the beginning of the structure. */
6638 if (parms.stack)
6639 XVECEXP (parms.ret, 0, 0)
6640 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6641
6642 /* Fill in the entries. */
6643 parms.nregs = 0;
6644 parms.intoffset = 0;
6645 function_arg_record_value_2 (type, 0, &parms, false);
6646 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6647
6648 gcc_assert (parms.nregs == nregs);
6649
6650 return parms.ret;
6651 }
6652
6653 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6654 of the 64-bit ABI for passing and returning unions.
6655 Return an expression valid as a return value for the FUNCTION_ARG
6656 and TARGET_FUNCTION_VALUE.
6657
6658 SIZE is the size in bytes of the union.
6659 MODE is the argument's machine mode.
6660 REGNO is the hard register the union will be passed in. */
6661
6662 static rtx
6663 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6664 int regno)
6665 {
6666 int nwords = ROUND_ADVANCE (size), i;
6667 rtx regs;
6668
6669 /* See comment in previous function for empty structures. */
6670 if (nwords == 0)
6671 return gen_rtx_REG (mode, regno);
6672
6673 if (slotno == SPARC_INT_ARG_MAX - 1)
6674 nwords = 1;
6675
6676 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6677
6678 for (i = 0; i < nwords; i++)
6679 {
6680 /* Unions are passed left-justified. */
6681 XVECEXP (regs, 0, i)
6682 = gen_rtx_EXPR_LIST (VOIDmode,
6683 gen_rtx_REG (word_mode, regno),
6684 GEN_INT (UNITS_PER_WORD * i));
6685 regno++;
6686 }
6687
6688 return regs;
6689 }
6690
6691 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6692 for passing and returning large (BLKmode) vectors.
6693 Return an expression valid as a return value for the FUNCTION_ARG
6694 and TARGET_FUNCTION_VALUE.
6695
6696 SIZE is the size in bytes of the vector (at least 8 bytes).
6697 REGNO is the FP hard register the vector will be passed in. */
6698
6699 static rtx
6700 function_arg_vector_value (int size, int regno)
6701 {
6702 int i, nregs = size / 8;
6703 rtx regs;
6704
6705 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6706
6707 for (i = 0; i < nregs; i++)
6708 {
6709 XVECEXP (regs, 0, i)
6710 = gen_rtx_EXPR_LIST (VOIDmode,
6711 gen_rtx_REG (DImode, regno + 2*i),
6712 GEN_INT (i*8));
6713 }
6714
6715 return regs;
6716 }
6717
6718 /* Determine where to put an argument to a function.
6719 Value is zero to push the argument on the stack,
6720 or a hard register in which to store the argument.
6721
6722 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6723 the preceding args and about the function being called.
6724 MODE is the argument's machine mode.
6725 TYPE is the data type of the argument (as a tree).
6726 This is null for libcalls where that information may
6727 not be available.
6728 NAMED is true if this argument is a named parameter
6729 (otherwise it is an extra parameter matching an ellipsis).
6730 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6731 TARGET_FUNCTION_INCOMING_ARG. */
6732
6733 static rtx
6734 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6735 const_tree type, bool named, bool incoming_p)
6736 {
6737 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6738
6739 int regbase = (incoming_p
6740 ? SPARC_INCOMING_INT_ARG_FIRST
6741 : SPARC_OUTGOING_INT_ARG_FIRST);
6742 int slotno, regno, padding;
6743 enum mode_class mclass = GET_MODE_CLASS (mode);
6744
6745 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6746 &regno, &padding);
6747 if (slotno == -1)
6748 return 0;
6749
6750 /* Vector types deserve special treatment because they are polymorphic wrt
6751 their mode, depending upon whether VIS instructions are enabled. */
6752 if (type && TREE_CODE (type) == VECTOR_TYPE)
6753 {
6754 HOST_WIDE_INT size = int_size_in_bytes (type);
6755 gcc_assert ((TARGET_ARCH32 && size <= 8)
6756 || (TARGET_ARCH64 && size <= 16));
6757
6758 if (mode == BLKmode)
6759 return function_arg_vector_value (size,
6760 SPARC_FP_ARG_FIRST + 2*slotno);
6761 else
6762 mclass = MODE_FLOAT;
6763 }
6764
6765 if (TARGET_ARCH32)
6766 return gen_rtx_REG (mode, regno);
6767
6768 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6769 and are promoted to registers if possible. */
6770 if (type && TREE_CODE (type) == RECORD_TYPE)
6771 {
6772 HOST_WIDE_INT size = int_size_in_bytes (type);
6773 gcc_assert (size <= 16);
6774
6775 return function_arg_record_value (type, mode, slotno, named, regbase);
6776 }
6777
6778 /* Unions up to 16 bytes in size are passed in integer registers. */
6779 else if (type && TREE_CODE (type) == UNION_TYPE)
6780 {
6781 HOST_WIDE_INT size = int_size_in_bytes (type);
6782 gcc_assert (size <= 16);
6783
6784 return function_arg_union_value (size, mode, slotno, regno);
6785 }
6786
6787 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6788 but also have the slot allocated for them.
6789 If no prototype is in scope fp values in register slots get passed
6790 in two places, either fp regs and int regs or fp regs and memory. */
6791 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6792 && SPARC_FP_REG_P (regno))
6793 {
6794 rtx reg = gen_rtx_REG (mode, regno);
6795 if (cum->prototype_p || cum->libcall_p)
6796 {
6797 /* "* 2" because fp reg numbers are recorded in 4 byte
6798 quantities. */
6799 #if 0
6800 /* ??? This will cause the value to be passed in the fp reg and
6801 in the stack. When a prototype exists we want to pass the
6802 value in the reg but reserve space on the stack. That's an
6803 optimization, and is deferred [for a bit]. */
6804 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6805 return gen_rtx_PARALLEL (mode,
6806 gen_rtvec (2,
6807 gen_rtx_EXPR_LIST (VOIDmode,
6808 NULL_RTX, const0_rtx),
6809 gen_rtx_EXPR_LIST (VOIDmode,
6810 reg, const0_rtx)));
6811 else
6812 #else
6813 /* ??? It seems that passing back a register even when past
6814 the area declared by REG_PARM_STACK_SPACE will allocate
6815 space appropriately, and will not copy the data onto the
6816 stack, exactly as we desire.
6817
6818 This is due to locate_and_pad_parm being called in
6819 expand_call whenever reg_parm_stack_space > 0, which
6820 while beneficial to our example here, would seem to be
6821 in error from what had been intended. Ho hum... -- r~ */
6822 #endif
6823 return reg;
6824 }
6825 else
6826 {
6827 rtx v0, v1;
6828
6829 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6830 {
6831 int intreg;
6832
6833 /* On incoming, we don't need to know that the value
6834 is passed in %f0 and %i0, and it confuses other parts
6835 causing needless spillage even on the simplest cases. */
6836 if (incoming_p)
6837 return reg;
6838
6839 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6840 + (regno - SPARC_FP_ARG_FIRST) / 2);
6841
6842 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6843 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6844 const0_rtx);
6845 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6846 }
6847 else
6848 {
6849 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6850 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6851 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6852 }
6853 }
6854 }
6855
6856 /* All other aggregate types are passed in an integer register in a mode
6857 corresponding to the size of the type. */
6858 else if (type && AGGREGATE_TYPE_P (type))
6859 {
6860 HOST_WIDE_INT size = int_size_in_bytes (type);
6861 gcc_assert (size <= 16);
6862
6863 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6864 }
6865
6866 return gen_rtx_REG (mode, regno);
6867 }
6868
6869 /* Handle the TARGET_FUNCTION_ARG target hook. */
6870
6871 static rtx
6872 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6873 const_tree type, bool named)
6874 {
6875 return sparc_function_arg_1 (cum, mode, type, named, false);
6876 }
6877
6878 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6879
6880 static rtx
6881 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6882 const_tree type, bool named)
6883 {
6884 return sparc_function_arg_1 (cum, mode, type, named, true);
6885 }
6886
6887 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6888
6889 static unsigned int
6890 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6891 {
6892 return ((TARGET_ARCH64
6893 && (GET_MODE_ALIGNMENT (mode) == 128
6894 || (type && TYPE_ALIGN (type) == 128)))
6895 ? 128
6896 : PARM_BOUNDARY);
6897 }
6898
6899 /* For an arg passed partly in registers and partly in memory,
6900 this is the number of bytes of registers used.
6901 For args passed entirely in registers or entirely in memory, zero.
6902
6903 Any arg that starts in the first 6 regs but won't entirely fit in them
6904 needs partial registers on v8. On v9, structures with integer
6905 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6906 values that begin in the last fp reg [where "last fp reg" varies with the
6907 mode] will be split between that reg and memory. */
6908
6909 static int
6910 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6911 tree type, bool named)
6912 {
6913 int slotno, regno, padding;
6914
6915 /* We pass false for incoming_p here, it doesn't matter. */
6916 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6917 false, &regno, &padding);
6918
6919 if (slotno == -1)
6920 return 0;
6921
6922 if (TARGET_ARCH32)
6923 {
6924 if ((slotno + (mode == BLKmode
6925 ? ROUND_ADVANCE (int_size_in_bytes (type))
6926 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6927 > SPARC_INT_ARG_MAX)
6928 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6929 }
6930 else
6931 {
6932 /* We are guaranteed by pass_by_reference that the size of the
6933 argument is not greater than 16 bytes, so we only need to return
6934 one word if the argument is partially passed in registers. */
6935
6936 if (type && AGGREGATE_TYPE_P (type))
6937 {
6938 int size = int_size_in_bytes (type);
6939
6940 if (size > UNITS_PER_WORD
6941 && slotno == SPARC_INT_ARG_MAX - 1)
6942 return UNITS_PER_WORD;
6943 }
6944 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6945 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6946 && ! (TARGET_FPU && named)))
6947 {
6948 /* The complex types are passed as packed types. */
6949 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6950 && slotno == SPARC_INT_ARG_MAX - 1)
6951 return UNITS_PER_WORD;
6952 }
6953 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6954 {
6955 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6956 > SPARC_FP_ARG_MAX)
6957 return UNITS_PER_WORD;
6958 }
6959 }
6960
6961 return 0;
6962 }
6963
6964 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6965 Specify whether to pass the argument by reference. */
6966
6967 static bool
6968 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6969 enum machine_mode mode, const_tree type,
6970 bool named ATTRIBUTE_UNUSED)
6971 {
6972 if (TARGET_ARCH32)
6973 /* Original SPARC 32-bit ABI says that structures and unions,
6974 and quad-precision floats are passed by reference. For Pascal,
6975 also pass arrays by reference. All other base types are passed
6976 in registers.
6977
6978 Extended ABI (as implemented by the Sun compiler) says that all
6979 complex floats are passed by reference. Pass complex integers
6980 in registers up to 8 bytes. More generally, enforce the 2-word
6981 cap for passing arguments in registers.
6982
6983 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6984 integers are passed like floats of the same size, that is in
6985 registers up to 8 bytes. Pass all vector floats by reference
6986 like structure and unions. */
6987 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6988 || mode == SCmode
6989 /* Catch CDImode, TFmode, DCmode and TCmode. */
6990 || GET_MODE_SIZE (mode) > 8
6991 || (type
6992 && TREE_CODE (type) == VECTOR_TYPE
6993 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6994 else
6995 /* Original SPARC 64-bit ABI says that structures and unions
6996 smaller than 16 bytes are passed in registers, as well as
6997 all other base types.
6998
6999 Extended ABI (as implemented by the Sun compiler) says that
7000 complex floats are passed in registers up to 16 bytes. Pass
7001 all complex integers in registers up to 16 bytes. More generally,
7002 enforce the 2-word cap for passing arguments in registers.
7003
7004 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7005 integers are passed like floats of the same size, that is in
7006 registers (up to 16 bytes). Pass all vector floats like structure
7007 and unions. */
7008 return ((type
7009 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7010 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7011 /* Catch CTImode and TCmode. */
7012 || GET_MODE_SIZE (mode) > 16);
7013 }
7014
7015 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7016 Update the data in CUM to advance over an argument
7017 of mode MODE and data type TYPE.
7018 TYPE is null for libcalls where that information may not be available. */
7019
7020 static void
7021 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7022 const_tree type, bool named)
7023 {
7024 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7025 int regno, padding;
7026
7027 /* We pass false for incoming_p here, it doesn't matter. */
7028 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7029
7030 /* If argument requires leading padding, add it. */
7031 cum->words += padding;
7032
7033 if (TARGET_ARCH32)
7034 {
7035 cum->words += (mode != BLKmode
7036 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7037 : ROUND_ADVANCE (int_size_in_bytes (type)));
7038 }
7039 else
7040 {
7041 if (type && AGGREGATE_TYPE_P (type))
7042 {
7043 int size = int_size_in_bytes (type);
7044
7045 if (size <= 8)
7046 ++cum->words;
7047 else if (size <= 16)
7048 cum->words += 2;
7049 else /* passed by reference */
7050 ++cum->words;
7051 }
7052 else
7053 {
7054 cum->words += (mode != BLKmode
7055 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7056 : ROUND_ADVANCE (int_size_in_bytes (type)));
7057 }
7058 }
7059 }
7060
7061 /* Handle the FUNCTION_ARG_PADDING macro.
7062 For the 64 bit ABI structs are always stored left shifted in their
7063 argument slot. */
7064
7065 enum direction
7066 function_arg_padding (enum machine_mode mode, const_tree type)
7067 {
7068 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7069 return upward;
7070
7071 /* Fall back to the default. */
7072 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7073 }
7074
7075 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7076 Specify whether to return the return value in memory. */
7077
7078 static bool
7079 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7080 {
7081 if (TARGET_ARCH32)
7082 /* Original SPARC 32-bit ABI says that structures and unions,
7083 and quad-precision floats are returned in memory. All other
7084 base types are returned in registers.
7085
7086 Extended ABI (as implemented by the Sun compiler) says that
7087 all complex floats are returned in registers (8 FP registers
7088 at most for '_Complex long double'). Return all complex integers
7089 in registers (4 at most for '_Complex long long').
7090
7091 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7092 integers are returned like floats of the same size, that is in
7093 registers up to 8 bytes and in memory otherwise. Return all
7094 vector floats in memory like structure and unions; note that
7095 they always have BLKmode like the latter. */
7096 return (TYPE_MODE (type) == BLKmode
7097 || TYPE_MODE (type) == TFmode
7098 || (TREE_CODE (type) == VECTOR_TYPE
7099 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7100 else
7101 /* Original SPARC 64-bit ABI says that structures and unions
7102 smaller than 32 bytes are returned in registers, as well as
7103 all other base types.
7104
7105 Extended ABI (as implemented by the Sun compiler) says that all
7106 complex floats are returned in registers (8 FP registers at most
7107 for '_Complex long double'). Return all complex integers in
7108 registers (4 at most for '_Complex TItype').
7109
7110 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7111 integers are returned like floats of the same size, that is in
7112 registers. Return all vector floats like structure and unions;
7113 note that they always have BLKmode like the latter. */
7114 return (TYPE_MODE (type) == BLKmode
7115 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7116 }
7117
7118 /* Handle the TARGET_STRUCT_VALUE target hook.
7119 Return where to find the structure return value address. */
7120
7121 static rtx
7122 sparc_struct_value_rtx (tree fndecl, int incoming)
7123 {
7124 if (TARGET_ARCH64)
7125 return 0;
7126 else
7127 {
7128 rtx mem;
7129
7130 if (incoming)
7131 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7132 STRUCT_VALUE_OFFSET));
7133 else
7134 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7135 STRUCT_VALUE_OFFSET));
7136
7137 /* Only follow the SPARC ABI for fixed-size structure returns.
7138 Variable size structure returns are handled per the normal
7139 procedures in GCC. This is enabled by -mstd-struct-return */
7140 if (incoming == 2
7141 && sparc_std_struct_return
7142 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7143 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7144 {
7145 /* We must check and adjust the return address, as it is
7146 optional as to whether the return object is really
7147 provided. */
7148 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7149 rtx scratch = gen_reg_rtx (SImode);
7150 rtx endlab = gen_label_rtx ();
7151
7152 /* Calculate the return object size */
7153 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7154 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7155 /* Construct a temporary return value */
7156 rtx temp_val
7157 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7158
7159 /* Implement SPARC 32-bit psABI callee return struct checking:
7160
7161 Fetch the instruction where we will return to and see if
7162 it's an unimp instruction (the most significant 10 bits
7163 will be zero). */
7164 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7165 plus_constant (Pmode,
7166 ret_reg, 8)));
7167 /* Assume the size is valid and pre-adjust */
7168 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7169 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7170 0, endlab);
7171 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7172 /* Write the address of the memory pointed to by temp_val into
7173 the memory pointed to by mem */
7174 emit_move_insn (mem, XEXP (temp_val, 0));
7175 emit_label (endlab);
7176 }
7177
7178 return mem;
7179 }
7180 }
7181
7182 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7183 For v9, function return values are subject to the same rules as arguments,
7184 except that up to 32 bytes may be returned in registers. */
7185
7186 static rtx
7187 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7188 bool outgoing)
7189 {
7190 /* Beware that the two values are swapped here wrt function_arg. */
7191 int regbase = (outgoing
7192 ? SPARC_INCOMING_INT_ARG_FIRST
7193 : SPARC_OUTGOING_INT_ARG_FIRST);
7194 enum mode_class mclass = GET_MODE_CLASS (mode);
7195 int regno;
7196
7197 /* Vector types deserve special treatment because they are polymorphic wrt
7198 their mode, depending upon whether VIS instructions are enabled. */
7199 if (type && TREE_CODE (type) == VECTOR_TYPE)
7200 {
7201 HOST_WIDE_INT size = int_size_in_bytes (type);
7202 gcc_assert ((TARGET_ARCH32 && size <= 8)
7203 || (TARGET_ARCH64 && size <= 32));
7204
7205 if (mode == BLKmode)
7206 return function_arg_vector_value (size,
7207 SPARC_FP_ARG_FIRST);
7208 else
7209 mclass = MODE_FLOAT;
7210 }
7211
7212 if (TARGET_ARCH64 && type)
7213 {
7214 /* Structures up to 32 bytes in size are returned in registers. */
7215 if (TREE_CODE (type) == RECORD_TYPE)
7216 {
7217 HOST_WIDE_INT size = int_size_in_bytes (type);
7218 gcc_assert (size <= 32);
7219
7220 return function_arg_record_value (type, mode, 0, 1, regbase);
7221 }
7222
7223 /* Unions up to 32 bytes in size are returned in integer registers. */
7224 else if (TREE_CODE (type) == UNION_TYPE)
7225 {
7226 HOST_WIDE_INT size = int_size_in_bytes (type);
7227 gcc_assert (size <= 32);
7228
7229 return function_arg_union_value (size, mode, 0, regbase);
7230 }
7231
7232 /* Objects that require it are returned in FP registers. */
7233 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7234 ;
7235
7236 /* All other aggregate types are returned in an integer register in a
7237 mode corresponding to the size of the type. */
7238 else if (AGGREGATE_TYPE_P (type))
7239 {
7240 /* All other aggregate types are passed in an integer register
7241 in a mode corresponding to the size of the type. */
7242 HOST_WIDE_INT size = int_size_in_bytes (type);
7243 gcc_assert (size <= 32);
7244
7245 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7246
7247 /* ??? We probably should have made the same ABI change in
7248 3.4.0 as the one we made for unions. The latter was
7249 required by the SCD though, while the former is not
7250 specified, so we favored compatibility and efficiency.
7251
7252 Now we're stuck for aggregates larger than 16 bytes,
7253 because OImode vanished in the meantime. Let's not
7254 try to be unduly clever, and simply follow the ABI
7255 for unions in that case. */
7256 if (mode == BLKmode)
7257 return function_arg_union_value (size, mode, 0, regbase);
7258 else
7259 mclass = MODE_INT;
7260 }
7261
7262 /* We should only have pointer and integer types at this point. This
7263 must match sparc_promote_function_mode. */
7264 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7265 mode = word_mode;
7266 }
7267
7268 /* We should only have pointer and integer types at this point. This must
7269 match sparc_promote_function_mode. */
7270 else if (TARGET_ARCH32
7271 && mclass == MODE_INT
7272 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7273 mode = word_mode;
7274
7275 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7276 regno = SPARC_FP_ARG_FIRST;
7277 else
7278 regno = regbase;
7279
7280 return gen_rtx_REG (mode, regno);
7281 }
7282
7283 /* Handle TARGET_FUNCTION_VALUE.
7284 On the SPARC, the value is found in the first "output" register, but the
7285 called function leaves it in the first "input" register. */
7286
7287 static rtx
7288 sparc_function_value (const_tree valtype,
7289 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7290 bool outgoing)
7291 {
7292 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7293 }
7294
7295 /* Handle TARGET_LIBCALL_VALUE. */
7296
7297 static rtx
7298 sparc_libcall_value (enum machine_mode mode,
7299 const_rtx fun ATTRIBUTE_UNUSED)
7300 {
7301 return sparc_function_value_1 (NULL_TREE, mode, false);
7302 }
7303
7304 /* Handle FUNCTION_VALUE_REGNO_P.
7305 On the SPARC, the first "output" reg is used for integer values, and the
7306 first floating point register is used for floating point values. */
7307
7308 static bool
7309 sparc_function_value_regno_p (const unsigned int regno)
7310 {
7311 return (regno == 8 || regno == 32);
7312 }
7313
7314 /* Do what is necessary for `va_start'. We look at the current function
7315 to determine if stdarg or varargs is used and return the address of
7316 the first unnamed parameter. */
7317
7318 static rtx
7319 sparc_builtin_saveregs (void)
7320 {
7321 int first_reg = crtl->args.info.words;
7322 rtx address;
7323 int regno;
7324
7325 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7326 emit_move_insn (gen_rtx_MEM (word_mode,
7327 gen_rtx_PLUS (Pmode,
7328 frame_pointer_rtx,
7329 GEN_INT (FIRST_PARM_OFFSET (0)
7330 + (UNITS_PER_WORD
7331 * regno)))),
7332 gen_rtx_REG (word_mode,
7333 SPARC_INCOMING_INT_ARG_FIRST + regno));
7334
7335 address = gen_rtx_PLUS (Pmode,
7336 frame_pointer_rtx,
7337 GEN_INT (FIRST_PARM_OFFSET (0)
7338 + UNITS_PER_WORD * first_reg));
7339
7340 return address;
7341 }
7342
7343 /* Implement `va_start' for stdarg. */
7344
7345 static void
7346 sparc_va_start (tree valist, rtx nextarg)
7347 {
7348 nextarg = expand_builtin_saveregs ();
7349 std_expand_builtin_va_start (valist, nextarg);
7350 }
7351
7352 /* Implement `va_arg' for stdarg. */
7353
7354 static tree
7355 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7356 gimple_seq *post_p)
7357 {
7358 HOST_WIDE_INT size, rsize, align;
7359 tree addr, incr;
7360 bool indirect;
7361 tree ptrtype = build_pointer_type (type);
7362
7363 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7364 {
7365 indirect = true;
7366 size = rsize = UNITS_PER_WORD;
7367 align = 0;
7368 }
7369 else
7370 {
7371 indirect = false;
7372 size = int_size_in_bytes (type);
7373 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7374 align = 0;
7375
7376 if (TARGET_ARCH64)
7377 {
7378 /* For SPARC64, objects requiring 16-byte alignment get it. */
7379 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7380 align = 2 * UNITS_PER_WORD;
7381
7382 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7383 are left-justified in their slots. */
7384 if (AGGREGATE_TYPE_P (type))
7385 {
7386 if (size == 0)
7387 size = rsize = UNITS_PER_WORD;
7388 else
7389 size = rsize;
7390 }
7391 }
7392 }
7393
7394 incr = valist;
7395 if (align)
7396 {
7397 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7398 incr = fold_convert (sizetype, incr);
7399 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7400 size_int (-align));
7401 incr = fold_convert (ptr_type_node, incr);
7402 }
7403
7404 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7405 addr = incr;
7406
7407 if (BYTES_BIG_ENDIAN && size < rsize)
7408 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7409
7410 if (indirect)
7411 {
7412 addr = fold_convert (build_pointer_type (ptrtype), addr);
7413 addr = build_va_arg_indirect_ref (addr);
7414 }
7415
7416 /* If the address isn't aligned properly for the type, we need a temporary.
7417 FIXME: This is inefficient, usually we can do this in registers. */
7418 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7419 {
7420 tree tmp = create_tmp_var (type, "va_arg_tmp");
7421 tree dest_addr = build_fold_addr_expr (tmp);
7422 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7423 3, dest_addr, addr, size_int (rsize));
7424 TREE_ADDRESSABLE (tmp) = 1;
7425 gimplify_and_add (copy, pre_p);
7426 addr = dest_addr;
7427 }
7428
7429 else
7430 addr = fold_convert (ptrtype, addr);
7431
7432 incr = fold_build_pointer_plus_hwi (incr, rsize);
7433 gimplify_assign (valist, incr, post_p);
7434
7435 return build_va_arg_indirect_ref (addr);
7436 }
7437 \f
7438 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7439 Specify whether the vector mode is supported by the hardware. */
7440
7441 static bool
7442 sparc_vector_mode_supported_p (enum machine_mode mode)
7443 {
7444 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7445 }
7446 \f
7447 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7448
7449 static enum machine_mode
7450 sparc_preferred_simd_mode (enum machine_mode mode)
7451 {
7452 if (TARGET_VIS)
7453 switch (mode)
7454 {
7455 case SImode:
7456 return V2SImode;
7457 case HImode:
7458 return V4HImode;
7459 case QImode:
7460 return V8QImode;
7461
7462 default:;
7463 }
7464
7465 return word_mode;
7466 }
7467 \f
7468 /* Return the string to output an unconditional branch to LABEL, which is
7469 the operand number of the label.
7470
7471 DEST is the destination insn (i.e. the label), INSN is the source. */
7472
7473 const char *
7474 output_ubranch (rtx dest, rtx insn)
7475 {
7476 static char string[64];
7477 bool v9_form = false;
7478 int delta;
7479 char *p;
7480
7481 /* Even if we are trying to use cbcond for this, evaluate
7482 whether we can use V9 branches as our backup plan. */
7483
7484 delta = 5000000;
7485 if (INSN_ADDRESSES_SET_P ())
7486 delta = (INSN_ADDRESSES (INSN_UID (dest))
7487 - INSN_ADDRESSES (INSN_UID (insn)));
7488
7489 /* Leave some instructions for "slop". */
7490 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7491 v9_form = true;
7492
7493 if (TARGET_CBCOND)
7494 {
7495 bool emit_nop = emit_cbcond_nop (insn);
7496 bool far = false;
7497 const char *rval;
7498
7499 if (delta < -500 || delta > 500)
7500 far = true;
7501
7502 if (far)
7503 {
7504 if (v9_form)
7505 rval = "ba,a,pt\t%%xcc, %l0";
7506 else
7507 rval = "b,a\t%l0";
7508 }
7509 else
7510 {
7511 if (emit_nop)
7512 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7513 else
7514 rval = "cwbe\t%%g0, %%g0, %l0";
7515 }
7516 return rval;
7517 }
7518
7519 if (v9_form)
7520 strcpy (string, "ba%*,pt\t%%xcc, ");
7521 else
7522 strcpy (string, "b%*\t");
7523
7524 p = strchr (string, '\0');
7525 *p++ = '%';
7526 *p++ = 'l';
7527 *p++ = '0';
7528 *p++ = '%';
7529 *p++ = '(';
7530 *p = '\0';
7531
7532 return string;
7533 }
7534
7535 /* Return the string to output a conditional branch to LABEL, which is
7536 the operand number of the label. OP is the conditional expression.
7537 XEXP (OP, 0) is assumed to be a condition code register (integer or
7538 floating point) and its mode specifies what kind of comparison we made.
7539
7540 DEST is the destination insn (i.e. the label), INSN is the source.
7541
7542 REVERSED is nonzero if we should reverse the sense of the comparison.
7543
7544 ANNUL is nonzero if we should generate an annulling branch. */
7545
7546 const char *
7547 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7548 rtx insn)
7549 {
7550 static char string[64];
7551 enum rtx_code code = GET_CODE (op);
7552 rtx cc_reg = XEXP (op, 0);
7553 enum machine_mode mode = GET_MODE (cc_reg);
7554 const char *labelno, *branch;
7555 int spaces = 8, far;
7556 char *p;
7557
7558 /* v9 branches are limited to +-1MB. If it is too far away,
7559 change
7560
7561 bne,pt %xcc, .LC30
7562
7563 to
7564
7565 be,pn %xcc, .+12
7566 nop
7567 ba .LC30
7568
7569 and
7570
7571 fbne,a,pn %fcc2, .LC29
7572
7573 to
7574
7575 fbe,pt %fcc2, .+16
7576 nop
7577 ba .LC29 */
7578
7579 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7580 if (reversed ^ far)
7581 {
7582 /* Reversal of FP compares takes care -- an ordered compare
7583 becomes an unordered compare and vice versa. */
7584 if (mode == CCFPmode || mode == CCFPEmode)
7585 code = reverse_condition_maybe_unordered (code);
7586 else
7587 code = reverse_condition (code);
7588 }
7589
7590 /* Start by writing the branch condition. */
7591 if (mode == CCFPmode || mode == CCFPEmode)
7592 {
7593 switch (code)
7594 {
7595 case NE:
7596 branch = "fbne";
7597 break;
7598 case EQ:
7599 branch = "fbe";
7600 break;
7601 case GE:
7602 branch = "fbge";
7603 break;
7604 case GT:
7605 branch = "fbg";
7606 break;
7607 case LE:
7608 branch = "fble";
7609 break;
7610 case LT:
7611 branch = "fbl";
7612 break;
7613 case UNORDERED:
7614 branch = "fbu";
7615 break;
7616 case ORDERED:
7617 branch = "fbo";
7618 break;
7619 case UNGT:
7620 branch = "fbug";
7621 break;
7622 case UNLT:
7623 branch = "fbul";
7624 break;
7625 case UNEQ:
7626 branch = "fbue";
7627 break;
7628 case UNGE:
7629 branch = "fbuge";
7630 break;
7631 case UNLE:
7632 branch = "fbule";
7633 break;
7634 case LTGT:
7635 branch = "fblg";
7636 break;
7637
7638 default:
7639 gcc_unreachable ();
7640 }
7641
7642 /* ??? !v9: FP branches cannot be preceded by another floating point
7643 insn. Because there is currently no concept of pre-delay slots,
7644 we can fix this only by always emitting a nop before a floating
7645 point branch. */
7646
7647 string[0] = '\0';
7648 if (! TARGET_V9)
7649 strcpy (string, "nop\n\t");
7650 strcat (string, branch);
7651 }
7652 else
7653 {
7654 switch (code)
7655 {
7656 case NE:
7657 branch = "bne";
7658 break;
7659 case EQ:
7660 branch = "be";
7661 break;
7662 case GE:
7663 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7664 branch = "bpos";
7665 else
7666 branch = "bge";
7667 break;
7668 case GT:
7669 branch = "bg";
7670 break;
7671 case LE:
7672 branch = "ble";
7673 break;
7674 case LT:
7675 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7676 branch = "bneg";
7677 else
7678 branch = "bl";
7679 break;
7680 case GEU:
7681 branch = "bgeu";
7682 break;
7683 case GTU:
7684 branch = "bgu";
7685 break;
7686 case LEU:
7687 branch = "bleu";
7688 break;
7689 case LTU:
7690 branch = "blu";
7691 break;
7692
7693 default:
7694 gcc_unreachable ();
7695 }
7696 strcpy (string, branch);
7697 }
7698 spaces -= strlen (branch);
7699 p = strchr (string, '\0');
7700
7701 /* Now add the annulling, the label, and a possible noop. */
7702 if (annul && ! far)
7703 {
7704 strcpy (p, ",a");
7705 p += 2;
7706 spaces -= 2;
7707 }
7708
7709 if (TARGET_V9)
7710 {
7711 rtx note;
7712 int v8 = 0;
7713
7714 if (! far && insn && INSN_ADDRESSES_SET_P ())
7715 {
7716 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7717 - INSN_ADDRESSES (INSN_UID (insn)));
7718 /* Leave some instructions for "slop". */
7719 if (delta < -260000 || delta >= 260000)
7720 v8 = 1;
7721 }
7722
7723 if (mode == CCFPmode || mode == CCFPEmode)
7724 {
7725 static char v9_fcc_labelno[] = "%%fccX, ";
7726 /* Set the char indicating the number of the fcc reg to use. */
7727 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7728 labelno = v9_fcc_labelno;
7729 if (v8)
7730 {
7731 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7732 labelno = "";
7733 }
7734 }
7735 else if (mode == CCXmode || mode == CCX_NOOVmode)
7736 {
7737 labelno = "%%xcc, ";
7738 gcc_assert (! v8);
7739 }
7740 else
7741 {
7742 labelno = "%%icc, ";
7743 if (v8)
7744 labelno = "";
7745 }
7746
7747 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7748 {
7749 strcpy (p,
7750 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7751 ? ",pt" : ",pn");
7752 p += 3;
7753 spaces -= 3;
7754 }
7755 }
7756 else
7757 labelno = "";
7758
7759 if (spaces > 0)
7760 *p++ = '\t';
7761 else
7762 *p++ = ' ';
7763 strcpy (p, labelno);
7764 p = strchr (p, '\0');
7765 if (far)
7766 {
7767 strcpy (p, ".+12\n\t nop\n\tb\t");
7768 /* Skip the next insn if requested or
7769 if we know that it will be a nop. */
7770 if (annul || ! final_sequence)
7771 p[3] = '6';
7772 p += 14;
7773 }
7774 *p++ = '%';
7775 *p++ = 'l';
7776 *p++ = label + '0';
7777 *p++ = '%';
7778 *p++ = '#';
7779 *p = '\0';
7780
7781 return string;
7782 }
7783
7784 /* Emit a library call comparison between floating point X and Y.
7785 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7786 Return the new operator to be used in the comparison sequence.
7787
7788 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7789 values as arguments instead of the TFmode registers themselves,
7790 that's why we cannot call emit_float_lib_cmp. */
7791
7792 rtx
7793 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7794 {
7795 const char *qpfunc;
7796 rtx slot0, slot1, result, tem, tem2, libfunc;
7797 enum machine_mode mode;
7798 enum rtx_code new_comparison;
7799
7800 switch (comparison)
7801 {
7802 case EQ:
7803 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7804 break;
7805
7806 case NE:
7807 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7808 break;
7809
7810 case GT:
7811 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7812 break;
7813
7814 case GE:
7815 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7816 break;
7817
7818 case LT:
7819 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7820 break;
7821
7822 case LE:
7823 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7824 break;
7825
7826 case ORDERED:
7827 case UNORDERED:
7828 case UNGT:
7829 case UNLT:
7830 case UNEQ:
7831 case UNGE:
7832 case UNLE:
7833 case LTGT:
7834 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7835 break;
7836
7837 default:
7838 gcc_unreachable ();
7839 }
7840
7841 if (TARGET_ARCH64)
7842 {
7843 if (MEM_P (x))
7844 {
7845 tree expr = MEM_EXPR (x);
7846 if (expr)
7847 mark_addressable (expr);
7848 slot0 = x;
7849 }
7850 else
7851 {
7852 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7853 emit_move_insn (slot0, x);
7854 }
7855
7856 if (MEM_P (y))
7857 {
7858 tree expr = MEM_EXPR (y);
7859 if (expr)
7860 mark_addressable (expr);
7861 slot1 = y;
7862 }
7863 else
7864 {
7865 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7866 emit_move_insn (slot1, y);
7867 }
7868
7869 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7870 emit_library_call (libfunc, LCT_NORMAL,
7871 DImode, 2,
7872 XEXP (slot0, 0), Pmode,
7873 XEXP (slot1, 0), Pmode);
7874 mode = DImode;
7875 }
7876 else
7877 {
7878 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7879 emit_library_call (libfunc, LCT_NORMAL,
7880 SImode, 2,
7881 x, TFmode, y, TFmode);
7882 mode = SImode;
7883 }
7884
7885
7886 /* Immediately move the result of the libcall into a pseudo
7887 register so reload doesn't clobber the value if it needs
7888 the return register for a spill reg. */
7889 result = gen_reg_rtx (mode);
7890 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7891
7892 switch (comparison)
7893 {
7894 default:
7895 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7896 case ORDERED:
7897 case UNORDERED:
7898 new_comparison = (comparison == UNORDERED ? EQ : NE);
7899 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7900 case UNGT:
7901 case UNGE:
7902 new_comparison = (comparison == UNGT ? GT : NE);
7903 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7904 case UNLE:
7905 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7906 case UNLT:
7907 tem = gen_reg_rtx (mode);
7908 if (TARGET_ARCH32)
7909 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7910 else
7911 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7912 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7913 case UNEQ:
7914 case LTGT:
7915 tem = gen_reg_rtx (mode);
7916 if (TARGET_ARCH32)
7917 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7918 else
7919 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7920 tem2 = gen_reg_rtx (mode);
7921 if (TARGET_ARCH32)
7922 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7923 else
7924 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7925 new_comparison = (comparison == UNEQ ? EQ : NE);
7926 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7927 }
7928
7929 gcc_unreachable ();
7930 }
7931
7932 /* Generate an unsigned DImode to FP conversion. This is the same code
7933 optabs would emit if we didn't have TFmode patterns. */
7934
7935 void
7936 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7937 {
7938 rtx neglab, donelab, i0, i1, f0, in, out;
7939
7940 out = operands[0];
7941 in = force_reg (DImode, operands[1]);
7942 neglab = gen_label_rtx ();
7943 donelab = gen_label_rtx ();
7944 i0 = gen_reg_rtx (DImode);
7945 i1 = gen_reg_rtx (DImode);
7946 f0 = gen_reg_rtx (mode);
7947
7948 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7949
7950 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7951 emit_jump_insn (gen_jump (donelab));
7952 emit_barrier ();
7953
7954 emit_label (neglab);
7955
7956 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7957 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7958 emit_insn (gen_iordi3 (i0, i0, i1));
7959 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7960 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7961
7962 emit_label (donelab);
7963 }
7964
7965 /* Generate an FP to unsigned DImode conversion. This is the same code
7966 optabs would emit if we didn't have TFmode patterns. */
7967
7968 void
7969 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7970 {
7971 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7972
7973 out = operands[0];
7974 in = force_reg (mode, operands[1]);
7975 neglab = gen_label_rtx ();
7976 donelab = gen_label_rtx ();
7977 i0 = gen_reg_rtx (DImode);
7978 i1 = gen_reg_rtx (DImode);
7979 limit = gen_reg_rtx (mode);
7980 f0 = gen_reg_rtx (mode);
7981
7982 emit_move_insn (limit,
7983 CONST_DOUBLE_FROM_REAL_VALUE (
7984 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7985 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7986
7987 emit_insn (gen_rtx_SET (VOIDmode,
7988 out,
7989 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7990 emit_jump_insn (gen_jump (donelab));
7991 emit_barrier ();
7992
7993 emit_label (neglab);
7994
7995 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7996 emit_insn (gen_rtx_SET (VOIDmode,
7997 i0,
7998 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7999 emit_insn (gen_movdi (i1, const1_rtx));
8000 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8001 emit_insn (gen_xordi3 (out, i0, i1));
8002
8003 emit_label (donelab);
8004 }
8005
8006 /* Return the string to output a compare and branch instruction to DEST.
8007 DEST is the destination insn (i.e. the label), INSN is the source,
8008 and OP is the conditional expression. */
8009
8010 const char *
8011 output_cbcond (rtx op, rtx dest, rtx insn)
8012 {
8013 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8014 enum rtx_code code = GET_CODE (op);
8015 const char *cond_str, *tmpl;
8016 int far, emit_nop, len;
8017 static char string[64];
8018 char size_char;
8019
8020 /* Compare and Branch is limited to +-2KB. If it is too far away,
8021 change
8022
8023 cxbne X, Y, .LC30
8024
8025 to
8026
8027 cxbe X, Y, .+16
8028 nop
8029 ba,pt xcc, .LC30
8030 nop */
8031
8032 len = get_attr_length (insn);
8033
8034 far = len == 4;
8035 emit_nop = len == 2;
8036
8037 if (far)
8038 code = reverse_condition (code);
8039
8040 size_char = ((mode == SImode) ? 'w' : 'x');
8041
8042 switch (code)
8043 {
8044 case NE:
8045 cond_str = "ne";
8046 break;
8047
8048 case EQ:
8049 cond_str = "e";
8050 break;
8051
8052 case GE:
8053 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8054 cond_str = "pos";
8055 else
8056 cond_str = "ge";
8057 break;
8058
8059 case GT:
8060 cond_str = "g";
8061 break;
8062
8063 case LE:
8064 cond_str = "le";
8065 break;
8066
8067 case LT:
8068 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8069 cond_str = "neg";
8070 else
8071 cond_str = "l";
8072 break;
8073
8074 case GEU:
8075 cond_str = "cc";
8076 break;
8077
8078 case GTU:
8079 cond_str = "gu";
8080 break;
8081
8082 case LEU:
8083 cond_str = "leu";
8084 break;
8085
8086 case LTU:
8087 cond_str = "cs";
8088 break;
8089
8090 default:
8091 gcc_unreachable ();
8092 }
8093
8094 if (far)
8095 {
8096 int veryfar = 1, delta;
8097
8098 if (INSN_ADDRESSES_SET_P ())
8099 {
8100 delta = (INSN_ADDRESSES (INSN_UID (dest))
8101 - INSN_ADDRESSES (INSN_UID (insn)));
8102 /* Leave some instructions for "slop". */
8103 if (delta >= -260000 && delta < 260000)
8104 veryfar = 0;
8105 }
8106
8107 if (veryfar)
8108 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8109 else
8110 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8111 }
8112 else
8113 {
8114 if (emit_nop)
8115 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8116 else
8117 tmpl = "c%cb%s\t%%1, %%2, %%3";
8118 }
8119
8120 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8121
8122 return string;
8123 }
8124
8125 /* Return the string to output a conditional branch to LABEL, testing
8126 register REG. LABEL is the operand number of the label; REG is the
8127 operand number of the reg. OP is the conditional expression. The mode
8128 of REG says what kind of comparison we made.
8129
8130 DEST is the destination insn (i.e. the label), INSN is the source.
8131
8132 REVERSED is nonzero if we should reverse the sense of the comparison.
8133
8134 ANNUL is nonzero if we should generate an annulling branch. */
8135
8136 const char *
8137 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8138 int annul, rtx insn)
8139 {
8140 static char string[64];
8141 enum rtx_code code = GET_CODE (op);
8142 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8143 rtx note;
8144 int far;
8145 char *p;
8146
8147 /* branch on register are limited to +-128KB. If it is too far away,
8148 change
8149
8150 brnz,pt %g1, .LC30
8151
8152 to
8153
8154 brz,pn %g1, .+12
8155 nop
8156 ba,pt %xcc, .LC30
8157
8158 and
8159
8160 brgez,a,pn %o1, .LC29
8161
8162 to
8163
8164 brlz,pt %o1, .+16
8165 nop
8166 ba,pt %xcc, .LC29 */
8167
8168 far = get_attr_length (insn) >= 3;
8169
8170 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8171 if (reversed ^ far)
8172 code = reverse_condition (code);
8173
8174 /* Only 64 bit versions of these instructions exist. */
8175 gcc_assert (mode == DImode);
8176
8177 /* Start by writing the branch condition. */
8178
8179 switch (code)
8180 {
8181 case NE:
8182 strcpy (string, "brnz");
8183 break;
8184
8185 case EQ:
8186 strcpy (string, "brz");
8187 break;
8188
8189 case GE:
8190 strcpy (string, "brgez");
8191 break;
8192
8193 case LT:
8194 strcpy (string, "brlz");
8195 break;
8196
8197 case LE:
8198 strcpy (string, "brlez");
8199 break;
8200
8201 case GT:
8202 strcpy (string, "brgz");
8203 break;
8204
8205 default:
8206 gcc_unreachable ();
8207 }
8208
8209 p = strchr (string, '\0');
8210
8211 /* Now add the annulling, reg, label, and nop. */
8212 if (annul && ! far)
8213 {
8214 strcpy (p, ",a");
8215 p += 2;
8216 }
8217
8218 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8219 {
8220 strcpy (p,
8221 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8222 ? ",pt" : ",pn");
8223 p += 3;
8224 }
8225
8226 *p = p < string + 8 ? '\t' : ' ';
8227 p++;
8228 *p++ = '%';
8229 *p++ = '0' + reg;
8230 *p++ = ',';
8231 *p++ = ' ';
8232 if (far)
8233 {
8234 int veryfar = 1, delta;
8235
8236 if (INSN_ADDRESSES_SET_P ())
8237 {
8238 delta = (INSN_ADDRESSES (INSN_UID (dest))
8239 - INSN_ADDRESSES (INSN_UID (insn)));
8240 /* Leave some instructions for "slop". */
8241 if (delta >= -260000 && delta < 260000)
8242 veryfar = 0;
8243 }
8244
8245 strcpy (p, ".+12\n\t nop\n\t");
8246 /* Skip the next insn if requested or
8247 if we know that it will be a nop. */
8248 if (annul || ! final_sequence)
8249 p[3] = '6';
8250 p += 12;
8251 if (veryfar)
8252 {
8253 strcpy (p, "b\t");
8254 p += 2;
8255 }
8256 else
8257 {
8258 strcpy (p, "ba,pt\t%%xcc, ");
8259 p += 13;
8260 }
8261 }
8262 *p++ = '%';
8263 *p++ = 'l';
8264 *p++ = '0' + label;
8265 *p++ = '%';
8266 *p++ = '#';
8267 *p = '\0';
8268
8269 return string;
8270 }
8271
8272 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8273 Such instructions cannot be used in the delay slot of return insn on v9.
8274 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8275 */
8276
8277 static int
8278 epilogue_renumber (register rtx *where, int test)
8279 {
8280 register const char *fmt;
8281 register int i;
8282 register enum rtx_code code;
8283
8284 if (*where == 0)
8285 return 0;
8286
8287 code = GET_CODE (*where);
8288
8289 switch (code)
8290 {
8291 case REG:
8292 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8293 return 1;
8294 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8295 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8296 case SCRATCH:
8297 case CC0:
8298 case PC:
8299 case CONST_INT:
8300 case CONST_DOUBLE:
8301 return 0;
8302
8303 /* Do not replace the frame pointer with the stack pointer because
8304 it can cause the delayed instruction to load below the stack.
8305 This occurs when instructions like:
8306
8307 (set (reg/i:SI 24 %i0)
8308 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8309 (const_int -20 [0xffffffec])) 0))
8310
8311 are in the return delayed slot. */
8312 case PLUS:
8313 if (GET_CODE (XEXP (*where, 0)) == REG
8314 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8315 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8316 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8317 return 1;
8318 break;
8319
8320 case MEM:
8321 if (SPARC_STACK_BIAS
8322 && GET_CODE (XEXP (*where, 0)) == REG
8323 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8324 return 1;
8325 break;
8326
8327 default:
8328 break;
8329 }
8330
8331 fmt = GET_RTX_FORMAT (code);
8332
8333 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8334 {
8335 if (fmt[i] == 'E')
8336 {
8337 register int j;
8338 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8339 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8340 return 1;
8341 }
8342 else if (fmt[i] == 'e'
8343 && epilogue_renumber (&(XEXP (*where, i)), test))
8344 return 1;
8345 }
8346 return 0;
8347 }
8348 \f
8349 /* Leaf functions and non-leaf functions have different needs. */
8350
8351 static const int
8352 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8353
8354 static const int
8355 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8356
8357 static const int *const reg_alloc_orders[] = {
8358 reg_leaf_alloc_order,
8359 reg_nonleaf_alloc_order};
8360
8361 void
8362 order_regs_for_local_alloc (void)
8363 {
8364 static int last_order_nonleaf = 1;
8365
8366 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8367 {
8368 last_order_nonleaf = !last_order_nonleaf;
8369 memcpy ((char *) reg_alloc_order,
8370 (const char *) reg_alloc_orders[last_order_nonleaf],
8371 FIRST_PSEUDO_REGISTER * sizeof (int));
8372 }
8373 }
8374 \f
8375 /* Return 1 if REG and MEM are legitimate enough to allow the various
8376 mem<-->reg splits to be run. */
8377
8378 int
8379 sparc_splitdi_legitimate (rtx reg, rtx mem)
8380 {
8381 /* Punt if we are here by mistake. */
8382 gcc_assert (reload_completed);
8383
8384 /* We must have an offsettable memory reference. */
8385 if (! offsettable_memref_p (mem))
8386 return 0;
8387
8388 /* If we have legitimate args for ldd/std, we do not want
8389 the split to happen. */
8390 if ((REGNO (reg) % 2) == 0
8391 && mem_min_alignment (mem, 8))
8392 return 0;
8393
8394 /* Success. */
8395 return 1;
8396 }
8397
8398 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8399
8400 int
8401 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8402 {
8403 int regno1, regno2;
8404
8405 if (GET_CODE (reg1) == SUBREG)
8406 reg1 = SUBREG_REG (reg1);
8407 if (GET_CODE (reg1) != REG)
8408 return 0;
8409 regno1 = REGNO (reg1);
8410
8411 if (GET_CODE (reg2) == SUBREG)
8412 reg2 = SUBREG_REG (reg2);
8413 if (GET_CODE (reg2) != REG)
8414 return 0;
8415 regno2 = REGNO (reg2);
8416
8417 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8418 return 1;
8419
8420 if (TARGET_VIS3)
8421 {
8422 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8423 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8424 return 1;
8425 }
8426
8427 return 0;
8428 }
8429
8430 /* Return 1 if x and y are some kind of REG and they refer to
8431 different hard registers. This test is guaranteed to be
8432 run after reload. */
8433
8434 int
8435 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8436 {
8437 if (GET_CODE (x) != REG)
8438 return 0;
8439 if (GET_CODE (y) != REG)
8440 return 0;
8441 if (REGNO (x) == REGNO (y))
8442 return 0;
8443 return 1;
8444 }
8445
8446 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8447 This makes them candidates for using ldd and std insns.
8448
8449 Note reg1 and reg2 *must* be hard registers. */
8450
8451 int
8452 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8453 {
8454 /* We might have been passed a SUBREG. */
8455 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8456 return 0;
8457
8458 if (REGNO (reg1) % 2 != 0)
8459 return 0;
8460
8461 /* Integer ldd is deprecated in SPARC V9 */
8462 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8463 return 0;
8464
8465 return (REGNO (reg1) == REGNO (reg2) - 1);
8466 }
8467
8468 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8469 an ldd or std insn.
8470
8471 This can only happen when addr1 and addr2, the addresses in mem1
8472 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8473 addr1 must also be aligned on a 64-bit boundary.
8474
8475 Also iff dependent_reg_rtx is not null it should not be used to
8476 compute the address for mem1, i.e. we cannot optimize a sequence
8477 like:
8478 ld [%o0], %o0
8479 ld [%o0 + 4], %o1
8480 to
8481 ldd [%o0], %o0
8482 nor:
8483 ld [%g3 + 4], %g3
8484 ld [%g3], %g2
8485 to
8486 ldd [%g3], %g2
8487
8488 But, note that the transformation from:
8489 ld [%g2 + 4], %g3
8490 ld [%g2], %g2
8491 to
8492 ldd [%g2], %g2
8493 is perfectly fine. Thus, the peephole2 patterns always pass us
8494 the destination register of the first load, never the second one.
8495
8496 For stores we don't have a similar problem, so dependent_reg_rtx is
8497 NULL_RTX. */
8498
8499 int
8500 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8501 {
8502 rtx addr1, addr2;
8503 unsigned int reg1;
8504 HOST_WIDE_INT offset1;
8505
8506 /* The mems cannot be volatile. */
8507 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8508 return 0;
8509
8510 /* MEM1 should be aligned on a 64-bit boundary. */
8511 if (MEM_ALIGN (mem1) < 64)
8512 return 0;
8513
8514 addr1 = XEXP (mem1, 0);
8515 addr2 = XEXP (mem2, 0);
8516
8517 /* Extract a register number and offset (if used) from the first addr. */
8518 if (GET_CODE (addr1) == PLUS)
8519 {
8520 /* If not a REG, return zero. */
8521 if (GET_CODE (XEXP (addr1, 0)) != REG)
8522 return 0;
8523 else
8524 {
8525 reg1 = REGNO (XEXP (addr1, 0));
8526 /* The offset must be constant! */
8527 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8528 return 0;
8529 offset1 = INTVAL (XEXP (addr1, 1));
8530 }
8531 }
8532 else if (GET_CODE (addr1) != REG)
8533 return 0;
8534 else
8535 {
8536 reg1 = REGNO (addr1);
8537 /* This was a simple (mem (reg)) expression. Offset is 0. */
8538 offset1 = 0;
8539 }
8540
8541 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8542 if (GET_CODE (addr2) != PLUS)
8543 return 0;
8544
8545 if (GET_CODE (XEXP (addr2, 0)) != REG
8546 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8547 return 0;
8548
8549 if (reg1 != REGNO (XEXP (addr2, 0)))
8550 return 0;
8551
8552 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8553 return 0;
8554
8555 /* The first offset must be evenly divisible by 8 to ensure the
8556 address is 64 bit aligned. */
8557 if (offset1 % 8 != 0)
8558 return 0;
8559
8560 /* The offset for the second addr must be 4 more than the first addr. */
8561 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8562 return 0;
8563
8564 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8565 instructions. */
8566 return 1;
8567 }
8568
8569 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8570
8571 rtx
8572 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8573 {
8574 rtx x = widen_memory_access (mem1, mode, 0);
8575 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8576 return x;
8577 }
8578
8579 /* Return 1 if reg is a pseudo, or is the first register in
8580 a hard register pair. This makes it suitable for use in
8581 ldd and std insns. */
8582
8583 int
8584 register_ok_for_ldd (rtx reg)
8585 {
8586 /* We might have been passed a SUBREG. */
8587 if (!REG_P (reg))
8588 return 0;
8589
8590 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8591 return (REGNO (reg) % 2 == 0);
8592
8593 return 1;
8594 }
8595
8596 /* Return 1 if OP, a MEM, has an address which is known to be
8597 aligned to an 8-byte boundary. */
8598
8599 int
8600 memory_ok_for_ldd (rtx op)
8601 {
8602 /* In 64-bit mode, we assume that the address is word-aligned. */
8603 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8604 return 0;
8605
8606 if (! can_create_pseudo_p ()
8607 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8608 return 0;
8609
8610 return 1;
8611 }
8612 \f
8613 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8614
8615 static bool
8616 sparc_print_operand_punct_valid_p (unsigned char code)
8617 {
8618 if (code == '#'
8619 || code == '*'
8620 || code == '('
8621 || code == ')'
8622 || code == '_'
8623 || code == '&')
8624 return true;
8625
8626 return false;
8627 }
8628
8629 /* Implement TARGET_PRINT_OPERAND.
8630 Print operand X (an rtx) in assembler syntax to file FILE.
8631 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8632 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8633
8634 static void
8635 sparc_print_operand (FILE *file, rtx x, int code)
8636 {
8637 switch (code)
8638 {
8639 case '#':
8640 /* Output an insn in a delay slot. */
8641 if (final_sequence)
8642 sparc_indent_opcode = 1;
8643 else
8644 fputs ("\n\t nop", file);
8645 return;
8646 case '*':
8647 /* Output an annul flag if there's nothing for the delay slot and we
8648 are optimizing. This is always used with '(' below.
8649 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8650 this is a dbx bug. So, we only do this when optimizing.
8651 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8652 Always emit a nop in case the next instruction is a branch. */
8653 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8654 fputs (",a", file);
8655 return;
8656 case '(':
8657 /* Output a 'nop' if there's nothing for the delay slot and we are
8658 not optimizing. This is always used with '*' above. */
8659 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8660 fputs ("\n\t nop", file);
8661 else if (final_sequence)
8662 sparc_indent_opcode = 1;
8663 return;
8664 case ')':
8665 /* Output the right displacement from the saved PC on function return.
8666 The caller may have placed an "unimp" insn immediately after the call
8667 so we have to account for it. This insn is used in the 32-bit ABI
8668 when calling a function that returns a non zero-sized structure. The
8669 64-bit ABI doesn't have it. Be careful to have this test be the same
8670 as that for the call. The exception is when sparc_std_struct_return
8671 is enabled, the psABI is followed exactly and the adjustment is made
8672 by the code in sparc_struct_value_rtx. The call emitted is the same
8673 when sparc_std_struct_return is enabled. */
8674 if (!TARGET_ARCH64
8675 && cfun->returns_struct
8676 && !sparc_std_struct_return
8677 && DECL_SIZE (DECL_RESULT (current_function_decl))
8678 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8679 == INTEGER_CST
8680 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8681 fputs ("12", file);
8682 else
8683 fputc ('8', file);
8684 return;
8685 case '_':
8686 /* Output the Embedded Medium/Anywhere code model base register. */
8687 fputs (EMBMEDANY_BASE_REG, file);
8688 return;
8689 case '&':
8690 /* Print some local dynamic TLS name. */
8691 assemble_name (file, get_some_local_dynamic_name ());
8692 return;
8693
8694 case 'Y':
8695 /* Adjust the operand to take into account a RESTORE operation. */
8696 if (GET_CODE (x) == CONST_INT)
8697 break;
8698 else if (GET_CODE (x) != REG)
8699 output_operand_lossage ("invalid %%Y operand");
8700 else if (REGNO (x) < 8)
8701 fputs (reg_names[REGNO (x)], file);
8702 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8703 fputs (reg_names[REGNO (x)-16], file);
8704 else
8705 output_operand_lossage ("invalid %%Y operand");
8706 return;
8707 case 'L':
8708 /* Print out the low order register name of a register pair. */
8709 if (WORDS_BIG_ENDIAN)
8710 fputs (reg_names[REGNO (x)+1], file);
8711 else
8712 fputs (reg_names[REGNO (x)], file);
8713 return;
8714 case 'H':
8715 /* Print out the high order register name of a register pair. */
8716 if (WORDS_BIG_ENDIAN)
8717 fputs (reg_names[REGNO (x)], file);
8718 else
8719 fputs (reg_names[REGNO (x)+1], file);
8720 return;
8721 case 'R':
8722 /* Print out the second register name of a register pair or quad.
8723 I.e., R (%o0) => %o1. */
8724 fputs (reg_names[REGNO (x)+1], file);
8725 return;
8726 case 'S':
8727 /* Print out the third register name of a register quad.
8728 I.e., S (%o0) => %o2. */
8729 fputs (reg_names[REGNO (x)+2], file);
8730 return;
8731 case 'T':
8732 /* Print out the fourth register name of a register quad.
8733 I.e., T (%o0) => %o3. */
8734 fputs (reg_names[REGNO (x)+3], file);
8735 return;
8736 case 'x':
8737 /* Print a condition code register. */
8738 if (REGNO (x) == SPARC_ICC_REG)
8739 {
8740 /* We don't handle CC[X]_NOOVmode because they're not supposed
8741 to occur here. */
8742 if (GET_MODE (x) == CCmode)
8743 fputs ("%icc", file);
8744 else if (GET_MODE (x) == CCXmode)
8745 fputs ("%xcc", file);
8746 else
8747 gcc_unreachable ();
8748 }
8749 else
8750 /* %fccN register */
8751 fputs (reg_names[REGNO (x)], file);
8752 return;
8753 case 'm':
8754 /* Print the operand's address only. */
8755 output_address (XEXP (x, 0));
8756 return;
8757 case 'r':
8758 /* In this case we need a register. Use %g0 if the
8759 operand is const0_rtx. */
8760 if (x == const0_rtx
8761 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8762 {
8763 fputs ("%g0", file);
8764 return;
8765 }
8766 else
8767 break;
8768
8769 case 'A':
8770 switch (GET_CODE (x))
8771 {
8772 case IOR: fputs ("or", file); break;
8773 case AND: fputs ("and", file); break;
8774 case XOR: fputs ("xor", file); break;
8775 default: output_operand_lossage ("invalid %%A operand");
8776 }
8777 return;
8778
8779 case 'B':
8780 switch (GET_CODE (x))
8781 {
8782 case IOR: fputs ("orn", file); break;
8783 case AND: fputs ("andn", file); break;
8784 case XOR: fputs ("xnor", file); break;
8785 default: output_operand_lossage ("invalid %%B operand");
8786 }
8787 return;
8788
8789 /* This is used by the conditional move instructions. */
8790 case 'C':
8791 {
8792 enum rtx_code rc = GET_CODE (x);
8793
8794 switch (rc)
8795 {
8796 case NE: fputs ("ne", file); break;
8797 case EQ: fputs ("e", file); break;
8798 case GE: fputs ("ge", file); break;
8799 case GT: fputs ("g", file); break;
8800 case LE: fputs ("le", file); break;
8801 case LT: fputs ("l", file); break;
8802 case GEU: fputs ("geu", file); break;
8803 case GTU: fputs ("gu", file); break;
8804 case LEU: fputs ("leu", file); break;
8805 case LTU: fputs ("lu", file); break;
8806 case LTGT: fputs ("lg", file); break;
8807 case UNORDERED: fputs ("u", file); break;
8808 case ORDERED: fputs ("o", file); break;
8809 case UNLT: fputs ("ul", file); break;
8810 case UNLE: fputs ("ule", file); break;
8811 case UNGT: fputs ("ug", file); break;
8812 case UNGE: fputs ("uge", file); break;
8813 case UNEQ: fputs ("ue", file); break;
8814 default: output_operand_lossage ("invalid %%C operand");
8815 }
8816 return;
8817 }
8818
8819 /* This are used by the movr instruction pattern. */
8820 case 'D':
8821 {
8822 enum rtx_code rc = GET_CODE (x);
8823 switch (rc)
8824 {
8825 case NE: fputs ("ne", file); break;
8826 case EQ: fputs ("e", file); break;
8827 case GE: fputs ("gez", file); break;
8828 case LT: fputs ("lz", file); break;
8829 case LE: fputs ("lez", file); break;
8830 case GT: fputs ("gz", file); break;
8831 default: output_operand_lossage ("invalid %%D operand");
8832 }
8833 return;
8834 }
8835
8836 case 'b':
8837 {
8838 /* Print a sign-extended character. */
8839 int i = trunc_int_for_mode (INTVAL (x), QImode);
8840 fprintf (file, "%d", i);
8841 return;
8842 }
8843
8844 case 'f':
8845 /* Operand must be a MEM; write its address. */
8846 if (GET_CODE (x) != MEM)
8847 output_operand_lossage ("invalid %%f operand");
8848 output_address (XEXP (x, 0));
8849 return;
8850
8851 case 's':
8852 {
8853 /* Print a sign-extended 32-bit value. */
8854 HOST_WIDE_INT i;
8855 if (GET_CODE(x) == CONST_INT)
8856 i = INTVAL (x);
8857 else if (GET_CODE(x) == CONST_DOUBLE)
8858 i = CONST_DOUBLE_LOW (x);
8859 else
8860 {
8861 output_operand_lossage ("invalid %%s operand");
8862 return;
8863 }
8864 i = trunc_int_for_mode (i, SImode);
8865 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8866 return;
8867 }
8868
8869 case 0:
8870 /* Do nothing special. */
8871 break;
8872
8873 default:
8874 /* Undocumented flag. */
8875 output_operand_lossage ("invalid operand output code");
8876 }
8877
8878 if (GET_CODE (x) == REG)
8879 fputs (reg_names[REGNO (x)], file);
8880 else if (GET_CODE (x) == MEM)
8881 {
8882 fputc ('[', file);
8883 /* Poor Sun assembler doesn't understand absolute addressing. */
8884 if (CONSTANT_P (XEXP (x, 0)))
8885 fputs ("%g0+", file);
8886 output_address (XEXP (x, 0));
8887 fputc (']', file);
8888 }
8889 else if (GET_CODE (x) == HIGH)
8890 {
8891 fputs ("%hi(", file);
8892 output_addr_const (file, XEXP (x, 0));
8893 fputc (')', file);
8894 }
8895 else if (GET_CODE (x) == LO_SUM)
8896 {
8897 sparc_print_operand (file, XEXP (x, 0), 0);
8898 if (TARGET_CM_MEDMID)
8899 fputs ("+%l44(", file);
8900 else
8901 fputs ("+%lo(", file);
8902 output_addr_const (file, XEXP (x, 1));
8903 fputc (')', file);
8904 }
8905 else if (GET_CODE (x) == CONST_DOUBLE
8906 && (GET_MODE (x) == VOIDmode
8907 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8908 {
8909 if (CONST_DOUBLE_HIGH (x) == 0)
8910 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8911 else if (CONST_DOUBLE_HIGH (x) == -1
8912 && CONST_DOUBLE_LOW (x) < 0)
8913 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8914 else
8915 output_operand_lossage ("long long constant not a valid immediate operand");
8916 }
8917 else if (GET_CODE (x) == CONST_DOUBLE)
8918 output_operand_lossage ("floating point constant not a valid immediate operand");
8919 else { output_addr_const (file, x); }
8920 }
8921
8922 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8923
8924 static void
8925 sparc_print_operand_address (FILE *file, rtx x)
8926 {
8927 register rtx base, index = 0;
8928 int offset = 0;
8929 register rtx addr = x;
8930
8931 if (REG_P (addr))
8932 fputs (reg_names[REGNO (addr)], file);
8933 else if (GET_CODE (addr) == PLUS)
8934 {
8935 if (CONST_INT_P (XEXP (addr, 0)))
8936 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8937 else if (CONST_INT_P (XEXP (addr, 1)))
8938 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8939 else
8940 base = XEXP (addr, 0), index = XEXP (addr, 1);
8941 if (GET_CODE (base) == LO_SUM)
8942 {
8943 gcc_assert (USE_AS_OFFSETABLE_LO10
8944 && TARGET_ARCH64
8945 && ! TARGET_CM_MEDMID);
8946 output_operand (XEXP (base, 0), 0);
8947 fputs ("+%lo(", file);
8948 output_address (XEXP (base, 1));
8949 fprintf (file, ")+%d", offset);
8950 }
8951 else
8952 {
8953 fputs (reg_names[REGNO (base)], file);
8954 if (index == 0)
8955 fprintf (file, "%+d", offset);
8956 else if (REG_P (index))
8957 fprintf (file, "+%s", reg_names[REGNO (index)]);
8958 else if (GET_CODE (index) == SYMBOL_REF
8959 || GET_CODE (index) == LABEL_REF
8960 || GET_CODE (index) == CONST)
8961 fputc ('+', file), output_addr_const (file, index);
8962 else gcc_unreachable ();
8963 }
8964 }
8965 else if (GET_CODE (addr) == MINUS
8966 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8967 {
8968 output_addr_const (file, XEXP (addr, 0));
8969 fputs ("-(", file);
8970 output_addr_const (file, XEXP (addr, 1));
8971 fputs ("-.)", file);
8972 }
8973 else if (GET_CODE (addr) == LO_SUM)
8974 {
8975 output_operand (XEXP (addr, 0), 0);
8976 if (TARGET_CM_MEDMID)
8977 fputs ("+%l44(", file);
8978 else
8979 fputs ("+%lo(", file);
8980 output_address (XEXP (addr, 1));
8981 fputc (')', file);
8982 }
8983 else if (flag_pic
8984 && GET_CODE (addr) == CONST
8985 && GET_CODE (XEXP (addr, 0)) == MINUS
8986 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8987 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8988 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8989 {
8990 addr = XEXP (addr, 0);
8991 output_addr_const (file, XEXP (addr, 0));
8992 /* Group the args of the second CONST in parenthesis. */
8993 fputs ("-(", file);
8994 /* Skip past the second CONST--it does nothing for us. */
8995 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8996 /* Close the parenthesis. */
8997 fputc (')', file);
8998 }
8999 else
9000 {
9001 output_addr_const (file, addr);
9002 }
9003 }
9004 \f
9005 /* Target hook for assembling integer objects. The sparc version has
9006 special handling for aligned DI-mode objects. */
9007
9008 static bool
9009 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9010 {
9011 /* ??? We only output .xword's for symbols and only then in environments
9012 where the assembler can handle them. */
9013 if (aligned_p && size == 8
9014 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9015 {
9016 if (TARGET_V9)
9017 {
9018 assemble_integer_with_op ("\t.xword\t", x);
9019 return true;
9020 }
9021 else
9022 {
9023 assemble_aligned_integer (4, const0_rtx);
9024 assemble_aligned_integer (4, x);
9025 return true;
9026 }
9027 }
9028 return default_assemble_integer (x, size, aligned_p);
9029 }
9030 \f
9031 /* Return the value of a code used in the .proc pseudo-op that says
9032 what kind of result this function returns. For non-C types, we pick
9033 the closest C type. */
9034
9035 #ifndef SHORT_TYPE_SIZE
9036 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9037 #endif
9038
9039 #ifndef INT_TYPE_SIZE
9040 #define INT_TYPE_SIZE BITS_PER_WORD
9041 #endif
9042
9043 #ifndef LONG_TYPE_SIZE
9044 #define LONG_TYPE_SIZE BITS_PER_WORD
9045 #endif
9046
9047 #ifndef LONG_LONG_TYPE_SIZE
9048 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9049 #endif
9050
9051 #ifndef FLOAT_TYPE_SIZE
9052 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9053 #endif
9054
9055 #ifndef DOUBLE_TYPE_SIZE
9056 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9057 #endif
9058
9059 #ifndef LONG_DOUBLE_TYPE_SIZE
9060 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9061 #endif
9062
9063 unsigned long
9064 sparc_type_code (register tree type)
9065 {
9066 register unsigned long qualifiers = 0;
9067 register unsigned shift;
9068
9069 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9070 setting more, since some assemblers will give an error for this. Also,
9071 we must be careful to avoid shifts of 32 bits or more to avoid getting
9072 unpredictable results. */
9073
9074 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9075 {
9076 switch (TREE_CODE (type))
9077 {
9078 case ERROR_MARK:
9079 return qualifiers;
9080
9081 case ARRAY_TYPE:
9082 qualifiers |= (3 << shift);
9083 break;
9084
9085 case FUNCTION_TYPE:
9086 case METHOD_TYPE:
9087 qualifiers |= (2 << shift);
9088 break;
9089
9090 case POINTER_TYPE:
9091 case REFERENCE_TYPE:
9092 case OFFSET_TYPE:
9093 qualifiers |= (1 << shift);
9094 break;
9095
9096 case RECORD_TYPE:
9097 return (qualifiers | 8);
9098
9099 case UNION_TYPE:
9100 case QUAL_UNION_TYPE:
9101 return (qualifiers | 9);
9102
9103 case ENUMERAL_TYPE:
9104 return (qualifiers | 10);
9105
9106 case VOID_TYPE:
9107 return (qualifiers | 16);
9108
9109 case INTEGER_TYPE:
9110 /* If this is a range type, consider it to be the underlying
9111 type. */
9112 if (TREE_TYPE (type) != 0)
9113 break;
9114
9115 /* Carefully distinguish all the standard types of C,
9116 without messing up if the language is not C. We do this by
9117 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9118 look at both the names and the above fields, but that's redundant.
9119 Any type whose size is between two C types will be considered
9120 to be the wider of the two types. Also, we do not have a
9121 special code to use for "long long", so anything wider than
9122 long is treated the same. Note that we can't distinguish
9123 between "int" and "long" in this code if they are the same
9124 size, but that's fine, since neither can the assembler. */
9125
9126 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9127 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9128
9129 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9130 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9131
9132 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9133 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9134
9135 else
9136 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9137
9138 case REAL_TYPE:
9139 /* If this is a range type, consider it to be the underlying
9140 type. */
9141 if (TREE_TYPE (type) != 0)
9142 break;
9143
9144 /* Carefully distinguish all the standard types of C,
9145 without messing up if the language is not C. */
9146
9147 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9148 return (qualifiers | 6);
9149
9150 else
9151 return (qualifiers | 7);
9152
9153 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9154 /* ??? We need to distinguish between double and float complex types,
9155 but I don't know how yet because I can't reach this code from
9156 existing front-ends. */
9157 return (qualifiers | 7); /* Who knows? */
9158
9159 case VECTOR_TYPE:
9160 case BOOLEAN_TYPE: /* Boolean truth value type. */
9161 case LANG_TYPE:
9162 case NULLPTR_TYPE:
9163 return qualifiers;
9164
9165 default:
9166 gcc_unreachable (); /* Not a type! */
9167 }
9168 }
9169
9170 return qualifiers;
9171 }
9172 \f
9173 /* Nested function support. */
9174
9175 /* Emit RTL insns to initialize the variable parts of a trampoline.
9176 FNADDR is an RTX for the address of the function's pure code.
9177 CXT is an RTX for the static chain value for the function.
9178
9179 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9180 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9181 (to store insns). This is a bit excessive. Perhaps a different
9182 mechanism would be better here.
9183
9184 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9185
9186 static void
9187 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9188 {
9189 /* SPARC 32-bit trampoline:
9190
9191 sethi %hi(fn), %g1
9192 sethi %hi(static), %g2
9193 jmp %g1+%lo(fn)
9194 or %g2, %lo(static), %g2
9195
9196 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9197 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9198 */
9199
9200 emit_move_insn
9201 (adjust_address (m_tramp, SImode, 0),
9202 expand_binop (SImode, ior_optab,
9203 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9204 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9205 NULL_RTX, 1, OPTAB_DIRECT));
9206
9207 emit_move_insn
9208 (adjust_address (m_tramp, SImode, 4),
9209 expand_binop (SImode, ior_optab,
9210 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9211 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9212 NULL_RTX, 1, OPTAB_DIRECT));
9213
9214 emit_move_insn
9215 (adjust_address (m_tramp, SImode, 8),
9216 expand_binop (SImode, ior_optab,
9217 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9218 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9219 NULL_RTX, 1, OPTAB_DIRECT));
9220
9221 emit_move_insn
9222 (adjust_address (m_tramp, SImode, 12),
9223 expand_binop (SImode, ior_optab,
9224 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9225 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9226 NULL_RTX, 1, OPTAB_DIRECT));
9227
9228 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9229 aligned on a 16 byte boundary so one flush clears it all. */
9230 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9231 if (sparc_cpu != PROCESSOR_ULTRASPARC
9232 && sparc_cpu != PROCESSOR_ULTRASPARC3
9233 && sparc_cpu != PROCESSOR_NIAGARA
9234 && sparc_cpu != PROCESSOR_NIAGARA2
9235 && sparc_cpu != PROCESSOR_NIAGARA3
9236 && sparc_cpu != PROCESSOR_NIAGARA4)
9237 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9238
9239 /* Call __enable_execute_stack after writing onto the stack to make sure
9240 the stack address is accessible. */
9241 #ifdef HAVE_ENABLE_EXECUTE_STACK
9242 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9243 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9244 #endif
9245
9246 }
9247
9248 /* The 64-bit version is simpler because it makes more sense to load the
9249 values as "immediate" data out of the trampoline. It's also easier since
9250 we can read the PC without clobbering a register. */
9251
9252 static void
9253 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9254 {
9255 /* SPARC 64-bit trampoline:
9256
9257 rd %pc, %g1
9258 ldx [%g1+24], %g5
9259 jmp %g5
9260 ldx [%g1+16], %g5
9261 +16 bytes data
9262 */
9263
9264 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9265 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9266 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9267 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9268 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9269 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9270 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9271 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9272 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9273 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9274 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9275
9276 if (sparc_cpu != PROCESSOR_ULTRASPARC
9277 && sparc_cpu != PROCESSOR_ULTRASPARC3
9278 && sparc_cpu != PROCESSOR_NIAGARA
9279 && sparc_cpu != PROCESSOR_NIAGARA2
9280 && sparc_cpu != PROCESSOR_NIAGARA3
9281 && sparc_cpu != PROCESSOR_NIAGARA4)
9282 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9283
9284 /* Call __enable_execute_stack after writing onto the stack to make sure
9285 the stack address is accessible. */
9286 #ifdef HAVE_ENABLE_EXECUTE_STACK
9287 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9288 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9289 #endif
9290 }
9291
9292 /* Worker for TARGET_TRAMPOLINE_INIT. */
9293
9294 static void
9295 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9296 {
9297 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9298 cxt = force_reg (Pmode, cxt);
9299 if (TARGET_ARCH64)
9300 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9301 else
9302 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9303 }
9304 \f
9305 /* Adjust the cost of a scheduling dependency. Return the new cost of
9306 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9307
9308 static int
9309 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9310 {
9311 enum attr_type insn_type;
9312
9313 if (! recog_memoized (insn))
9314 return 0;
9315
9316 insn_type = get_attr_type (insn);
9317
9318 if (REG_NOTE_KIND (link) == 0)
9319 {
9320 /* Data dependency; DEP_INSN writes a register that INSN reads some
9321 cycles later. */
9322
9323 /* if a load, then the dependence must be on the memory address;
9324 add an extra "cycle". Note that the cost could be two cycles
9325 if the reg was written late in an instruction group; we ca not tell
9326 here. */
9327 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9328 return cost + 3;
9329
9330 /* Get the delay only if the address of the store is the dependence. */
9331 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9332 {
9333 rtx pat = PATTERN(insn);
9334 rtx dep_pat = PATTERN (dep_insn);
9335
9336 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9337 return cost; /* This should not happen! */
9338
9339 /* The dependency between the two instructions was on the data that
9340 is being stored. Assume that this implies that the address of the
9341 store is not dependent. */
9342 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9343 return cost;
9344
9345 return cost + 3; /* An approximation. */
9346 }
9347
9348 /* A shift instruction cannot receive its data from an instruction
9349 in the same cycle; add a one cycle penalty. */
9350 if (insn_type == TYPE_SHIFT)
9351 return cost + 3; /* Split before cascade into shift. */
9352 }
9353 else
9354 {
9355 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9356 INSN writes some cycles later. */
9357
9358 /* These are only significant for the fpu unit; writing a fp reg before
9359 the fpu has finished with it stalls the processor. */
9360
9361 /* Reusing an integer register causes no problems. */
9362 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9363 return 0;
9364 }
9365
9366 return cost;
9367 }
9368
9369 static int
9370 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9371 {
9372 enum attr_type insn_type, dep_type;
9373 rtx pat = PATTERN(insn);
9374 rtx dep_pat = PATTERN (dep_insn);
9375
9376 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9377 return cost;
9378
9379 insn_type = get_attr_type (insn);
9380 dep_type = get_attr_type (dep_insn);
9381
9382 switch (REG_NOTE_KIND (link))
9383 {
9384 case 0:
9385 /* Data dependency; DEP_INSN writes a register that INSN reads some
9386 cycles later. */
9387
9388 switch (insn_type)
9389 {
9390 case TYPE_STORE:
9391 case TYPE_FPSTORE:
9392 /* Get the delay iff the address of the store is the dependence. */
9393 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9394 return cost;
9395
9396 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9397 return cost;
9398 return cost + 3;
9399
9400 case TYPE_LOAD:
9401 case TYPE_SLOAD:
9402 case TYPE_FPLOAD:
9403 /* If a load, then the dependence must be on the memory address. If
9404 the addresses aren't equal, then it might be a false dependency */
9405 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9406 {
9407 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9408 || GET_CODE (SET_DEST (dep_pat)) != MEM
9409 || GET_CODE (SET_SRC (pat)) != MEM
9410 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9411 XEXP (SET_SRC (pat), 0)))
9412 return cost + 2;
9413
9414 return cost + 8;
9415 }
9416 break;
9417
9418 case TYPE_BRANCH:
9419 /* Compare to branch latency is 0. There is no benefit from
9420 separating compare and branch. */
9421 if (dep_type == TYPE_COMPARE)
9422 return 0;
9423 /* Floating point compare to branch latency is less than
9424 compare to conditional move. */
9425 if (dep_type == TYPE_FPCMP)
9426 return cost - 1;
9427 break;
9428 default:
9429 break;
9430 }
9431 break;
9432
9433 case REG_DEP_ANTI:
9434 /* Anti-dependencies only penalize the fpu unit. */
9435 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9436 return 0;
9437 break;
9438
9439 default:
9440 break;
9441 }
9442
9443 return cost;
9444 }
9445
9446 static int
9447 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9448 {
9449 switch (sparc_cpu)
9450 {
9451 case PROCESSOR_SUPERSPARC:
9452 cost = supersparc_adjust_cost (insn, link, dep, cost);
9453 break;
9454 case PROCESSOR_HYPERSPARC:
9455 case PROCESSOR_SPARCLITE86X:
9456 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9457 break;
9458 default:
9459 break;
9460 }
9461 return cost;
9462 }
9463
9464 static void
9465 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9466 int sched_verbose ATTRIBUTE_UNUSED,
9467 int max_ready ATTRIBUTE_UNUSED)
9468 {}
9469
9470 static int
9471 sparc_use_sched_lookahead (void)
9472 {
9473 if (sparc_cpu == PROCESSOR_NIAGARA
9474 || sparc_cpu == PROCESSOR_NIAGARA2
9475 || sparc_cpu == PROCESSOR_NIAGARA3)
9476 return 0;
9477 if (sparc_cpu == PROCESSOR_NIAGARA4)
9478 return 2;
9479 if (sparc_cpu == PROCESSOR_ULTRASPARC
9480 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9481 return 4;
9482 if ((1 << sparc_cpu) &
9483 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9484 (1 << PROCESSOR_SPARCLITE86X)))
9485 return 3;
9486 return 0;
9487 }
9488
9489 static int
9490 sparc_issue_rate (void)
9491 {
9492 switch (sparc_cpu)
9493 {
9494 case PROCESSOR_NIAGARA:
9495 case PROCESSOR_NIAGARA2:
9496 case PROCESSOR_NIAGARA3:
9497 default:
9498 return 1;
9499 case PROCESSOR_NIAGARA4:
9500 case PROCESSOR_V9:
9501 /* Assume V9 processors are capable of at least dual-issue. */
9502 return 2;
9503 case PROCESSOR_SUPERSPARC:
9504 return 3;
9505 case PROCESSOR_HYPERSPARC:
9506 case PROCESSOR_SPARCLITE86X:
9507 return 2;
9508 case PROCESSOR_ULTRASPARC:
9509 case PROCESSOR_ULTRASPARC3:
9510 return 4;
9511 }
9512 }
9513
9514 static int
9515 set_extends (rtx insn)
9516 {
9517 register rtx pat = PATTERN (insn);
9518
9519 switch (GET_CODE (SET_SRC (pat)))
9520 {
9521 /* Load and some shift instructions zero extend. */
9522 case MEM:
9523 case ZERO_EXTEND:
9524 /* sethi clears the high bits */
9525 case HIGH:
9526 /* LO_SUM is used with sethi. sethi cleared the high
9527 bits and the values used with lo_sum are positive */
9528 case LO_SUM:
9529 /* Store flag stores 0 or 1 */
9530 case LT: case LTU:
9531 case GT: case GTU:
9532 case LE: case LEU:
9533 case GE: case GEU:
9534 case EQ:
9535 case NE:
9536 return 1;
9537 case AND:
9538 {
9539 rtx op0 = XEXP (SET_SRC (pat), 0);
9540 rtx op1 = XEXP (SET_SRC (pat), 1);
9541 if (GET_CODE (op1) == CONST_INT)
9542 return INTVAL (op1) >= 0;
9543 if (GET_CODE (op0) != REG)
9544 return 0;
9545 if (sparc_check_64 (op0, insn) == 1)
9546 return 1;
9547 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9548 }
9549 case IOR:
9550 case XOR:
9551 {
9552 rtx op0 = XEXP (SET_SRC (pat), 0);
9553 rtx op1 = XEXP (SET_SRC (pat), 1);
9554 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9555 return 0;
9556 if (GET_CODE (op1) == CONST_INT)
9557 return INTVAL (op1) >= 0;
9558 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9559 }
9560 case LSHIFTRT:
9561 return GET_MODE (SET_SRC (pat)) == SImode;
9562 /* Positive integers leave the high bits zero. */
9563 case CONST_DOUBLE:
9564 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9565 case CONST_INT:
9566 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9567 case ASHIFTRT:
9568 case SIGN_EXTEND:
9569 return - (GET_MODE (SET_SRC (pat)) == SImode);
9570 case REG:
9571 return sparc_check_64 (SET_SRC (pat), insn);
9572 default:
9573 return 0;
9574 }
9575 }
9576
9577 /* We _ought_ to have only one kind per function, but... */
9578 static GTY(()) rtx sparc_addr_diff_list;
9579 static GTY(()) rtx sparc_addr_list;
9580
9581 void
9582 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9583 {
9584 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9585 if (diff)
9586 sparc_addr_diff_list
9587 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9588 else
9589 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9590 }
9591
9592 static void
9593 sparc_output_addr_vec (rtx vec)
9594 {
9595 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9596 int idx, vlen = XVECLEN (body, 0);
9597
9598 #ifdef ASM_OUTPUT_ADDR_VEC_START
9599 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9600 #endif
9601
9602 #ifdef ASM_OUTPUT_CASE_LABEL
9603 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9604 NEXT_INSN (lab));
9605 #else
9606 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9607 #endif
9608
9609 for (idx = 0; idx < vlen; idx++)
9610 {
9611 ASM_OUTPUT_ADDR_VEC_ELT
9612 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9613 }
9614
9615 #ifdef ASM_OUTPUT_ADDR_VEC_END
9616 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9617 #endif
9618 }
9619
9620 static void
9621 sparc_output_addr_diff_vec (rtx vec)
9622 {
9623 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9624 rtx base = XEXP (XEXP (body, 0), 0);
9625 int idx, vlen = XVECLEN (body, 1);
9626
9627 #ifdef ASM_OUTPUT_ADDR_VEC_START
9628 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9629 #endif
9630
9631 #ifdef ASM_OUTPUT_CASE_LABEL
9632 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9633 NEXT_INSN (lab));
9634 #else
9635 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9636 #endif
9637
9638 for (idx = 0; idx < vlen; idx++)
9639 {
9640 ASM_OUTPUT_ADDR_DIFF_ELT
9641 (asm_out_file,
9642 body,
9643 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9644 CODE_LABEL_NUMBER (base));
9645 }
9646
9647 #ifdef ASM_OUTPUT_ADDR_VEC_END
9648 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9649 #endif
9650 }
9651
9652 static void
9653 sparc_output_deferred_case_vectors (void)
9654 {
9655 rtx t;
9656 int align;
9657
9658 if (sparc_addr_list == NULL_RTX
9659 && sparc_addr_diff_list == NULL_RTX)
9660 return;
9661
9662 /* Align to cache line in the function's code section. */
9663 switch_to_section (current_function_section ());
9664
9665 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9666 if (align > 0)
9667 ASM_OUTPUT_ALIGN (asm_out_file, align);
9668
9669 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9670 sparc_output_addr_vec (XEXP (t, 0));
9671 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9672 sparc_output_addr_diff_vec (XEXP (t, 0));
9673
9674 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9675 }
9676
9677 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9678 unknown. Return 1 if the high bits are zero, -1 if the register is
9679 sign extended. */
9680 int
9681 sparc_check_64 (rtx x, rtx insn)
9682 {
9683 /* If a register is set only once it is safe to ignore insns this
9684 code does not know how to handle. The loop will either recognize
9685 the single set and return the correct value or fail to recognize
9686 it and return 0. */
9687 int set_once = 0;
9688 rtx y = x;
9689
9690 gcc_assert (GET_CODE (x) == REG);
9691
9692 if (GET_MODE (x) == DImode)
9693 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9694
9695 if (flag_expensive_optimizations
9696 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9697 set_once = 1;
9698
9699 if (insn == 0)
9700 {
9701 if (set_once)
9702 insn = get_last_insn_anywhere ();
9703 else
9704 return 0;
9705 }
9706
9707 while ((insn = PREV_INSN (insn)))
9708 {
9709 switch (GET_CODE (insn))
9710 {
9711 case JUMP_INSN:
9712 case NOTE:
9713 break;
9714 case CODE_LABEL:
9715 case CALL_INSN:
9716 default:
9717 if (! set_once)
9718 return 0;
9719 break;
9720 case INSN:
9721 {
9722 rtx pat = PATTERN (insn);
9723 if (GET_CODE (pat) != SET)
9724 return 0;
9725 if (rtx_equal_p (x, SET_DEST (pat)))
9726 return set_extends (insn);
9727 if (y && rtx_equal_p (y, SET_DEST (pat)))
9728 return set_extends (insn);
9729 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9730 return 0;
9731 }
9732 }
9733 }
9734 return 0;
9735 }
9736
9737 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9738 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9739
9740 const char *
9741 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9742 {
9743 static char asm_code[60];
9744
9745 /* The scratch register is only required when the destination
9746 register is not a 64-bit global or out register. */
9747 if (which_alternative != 2)
9748 operands[3] = operands[0];
9749
9750 /* We can only shift by constants <= 63. */
9751 if (GET_CODE (operands[2]) == CONST_INT)
9752 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9753
9754 if (GET_CODE (operands[1]) == CONST_INT)
9755 {
9756 output_asm_insn ("mov\t%1, %3", operands);
9757 }
9758 else
9759 {
9760 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9761 if (sparc_check_64 (operands[1], insn) <= 0)
9762 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9763 output_asm_insn ("or\t%L1, %3, %3", operands);
9764 }
9765
9766 strcpy (asm_code, opcode);
9767
9768 if (which_alternative != 2)
9769 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9770 else
9771 return
9772 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9773 }
9774 \f
9775 /* Output rtl to increment the profiler label LABELNO
9776 for profiling a function entry. */
9777
9778 void
9779 sparc_profile_hook (int labelno)
9780 {
9781 char buf[32];
9782 rtx lab, fun;
9783
9784 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9785 if (NO_PROFILE_COUNTERS)
9786 {
9787 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9788 }
9789 else
9790 {
9791 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9792 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9793 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9794 }
9795 }
9796 \f
9797 #ifdef TARGET_SOLARIS
9798 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9799
9800 static void
9801 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9802 tree decl ATTRIBUTE_UNUSED)
9803 {
9804 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9805 {
9806 solaris_elf_asm_comdat_section (name, flags, decl);
9807 return;
9808 }
9809
9810 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9811
9812 if (!(flags & SECTION_DEBUG))
9813 fputs (",#alloc", asm_out_file);
9814 if (flags & SECTION_WRITE)
9815 fputs (",#write", asm_out_file);
9816 if (flags & SECTION_TLS)
9817 fputs (",#tls", asm_out_file);
9818 if (flags & SECTION_CODE)
9819 fputs (",#execinstr", asm_out_file);
9820
9821 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9822 if (HAVE_AS_SPARC_NOBITS)
9823 {
9824 if (flags & SECTION_BSS)
9825 fputs (",#nobits", asm_out_file);
9826 else
9827 fputs (",#progbits", asm_out_file);
9828 }
9829
9830 fputc ('\n', asm_out_file);
9831 }
9832 #endif /* TARGET_SOLARIS */
9833
9834 /* We do not allow indirect calls to be optimized into sibling calls.
9835
9836 We cannot use sibling calls when delayed branches are disabled
9837 because they will likely require the call delay slot to be filled.
9838
9839 Also, on SPARC 32-bit we cannot emit a sibling call when the
9840 current function returns a structure. This is because the "unimp
9841 after call" convention would cause the callee to return to the
9842 wrong place. The generic code already disallows cases where the
9843 function being called returns a structure.
9844
9845 It may seem strange how this last case could occur. Usually there
9846 is code after the call which jumps to epilogue code which dumps the
9847 return value into the struct return area. That ought to invalidate
9848 the sibling call right? Well, in the C++ case we can end up passing
9849 the pointer to the struct return area to a constructor (which returns
9850 void) and then nothing else happens. Such a sibling call would look
9851 valid without the added check here.
9852
9853 VxWorks PIC PLT entries require the global pointer to be initialized
9854 on entry. We therefore can't emit sibling calls to them. */
9855 static bool
9856 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9857 {
9858 return (decl
9859 && flag_delayed_branch
9860 && (TARGET_ARCH64 || ! cfun->returns_struct)
9861 && !(TARGET_VXWORKS_RTP
9862 && flag_pic
9863 && !targetm.binds_local_p (decl)));
9864 }
9865 \f
9866 /* libfunc renaming. */
9867
9868 static void
9869 sparc_init_libfuncs (void)
9870 {
9871 if (TARGET_ARCH32)
9872 {
9873 /* Use the subroutines that Sun's library provides for integer
9874 multiply and divide. The `*' prevents an underscore from
9875 being prepended by the compiler. .umul is a little faster
9876 than .mul. */
9877 set_optab_libfunc (smul_optab, SImode, "*.umul");
9878 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9879 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9880 set_optab_libfunc (smod_optab, SImode, "*.rem");
9881 set_optab_libfunc (umod_optab, SImode, "*.urem");
9882
9883 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9884 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9885 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9886 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9887 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9888 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9889
9890 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9891 is because with soft-float, the SFmode and DFmode sqrt
9892 instructions will be absent, and the compiler will notice and
9893 try to use the TFmode sqrt instruction for calls to the
9894 builtin function sqrt, but this fails. */
9895 if (TARGET_FPU)
9896 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9897
9898 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9899 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9900 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9901 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9902 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9903 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9904
9905 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9906 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9907 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9908 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9909
9910 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9911 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9912 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9913 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9914
9915 if (DITF_CONVERSION_LIBFUNCS)
9916 {
9917 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9918 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9919 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9920 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9921 }
9922
9923 if (SUN_CONVERSION_LIBFUNCS)
9924 {
9925 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9926 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9927 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9928 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9929 }
9930 }
9931 if (TARGET_ARCH64)
9932 {
9933 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9934 do not exist in the library. Make sure the compiler does not
9935 emit calls to them by accident. (It should always use the
9936 hardware instructions.) */
9937 set_optab_libfunc (smul_optab, SImode, 0);
9938 set_optab_libfunc (sdiv_optab, SImode, 0);
9939 set_optab_libfunc (udiv_optab, SImode, 0);
9940 set_optab_libfunc (smod_optab, SImode, 0);
9941 set_optab_libfunc (umod_optab, SImode, 0);
9942
9943 if (SUN_INTEGER_MULTIPLY_64)
9944 {
9945 set_optab_libfunc (smul_optab, DImode, "__mul64");
9946 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9947 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9948 set_optab_libfunc (smod_optab, DImode, "__rem64");
9949 set_optab_libfunc (umod_optab, DImode, "__urem64");
9950 }
9951
9952 if (SUN_CONVERSION_LIBFUNCS)
9953 {
9954 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9955 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9956 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9957 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9958 }
9959 }
9960 }
9961 \f
9962 static tree def_builtin(const char *name, int code, tree type)
9963 {
9964 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9965 NULL_TREE);
9966 }
9967
9968 static tree def_builtin_const(const char *name, int code, tree type)
9969 {
9970 tree t = def_builtin(name, code, type);
9971
9972 if (t)
9973 TREE_READONLY (t) = 1;
9974
9975 return t;
9976 }
9977
9978 /* Implement the TARGET_INIT_BUILTINS target hook.
9979 Create builtin functions for special SPARC instructions. */
9980
9981 static void
9982 sparc_init_builtins (void)
9983 {
9984 if (TARGET_VIS)
9985 sparc_vis_init_builtins ();
9986 }
9987
9988 /* Create builtin functions for VIS 1.0 instructions. */
9989
9990 static void
9991 sparc_vis_init_builtins (void)
9992 {
9993 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9994 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9995 tree v4hi = build_vector_type (intHI_type_node, 4);
9996 tree v2hi = build_vector_type (intHI_type_node, 2);
9997 tree v2si = build_vector_type (intSI_type_node, 2);
9998 tree v1si = build_vector_type (intSI_type_node, 1);
9999
10000 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10001 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10002 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10003 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10004 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10005 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10006 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10007 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10008 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10009 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10010 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10011 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10012 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10013 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10014 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10015 v8qi, v8qi,
10016 intDI_type_node, 0);
10017 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10018 v8qi, v8qi, 0);
10019 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10020 v8qi, v8qi, 0);
10021 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10022 intDI_type_node,
10023 intDI_type_node, 0);
10024 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10025 intSI_type_node,
10026 intSI_type_node, 0);
10027 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10028 ptr_type_node,
10029 intSI_type_node, 0);
10030 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10031 ptr_type_node,
10032 intDI_type_node, 0);
10033 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10034 ptr_type_node,
10035 ptr_type_node, 0);
10036 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10037 ptr_type_node,
10038 ptr_type_node, 0);
10039 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10040 v4hi, v4hi, 0);
10041 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10042 v2si, v2si, 0);
10043 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10044 v4hi, v4hi, 0);
10045 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10046 v2si, v2si, 0);
10047 tree void_ftype_di = build_function_type_list (void_type_node,
10048 intDI_type_node, 0);
10049 tree di_ftype_void = build_function_type_list (intDI_type_node,
10050 void_type_node, 0);
10051 tree void_ftype_si = build_function_type_list (void_type_node,
10052 intSI_type_node, 0);
10053 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10054 float_type_node,
10055 float_type_node, 0);
10056 tree df_ftype_df_df = build_function_type_list (double_type_node,
10057 double_type_node,
10058 double_type_node, 0);
10059
10060 /* Packing and expanding vectors. */
10061 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10062 v4qi_ftype_v4hi);
10063 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10064 v8qi_ftype_v2si_v8qi);
10065 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10066 v2hi_ftype_v2si);
10067 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10068 v4hi_ftype_v4qi);
10069 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10070 v8qi_ftype_v4qi_v4qi);
10071
10072 /* Multiplications. */
10073 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10074 v4hi_ftype_v4qi_v4hi);
10075 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10076 v4hi_ftype_v4qi_v2hi);
10077 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10078 v4hi_ftype_v4qi_v2hi);
10079 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10080 v4hi_ftype_v8qi_v4hi);
10081 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10082 v4hi_ftype_v8qi_v4hi);
10083 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10084 v2si_ftype_v4qi_v2hi);
10085 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10086 v2si_ftype_v4qi_v2hi);
10087
10088 /* Data aligning. */
10089 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10090 v4hi_ftype_v4hi_v4hi);
10091 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10092 v8qi_ftype_v8qi_v8qi);
10093 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10094 v2si_ftype_v2si_v2si);
10095 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10096 di_ftype_di_di);
10097
10098 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10099 void_ftype_di);
10100 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10101 di_ftype_void);
10102
10103 if (TARGET_ARCH64)
10104 {
10105 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10106 ptr_ftype_ptr_di);
10107 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10108 ptr_ftype_ptr_di);
10109 }
10110 else
10111 {
10112 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10113 ptr_ftype_ptr_si);
10114 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10115 ptr_ftype_ptr_si);
10116 }
10117
10118 /* Pixel distance. */
10119 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10120 di_ftype_v8qi_v8qi_di);
10121
10122 /* Edge handling. */
10123 if (TARGET_ARCH64)
10124 {
10125 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10126 di_ftype_ptr_ptr);
10127 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10128 di_ftype_ptr_ptr);
10129 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10130 di_ftype_ptr_ptr);
10131 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10132 di_ftype_ptr_ptr);
10133 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10134 di_ftype_ptr_ptr);
10135 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10136 di_ftype_ptr_ptr);
10137 if (TARGET_VIS2)
10138 {
10139 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10140 di_ftype_ptr_ptr);
10141 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10142 di_ftype_ptr_ptr);
10143 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10144 di_ftype_ptr_ptr);
10145 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10146 di_ftype_ptr_ptr);
10147 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10148 di_ftype_ptr_ptr);
10149 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10150 di_ftype_ptr_ptr);
10151 }
10152 }
10153 else
10154 {
10155 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10156 si_ftype_ptr_ptr);
10157 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10158 si_ftype_ptr_ptr);
10159 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10160 si_ftype_ptr_ptr);
10161 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10162 si_ftype_ptr_ptr);
10163 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10164 si_ftype_ptr_ptr);
10165 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10166 si_ftype_ptr_ptr);
10167 if (TARGET_VIS2)
10168 {
10169 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10170 si_ftype_ptr_ptr);
10171 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10172 si_ftype_ptr_ptr);
10173 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10174 si_ftype_ptr_ptr);
10175 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10176 si_ftype_ptr_ptr);
10177 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10178 si_ftype_ptr_ptr);
10179 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10180 si_ftype_ptr_ptr);
10181 }
10182 }
10183
10184 /* Pixel compare. */
10185 if (TARGET_ARCH64)
10186 {
10187 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10188 di_ftype_v4hi_v4hi);
10189 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10190 di_ftype_v2si_v2si);
10191 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10192 di_ftype_v4hi_v4hi);
10193 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10194 di_ftype_v2si_v2si);
10195 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10196 di_ftype_v4hi_v4hi);
10197 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10198 di_ftype_v2si_v2si);
10199 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10200 di_ftype_v4hi_v4hi);
10201 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10202 di_ftype_v2si_v2si);
10203 }
10204 else
10205 {
10206 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10207 si_ftype_v4hi_v4hi);
10208 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10209 si_ftype_v2si_v2si);
10210 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10211 si_ftype_v4hi_v4hi);
10212 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10213 si_ftype_v2si_v2si);
10214 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10215 si_ftype_v4hi_v4hi);
10216 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10217 si_ftype_v2si_v2si);
10218 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10219 si_ftype_v4hi_v4hi);
10220 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10221 si_ftype_v2si_v2si);
10222 }
10223
10224 /* Addition and subtraction. */
10225 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10226 v4hi_ftype_v4hi_v4hi);
10227 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10228 v2hi_ftype_v2hi_v2hi);
10229 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10230 v2si_ftype_v2si_v2si);
10231 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10232 v1si_ftype_v1si_v1si);
10233 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10234 v4hi_ftype_v4hi_v4hi);
10235 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10236 v2hi_ftype_v2hi_v2hi);
10237 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10238 v2si_ftype_v2si_v2si);
10239 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10240 v1si_ftype_v1si_v1si);
10241
10242 /* Three-dimensional array addressing. */
10243 if (TARGET_ARCH64)
10244 {
10245 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10246 di_ftype_di_di);
10247 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10248 di_ftype_di_di);
10249 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10250 di_ftype_di_di);
10251 }
10252 else
10253 {
10254 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10255 si_ftype_si_si);
10256 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10257 si_ftype_si_si);
10258 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10259 si_ftype_si_si);
10260 }
10261
10262 if (TARGET_VIS2)
10263 {
10264 /* Byte mask and shuffle */
10265 if (TARGET_ARCH64)
10266 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10267 di_ftype_di_di);
10268 else
10269 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10270 si_ftype_si_si);
10271 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10272 v4hi_ftype_v4hi_v4hi);
10273 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10274 v8qi_ftype_v8qi_v8qi);
10275 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10276 v2si_ftype_v2si_v2si);
10277 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10278 di_ftype_di_di);
10279 }
10280
10281 if (TARGET_VIS3)
10282 {
10283 if (TARGET_ARCH64)
10284 {
10285 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10286 void_ftype_di);
10287 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10288 void_ftype_di);
10289 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10290 void_ftype_di);
10291 }
10292 else
10293 {
10294 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10295 void_ftype_si);
10296 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10297 void_ftype_si);
10298 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10299 void_ftype_si);
10300 }
10301
10302 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10303 v4hi_ftype_v4hi_v4hi);
10304
10305 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10306 v4hi_ftype_v4hi_v4hi);
10307 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10308 v4hi_ftype_v4hi_v4hi);
10309 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10310 v4hi_ftype_v4hi_v4hi);
10311 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10312 v4hi_ftype_v4hi_v4hi);
10313 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10314 v2si_ftype_v2si_v2si);
10315 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10316 v2si_ftype_v2si_v2si);
10317 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10318 v2si_ftype_v2si_v2si);
10319 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10320 v2si_ftype_v2si_v2si);
10321
10322 if (TARGET_ARCH64)
10323 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10324 di_ftype_v8qi_v8qi);
10325 else
10326 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10327 si_ftype_v8qi_v8qi);
10328
10329 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10330 v4hi_ftype_v4hi_v4hi);
10331 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10332 di_ftype_di_di);
10333 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10334 di_ftype_di_di);
10335
10336 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10337 v4hi_ftype_v4hi_v4hi);
10338 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10339 v2hi_ftype_v2hi_v2hi);
10340 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10341 v4hi_ftype_v4hi_v4hi);
10342 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10343 v2hi_ftype_v2hi_v2hi);
10344 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10345 v2si_ftype_v2si_v2si);
10346 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10347 v1si_ftype_v1si_v1si);
10348 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10349 v2si_ftype_v2si_v2si);
10350 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10351 v1si_ftype_v1si_v1si);
10352
10353 if (TARGET_ARCH64)
10354 {
10355 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10356 di_ftype_v8qi_v8qi);
10357 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10358 di_ftype_v8qi_v8qi);
10359 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10360 di_ftype_v8qi_v8qi);
10361 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10362 di_ftype_v8qi_v8qi);
10363 }
10364 else
10365 {
10366 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10367 si_ftype_v8qi_v8qi);
10368 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10369 si_ftype_v8qi_v8qi);
10370 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10371 si_ftype_v8qi_v8qi);
10372 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10373 si_ftype_v8qi_v8qi);
10374 }
10375
10376 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10377 sf_ftype_sf_sf);
10378 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10379 df_ftype_df_df);
10380 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10381 sf_ftype_sf_sf);
10382 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10383 df_ftype_df_df);
10384 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10385 sf_ftype_sf_sf);
10386 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10387 df_ftype_df_df);
10388
10389 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10390 di_ftype_di_di);
10391 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10392 di_ftype_di_di);
10393 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10394 di_ftype_di_di);
10395 }
10396 }
10397
10398 /* Handle TARGET_EXPAND_BUILTIN target hook.
10399 Expand builtin functions for sparc intrinsics. */
10400
10401 static rtx
10402 sparc_expand_builtin (tree exp, rtx target,
10403 rtx subtarget ATTRIBUTE_UNUSED,
10404 enum machine_mode tmode ATTRIBUTE_UNUSED,
10405 int ignore ATTRIBUTE_UNUSED)
10406 {
10407 tree arg;
10408 call_expr_arg_iterator iter;
10409 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10410 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10411 rtx pat, op[4];
10412 int arg_count = 0;
10413 bool nonvoid;
10414
10415 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10416
10417 if (nonvoid)
10418 {
10419 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10420 if (!target
10421 || GET_MODE (target) != tmode
10422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10423 op[0] = gen_reg_rtx (tmode);
10424 else
10425 op[0] = target;
10426 }
10427 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10428 {
10429 const struct insn_operand_data *insn_op;
10430 int idx;
10431
10432 if (arg == error_mark_node)
10433 return NULL_RTX;
10434
10435 arg_count++;
10436 idx = arg_count - !nonvoid;
10437 insn_op = &insn_data[icode].operand[idx];
10438 op[arg_count] = expand_normal (arg);
10439
10440 if (insn_op->mode == V1DImode
10441 && GET_MODE (op[arg_count]) == DImode)
10442 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10443 else if (insn_op->mode == V1SImode
10444 && GET_MODE (op[arg_count]) == SImode)
10445 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10446
10447 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10448 insn_op->mode))
10449 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10450 }
10451
10452 switch (arg_count)
10453 {
10454 case 0:
10455 pat = GEN_FCN (icode) (op[0]);
10456 break;
10457 case 1:
10458 if (nonvoid)
10459 pat = GEN_FCN (icode) (op[0], op[1]);
10460 else
10461 pat = GEN_FCN (icode) (op[1]);
10462 break;
10463 case 2:
10464 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10465 break;
10466 case 3:
10467 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10468 break;
10469 default:
10470 gcc_unreachable ();
10471 }
10472
10473 if (!pat)
10474 return NULL_RTX;
10475
10476 emit_insn (pat);
10477
10478 if (nonvoid)
10479 return op[0];
10480 else
10481 return const0_rtx;
10482 }
10483
10484 static int
10485 sparc_vis_mul8x16 (int e8, int e16)
10486 {
10487 return (e8 * e16 + 128) / 256;
10488 }
10489
10490 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10491 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10492
10493 static void
10494 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10495 tree cst0, tree cst1)
10496 {
10497 unsigned i, num = VECTOR_CST_NELTS (cst0);
10498 int scale;
10499
10500 switch (fncode)
10501 {
10502 case CODE_FOR_fmul8x16_vis:
10503 for (i = 0; i < num; ++i)
10504 {
10505 int val
10506 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10507 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10508 n_elts[i] = build_int_cst (inner_type, val);
10509 }
10510 break;
10511
10512 case CODE_FOR_fmul8x16au_vis:
10513 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10514
10515 for (i = 0; i < num; ++i)
10516 {
10517 int val
10518 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10519 scale);
10520 n_elts[i] = build_int_cst (inner_type, val);
10521 }
10522 break;
10523
10524 case CODE_FOR_fmul8x16al_vis:
10525 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10526
10527 for (i = 0; i < num; ++i)
10528 {
10529 int val
10530 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10531 scale);
10532 n_elts[i] = build_int_cst (inner_type, val);
10533 }
10534 break;
10535
10536 default:
10537 gcc_unreachable ();
10538 }
10539 }
10540
10541 /* Handle TARGET_FOLD_BUILTIN target hook.
10542 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10543 result of the function call is ignored. NULL_TREE is returned if the
10544 function could not be folded. */
10545
10546 static tree
10547 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10548 tree *args, bool ignore)
10549 {
10550 tree arg0, arg1, arg2;
10551 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10552 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10553
10554 if (ignore)
10555 {
10556 /* Note that a switch statement instead of the sequence of tests would
10557 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10558 and that would yield multiple alternatives with identical values. */
10559 if (icode == CODE_FOR_alignaddrsi_vis
10560 || icode == CODE_FOR_alignaddrdi_vis
10561 || icode == CODE_FOR_wrgsr_vis
10562 || icode == CODE_FOR_bmasksi_vis
10563 || icode == CODE_FOR_bmaskdi_vis
10564 || icode == CODE_FOR_cmask8si_vis
10565 || icode == CODE_FOR_cmask8di_vis
10566 || icode == CODE_FOR_cmask16si_vis
10567 || icode == CODE_FOR_cmask16di_vis
10568 || icode == CODE_FOR_cmask32si_vis
10569 || icode == CODE_FOR_cmask32di_vis)
10570 ;
10571 else
10572 return build_zero_cst (rtype);
10573 }
10574
10575 switch (icode)
10576 {
10577 case CODE_FOR_fexpand_vis:
10578 arg0 = args[0];
10579 STRIP_NOPS (arg0);
10580
10581 if (TREE_CODE (arg0) == VECTOR_CST)
10582 {
10583 tree inner_type = TREE_TYPE (rtype);
10584 tree *n_elts;
10585 unsigned i;
10586
10587 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10588 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10589 n_elts[i] = build_int_cst (inner_type,
10590 TREE_INT_CST_LOW
10591 (VECTOR_CST_ELT (arg0, i)) << 4);
10592 return build_vector (rtype, n_elts);
10593 }
10594 break;
10595
10596 case CODE_FOR_fmul8x16_vis:
10597 case CODE_FOR_fmul8x16au_vis:
10598 case CODE_FOR_fmul8x16al_vis:
10599 arg0 = args[0];
10600 arg1 = args[1];
10601 STRIP_NOPS (arg0);
10602 STRIP_NOPS (arg1);
10603
10604 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10605 {
10606 tree inner_type = TREE_TYPE (rtype);
10607 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10608 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10609 return build_vector (rtype, n_elts);
10610 }
10611 break;
10612
10613 case CODE_FOR_fpmerge_vis:
10614 arg0 = args[0];
10615 arg1 = args[1];
10616 STRIP_NOPS (arg0);
10617 STRIP_NOPS (arg1);
10618
10619 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10620 {
10621 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10622 unsigned i;
10623 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10624 {
10625 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10626 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10627 }
10628
10629 return build_vector (rtype, n_elts);
10630 }
10631 break;
10632
10633 case CODE_FOR_pdist_vis:
10634 arg0 = args[0];
10635 arg1 = args[1];
10636 arg2 = args[2];
10637 STRIP_NOPS (arg0);
10638 STRIP_NOPS (arg1);
10639 STRIP_NOPS (arg2);
10640
10641 if (TREE_CODE (arg0) == VECTOR_CST
10642 && TREE_CODE (arg1) == VECTOR_CST
10643 && TREE_CODE (arg2) == INTEGER_CST)
10644 {
10645 bool overflow = false;
10646 double_int result = TREE_INT_CST (arg2);
10647 double_int tmp;
10648 unsigned i;
10649
10650 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10651 {
10652 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10653 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10654
10655 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10656
10657 tmp = e1.neg_with_overflow (&neg1_ovf);
10658 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10659 if (tmp.is_negative ())
10660 tmp = tmp.neg_with_overflow (&neg2_ovf);
10661
10662 result = result.add_with_sign (tmp, false, &add2_ovf);
10663 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10664 }
10665
10666 gcc_assert (!overflow);
10667
10668 return build_int_cst_wide (rtype, result.low, result.high);
10669 }
10670
10671 default:
10672 break;
10673 }
10674
10675 return NULL_TREE;
10676 }
10677 \f
10678 /* ??? This duplicates information provided to the compiler by the
10679 ??? scheduler description. Some day, teach genautomata to output
10680 ??? the latencies and then CSE will just use that. */
10681
10682 static bool
10683 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10684 int *total, bool speed ATTRIBUTE_UNUSED)
10685 {
10686 enum machine_mode mode = GET_MODE (x);
10687 bool float_mode_p = FLOAT_MODE_P (mode);
10688
10689 switch (code)
10690 {
10691 case CONST_INT:
10692 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10693 {
10694 *total = 0;
10695 return true;
10696 }
10697 /* FALLTHRU */
10698
10699 case HIGH:
10700 *total = 2;
10701 return true;
10702
10703 case CONST:
10704 case LABEL_REF:
10705 case SYMBOL_REF:
10706 *total = 4;
10707 return true;
10708
10709 case CONST_DOUBLE:
10710 if (GET_MODE (x) == VOIDmode
10711 && ((CONST_DOUBLE_HIGH (x) == 0
10712 && CONST_DOUBLE_LOW (x) < 0x1000)
10713 || (CONST_DOUBLE_HIGH (x) == -1
10714 && CONST_DOUBLE_LOW (x) < 0
10715 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10716 *total = 0;
10717 else
10718 *total = 8;
10719 return true;
10720
10721 case MEM:
10722 /* If outer-code was a sign or zero extension, a cost
10723 of COSTS_N_INSNS (1) was already added in. This is
10724 why we are subtracting it back out. */
10725 if (outer_code == ZERO_EXTEND)
10726 {
10727 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10728 }
10729 else if (outer_code == SIGN_EXTEND)
10730 {
10731 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10732 }
10733 else if (float_mode_p)
10734 {
10735 *total = sparc_costs->float_load;
10736 }
10737 else
10738 {
10739 *total = sparc_costs->int_load;
10740 }
10741
10742 return true;
10743
10744 case PLUS:
10745 case MINUS:
10746 if (float_mode_p)
10747 *total = sparc_costs->float_plusminus;
10748 else
10749 *total = COSTS_N_INSNS (1);
10750 return false;
10751
10752 case FMA:
10753 {
10754 rtx sub;
10755
10756 gcc_assert (float_mode_p);
10757 *total = sparc_costs->float_mul;
10758
10759 sub = XEXP (x, 0);
10760 if (GET_CODE (sub) == NEG)
10761 sub = XEXP (sub, 0);
10762 *total += rtx_cost (sub, FMA, 0, speed);
10763
10764 sub = XEXP (x, 2);
10765 if (GET_CODE (sub) == NEG)
10766 sub = XEXP (sub, 0);
10767 *total += rtx_cost (sub, FMA, 2, speed);
10768 return true;
10769 }
10770
10771 case MULT:
10772 if (float_mode_p)
10773 *total = sparc_costs->float_mul;
10774 else if (! TARGET_HARD_MUL)
10775 *total = COSTS_N_INSNS (25);
10776 else
10777 {
10778 int bit_cost;
10779
10780 bit_cost = 0;
10781 if (sparc_costs->int_mul_bit_factor)
10782 {
10783 int nbits;
10784
10785 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10786 {
10787 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10788 for (nbits = 0; value != 0; value &= value - 1)
10789 nbits++;
10790 }
10791 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10792 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10793 {
10794 rtx x1 = XEXP (x, 1);
10795 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10796 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10797
10798 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10799 nbits++;
10800 for (; value2 != 0; value2 &= value2 - 1)
10801 nbits++;
10802 }
10803 else
10804 nbits = 7;
10805
10806 if (nbits < 3)
10807 nbits = 3;
10808 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10809 bit_cost = COSTS_N_INSNS (bit_cost);
10810 }
10811
10812 if (mode == DImode)
10813 *total = sparc_costs->int_mulX + bit_cost;
10814 else
10815 *total = sparc_costs->int_mul + bit_cost;
10816 }
10817 return false;
10818
10819 case ASHIFT:
10820 case ASHIFTRT:
10821 case LSHIFTRT:
10822 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10823 return false;
10824
10825 case DIV:
10826 case UDIV:
10827 case MOD:
10828 case UMOD:
10829 if (float_mode_p)
10830 {
10831 if (mode == DFmode)
10832 *total = sparc_costs->float_div_df;
10833 else
10834 *total = sparc_costs->float_div_sf;
10835 }
10836 else
10837 {
10838 if (mode == DImode)
10839 *total = sparc_costs->int_divX;
10840 else
10841 *total = sparc_costs->int_div;
10842 }
10843 return false;
10844
10845 case NEG:
10846 if (! float_mode_p)
10847 {
10848 *total = COSTS_N_INSNS (1);
10849 return false;
10850 }
10851 /* FALLTHRU */
10852
10853 case ABS:
10854 case FLOAT:
10855 case UNSIGNED_FLOAT:
10856 case FIX:
10857 case UNSIGNED_FIX:
10858 case FLOAT_EXTEND:
10859 case FLOAT_TRUNCATE:
10860 *total = sparc_costs->float_move;
10861 return false;
10862
10863 case SQRT:
10864 if (mode == DFmode)
10865 *total = sparc_costs->float_sqrt_df;
10866 else
10867 *total = sparc_costs->float_sqrt_sf;
10868 return false;
10869
10870 case COMPARE:
10871 if (float_mode_p)
10872 *total = sparc_costs->float_cmp;
10873 else
10874 *total = COSTS_N_INSNS (1);
10875 return false;
10876
10877 case IF_THEN_ELSE:
10878 if (float_mode_p)
10879 *total = sparc_costs->float_cmove;
10880 else
10881 *total = sparc_costs->int_cmove;
10882 return false;
10883
10884 case IOR:
10885 /* Handle the NAND vector patterns. */
10886 if (sparc_vector_mode_supported_p (GET_MODE (x))
10887 && GET_CODE (XEXP (x, 0)) == NOT
10888 && GET_CODE (XEXP (x, 1)) == NOT)
10889 {
10890 *total = COSTS_N_INSNS (1);
10891 return true;
10892 }
10893 else
10894 return false;
10895
10896 default:
10897 return false;
10898 }
10899 }
10900
10901 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10902
10903 static inline bool
10904 general_or_i64_p (reg_class_t rclass)
10905 {
10906 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10907 }
10908
10909 /* Implement TARGET_REGISTER_MOVE_COST. */
10910
10911 static int
10912 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10913 reg_class_t from, reg_class_t to)
10914 {
10915 bool need_memory = false;
10916
10917 if (from == FPCC_REGS || to == FPCC_REGS)
10918 need_memory = true;
10919 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10920 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10921 {
10922 if (TARGET_VIS3)
10923 {
10924 int size = GET_MODE_SIZE (mode);
10925 if (size == 8 || size == 4)
10926 {
10927 if (! TARGET_ARCH32 || size == 4)
10928 return 4;
10929 else
10930 return 6;
10931 }
10932 }
10933 need_memory = true;
10934 }
10935
10936 if (need_memory)
10937 {
10938 if (sparc_cpu == PROCESSOR_ULTRASPARC
10939 || sparc_cpu == PROCESSOR_ULTRASPARC3
10940 || sparc_cpu == PROCESSOR_NIAGARA
10941 || sparc_cpu == PROCESSOR_NIAGARA2
10942 || sparc_cpu == PROCESSOR_NIAGARA3
10943 || sparc_cpu == PROCESSOR_NIAGARA4)
10944 return 12;
10945
10946 return 6;
10947 }
10948
10949 return 2;
10950 }
10951
10952 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10953 This is achieved by means of a manual dynamic stack space allocation in
10954 the current frame. We make the assumption that SEQ doesn't contain any
10955 function calls, with the possible exception of calls to the GOT helper. */
10956
10957 static void
10958 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10959 {
10960 /* We must preserve the lowest 16 words for the register save area. */
10961 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10962 /* We really need only 2 words of fresh stack space. */
10963 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10964
10965 rtx slot
10966 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10967 SPARC_STACK_BIAS + offset));
10968
10969 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10970 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10971 if (reg2)
10972 emit_insn (gen_rtx_SET (VOIDmode,
10973 adjust_address (slot, word_mode, UNITS_PER_WORD),
10974 reg2));
10975 emit_insn (seq);
10976 if (reg2)
10977 emit_insn (gen_rtx_SET (VOIDmode,
10978 reg2,
10979 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10980 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10981 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10982 }
10983
10984 /* Output the assembler code for a thunk function. THUNK_DECL is the
10985 declaration for the thunk function itself, FUNCTION is the decl for
10986 the target function. DELTA is an immediate constant offset to be
10987 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10988 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10989
10990 static void
10991 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10992 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10993 tree function)
10994 {
10995 rtx this_rtx, insn, funexp;
10996 unsigned int int_arg_first;
10997
10998 reload_completed = 1;
10999 epilogue_completed = 1;
11000
11001 emit_note (NOTE_INSN_PROLOGUE_END);
11002
11003 if (TARGET_FLAT)
11004 {
11005 sparc_leaf_function_p = 1;
11006
11007 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11008 }
11009 else if (flag_delayed_branch)
11010 {
11011 /* We will emit a regular sibcall below, so we need to instruct
11012 output_sibcall that we are in a leaf function. */
11013 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11014
11015 /* This will cause final.c to invoke leaf_renumber_regs so we
11016 must behave as if we were in a not-yet-leafified function. */
11017 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11018 }
11019 else
11020 {
11021 /* We will emit the sibcall manually below, so we will need to
11022 manually spill non-leaf registers. */
11023 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11024
11025 /* We really are in a leaf function. */
11026 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11027 }
11028
11029 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11030 returns a structure, the structure return pointer is there instead. */
11031 if (TARGET_ARCH64
11032 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11033 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11034 else
11035 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11036
11037 /* Add DELTA. When possible use a plain add, otherwise load it into
11038 a register first. */
11039 if (delta)
11040 {
11041 rtx delta_rtx = GEN_INT (delta);
11042
11043 if (! SPARC_SIMM13_P (delta))
11044 {
11045 rtx scratch = gen_rtx_REG (Pmode, 1);
11046 emit_move_insn (scratch, delta_rtx);
11047 delta_rtx = scratch;
11048 }
11049
11050 /* THIS_RTX += DELTA. */
11051 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11052 }
11053
11054 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11055 if (vcall_offset)
11056 {
11057 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11058 rtx scratch = gen_rtx_REG (Pmode, 1);
11059
11060 gcc_assert (vcall_offset < 0);
11061
11062 /* SCRATCH = *THIS_RTX. */
11063 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11064
11065 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11066 may not have any available scratch register at this point. */
11067 if (SPARC_SIMM13_P (vcall_offset))
11068 ;
11069 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11070 else if (! fixed_regs[5]
11071 /* The below sequence is made up of at least 2 insns,
11072 while the default method may need only one. */
11073 && vcall_offset < -8192)
11074 {
11075 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11076 emit_move_insn (scratch2, vcall_offset_rtx);
11077 vcall_offset_rtx = scratch2;
11078 }
11079 else
11080 {
11081 rtx increment = GEN_INT (-4096);
11082
11083 /* VCALL_OFFSET is a negative number whose typical range can be
11084 estimated as -32768..0 in 32-bit mode. In almost all cases
11085 it is therefore cheaper to emit multiple add insns than
11086 spilling and loading the constant into a register (at least
11087 6 insns). */
11088 while (! SPARC_SIMM13_P (vcall_offset))
11089 {
11090 emit_insn (gen_add2_insn (scratch, increment));
11091 vcall_offset += 4096;
11092 }
11093 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11094 }
11095
11096 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11097 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11098 gen_rtx_PLUS (Pmode,
11099 scratch,
11100 vcall_offset_rtx)));
11101
11102 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11103 emit_insn (gen_add2_insn (this_rtx, scratch));
11104 }
11105
11106 /* Generate a tail call to the target function. */
11107 if (! TREE_USED (function))
11108 {
11109 assemble_external (function);
11110 TREE_USED (function) = 1;
11111 }
11112 funexp = XEXP (DECL_RTL (function), 0);
11113
11114 if (flag_delayed_branch)
11115 {
11116 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11117 insn = emit_call_insn (gen_sibcall (funexp));
11118 SIBLING_CALL_P (insn) = 1;
11119 }
11120 else
11121 {
11122 /* The hoops we have to jump through in order to generate a sibcall
11123 without using delay slots... */
11124 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11125
11126 if (flag_pic)
11127 {
11128 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11129 start_sequence ();
11130 load_got_register (); /* clobbers %o7 */
11131 scratch = sparc_legitimize_pic_address (funexp, scratch);
11132 seq = get_insns ();
11133 end_sequence ();
11134 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11135 }
11136 else if (TARGET_ARCH32)
11137 {
11138 emit_insn (gen_rtx_SET (VOIDmode,
11139 scratch,
11140 gen_rtx_HIGH (SImode, funexp)));
11141 emit_insn (gen_rtx_SET (VOIDmode,
11142 scratch,
11143 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11144 }
11145 else /* TARGET_ARCH64 */
11146 {
11147 switch (sparc_cmodel)
11148 {
11149 case CM_MEDLOW:
11150 case CM_MEDMID:
11151 /* The destination can serve as a temporary. */
11152 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11153 break;
11154
11155 case CM_MEDANY:
11156 case CM_EMBMEDANY:
11157 /* The destination cannot serve as a temporary. */
11158 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11159 start_sequence ();
11160 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11161 seq = get_insns ();
11162 end_sequence ();
11163 emit_and_preserve (seq, spill_reg, 0);
11164 break;
11165
11166 default:
11167 gcc_unreachable ();
11168 }
11169 }
11170
11171 emit_jump_insn (gen_indirect_jump (scratch));
11172 }
11173
11174 emit_barrier ();
11175
11176 /* Run just enough of rest_of_compilation to get the insns emitted.
11177 There's not really enough bulk here to make other passes such as
11178 instruction scheduling worth while. Note that use_thunk calls
11179 assemble_start_function and assemble_end_function. */
11180 insn = get_insns ();
11181 shorten_branches (insn);
11182 final_start_function (insn, file, 1);
11183 final (insn, file, 1);
11184 final_end_function ();
11185
11186 reload_completed = 0;
11187 epilogue_completed = 0;
11188 }
11189
11190 /* Return true if sparc_output_mi_thunk would be able to output the
11191 assembler code for the thunk function specified by the arguments
11192 it is passed, and false otherwise. */
11193 static bool
11194 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11195 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11196 HOST_WIDE_INT vcall_offset,
11197 const_tree function ATTRIBUTE_UNUSED)
11198 {
11199 /* Bound the loop used in the default method above. */
11200 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11201 }
11202
11203 /* How to allocate a 'struct machine_function'. */
11204
11205 static struct machine_function *
11206 sparc_init_machine_status (void)
11207 {
11208 return ggc_alloc_cleared_machine_function ();
11209 }
11210
11211 /* Locate some local-dynamic symbol still in use by this function
11212 so that we can print its name in local-dynamic base patterns. */
11213
11214 static const char *
11215 get_some_local_dynamic_name (void)
11216 {
11217 rtx insn;
11218
11219 if (cfun->machine->some_ld_name)
11220 return cfun->machine->some_ld_name;
11221
11222 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11223 if (INSN_P (insn)
11224 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11225 return cfun->machine->some_ld_name;
11226
11227 gcc_unreachable ();
11228 }
11229
11230 static int
11231 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11232 {
11233 rtx x = *px;
11234
11235 if (x
11236 && GET_CODE (x) == SYMBOL_REF
11237 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11238 {
11239 cfun->machine->some_ld_name = XSTR (x, 0);
11240 return 1;
11241 }
11242
11243 return 0;
11244 }
11245
11246 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11247 We need to emit DTP-relative relocations. */
11248
11249 static void
11250 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11251 {
11252 switch (size)
11253 {
11254 case 4:
11255 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11256 break;
11257 case 8:
11258 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11259 break;
11260 default:
11261 gcc_unreachable ();
11262 }
11263 output_addr_const (file, x);
11264 fputs (")", file);
11265 }
11266
11267 /* Do whatever processing is required at the end of a file. */
11268
11269 static void
11270 sparc_file_end (void)
11271 {
11272 /* If we need to emit the special GOT helper function, do so now. */
11273 if (got_helper_rtx)
11274 {
11275 const char *name = XSTR (got_helper_rtx, 0);
11276 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11277 #ifdef DWARF2_UNWIND_INFO
11278 bool do_cfi;
11279 #endif
11280
11281 if (USE_HIDDEN_LINKONCE)
11282 {
11283 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11284 get_identifier (name),
11285 build_function_type_list (void_type_node,
11286 NULL_TREE));
11287 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11288 NULL_TREE, void_type_node);
11289 TREE_PUBLIC (decl) = 1;
11290 TREE_STATIC (decl) = 1;
11291 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11292 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11293 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11294 resolve_unique_section (decl, 0, flag_function_sections);
11295 allocate_struct_function (decl, true);
11296 cfun->is_thunk = 1;
11297 current_function_decl = decl;
11298 init_varasm_status ();
11299 assemble_start_function (decl, name);
11300 }
11301 else
11302 {
11303 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11304 switch_to_section (text_section);
11305 if (align > 0)
11306 ASM_OUTPUT_ALIGN (asm_out_file, align);
11307 ASM_OUTPUT_LABEL (asm_out_file, name);
11308 }
11309
11310 #ifdef DWARF2_UNWIND_INFO
11311 do_cfi = dwarf2out_do_cfi_asm ();
11312 if (do_cfi)
11313 fprintf (asm_out_file, "\t.cfi_startproc\n");
11314 #endif
11315 if (flag_delayed_branch)
11316 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11317 reg_name, reg_name);
11318 else
11319 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11320 reg_name, reg_name);
11321 #ifdef DWARF2_UNWIND_INFO
11322 if (do_cfi)
11323 fprintf (asm_out_file, "\t.cfi_endproc\n");
11324 #endif
11325 }
11326
11327 if (NEED_INDICATE_EXEC_STACK)
11328 file_end_indicate_exec_stack ();
11329
11330 #ifdef TARGET_SOLARIS
11331 solaris_file_end ();
11332 #endif
11333 }
11334
11335 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11336 /* Implement TARGET_MANGLE_TYPE. */
11337
11338 static const char *
11339 sparc_mangle_type (const_tree type)
11340 {
11341 if (!TARGET_64BIT
11342 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11343 && TARGET_LONG_DOUBLE_128)
11344 return "g";
11345
11346 /* For all other types, use normal C++ mangling. */
11347 return NULL;
11348 }
11349 #endif
11350
11351 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11352 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11353 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11354
11355 void
11356 sparc_emit_membar_for_model (enum memmodel model,
11357 int load_store, int before_after)
11358 {
11359 /* Bits for the MEMBAR mmask field. */
11360 const int LoadLoad = 1;
11361 const int StoreLoad = 2;
11362 const int LoadStore = 4;
11363 const int StoreStore = 8;
11364
11365 int mm = 0, implied = 0;
11366
11367 switch (sparc_memory_model)
11368 {
11369 case SMM_SC:
11370 /* Sequential Consistency. All memory transactions are immediately
11371 visible in sequential execution order. No barriers needed. */
11372 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11373 break;
11374
11375 case SMM_TSO:
11376 /* Total Store Ordering: all memory transactions with store semantics
11377 are followed by an implied StoreStore. */
11378 implied |= StoreStore;
11379
11380 /* If we're not looking for a raw barrer (before+after), then atomic
11381 operations get the benefit of being both load and store. */
11382 if (load_store == 3 && before_after == 1)
11383 implied |= StoreLoad;
11384 /* FALLTHRU */
11385
11386 case SMM_PSO:
11387 /* Partial Store Ordering: all memory transactions with load semantics
11388 are followed by an implied LoadLoad | LoadStore. */
11389 implied |= LoadLoad | LoadStore;
11390
11391 /* If we're not looking for a raw barrer (before+after), then atomic
11392 operations get the benefit of being both load and store. */
11393 if (load_store == 3 && before_after == 2)
11394 implied |= StoreLoad | StoreStore;
11395 /* FALLTHRU */
11396
11397 case SMM_RMO:
11398 /* Relaxed Memory Ordering: no implicit bits. */
11399 break;
11400
11401 default:
11402 gcc_unreachable ();
11403 }
11404
11405 if (before_after & 1)
11406 {
11407 if (model == MEMMODEL_RELEASE
11408 || model == MEMMODEL_ACQ_REL
11409 || model == MEMMODEL_SEQ_CST)
11410 {
11411 if (load_store & 1)
11412 mm |= LoadLoad | StoreLoad;
11413 if (load_store & 2)
11414 mm |= LoadStore | StoreStore;
11415 }
11416 }
11417 if (before_after & 2)
11418 {
11419 if (model == MEMMODEL_ACQUIRE
11420 || model == MEMMODEL_ACQ_REL
11421 || model == MEMMODEL_SEQ_CST)
11422 {
11423 if (load_store & 1)
11424 mm |= LoadLoad | LoadStore;
11425 if (load_store & 2)
11426 mm |= StoreLoad | StoreStore;
11427 }
11428 }
11429
11430 /* Remove the bits implied by the system memory model. */
11431 mm &= ~implied;
11432
11433 /* For raw barriers (before+after), always emit a barrier.
11434 This will become a compile-time barrier if needed. */
11435 if (mm || before_after == 3)
11436 emit_insn (gen_membar (GEN_INT (mm)));
11437 }
11438
11439 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11440 compare and swap on the word containing the byte or half-word. */
11441
11442 static void
11443 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11444 rtx oldval, rtx newval)
11445 {
11446 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11447 rtx addr = gen_reg_rtx (Pmode);
11448 rtx off = gen_reg_rtx (SImode);
11449 rtx oldv = gen_reg_rtx (SImode);
11450 rtx newv = gen_reg_rtx (SImode);
11451 rtx oldvalue = gen_reg_rtx (SImode);
11452 rtx newvalue = gen_reg_rtx (SImode);
11453 rtx res = gen_reg_rtx (SImode);
11454 rtx resv = gen_reg_rtx (SImode);
11455 rtx memsi, val, mask, end_label, loop_label, cc;
11456
11457 emit_insn (gen_rtx_SET (VOIDmode, addr,
11458 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11459
11460 if (Pmode != SImode)
11461 addr1 = gen_lowpart (SImode, addr1);
11462 emit_insn (gen_rtx_SET (VOIDmode, off,
11463 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11464
11465 memsi = gen_rtx_MEM (SImode, addr);
11466 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11467 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11468
11469 val = copy_to_reg (memsi);
11470
11471 emit_insn (gen_rtx_SET (VOIDmode, off,
11472 gen_rtx_XOR (SImode, off,
11473 GEN_INT (GET_MODE (mem) == QImode
11474 ? 3 : 2))));
11475
11476 emit_insn (gen_rtx_SET (VOIDmode, off,
11477 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11478
11479 if (GET_MODE (mem) == QImode)
11480 mask = force_reg (SImode, GEN_INT (0xff));
11481 else
11482 mask = force_reg (SImode, GEN_INT (0xffff));
11483
11484 emit_insn (gen_rtx_SET (VOIDmode, mask,
11485 gen_rtx_ASHIFT (SImode, mask, off)));
11486
11487 emit_insn (gen_rtx_SET (VOIDmode, val,
11488 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11489 val)));
11490
11491 oldval = gen_lowpart (SImode, oldval);
11492 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11493 gen_rtx_ASHIFT (SImode, oldval, off)));
11494
11495 newval = gen_lowpart_common (SImode, newval);
11496 emit_insn (gen_rtx_SET (VOIDmode, newv,
11497 gen_rtx_ASHIFT (SImode, newval, off)));
11498
11499 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11500 gen_rtx_AND (SImode, oldv, mask)));
11501
11502 emit_insn (gen_rtx_SET (VOIDmode, newv,
11503 gen_rtx_AND (SImode, newv, mask)));
11504
11505 end_label = gen_label_rtx ();
11506 loop_label = gen_label_rtx ();
11507 emit_label (loop_label);
11508
11509 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11510 gen_rtx_IOR (SImode, oldv, val)));
11511
11512 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11513 gen_rtx_IOR (SImode, newv, val)));
11514
11515 emit_move_insn (bool_result, const1_rtx);
11516
11517 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11518
11519 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11520
11521 emit_insn (gen_rtx_SET (VOIDmode, resv,
11522 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11523 res)));
11524
11525 emit_move_insn (bool_result, const0_rtx);
11526
11527 cc = gen_compare_reg_1 (NE, resv, val);
11528 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11529
11530 /* Use cbranchcc4 to separate the compare and branch! */
11531 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11532 cc, const0_rtx, loop_label));
11533
11534 emit_label (end_label);
11535
11536 emit_insn (gen_rtx_SET (VOIDmode, res,
11537 gen_rtx_AND (SImode, res, mask)));
11538
11539 emit_insn (gen_rtx_SET (VOIDmode, res,
11540 gen_rtx_LSHIFTRT (SImode, res, off)));
11541
11542 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11543 }
11544
11545 /* Expand code to perform a compare-and-swap. */
11546
11547 void
11548 sparc_expand_compare_and_swap (rtx operands[])
11549 {
11550 rtx bval, retval, mem, oldval, newval;
11551 enum machine_mode mode;
11552 enum memmodel model;
11553
11554 bval = operands[0];
11555 retval = operands[1];
11556 mem = operands[2];
11557 oldval = operands[3];
11558 newval = operands[4];
11559 model = (enum memmodel) INTVAL (operands[6]);
11560 mode = GET_MODE (mem);
11561
11562 sparc_emit_membar_for_model (model, 3, 1);
11563
11564 if (reg_overlap_mentioned_p (retval, oldval))
11565 oldval = copy_to_reg (oldval);
11566
11567 if (mode == QImode || mode == HImode)
11568 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11569 else
11570 {
11571 rtx (*gen) (rtx, rtx, rtx, rtx);
11572 rtx x;
11573
11574 if (mode == SImode)
11575 gen = gen_atomic_compare_and_swapsi_1;
11576 else
11577 gen = gen_atomic_compare_and_swapdi_1;
11578 emit_insn (gen (retval, mem, oldval, newval));
11579
11580 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11581 if (x != bval)
11582 convert_move (bval, x, 1);
11583 }
11584
11585 sparc_emit_membar_for_model (model, 3, 2);
11586 }
11587
11588 void
11589 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11590 {
11591 rtx t_1, t_2, t_3;
11592
11593 sel = gen_lowpart (DImode, sel);
11594 switch (vmode)
11595 {
11596 case V2SImode:
11597 /* inp = xxxxxxxAxxxxxxxB */
11598 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11599 NULL_RTX, 1, OPTAB_DIRECT);
11600 /* t_1 = ....xxxxxxxAxxx. */
11601 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11602 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11603 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11604 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11605 /* sel = .......B */
11606 /* t_1 = ...A.... */
11607 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11608 /* sel = ...A...B */
11609 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11610 /* sel = AAAABBBB * 4 */
11611 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11612 /* sel = { A*4, A*4+1, A*4+2, ... } */
11613 break;
11614
11615 case V4HImode:
11616 /* inp = xxxAxxxBxxxCxxxD */
11617 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11618 NULL_RTX, 1, OPTAB_DIRECT);
11619 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11620 NULL_RTX, 1, OPTAB_DIRECT);
11621 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11622 NULL_RTX, 1, OPTAB_DIRECT);
11623 /* t_1 = ..xxxAxxxBxxxCxx */
11624 /* t_2 = ....xxxAxxxBxxxC */
11625 /* t_3 = ......xxxAxxxBxx */
11626 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11627 GEN_INT (0x07),
11628 NULL_RTX, 1, OPTAB_DIRECT);
11629 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11630 GEN_INT (0x0700),
11631 NULL_RTX, 1, OPTAB_DIRECT);
11632 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11633 GEN_INT (0x070000),
11634 NULL_RTX, 1, OPTAB_DIRECT);
11635 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11636 GEN_INT (0x07000000),
11637 NULL_RTX, 1, OPTAB_DIRECT);
11638 /* sel = .......D */
11639 /* t_1 = .....C.. */
11640 /* t_2 = ...B.... */
11641 /* t_3 = .A...... */
11642 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11643 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11644 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11645 /* sel = .A.B.C.D */
11646 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11647 /* sel = AABBCCDD * 2 */
11648 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11649 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11650 break;
11651
11652 case V8QImode:
11653 /* input = xAxBxCxDxExFxGxH */
11654 sel = expand_simple_binop (DImode, AND, sel,
11655 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11656 | 0x0f0f0f0f),
11657 NULL_RTX, 1, OPTAB_DIRECT);
11658 /* sel = .A.B.C.D.E.F.G.H */
11659 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11660 NULL_RTX, 1, OPTAB_DIRECT);
11661 /* t_1 = ..A.B.C.D.E.F.G. */
11662 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11663 NULL_RTX, 1, OPTAB_DIRECT);
11664 /* sel = .AABBCCDDEEFFGGH */
11665 sel = expand_simple_binop (DImode, AND, sel,
11666 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11667 | 0xff00ff),
11668 NULL_RTX, 1, OPTAB_DIRECT);
11669 /* sel = ..AB..CD..EF..GH */
11670 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11671 NULL_RTX, 1, OPTAB_DIRECT);
11672 /* t_1 = ....AB..CD..EF.. */
11673 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11674 NULL_RTX, 1, OPTAB_DIRECT);
11675 /* sel = ..ABABCDCDEFEFGH */
11676 sel = expand_simple_binop (DImode, AND, sel,
11677 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11678 NULL_RTX, 1, OPTAB_DIRECT);
11679 /* sel = ....ABCD....EFGH */
11680 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11681 NULL_RTX, 1, OPTAB_DIRECT);
11682 /* t_1 = ........ABCD.... */
11683 sel = gen_lowpart (SImode, sel);
11684 t_1 = gen_lowpart (SImode, t_1);
11685 break;
11686
11687 default:
11688 gcc_unreachable ();
11689 }
11690
11691 /* Always perform the final addition/merge within the bmask insn. */
11692 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11693 }
11694
11695 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11696
11697 static bool
11698 sparc_frame_pointer_required (void)
11699 {
11700 /* If the stack pointer is dynamically modified in the function, it cannot
11701 serve as the frame pointer. */
11702 if (cfun->calls_alloca)
11703 return true;
11704
11705 /* If the function receives nonlocal gotos, it needs to save the frame
11706 pointer in the nonlocal_goto_save_area object. */
11707 if (cfun->has_nonlocal_label)
11708 return true;
11709
11710 /* In flat mode, that's it. */
11711 if (TARGET_FLAT)
11712 return false;
11713
11714 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11715 return !(crtl->is_leaf && only_leaf_regs_used ());
11716 }
11717
11718 /* The way this is structured, we can't eliminate SFP in favor of SP
11719 if the frame pointer is required: we want to use the SFP->HFP elimination
11720 in that case. But the test in update_eliminables doesn't know we are
11721 assuming below that we only do the former elimination. */
11722
11723 static bool
11724 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11725 {
11726 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11727 }
11728
11729 /* Return the hard frame pointer directly to bypass the stack bias. */
11730
11731 static rtx
11732 sparc_builtin_setjmp_frame_value (void)
11733 {
11734 return hard_frame_pointer_rtx;
11735 }
11736
11737 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11738 they won't be allocated. */
11739
11740 static void
11741 sparc_conditional_register_usage (void)
11742 {
11743 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11744 {
11745 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11746 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11747 }
11748 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11749 /* then honor it. */
11750 if (TARGET_ARCH32 && fixed_regs[5])
11751 fixed_regs[5] = 1;
11752 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11753 fixed_regs[5] = 0;
11754 if (! TARGET_V9)
11755 {
11756 int regno;
11757 for (regno = SPARC_FIRST_V9_FP_REG;
11758 regno <= SPARC_LAST_V9_FP_REG;
11759 regno++)
11760 fixed_regs[regno] = 1;
11761 /* %fcc0 is used by v8 and v9. */
11762 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11763 regno <= SPARC_LAST_V9_FCC_REG;
11764 regno++)
11765 fixed_regs[regno] = 1;
11766 }
11767 if (! TARGET_FPU)
11768 {
11769 int regno;
11770 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11771 fixed_regs[regno] = 1;
11772 }
11773 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11774 /* then honor it. Likewise with g3 and g4. */
11775 if (fixed_regs[2] == 2)
11776 fixed_regs[2] = ! TARGET_APP_REGS;
11777 if (fixed_regs[3] == 2)
11778 fixed_regs[3] = ! TARGET_APP_REGS;
11779 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11780 fixed_regs[4] = ! TARGET_APP_REGS;
11781 else if (TARGET_CM_EMBMEDANY)
11782 fixed_regs[4] = 1;
11783 else if (fixed_regs[4] == 2)
11784 fixed_regs[4] = 0;
11785 if (TARGET_FLAT)
11786 {
11787 int regno;
11788 /* Disable leaf functions. */
11789 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11790 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11791 leaf_reg_remap [regno] = regno;
11792 }
11793 if (TARGET_VIS)
11794 global_regs[SPARC_GSR_REG] = 1;
11795 }
11796
11797 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11798
11799 - We can't load constants into FP registers.
11800 - We can't load FP constants into integer registers when soft-float,
11801 because there is no soft-float pattern with a r/F constraint.
11802 - We can't load FP constants into integer registers for TFmode unless
11803 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11804 - Try and reload integer constants (symbolic or otherwise) back into
11805 registers directly, rather than having them dumped to memory. */
11806
11807 static reg_class_t
11808 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11809 {
11810 enum machine_mode mode = GET_MODE (x);
11811 if (CONSTANT_P (x))
11812 {
11813 if (FP_REG_CLASS_P (rclass)
11814 || rclass == GENERAL_OR_FP_REGS
11815 || rclass == GENERAL_OR_EXTRA_FP_REGS
11816 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11817 || (mode == TFmode && ! const_zero_operand (x, mode)))
11818 return NO_REGS;
11819
11820 if (GET_MODE_CLASS (mode) == MODE_INT)
11821 return GENERAL_REGS;
11822
11823 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11824 {
11825 if (! FP_REG_CLASS_P (rclass)
11826 || !(const_zero_operand (x, mode)
11827 || const_all_ones_operand (x, mode)))
11828 return NO_REGS;
11829 }
11830 }
11831
11832 if (TARGET_VIS3
11833 && ! TARGET_ARCH64
11834 && (rclass == EXTRA_FP_REGS
11835 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11836 {
11837 int regno = true_regnum (x);
11838
11839 if (SPARC_INT_REG_P (regno))
11840 return (rclass == EXTRA_FP_REGS
11841 ? FP_REGS : GENERAL_OR_FP_REGS);
11842 }
11843
11844 return rclass;
11845 }
11846
11847 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11848 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11849
11850 const char *
11851 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11852 {
11853 char mulstr[32];
11854
11855 gcc_assert (! TARGET_ARCH64);
11856
11857 if (sparc_check_64 (operands[1], insn) <= 0)
11858 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11859 if (which_alternative == 1)
11860 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11861 if (GET_CODE (operands[2]) == CONST_INT)
11862 {
11863 if (which_alternative == 1)
11864 {
11865 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11866 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11867 output_asm_insn (mulstr, operands);
11868 return "srlx\t%L0, 32, %H0";
11869 }
11870 else
11871 {
11872 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11873 output_asm_insn ("or\t%L1, %3, %3", operands);
11874 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11875 output_asm_insn (mulstr, operands);
11876 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11877 return "mov\t%3, %L0";
11878 }
11879 }
11880 else if (rtx_equal_p (operands[1], operands[2]))
11881 {
11882 if (which_alternative == 1)
11883 {
11884 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11885 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11886 output_asm_insn (mulstr, operands);
11887 return "srlx\t%L0, 32, %H0";
11888 }
11889 else
11890 {
11891 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11892 output_asm_insn ("or\t%L1, %3, %3", operands);
11893 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11894 output_asm_insn (mulstr, operands);
11895 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11896 return "mov\t%3, %L0";
11897 }
11898 }
11899 if (sparc_check_64 (operands[2], insn) <= 0)
11900 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11901 if (which_alternative == 1)
11902 {
11903 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11904 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11905 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11906 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11907 output_asm_insn (mulstr, operands);
11908 return "srlx\t%L0, 32, %H0";
11909 }
11910 else
11911 {
11912 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11913 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11914 output_asm_insn ("or\t%L1, %3, %3", operands);
11915 output_asm_insn ("or\t%L2, %4, %4", operands);
11916 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11917 output_asm_insn (mulstr, operands);
11918 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11919 return "mov\t%3, %L0";
11920 }
11921 }
11922
11923 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11924 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11925 and INNER_MODE are the modes describing TARGET. */
11926
11927 static void
11928 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11929 enum machine_mode inner_mode)
11930 {
11931 rtx t1, final_insn, sel;
11932 int bmask;
11933
11934 t1 = gen_reg_rtx (mode);
11935
11936 elt = convert_modes (SImode, inner_mode, elt, true);
11937 emit_move_insn (gen_lowpart(SImode, t1), elt);
11938
11939 switch (mode)
11940 {
11941 case V2SImode:
11942 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11943 bmask = 0x45674567;
11944 break;
11945 case V4HImode:
11946 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11947 bmask = 0x67676767;
11948 break;
11949 case V8QImode:
11950 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11951 bmask = 0x77777777;
11952 break;
11953 default:
11954 gcc_unreachable ();
11955 }
11956
11957 sel = force_reg (SImode, GEN_INT (bmask));
11958 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
11959 emit_insn (final_insn);
11960 }
11961
11962 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11963 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11964
11965 static void
11966 vector_init_fpmerge (rtx target, rtx elt)
11967 {
11968 rtx t1, t2, t2_low, t3, t3_low;
11969
11970 t1 = gen_reg_rtx (V4QImode);
11971 elt = convert_modes (SImode, QImode, elt, true);
11972 emit_move_insn (gen_lowpart (SImode, t1), elt);
11973
11974 t2 = gen_reg_rtx (V8QImode);
11975 t2_low = gen_lowpart (V4QImode, t2);
11976 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11977
11978 t3 = gen_reg_rtx (V8QImode);
11979 t3_low = gen_lowpart (V4QImode, t3);
11980 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11981
11982 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11983 }
11984
11985 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11986 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11987
11988 static void
11989 vector_init_faligndata (rtx target, rtx elt)
11990 {
11991 rtx t1 = gen_reg_rtx (V4HImode);
11992 int i;
11993
11994 elt = convert_modes (SImode, HImode, elt, true);
11995 emit_move_insn (gen_lowpart (SImode, t1), elt);
11996
11997 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11998 force_reg (SImode, GEN_INT (6)),
11999 const0_rtx));
12000
12001 for (i = 0; i < 4; i++)
12002 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12003 }
12004
12005 /* Emit code to initialize TARGET to values for individual fields VALS. */
12006
12007 void
12008 sparc_expand_vector_init (rtx target, rtx vals)
12009 {
12010 const enum machine_mode mode = GET_MODE (target);
12011 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12012 const int n_elts = GET_MODE_NUNITS (mode);
12013 int i, n_var = 0;
12014 bool all_same;
12015 rtx mem;
12016
12017 all_same = true;
12018 for (i = 0; i < n_elts; i++)
12019 {
12020 rtx x = XVECEXP (vals, 0, i);
12021 if (!CONSTANT_P (x))
12022 n_var++;
12023
12024 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12025 all_same = false;
12026 }
12027
12028 if (n_var == 0)
12029 {
12030 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12031 return;
12032 }
12033
12034 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12035 {
12036 if (GET_MODE_SIZE (inner_mode) == 4)
12037 {
12038 emit_move_insn (gen_lowpart (SImode, target),
12039 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12040 return;
12041 }
12042 else if (GET_MODE_SIZE (inner_mode) == 8)
12043 {
12044 emit_move_insn (gen_lowpart (DImode, target),
12045 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12046 return;
12047 }
12048 }
12049 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12050 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12051 {
12052 emit_move_insn (gen_highpart (word_mode, target),
12053 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12054 emit_move_insn (gen_lowpart (word_mode, target),
12055 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12056 return;
12057 }
12058
12059 if (all_same && GET_MODE_SIZE (mode) == 8)
12060 {
12061 if (TARGET_VIS2)
12062 {
12063 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12064 return;
12065 }
12066 if (mode == V8QImode)
12067 {
12068 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12069 return;
12070 }
12071 if (mode == V4HImode)
12072 {
12073 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12074 return;
12075 }
12076 }
12077
12078 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12079 for (i = 0; i < n_elts; i++)
12080 emit_move_insn (adjust_address_nv (mem, inner_mode,
12081 i * GET_MODE_SIZE (inner_mode)),
12082 XVECEXP (vals, 0, i));
12083 emit_move_insn (target, mem);
12084 }
12085
12086 /* Implement TARGET_SECONDARY_RELOAD. */
12087
12088 static reg_class_t
12089 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12090 enum machine_mode mode, secondary_reload_info *sri)
12091 {
12092 enum reg_class rclass = (enum reg_class) rclass_i;
12093
12094 sri->icode = CODE_FOR_nothing;
12095 sri->extra_cost = 0;
12096
12097 /* We need a temporary when loading/storing a HImode/QImode value
12098 between memory and the FPU registers. This can happen when combine puts
12099 a paradoxical subreg in a float/fix conversion insn. */
12100 if (FP_REG_CLASS_P (rclass)
12101 && (mode == HImode || mode == QImode)
12102 && (GET_CODE (x) == MEM
12103 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12104 && true_regnum (x) == -1)))
12105 return GENERAL_REGS;
12106
12107 /* On 32-bit we need a temporary when loading/storing a DFmode value
12108 between unaligned memory and the upper FPU registers. */
12109 if (TARGET_ARCH32
12110 && rclass == EXTRA_FP_REGS
12111 && mode == DFmode
12112 && GET_CODE (x) == MEM
12113 && ! mem_min_alignment (x, 8))
12114 return FP_REGS;
12115
12116 if (((TARGET_CM_MEDANY
12117 && symbolic_operand (x, mode))
12118 || (TARGET_CM_EMBMEDANY
12119 && text_segment_operand (x, mode)))
12120 && ! flag_pic)
12121 {
12122 if (in_p)
12123 sri->icode = direct_optab_handler (reload_in_optab, mode);
12124 else
12125 sri->icode = direct_optab_handler (reload_out_optab, mode);
12126 return NO_REGS;
12127 }
12128
12129 if (TARGET_VIS3 && TARGET_ARCH32)
12130 {
12131 int regno = true_regnum (x);
12132
12133 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12134 to move 8-byte values in 4-byte pieces. This only works via
12135 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12136 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12137 an FP_REGS intermediate move. */
12138 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12139 || ((general_or_i64_p (rclass)
12140 || rclass == GENERAL_OR_FP_REGS)
12141 && SPARC_FP_REG_P (regno)))
12142 {
12143 sri->extra_cost = 2;
12144 return FP_REGS;
12145 }
12146 }
12147
12148 return NO_REGS;
12149 }
12150
12151 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12152 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12153
12154 bool
12155 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12156 {
12157 enum rtx_code rc = GET_CODE (operands[1]);
12158 enum machine_mode cmp_mode;
12159 rtx cc_reg, dst, cmp;
12160
12161 cmp = operands[1];
12162 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12163 return false;
12164
12165 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12166 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12167
12168 cmp_mode = GET_MODE (XEXP (cmp, 0));
12169 rc = GET_CODE (cmp);
12170
12171 dst = operands[0];
12172 if (! rtx_equal_p (operands[2], dst)
12173 && ! rtx_equal_p (operands[3], dst))
12174 {
12175 if (reg_overlap_mentioned_p (dst, cmp))
12176 dst = gen_reg_rtx (mode);
12177
12178 emit_move_insn (dst, operands[3]);
12179 }
12180 else if (operands[2] == dst)
12181 {
12182 operands[2] = operands[3];
12183
12184 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12185 rc = reverse_condition_maybe_unordered (rc);
12186 else
12187 rc = reverse_condition (rc);
12188 }
12189
12190 if (XEXP (cmp, 1) == const0_rtx
12191 && GET_CODE (XEXP (cmp, 0)) == REG
12192 && cmp_mode == DImode
12193 && v9_regcmp_p (rc))
12194 cc_reg = XEXP (cmp, 0);
12195 else
12196 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12197
12198 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12199
12200 emit_insn (gen_rtx_SET (VOIDmode, dst,
12201 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12202
12203 if (dst != operands[0])
12204 emit_move_insn (operands[0], dst);
12205
12206 return true;
12207 }
12208
12209 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12210 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12211 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12212 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12213 code to be used for the condition mask. */
12214
12215 void
12216 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12217 {
12218 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12219 enum rtx_code code = GET_CODE (operands[3]);
12220
12221 mask = gen_reg_rtx (Pmode);
12222 cop0 = operands[4];
12223 cop1 = operands[5];
12224 if (code == LT || code == GE)
12225 {
12226 rtx t;
12227
12228 code = swap_condition (code);
12229 t = cop0; cop0 = cop1; cop1 = t;
12230 }
12231
12232 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12233
12234 fcmp = gen_rtx_UNSPEC (Pmode,
12235 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12236 fcode);
12237
12238 cmask = gen_rtx_UNSPEC (DImode,
12239 gen_rtvec (2, mask, gsr),
12240 ccode);
12241
12242 bshuf = gen_rtx_UNSPEC (mode,
12243 gen_rtvec (3, operands[1], operands[2], gsr),
12244 UNSPEC_BSHUFFLE);
12245
12246 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12247 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12248
12249 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12250 }
12251
12252 /* On sparc, any mode which naturally allocates into the float
12253 registers should return 4 here. */
12254
12255 unsigned int
12256 sparc_regmode_natural_size (enum machine_mode mode)
12257 {
12258 int size = UNITS_PER_WORD;
12259
12260 if (TARGET_ARCH64)
12261 {
12262 enum mode_class mclass = GET_MODE_CLASS (mode);
12263
12264 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12265 size = 4;
12266 }
12267
12268 return size;
12269 }
12270
12271 /* Return TRUE if it is a good idea to tie two pseudo registers
12272 when one has mode MODE1 and one has mode MODE2.
12273 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12274 for any hard reg, then this must be FALSE for correct output.
12275
12276 For V9 we have to deal with the fact that only the lower 32 floating
12277 point registers are 32-bit addressable. */
12278
12279 bool
12280 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12281 {
12282 enum mode_class mclass1, mclass2;
12283 unsigned short size1, size2;
12284
12285 if (mode1 == mode2)
12286 return true;
12287
12288 mclass1 = GET_MODE_CLASS (mode1);
12289 mclass2 = GET_MODE_CLASS (mode2);
12290 if (mclass1 != mclass2)
12291 return false;
12292
12293 if (! TARGET_V9)
12294 return true;
12295
12296 /* Classes are the same and we are V9 so we have to deal with upper
12297 vs. lower floating point registers. If one of the modes is a
12298 4-byte mode, and the other is not, we have to mark them as not
12299 tieable because only the lower 32 floating point register are
12300 addressable 32-bits at a time.
12301
12302 We can't just test explicitly for SFmode, otherwise we won't
12303 cover the vector mode cases properly. */
12304
12305 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12306 return true;
12307
12308 size1 = GET_MODE_SIZE (mode1);
12309 size2 = GET_MODE_SIZE (mode2);
12310 if ((size1 > 4 && size2 == 4)
12311 || (size2 > 4 && size1 == 4))
12312 return false;
12313
12314 return true;
12315 }
12316
12317 static enum machine_mode sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12318 {
12319 return (TARGET_ARCH64 ? DImode : SImode);
12320 }
12321
12322 #include "gt-sparc.h"