]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "debug.h"
50 #include "common/common-target.h"
51 #include "gimplify.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "tree-pass.h"
56 #include "context.h"
57 #include "builtins.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Processor costs */
63
64 struct processor_costs {
65 /* Integer load */
66 const int int_load;
67
68 /* Integer signed load */
69 const int int_sload;
70
71 /* Integer zeroed load */
72 const int int_zload;
73
74 /* Float load */
75 const int float_load;
76
77 /* fmov, fneg, fabs */
78 const int float_move;
79
80 /* fadd, fsub */
81 const int float_plusminus;
82
83 /* fcmp */
84 const int float_cmp;
85
86 /* fmov, fmovr */
87 const int float_cmove;
88
89 /* fmul */
90 const int float_mul;
91
92 /* fdivs */
93 const int float_div_sf;
94
95 /* fdivd */
96 const int float_div_df;
97
98 /* fsqrts */
99 const int float_sqrt_sf;
100
101 /* fsqrtd */
102 const int float_sqrt_df;
103
104 /* umul/smul */
105 const int int_mul;
106
107 /* mulX */
108 const int int_mulX;
109
110 /* integer multiply cost for each bit set past the most
111 significant 3, so the formula for multiply cost becomes:
112
113 if (rs1 < 0)
114 highest_bit = highest_clear_bit(rs1);
115 else
116 highest_bit = highest_set_bit(rs1);
117 if (highest_bit < 3)
118 highest_bit = 3;
119 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120
121 A value of zero indicates that the multiply costs is fixed,
122 and not variable. */
123 const int int_mul_bit_factor;
124
125 /* udiv/sdiv */
126 const int int_div;
127
128 /* divX */
129 const int int_divX;
130
131 /* movcc, movr */
132 const int int_cmove;
133
134 /* penalty for shifts, due to scheduling rules etc. */
135 const int shift_penalty;
136 };
137
138 static const
139 struct processor_costs cypress_costs = {
140 COSTS_N_INSNS (2), /* int load */
141 COSTS_N_INSNS (2), /* int signed load */
142 COSTS_N_INSNS (2), /* int zeroed load */
143 COSTS_N_INSNS (2), /* float load */
144 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145 COSTS_N_INSNS (5), /* fadd, fsub */
146 COSTS_N_INSNS (1), /* fcmp */
147 COSTS_N_INSNS (1), /* fmov, fmovr */
148 COSTS_N_INSNS (7), /* fmul */
149 COSTS_N_INSNS (37), /* fdivs */
150 COSTS_N_INSNS (37), /* fdivd */
151 COSTS_N_INSNS (63), /* fsqrts */
152 COSTS_N_INSNS (63), /* fsqrtd */
153 COSTS_N_INSNS (1), /* imul */
154 COSTS_N_INSNS (1), /* imulX */
155 0, /* imul bit factor */
156 COSTS_N_INSNS (1), /* idiv */
157 COSTS_N_INSNS (1), /* idivX */
158 COSTS_N_INSNS (1), /* movcc/movr */
159 0, /* shift penalty */
160 };
161
162 static const
163 struct processor_costs supersparc_costs = {
164 COSTS_N_INSNS (1), /* int load */
165 COSTS_N_INSNS (1), /* int signed load */
166 COSTS_N_INSNS (1), /* int zeroed load */
167 COSTS_N_INSNS (0), /* float load */
168 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169 COSTS_N_INSNS (3), /* fadd, fsub */
170 COSTS_N_INSNS (3), /* fcmp */
171 COSTS_N_INSNS (1), /* fmov, fmovr */
172 COSTS_N_INSNS (3), /* fmul */
173 COSTS_N_INSNS (6), /* fdivs */
174 COSTS_N_INSNS (9), /* fdivd */
175 COSTS_N_INSNS (12), /* fsqrts */
176 COSTS_N_INSNS (12), /* fsqrtd */
177 COSTS_N_INSNS (4), /* imul */
178 COSTS_N_INSNS (4), /* imulX */
179 0, /* imul bit factor */
180 COSTS_N_INSNS (4), /* idiv */
181 COSTS_N_INSNS (4), /* idivX */
182 COSTS_N_INSNS (1), /* movcc/movr */
183 1, /* shift penalty */
184 };
185
186 static const
187 struct processor_costs hypersparc_costs = {
188 COSTS_N_INSNS (1), /* int load */
189 COSTS_N_INSNS (1), /* int signed load */
190 COSTS_N_INSNS (1), /* int zeroed load */
191 COSTS_N_INSNS (1), /* float load */
192 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193 COSTS_N_INSNS (1), /* fadd, fsub */
194 COSTS_N_INSNS (1), /* fcmp */
195 COSTS_N_INSNS (1), /* fmov, fmovr */
196 COSTS_N_INSNS (1), /* fmul */
197 COSTS_N_INSNS (8), /* fdivs */
198 COSTS_N_INSNS (12), /* fdivd */
199 COSTS_N_INSNS (17), /* fsqrts */
200 COSTS_N_INSNS (17), /* fsqrtd */
201 COSTS_N_INSNS (17), /* imul */
202 COSTS_N_INSNS (17), /* imulX */
203 0, /* imul bit factor */
204 COSTS_N_INSNS (17), /* idiv */
205 COSTS_N_INSNS (17), /* idivX */
206 COSTS_N_INSNS (1), /* movcc/movr */
207 0, /* shift penalty */
208 };
209
210 static const
211 struct processor_costs leon_costs = {
212 COSTS_N_INSNS (1), /* int load */
213 COSTS_N_INSNS (1), /* int signed load */
214 COSTS_N_INSNS (1), /* int zeroed load */
215 COSTS_N_INSNS (1), /* float load */
216 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217 COSTS_N_INSNS (1), /* fadd, fsub */
218 COSTS_N_INSNS (1), /* fcmp */
219 COSTS_N_INSNS (1), /* fmov, fmovr */
220 COSTS_N_INSNS (1), /* fmul */
221 COSTS_N_INSNS (15), /* fdivs */
222 COSTS_N_INSNS (15), /* fdivd */
223 COSTS_N_INSNS (23), /* fsqrts */
224 COSTS_N_INSNS (23), /* fsqrtd */
225 COSTS_N_INSNS (5), /* imul */
226 COSTS_N_INSNS (5), /* imulX */
227 0, /* imul bit factor */
228 COSTS_N_INSNS (5), /* idiv */
229 COSTS_N_INSNS (5), /* idivX */
230 COSTS_N_INSNS (1), /* movcc/movr */
231 0, /* shift penalty */
232 };
233
234 static const
235 struct processor_costs leon3_costs = {
236 COSTS_N_INSNS (1), /* int load */
237 COSTS_N_INSNS (1), /* int signed load */
238 COSTS_N_INSNS (1), /* int zeroed load */
239 COSTS_N_INSNS (1), /* float load */
240 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241 COSTS_N_INSNS (1), /* fadd, fsub */
242 COSTS_N_INSNS (1), /* fcmp */
243 COSTS_N_INSNS (1), /* fmov, fmovr */
244 COSTS_N_INSNS (1), /* fmul */
245 COSTS_N_INSNS (14), /* fdivs */
246 COSTS_N_INSNS (15), /* fdivd */
247 COSTS_N_INSNS (22), /* fsqrts */
248 COSTS_N_INSNS (23), /* fsqrtd */
249 COSTS_N_INSNS (5), /* imul */
250 COSTS_N_INSNS (5), /* imulX */
251 0, /* imul bit factor */
252 COSTS_N_INSNS (35), /* idiv */
253 COSTS_N_INSNS (35), /* idivX */
254 COSTS_N_INSNS (1), /* movcc/movr */
255 0, /* shift penalty */
256 };
257
258 static const
259 struct processor_costs sparclet_costs = {
260 COSTS_N_INSNS (3), /* int load */
261 COSTS_N_INSNS (3), /* int signed load */
262 COSTS_N_INSNS (1), /* int zeroed load */
263 COSTS_N_INSNS (1), /* float load */
264 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265 COSTS_N_INSNS (1), /* fadd, fsub */
266 COSTS_N_INSNS (1), /* fcmp */
267 COSTS_N_INSNS (1), /* fmov, fmovr */
268 COSTS_N_INSNS (1), /* fmul */
269 COSTS_N_INSNS (1), /* fdivs */
270 COSTS_N_INSNS (1), /* fdivd */
271 COSTS_N_INSNS (1), /* fsqrts */
272 COSTS_N_INSNS (1), /* fsqrtd */
273 COSTS_N_INSNS (5), /* imul */
274 COSTS_N_INSNS (5), /* imulX */
275 0, /* imul bit factor */
276 COSTS_N_INSNS (5), /* idiv */
277 COSTS_N_INSNS (5), /* idivX */
278 COSTS_N_INSNS (1), /* movcc/movr */
279 0, /* shift penalty */
280 };
281
282 static const
283 struct processor_costs ultrasparc_costs = {
284 COSTS_N_INSNS (2), /* int load */
285 COSTS_N_INSNS (3), /* int signed load */
286 COSTS_N_INSNS (2), /* int zeroed load */
287 COSTS_N_INSNS (2), /* float load */
288 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289 COSTS_N_INSNS (4), /* fadd, fsub */
290 COSTS_N_INSNS (1), /* fcmp */
291 COSTS_N_INSNS (2), /* fmov, fmovr */
292 COSTS_N_INSNS (4), /* fmul */
293 COSTS_N_INSNS (13), /* fdivs */
294 COSTS_N_INSNS (23), /* fdivd */
295 COSTS_N_INSNS (13), /* fsqrts */
296 COSTS_N_INSNS (23), /* fsqrtd */
297 COSTS_N_INSNS (4), /* imul */
298 COSTS_N_INSNS (4), /* imulX */
299 2, /* imul bit factor */
300 COSTS_N_INSNS (37), /* idiv */
301 COSTS_N_INSNS (68), /* idivX */
302 COSTS_N_INSNS (2), /* movcc/movr */
303 2, /* shift penalty */
304 };
305
306 static const
307 struct processor_costs ultrasparc3_costs = {
308 COSTS_N_INSNS (2), /* int load */
309 COSTS_N_INSNS (3), /* int signed load */
310 COSTS_N_INSNS (3), /* int zeroed load */
311 COSTS_N_INSNS (2), /* float load */
312 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313 COSTS_N_INSNS (4), /* fadd, fsub */
314 COSTS_N_INSNS (5), /* fcmp */
315 COSTS_N_INSNS (3), /* fmov, fmovr */
316 COSTS_N_INSNS (4), /* fmul */
317 COSTS_N_INSNS (17), /* fdivs */
318 COSTS_N_INSNS (20), /* fdivd */
319 COSTS_N_INSNS (20), /* fsqrts */
320 COSTS_N_INSNS (29), /* fsqrtd */
321 COSTS_N_INSNS (6), /* imul */
322 COSTS_N_INSNS (6), /* imulX */
323 0, /* imul bit factor */
324 COSTS_N_INSNS (40), /* idiv */
325 COSTS_N_INSNS (71), /* idivX */
326 COSTS_N_INSNS (2), /* movcc/movr */
327 0, /* shift penalty */
328 };
329
330 static const
331 struct processor_costs niagara_costs = {
332 COSTS_N_INSNS (3), /* int load */
333 COSTS_N_INSNS (3), /* int signed load */
334 COSTS_N_INSNS (3), /* int zeroed load */
335 COSTS_N_INSNS (9), /* float load */
336 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337 COSTS_N_INSNS (8), /* fadd, fsub */
338 COSTS_N_INSNS (26), /* fcmp */
339 COSTS_N_INSNS (8), /* fmov, fmovr */
340 COSTS_N_INSNS (29), /* fmul */
341 COSTS_N_INSNS (54), /* fdivs */
342 COSTS_N_INSNS (83), /* fdivd */
343 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345 COSTS_N_INSNS (11), /* imul */
346 COSTS_N_INSNS (11), /* imulX */
347 0, /* imul bit factor */
348 COSTS_N_INSNS (72), /* idiv */
349 COSTS_N_INSNS (72), /* idivX */
350 COSTS_N_INSNS (1), /* movcc/movr */
351 0, /* shift penalty */
352 };
353
354 static const
355 struct processor_costs niagara2_costs = {
356 COSTS_N_INSNS (3), /* int load */
357 COSTS_N_INSNS (3), /* int signed load */
358 COSTS_N_INSNS (3), /* int zeroed load */
359 COSTS_N_INSNS (3), /* float load */
360 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361 COSTS_N_INSNS (6), /* fadd, fsub */
362 COSTS_N_INSNS (6), /* fcmp */
363 COSTS_N_INSNS (6), /* fmov, fmovr */
364 COSTS_N_INSNS (6), /* fmul */
365 COSTS_N_INSNS (19), /* fdivs */
366 COSTS_N_INSNS (33), /* fdivd */
367 COSTS_N_INSNS (19), /* fsqrts */
368 COSTS_N_INSNS (33), /* fsqrtd */
369 COSTS_N_INSNS (5), /* imul */
370 COSTS_N_INSNS (5), /* imulX */
371 0, /* imul bit factor */
372 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (1), /* movcc/movr */
375 0, /* shift penalty */
376 };
377
378 static const
379 struct processor_costs niagara3_costs = {
380 COSTS_N_INSNS (3), /* int load */
381 COSTS_N_INSNS (3), /* int signed load */
382 COSTS_N_INSNS (3), /* int zeroed load */
383 COSTS_N_INSNS (3), /* float load */
384 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385 COSTS_N_INSNS (9), /* fadd, fsub */
386 COSTS_N_INSNS (9), /* fcmp */
387 COSTS_N_INSNS (9), /* fmov, fmovr */
388 COSTS_N_INSNS (9), /* fmul */
389 COSTS_N_INSNS (23), /* fdivs */
390 COSTS_N_INSNS (37), /* fdivd */
391 COSTS_N_INSNS (23), /* fsqrts */
392 COSTS_N_INSNS (37), /* fsqrtd */
393 COSTS_N_INSNS (9), /* imul */
394 COSTS_N_INSNS (9), /* imulX */
395 0, /* imul bit factor */
396 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398 COSTS_N_INSNS (1), /* movcc/movr */
399 0, /* shift penalty */
400 };
401
402 static const
403 struct processor_costs niagara4_costs = {
404 COSTS_N_INSNS (5), /* int load */
405 COSTS_N_INSNS (5), /* int signed load */
406 COSTS_N_INSNS (5), /* int zeroed load */
407 COSTS_N_INSNS (5), /* float load */
408 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409 COSTS_N_INSNS (11), /* fadd, fsub */
410 COSTS_N_INSNS (11), /* fcmp */
411 COSTS_N_INSNS (11), /* fmov, fmovr */
412 COSTS_N_INSNS (11), /* fmul */
413 COSTS_N_INSNS (24), /* fdivs */
414 COSTS_N_INSNS (37), /* fdivd */
415 COSTS_N_INSNS (24), /* fsqrts */
416 COSTS_N_INSNS (37), /* fsqrtd */
417 COSTS_N_INSNS (12), /* imul */
418 COSTS_N_INSNS (12), /* imulX */
419 0, /* imul bit factor */
420 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422 COSTS_N_INSNS (1), /* movcc/movr */
423 0, /* shift penalty */
424 };
425
426 static const struct processor_costs *sparc_costs = &cypress_costs;
427
428 #ifdef HAVE_AS_RELAX_OPTION
429 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
430 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
431 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
432 somebody does not branch between the sethi and jmp. */
433 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
434 #else
435 #define LEAF_SIBCALL_SLOT_RESERVED_P \
436 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
437 #endif
438
439 /* Vector to say how input registers are mapped to output registers.
440 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
441 eliminate it. You must use -fomit-frame-pointer to get that. */
442 char leaf_reg_remap[] =
443 { 0, 1, 2, 3, 4, 5, 6, 7,
444 -1, -1, -1, -1, -1, -1, 14, -1,
445 -1, -1, -1, -1, -1, -1, -1, -1,
446 8, 9, 10, 11, 12, 13, -1, 15,
447
448 32, 33, 34, 35, 36, 37, 38, 39,
449 40, 41, 42, 43, 44, 45, 46, 47,
450 48, 49, 50, 51, 52, 53, 54, 55,
451 56, 57, 58, 59, 60, 61, 62, 63,
452 64, 65, 66, 67, 68, 69, 70, 71,
453 72, 73, 74, 75, 76, 77, 78, 79,
454 80, 81, 82, 83, 84, 85, 86, 87,
455 88, 89, 90, 91, 92, 93, 94, 95,
456 96, 97, 98, 99, 100, 101, 102};
457
458 /* Vector, indexed by hard register number, which contains 1
459 for a register that is allowable in a candidate for leaf
460 function treatment. */
461 char sparc_leaf_regs[] =
462 { 1, 1, 1, 1, 1, 1, 1, 1,
463 0, 0, 0, 0, 0, 0, 1, 0,
464 0, 0, 0, 0, 0, 0, 0, 0,
465 1, 1, 1, 1, 1, 1, 0, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1, 1, 1,
473 1, 1, 1, 1, 1, 1, 1, 1,
474 1, 1, 1, 1, 1, 1, 1};
475
476 struct GTY(()) machine_function
477 {
478 /* Size of the frame of the function. */
479 HOST_WIDE_INT frame_size;
480
481 /* Size of the frame of the function minus the register window save area
482 and the outgoing argument area. */
483 HOST_WIDE_INT apparent_frame_size;
484
485 /* Register we pretend the frame pointer is allocated to. Normally, this
486 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
487 record "offset" separately as it may be too big for (reg + disp). */
488 rtx frame_base_reg;
489 HOST_WIDE_INT frame_base_offset;
490
491 /* Number of global or FP registers to be saved (as 4-byte quantities). */
492 int n_global_fp_regs;
493
494 /* True if the current function is leaf and uses only leaf regs,
495 so that the SPARC leaf function optimization can be applied.
496 Private version of crtl->uses_only_leaf_regs, see
497 sparc_expand_prologue for the rationale. */
498 int leaf_function_p;
499
500 /* True if the prologue saves local or in registers. */
501 bool save_local_in_regs_p;
502
503 /* True if the data calculated by sparc_expand_prologue are valid. */
504 bool prologue_data_valid_p;
505 };
506
507 #define sparc_frame_size cfun->machine->frame_size
508 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
509 #define sparc_frame_base_reg cfun->machine->frame_base_reg
510 #define sparc_frame_base_offset cfun->machine->frame_base_offset
511 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
512 #define sparc_leaf_function_p cfun->machine->leaf_function_p
513 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
514 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
515
516 /* 1 if the next opcode is to be specially indented. */
517 int sparc_indent_opcode = 0;
518
519 static void sparc_option_override (void);
520 static void sparc_init_modes (void);
521 static void scan_record_type (const_tree, int *, int *, int *);
522 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
523 const_tree, bool, bool, int *, int *);
524
525 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
526 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
527
528 static void sparc_emit_set_const32 (rtx, rtx);
529 static void sparc_emit_set_const64 (rtx, rtx);
530 static void sparc_output_addr_vec (rtx);
531 static void sparc_output_addr_diff_vec (rtx);
532 static void sparc_output_deferred_case_vectors (void);
533 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
534 static bool sparc_legitimate_constant_p (machine_mode, rtx);
535 static rtx sparc_builtin_saveregs (void);
536 static int epilogue_renumber (rtx *, int);
537 static bool sparc_assemble_integer (rtx, unsigned int, int);
538 static int set_extends (rtx_insn *);
539 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
540 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
541 #ifdef TARGET_SOLARIS
542 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
543 tree) ATTRIBUTE_UNUSED;
544 #endif
545 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
546 static int sparc_issue_rate (void);
547 static void sparc_sched_init (FILE *, int, int);
548 static int sparc_use_sched_lookahead (void);
549
550 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
551 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
552 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
553 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
554 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
555
556 static bool sparc_function_ok_for_sibcall (tree, tree);
557 static void sparc_init_libfuncs (void);
558 static void sparc_init_builtins (void);
559 static void sparc_fpu_init_builtins (void);
560 static void sparc_vis_init_builtins (void);
561 static tree sparc_builtin_decl (unsigned, bool);
562 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
563 static tree sparc_fold_builtin (tree, int, tree *, bool);
564 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
565 HOST_WIDE_INT, tree);
566 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
567 HOST_WIDE_INT, const_tree);
568 static struct machine_function * sparc_init_machine_status (void);
569 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
570 static rtx sparc_tls_get_addr (void);
571 static rtx sparc_tls_got (void);
572 static int sparc_register_move_cost (machine_mode,
573 reg_class_t, reg_class_t);
574 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
575 static rtx sparc_function_value (const_tree, const_tree, bool);
576 static rtx sparc_libcall_value (machine_mode, const_rtx);
577 static bool sparc_function_value_regno_p (const unsigned int);
578 static rtx sparc_struct_value_rtx (tree, int);
579 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
580 int *, const_tree, int);
581 static bool sparc_return_in_memory (const_tree, const_tree);
582 static bool sparc_strict_argument_naming (cumulative_args_t);
583 static void sparc_va_start (tree, rtx);
584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
585 static bool sparc_vector_mode_supported_p (machine_mode);
586 static bool sparc_tls_referenced_p (rtx);
587 static rtx sparc_legitimize_tls_address (rtx);
588 static rtx sparc_legitimize_pic_address (rtx, rtx);
589 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
590 static rtx sparc_delegitimize_address (rtx);
591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
592 static bool sparc_pass_by_reference (cumulative_args_t,
593 machine_mode, const_tree, bool);
594 static void sparc_function_arg_advance (cumulative_args_t,
595 machine_mode, const_tree, bool);
596 static rtx sparc_function_arg_1 (cumulative_args_t,
597 machine_mode, const_tree, bool, bool);
598 static rtx sparc_function_arg (cumulative_args_t,
599 machine_mode, const_tree, bool);
600 static rtx sparc_function_incoming_arg (cumulative_args_t,
601 machine_mode, const_tree, bool);
602 static unsigned int sparc_function_arg_boundary (machine_mode,
603 const_tree);
604 static int sparc_arg_partial_bytes (cumulative_args_t,
605 machine_mode, tree, bool);
606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
607 static void sparc_file_end (void);
608 static bool sparc_frame_pointer_required (void);
609 static bool sparc_can_eliminate (const int, const int);
610 static rtx sparc_builtin_setjmp_frame_value (void);
611 static void sparc_conditional_register_usage (void);
612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
613 static const char *sparc_mangle_type (const_tree);
614 #endif
615 static void sparc_trampoline_init (rtx, tree, rtx);
616 static machine_mode sparc_preferred_simd_mode (machine_mode);
617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
618 static bool sparc_print_operand_punct_valid_p (unsigned char);
619 static void sparc_print_operand (FILE *, rtx, int);
620 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
622 machine_mode,
623 secondary_reload_info *);
624 static machine_mode sparc_cstore_mode (enum insn_code icode);
625 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
626 \f
627 #ifdef SUBTARGET_ATTRIBUTE_TABLE
628 /* Table of valid machine attributes. */
629 static const struct attribute_spec sparc_attribute_table[] =
630 {
631 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
632 do_diagnostic } */
633 SUBTARGET_ATTRIBUTE_TABLE,
634 { NULL, 0, 0, false, false, false, NULL, false }
635 };
636 #endif
637 \f
638 /* Option handling. */
639
640 /* Parsed value. */
641 enum cmodel sparc_cmodel;
642
643 char sparc_hard_reg_printed[8];
644
645 /* Initialize the GCC target structure. */
646
647 /* The default is to use .half rather than .short for aligned HI objects. */
648 #undef TARGET_ASM_ALIGNED_HI_OP
649 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
650
651 #undef TARGET_ASM_UNALIGNED_HI_OP
652 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
653 #undef TARGET_ASM_UNALIGNED_SI_OP
654 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
655 #undef TARGET_ASM_UNALIGNED_DI_OP
656 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
657
658 /* The target hook has to handle DI-mode values. */
659 #undef TARGET_ASM_INTEGER
660 #define TARGET_ASM_INTEGER sparc_assemble_integer
661
662 #undef TARGET_ASM_FUNCTION_PROLOGUE
663 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
664 #undef TARGET_ASM_FUNCTION_EPILOGUE
665 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
666
667 #undef TARGET_SCHED_ADJUST_COST
668 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
669 #undef TARGET_SCHED_ISSUE_RATE
670 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
671 #undef TARGET_SCHED_INIT
672 #define TARGET_SCHED_INIT sparc_sched_init
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
675
676 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
677 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
678
679 #undef TARGET_INIT_LIBFUNCS
680 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
681
682 #undef TARGET_LEGITIMIZE_ADDRESS
683 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
684 #undef TARGET_DELEGITIMIZE_ADDRESS
685 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
686 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
687 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
688
689 #undef TARGET_INIT_BUILTINS
690 #define TARGET_INIT_BUILTINS sparc_init_builtins
691 #undef TARGET_BUILTIN_DECL
692 #define TARGET_BUILTIN_DECL sparc_builtin_decl
693 #undef TARGET_EXPAND_BUILTIN
694 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
695 #undef TARGET_FOLD_BUILTIN
696 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
697
698 #if TARGET_TLS
699 #undef TARGET_HAVE_TLS
700 #define TARGET_HAVE_TLS true
701 #endif
702
703 #undef TARGET_CANNOT_FORCE_CONST_MEM
704 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
705
706 #undef TARGET_ASM_OUTPUT_MI_THUNK
707 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
708 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
709 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
710
711 #undef TARGET_RTX_COSTS
712 #define TARGET_RTX_COSTS sparc_rtx_costs
713 #undef TARGET_ADDRESS_COST
714 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
715 #undef TARGET_REGISTER_MOVE_COST
716 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
717
718 #undef TARGET_PROMOTE_FUNCTION_MODE
719 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
720
721 #undef TARGET_FUNCTION_VALUE
722 #define TARGET_FUNCTION_VALUE sparc_function_value
723 #undef TARGET_LIBCALL_VALUE
724 #define TARGET_LIBCALL_VALUE sparc_libcall_value
725 #undef TARGET_FUNCTION_VALUE_REGNO_P
726 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
727
728 #undef TARGET_STRUCT_VALUE_RTX
729 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
730 #undef TARGET_RETURN_IN_MEMORY
731 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
732 #undef TARGET_MUST_PASS_IN_STACK
733 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
734 #undef TARGET_PASS_BY_REFERENCE
735 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
736 #undef TARGET_ARG_PARTIAL_BYTES
737 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
738 #undef TARGET_FUNCTION_ARG_ADVANCE
739 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
740 #undef TARGET_FUNCTION_ARG
741 #define TARGET_FUNCTION_ARG sparc_function_arg
742 #undef TARGET_FUNCTION_INCOMING_ARG
743 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
744 #undef TARGET_FUNCTION_ARG_BOUNDARY
745 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
746
747 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
748 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
749 #undef TARGET_STRICT_ARGUMENT_NAMING
750 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
751
752 #undef TARGET_EXPAND_BUILTIN_VA_START
753 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
754 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
755 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
756
757 #undef TARGET_VECTOR_MODE_SUPPORTED_P
758 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
759
760 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
761 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
762
763 #ifdef SUBTARGET_INSERT_ATTRIBUTES
764 #undef TARGET_INSERT_ATTRIBUTES
765 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
766 #endif
767
768 #ifdef SUBTARGET_ATTRIBUTE_TABLE
769 #undef TARGET_ATTRIBUTE_TABLE
770 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
771 #endif
772
773 #undef TARGET_OPTION_OVERRIDE
774 #define TARGET_OPTION_OVERRIDE sparc_option_override
775
776 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
779 #endif
780
781 #undef TARGET_ASM_FILE_END
782 #define TARGET_ASM_FILE_END sparc_file_end
783
784 #undef TARGET_FRAME_POINTER_REQUIRED
785 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
786
787 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
788 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
789
790 #undef TARGET_CAN_ELIMINATE
791 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
792
793 #undef TARGET_PREFERRED_RELOAD_CLASS
794 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
795
796 #undef TARGET_SECONDARY_RELOAD
797 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
798
799 #undef TARGET_CONDITIONAL_REGISTER_USAGE
800 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
801
802 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
803 #undef TARGET_MANGLE_TYPE
804 #define TARGET_MANGLE_TYPE sparc_mangle_type
805 #endif
806
807 #undef TARGET_LEGITIMATE_ADDRESS_P
808 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
809
810 #undef TARGET_LEGITIMATE_CONSTANT_P
811 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
812
813 #undef TARGET_TRAMPOLINE_INIT
814 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
815
816 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
817 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
818 #undef TARGET_PRINT_OPERAND
819 #define TARGET_PRINT_OPERAND sparc_print_operand
820 #undef TARGET_PRINT_OPERAND_ADDRESS
821 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
822
823 /* The value stored by LDSTUB. */
824 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
825 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
826
827 #undef TARGET_CSTORE_MODE
828 #define TARGET_CSTORE_MODE sparc_cstore_mode
829
830 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
831 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
832
833 struct gcc_target targetm = TARGET_INITIALIZER;
834
835 /* Return the memory reference contained in X if any, zero otherwise. */
836
837 static rtx
838 mem_ref (rtx x)
839 {
840 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
841 x = XEXP (x, 0);
842
843 if (MEM_P (x))
844 return x;
845
846 return NULL_RTX;
847 }
848
849 /* We use a machine specific pass to enable workarounds for errata.
850 We need to have the (essentially) final form of the insn stream in order
851 to properly detect the various hazards. Therefore, this machine specific
852 pass runs as late as possible. The pass is inserted in the pass pipeline
853 at the end of sparc_option_override. */
854
855 static unsigned int
856 sparc_do_work_around_errata (void)
857 {
858 rtx_insn *insn, *next;
859
860 /* Force all instructions to be split into their final form. */
861 split_all_insns_noflow ();
862
863 /* Now look for specific patterns in the insn stream. */
864 for (insn = get_insns (); insn; insn = next)
865 {
866 bool insert_nop = false;
867 rtx set;
868
869 /* Look into the instruction in a delay slot. */
870 if (NONJUMP_INSN_P (insn))
871 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
872 insn = seq->insn (1);
873
874 /* Look for a single-word load into an odd-numbered FP register. */
875 if (sparc_fix_at697f
876 && NONJUMP_INSN_P (insn)
877 && (set = single_set (insn)) != NULL_RTX
878 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
879 && MEM_P (SET_SRC (set))
880 && REG_P (SET_DEST (set))
881 && REGNO (SET_DEST (set)) > 31
882 && REGNO (SET_DEST (set)) % 2 != 0)
883 {
884 /* The wrong dependency is on the enclosing double register. */
885 const unsigned int x = REGNO (SET_DEST (set)) - 1;
886 unsigned int src1, src2, dest;
887 int code;
888
889 next = next_active_insn (insn);
890 if (!next)
891 break;
892 /* If the insn is a branch, then it cannot be problematic. */
893 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
894 continue;
895
896 extract_insn (next);
897 code = INSN_CODE (next);
898
899 switch (code)
900 {
901 case CODE_FOR_adddf3:
902 case CODE_FOR_subdf3:
903 case CODE_FOR_muldf3:
904 case CODE_FOR_divdf3:
905 dest = REGNO (recog_data.operand[0]);
906 src1 = REGNO (recog_data.operand[1]);
907 src2 = REGNO (recog_data.operand[2]);
908 if (src1 != src2)
909 {
910 /* Case [1-4]:
911 ld [address], %fx+1
912 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
913 if ((src1 == x || src2 == x)
914 && (dest == src1 || dest == src2))
915 insert_nop = true;
916 }
917 else
918 {
919 /* Case 5:
920 ld [address], %fx+1
921 FPOPd %fx, %fx, %fx */
922 if (src1 == x
923 && dest == src1
924 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
925 insert_nop = true;
926 }
927 break;
928
929 case CODE_FOR_sqrtdf2:
930 dest = REGNO (recog_data.operand[0]);
931 src1 = REGNO (recog_data.operand[1]);
932 /* Case 6:
933 ld [address], %fx+1
934 fsqrtd %fx, %fx */
935 if (src1 == x && dest == src1)
936 insert_nop = true;
937 break;
938
939 default:
940 break;
941 }
942 }
943
944 /* Look for a single-word load into an integer register. */
945 else if (sparc_fix_ut699
946 && NONJUMP_INSN_P (insn)
947 && (set = single_set (insn)) != NULL_RTX
948 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
949 && mem_ref (SET_SRC (set)) != NULL_RTX
950 && REG_P (SET_DEST (set))
951 && REGNO (SET_DEST (set)) < 32)
952 {
953 /* There is no problem if the second memory access has a data
954 dependency on the first single-cycle load. */
955 rtx x = SET_DEST (set);
956
957 next = next_active_insn (insn);
958 if (!next)
959 break;
960 /* If the insn is a branch, then it cannot be problematic. */
961 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
962 continue;
963
964 /* Look for a second memory access to/from an integer register. */
965 if ((set = single_set (next)) != NULL_RTX)
966 {
967 rtx src = SET_SRC (set);
968 rtx dest = SET_DEST (set);
969 rtx mem;
970
971 /* LDD is affected. */
972 if ((mem = mem_ref (src)) != NULL_RTX
973 && REG_P (dest)
974 && REGNO (dest) < 32
975 && !reg_mentioned_p (x, XEXP (mem, 0)))
976 insert_nop = true;
977
978 /* STD is *not* affected. */
979 else if (MEM_P (dest)
980 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
981 && (src == CONST0_RTX (GET_MODE (dest))
982 || (REG_P (src)
983 && REGNO (src) < 32
984 && REGNO (src) != REGNO (x)))
985 && !reg_mentioned_p (x, XEXP (dest, 0)))
986 insert_nop = true;
987 }
988 }
989
990 /* Look for a single-word load/operation into an FP register. */
991 else if (sparc_fix_ut699
992 && NONJUMP_INSN_P (insn)
993 && (set = single_set (insn)) != NULL_RTX
994 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
995 && REG_P (SET_DEST (set))
996 && REGNO (SET_DEST (set)) > 31)
997 {
998 /* Number of instructions in the problematic window. */
999 const int n_insns = 4;
1000 /* The problematic combination is with the sibling FP register. */
1001 const unsigned int x = REGNO (SET_DEST (set));
1002 const unsigned int y = x ^ 1;
1003 rtx_insn *after;
1004 int i;
1005
1006 next = next_active_insn (insn);
1007 if (!next)
1008 break;
1009 /* If the insn is a branch, then it cannot be problematic. */
1010 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1011 continue;
1012
1013 /* Look for a second load/operation into the sibling FP register. */
1014 if (!((set = single_set (next)) != NULL_RTX
1015 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1016 && REG_P (SET_DEST (set))
1017 && REGNO (SET_DEST (set)) == y))
1018 continue;
1019
1020 /* Look for a (possible) store from the FP register in the next N
1021 instructions, but bail out if it is again modified or if there
1022 is a store from the sibling FP register before this store. */
1023 for (after = next, i = 0; i < n_insns; i++)
1024 {
1025 bool branch_p;
1026
1027 after = next_active_insn (after);
1028 if (!after)
1029 break;
1030
1031 /* This is a branch with an empty delay slot. */
1032 if (!NONJUMP_INSN_P (after))
1033 {
1034 if (++i == n_insns)
1035 break;
1036 branch_p = true;
1037 after = NULL;
1038 }
1039 /* This is a branch with a filled delay slot. */
1040 else if (rtx_sequence *seq =
1041 dyn_cast <rtx_sequence *> (PATTERN (after)))
1042 {
1043 if (++i == n_insns)
1044 break;
1045 branch_p = true;
1046 after = seq->insn (1);
1047 }
1048 /* This is a regular instruction. */
1049 else
1050 branch_p = false;
1051
1052 if (after && (set = single_set (after)) != NULL_RTX)
1053 {
1054 const rtx src = SET_SRC (set);
1055 const rtx dest = SET_DEST (set);
1056 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1057
1058 /* If the FP register is again modified before the store,
1059 then the store isn't affected. */
1060 if (REG_P (dest)
1061 && (REGNO (dest) == x
1062 || (REGNO (dest) == y && size == 8)))
1063 break;
1064
1065 if (MEM_P (dest) && REG_P (src))
1066 {
1067 /* If there is a store from the sibling FP register
1068 before the store, then the store is not affected. */
1069 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1070 break;
1071
1072 /* Otherwise, the store is affected. */
1073 if (REGNO (src) == x && size == 4)
1074 {
1075 insert_nop = true;
1076 break;
1077 }
1078 }
1079 }
1080
1081 /* If we have a branch in the first M instructions, then we
1082 cannot see the (M+2)th instruction so we play safe. */
1083 if (branch_p && i <= (n_insns - 2))
1084 {
1085 insert_nop = true;
1086 break;
1087 }
1088 }
1089 }
1090
1091 else
1092 next = NEXT_INSN (insn);
1093
1094 if (insert_nop)
1095 emit_insn_before (gen_nop (), next);
1096 }
1097
1098 return 0;
1099 }
1100
1101 namespace {
1102
1103 const pass_data pass_data_work_around_errata =
1104 {
1105 RTL_PASS, /* type */
1106 "errata", /* name */
1107 OPTGROUP_NONE, /* optinfo_flags */
1108 TV_MACH_DEP, /* tv_id */
1109 0, /* properties_required */
1110 0, /* properties_provided */
1111 0, /* properties_destroyed */
1112 0, /* todo_flags_start */
1113 0, /* todo_flags_finish */
1114 };
1115
1116 class pass_work_around_errata : public rtl_opt_pass
1117 {
1118 public:
1119 pass_work_around_errata(gcc::context *ctxt)
1120 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1121 {}
1122
1123 /* opt_pass methods: */
1124 virtual bool gate (function *)
1125 {
1126 /* The only errata we handle are those of the AT697F and UT699. */
1127 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1128 }
1129
1130 virtual unsigned int execute (function *)
1131 {
1132 return sparc_do_work_around_errata ();
1133 }
1134
1135 }; // class pass_work_around_errata
1136
1137 } // anon namespace
1138
1139 rtl_opt_pass *
1140 make_pass_work_around_errata (gcc::context *ctxt)
1141 {
1142 return new pass_work_around_errata (ctxt);
1143 }
1144
1145 /* Helpers for TARGET_DEBUG_OPTIONS. */
1146 static void
1147 dump_target_flag_bits (const int flags)
1148 {
1149 if (flags & MASK_64BIT)
1150 fprintf (stderr, "64BIT ");
1151 if (flags & MASK_APP_REGS)
1152 fprintf (stderr, "APP_REGS ");
1153 if (flags & MASK_FASTER_STRUCTS)
1154 fprintf (stderr, "FASTER_STRUCTS ");
1155 if (flags & MASK_FLAT)
1156 fprintf (stderr, "FLAT ");
1157 if (flags & MASK_FMAF)
1158 fprintf (stderr, "FMAF ");
1159 if (flags & MASK_FPU)
1160 fprintf (stderr, "FPU ");
1161 if (flags & MASK_HARD_QUAD)
1162 fprintf (stderr, "HARD_QUAD ");
1163 if (flags & MASK_POPC)
1164 fprintf (stderr, "POPC ");
1165 if (flags & MASK_PTR64)
1166 fprintf (stderr, "PTR64 ");
1167 if (flags & MASK_STACK_BIAS)
1168 fprintf (stderr, "STACK_BIAS ");
1169 if (flags & MASK_UNALIGNED_DOUBLES)
1170 fprintf (stderr, "UNALIGNED_DOUBLES ");
1171 if (flags & MASK_V8PLUS)
1172 fprintf (stderr, "V8PLUS ");
1173 if (flags & MASK_VIS)
1174 fprintf (stderr, "VIS ");
1175 if (flags & MASK_VIS2)
1176 fprintf (stderr, "VIS2 ");
1177 if (flags & MASK_VIS3)
1178 fprintf (stderr, "VIS3 ");
1179 if (flags & MASK_CBCOND)
1180 fprintf (stderr, "CBCOND ");
1181 if (flags & MASK_DEPRECATED_V8_INSNS)
1182 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1183 if (flags & MASK_SPARCLET)
1184 fprintf (stderr, "SPARCLET ");
1185 if (flags & MASK_SPARCLITE)
1186 fprintf (stderr, "SPARCLITE ");
1187 if (flags & MASK_V8)
1188 fprintf (stderr, "V8 ");
1189 if (flags & MASK_V9)
1190 fprintf (stderr, "V9 ");
1191 }
1192
1193 static void
1194 dump_target_flags (const char *prefix, const int flags)
1195 {
1196 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1197 dump_target_flag_bits (flags);
1198 fprintf(stderr, "]\n");
1199 }
1200
1201 /* Validate and override various options, and do some machine dependent
1202 initialization. */
1203
1204 static void
1205 sparc_option_override (void)
1206 {
1207 static struct code_model {
1208 const char *const name;
1209 const enum cmodel value;
1210 } const cmodels[] = {
1211 { "32", CM_32 },
1212 { "medlow", CM_MEDLOW },
1213 { "medmid", CM_MEDMID },
1214 { "medany", CM_MEDANY },
1215 { "embmedany", CM_EMBMEDANY },
1216 { NULL, (enum cmodel) 0 }
1217 };
1218 const struct code_model *cmodel;
1219 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1220 static struct cpu_default {
1221 const int cpu;
1222 const enum processor_type processor;
1223 } const cpu_default[] = {
1224 /* There must be one entry here for each TARGET_CPU value. */
1225 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1226 { TARGET_CPU_v8, PROCESSOR_V8 },
1227 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1228 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1229 { TARGET_CPU_leon, PROCESSOR_LEON },
1230 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1231 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1232 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1233 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1234 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1235 { TARGET_CPU_v9, PROCESSOR_V9 },
1236 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1237 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1238 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1239 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1240 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1241 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1242 { -1, PROCESSOR_V7 }
1243 };
1244 const struct cpu_default *def;
1245 /* Table of values for -m{cpu,tune}=. This must match the order of
1246 the enum processor_type in sparc-opts.h. */
1247 static struct cpu_table {
1248 const char *const name;
1249 const int disable;
1250 const int enable;
1251 } const cpu_table[] = {
1252 { "v7", MASK_ISA, 0 },
1253 { "cypress", MASK_ISA, 0 },
1254 { "v8", MASK_ISA, MASK_V8 },
1255 /* TI TMS390Z55 supersparc */
1256 { "supersparc", MASK_ISA, MASK_V8 },
1257 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1258 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1259 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1260 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1261 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1262 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1263 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1264 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1265 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1266 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1267 { "sparclet", MASK_ISA, MASK_SPARCLET },
1268 /* TEMIC sparclet */
1269 { "tsc701", MASK_ISA, MASK_SPARCLET },
1270 { "v9", MASK_ISA, MASK_V9 },
1271 /* UltraSPARC I, II, IIi */
1272 { "ultrasparc", MASK_ISA,
1273 /* Although insns using %y are deprecated, it is a clear win. */
1274 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1275 /* UltraSPARC III */
1276 /* ??? Check if %y issue still holds true. */
1277 { "ultrasparc3", MASK_ISA,
1278 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1279 /* UltraSPARC T1 */
1280 { "niagara", MASK_ISA,
1281 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1282 /* UltraSPARC T2 */
1283 { "niagara2", MASK_ISA,
1284 MASK_V9|MASK_POPC|MASK_VIS2 },
1285 /* UltraSPARC T3 */
1286 { "niagara3", MASK_ISA,
1287 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1288 /* UltraSPARC T4 */
1289 { "niagara4", MASK_ISA,
1290 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1291 };
1292 const struct cpu_table *cpu;
1293 unsigned int i;
1294 int fpu;
1295
1296 if (sparc_debug_string != NULL)
1297 {
1298 const char *q;
1299 char *p;
1300
1301 p = ASTRDUP (sparc_debug_string);
1302 while ((q = strtok (p, ",")) != NULL)
1303 {
1304 bool invert;
1305 int mask;
1306
1307 p = NULL;
1308 if (*q == '!')
1309 {
1310 invert = true;
1311 q++;
1312 }
1313 else
1314 invert = false;
1315
1316 if (! strcmp (q, "all"))
1317 mask = MASK_DEBUG_ALL;
1318 else if (! strcmp (q, "options"))
1319 mask = MASK_DEBUG_OPTIONS;
1320 else
1321 error ("unknown -mdebug-%s switch", q);
1322
1323 if (invert)
1324 sparc_debug &= ~mask;
1325 else
1326 sparc_debug |= mask;
1327 }
1328 }
1329
1330 if (TARGET_DEBUG_OPTIONS)
1331 {
1332 dump_target_flags("Initial target_flags", target_flags);
1333 dump_target_flags("target_flags_explicit", target_flags_explicit);
1334 }
1335
1336 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1337 SUBTARGET_OVERRIDE_OPTIONS;
1338 #endif
1339
1340 #ifndef SPARC_BI_ARCH
1341 /* Check for unsupported architecture size. */
1342 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1343 error ("%s is not supported by this configuration",
1344 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1345 #endif
1346
1347 /* We force all 64bit archs to use 128 bit long double */
1348 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1349 {
1350 error ("-mlong-double-64 not allowed with -m64");
1351 target_flags |= MASK_LONG_DOUBLE_128;
1352 }
1353
1354 /* Code model selection. */
1355 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1356
1357 #ifdef SPARC_BI_ARCH
1358 if (TARGET_ARCH32)
1359 sparc_cmodel = CM_32;
1360 #endif
1361
1362 if (sparc_cmodel_string != NULL)
1363 {
1364 if (TARGET_ARCH64)
1365 {
1366 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1367 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1368 break;
1369 if (cmodel->name == NULL)
1370 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1371 else
1372 sparc_cmodel = cmodel->value;
1373 }
1374 else
1375 error ("-mcmodel= is not supported on 32 bit systems");
1376 }
1377
1378 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1379 for (i = 8; i < 16; i++)
1380 if (!call_used_regs [i])
1381 {
1382 error ("-fcall-saved-REG is not supported for out registers");
1383 call_used_regs [i] = 1;
1384 }
1385
1386 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1387
1388 /* Set the default CPU. */
1389 if (!global_options_set.x_sparc_cpu_and_features)
1390 {
1391 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1392 if (def->cpu == TARGET_CPU_DEFAULT)
1393 break;
1394 gcc_assert (def->cpu != -1);
1395 sparc_cpu_and_features = def->processor;
1396 }
1397
1398 if (!global_options_set.x_sparc_cpu)
1399 sparc_cpu = sparc_cpu_and_features;
1400
1401 cpu = &cpu_table[(int) sparc_cpu_and_features];
1402
1403 if (TARGET_DEBUG_OPTIONS)
1404 {
1405 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1406 fprintf (stderr, "sparc_cpu: %s\n",
1407 cpu_table[(int) sparc_cpu].name);
1408 dump_target_flags ("cpu->disable", cpu->disable);
1409 dump_target_flags ("cpu->enable", cpu->enable);
1410 }
1411
1412 target_flags &= ~cpu->disable;
1413 target_flags |= (cpu->enable
1414 #ifndef HAVE_AS_FMAF_HPC_VIS3
1415 & ~(MASK_FMAF | MASK_VIS3)
1416 #endif
1417 #ifndef HAVE_AS_SPARC4
1418 & ~MASK_CBCOND
1419 #endif
1420 #ifndef HAVE_AS_LEON
1421 & ~(MASK_LEON | MASK_LEON3)
1422 #endif
1423 );
1424
1425 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1426 the processor default. */
1427 if (target_flags_explicit & MASK_FPU)
1428 target_flags = (target_flags & ~MASK_FPU) | fpu;
1429
1430 /* -mvis2 implies -mvis */
1431 if (TARGET_VIS2)
1432 target_flags |= MASK_VIS;
1433
1434 /* -mvis3 implies -mvis2 and -mvis */
1435 if (TARGET_VIS3)
1436 target_flags |= MASK_VIS2 | MASK_VIS;
1437
1438 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1439 disabled. */
1440 if (! TARGET_FPU)
1441 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1442
1443 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1444 are available.
1445 -m64 also implies v9. */
1446 if (TARGET_VIS || TARGET_ARCH64)
1447 {
1448 target_flags |= MASK_V9;
1449 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1450 }
1451
1452 /* -mvis also implies -mv8plus on 32-bit */
1453 if (TARGET_VIS && ! TARGET_ARCH64)
1454 target_flags |= MASK_V8PLUS;
1455
1456 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1457 if (TARGET_V9 && TARGET_ARCH32)
1458 target_flags |= MASK_DEPRECATED_V8_INSNS;
1459
1460 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1461 if (! TARGET_V9 || TARGET_ARCH64)
1462 target_flags &= ~MASK_V8PLUS;
1463
1464 /* Don't use stack biasing in 32 bit mode. */
1465 if (TARGET_ARCH32)
1466 target_flags &= ~MASK_STACK_BIAS;
1467
1468 /* Supply a default value for align_functions. */
1469 if (align_functions == 0
1470 && (sparc_cpu == PROCESSOR_ULTRASPARC
1471 || sparc_cpu == PROCESSOR_ULTRASPARC3
1472 || sparc_cpu == PROCESSOR_NIAGARA
1473 || sparc_cpu == PROCESSOR_NIAGARA2
1474 || sparc_cpu == PROCESSOR_NIAGARA3
1475 || sparc_cpu == PROCESSOR_NIAGARA4))
1476 align_functions = 32;
1477
1478 /* Validate PCC_STRUCT_RETURN. */
1479 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1480 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1481
1482 /* Only use .uaxword when compiling for a 64-bit target. */
1483 if (!TARGET_ARCH64)
1484 targetm.asm_out.unaligned_op.di = NULL;
1485
1486 /* Do various machine dependent initializations. */
1487 sparc_init_modes ();
1488
1489 /* Set up function hooks. */
1490 init_machine_status = sparc_init_machine_status;
1491
1492 switch (sparc_cpu)
1493 {
1494 case PROCESSOR_V7:
1495 case PROCESSOR_CYPRESS:
1496 sparc_costs = &cypress_costs;
1497 break;
1498 case PROCESSOR_V8:
1499 case PROCESSOR_SPARCLITE:
1500 case PROCESSOR_SUPERSPARC:
1501 sparc_costs = &supersparc_costs;
1502 break;
1503 case PROCESSOR_F930:
1504 case PROCESSOR_F934:
1505 case PROCESSOR_HYPERSPARC:
1506 case PROCESSOR_SPARCLITE86X:
1507 sparc_costs = &hypersparc_costs;
1508 break;
1509 case PROCESSOR_LEON:
1510 sparc_costs = &leon_costs;
1511 break;
1512 case PROCESSOR_LEON3:
1513 case PROCESSOR_LEON3V7:
1514 sparc_costs = &leon3_costs;
1515 break;
1516 case PROCESSOR_SPARCLET:
1517 case PROCESSOR_TSC701:
1518 sparc_costs = &sparclet_costs;
1519 break;
1520 case PROCESSOR_V9:
1521 case PROCESSOR_ULTRASPARC:
1522 sparc_costs = &ultrasparc_costs;
1523 break;
1524 case PROCESSOR_ULTRASPARC3:
1525 sparc_costs = &ultrasparc3_costs;
1526 break;
1527 case PROCESSOR_NIAGARA:
1528 sparc_costs = &niagara_costs;
1529 break;
1530 case PROCESSOR_NIAGARA2:
1531 sparc_costs = &niagara2_costs;
1532 break;
1533 case PROCESSOR_NIAGARA3:
1534 sparc_costs = &niagara3_costs;
1535 break;
1536 case PROCESSOR_NIAGARA4:
1537 sparc_costs = &niagara4_costs;
1538 break;
1539 case PROCESSOR_NATIVE:
1540 gcc_unreachable ();
1541 };
1542
1543 if (sparc_memory_model == SMM_DEFAULT)
1544 {
1545 /* Choose the memory model for the operating system. */
1546 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1547 if (os_default != SMM_DEFAULT)
1548 sparc_memory_model = os_default;
1549 /* Choose the most relaxed model for the processor. */
1550 else if (TARGET_V9)
1551 sparc_memory_model = SMM_RMO;
1552 else if (TARGET_LEON3)
1553 sparc_memory_model = SMM_TSO;
1554 else if (TARGET_LEON)
1555 sparc_memory_model = SMM_SC;
1556 else if (TARGET_V8)
1557 sparc_memory_model = SMM_PSO;
1558 else
1559 sparc_memory_model = SMM_SC;
1560 }
1561
1562 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1563 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1564 target_flags |= MASK_LONG_DOUBLE_128;
1565 #endif
1566
1567 if (TARGET_DEBUG_OPTIONS)
1568 dump_target_flags ("Final target_flags", target_flags);
1569
1570 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1571 ((sparc_cpu == PROCESSOR_ULTRASPARC
1572 || sparc_cpu == PROCESSOR_NIAGARA
1573 || sparc_cpu == PROCESSOR_NIAGARA2
1574 || sparc_cpu == PROCESSOR_NIAGARA3
1575 || sparc_cpu == PROCESSOR_NIAGARA4)
1576 ? 2
1577 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1578 ? 8 : 3)),
1579 global_options.x_param_values,
1580 global_options_set.x_param_values);
1581 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1582 ((sparc_cpu == PROCESSOR_ULTRASPARC
1583 || sparc_cpu == PROCESSOR_ULTRASPARC3
1584 || sparc_cpu == PROCESSOR_NIAGARA
1585 || sparc_cpu == PROCESSOR_NIAGARA2
1586 || sparc_cpu == PROCESSOR_NIAGARA3
1587 || sparc_cpu == PROCESSOR_NIAGARA4)
1588 ? 64 : 32),
1589 global_options.x_param_values,
1590 global_options_set.x_param_values);
1591
1592 /* Disable save slot sharing for call-clobbered registers by default.
1593 The IRA sharing algorithm works on single registers only and this
1594 pessimizes for double floating-point registers. */
1595 if (!global_options_set.x_flag_ira_share_save_slots)
1596 flag_ira_share_save_slots = 0;
1597
1598 /* We register a machine specific pass to work around errata, if any.
1599 The pass mut be scheduled as late as possible so that we have the
1600 (essentially) final form of the insn stream to work on.
1601 Registering the pass must be done at start up. It's convenient to
1602 do it here. */
1603 opt_pass *errata_pass = make_pass_work_around_errata (g);
1604 struct register_pass_info insert_pass_work_around_errata =
1605 {
1606 errata_pass, /* pass */
1607 "dbr", /* reference_pass_name */
1608 1, /* ref_pass_instance_number */
1609 PASS_POS_INSERT_AFTER /* po_op */
1610 };
1611 register_pass (&insert_pass_work_around_errata);
1612 }
1613 \f
1614 /* Miscellaneous utilities. */
1615
1616 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1617 or branch on register contents instructions. */
1618
1619 int
1620 v9_regcmp_p (enum rtx_code code)
1621 {
1622 return (code == EQ || code == NE || code == GE || code == LT
1623 || code == LE || code == GT);
1624 }
1625
1626 /* Nonzero if OP is a floating point constant which can
1627 be loaded into an integer register using a single
1628 sethi instruction. */
1629
1630 int
1631 fp_sethi_p (rtx op)
1632 {
1633 if (GET_CODE (op) == CONST_DOUBLE)
1634 {
1635 long i;
1636
1637 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1638 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1639 }
1640
1641 return 0;
1642 }
1643
1644 /* Nonzero if OP is a floating point constant which can
1645 be loaded into an integer register using a single
1646 mov instruction. */
1647
1648 int
1649 fp_mov_p (rtx op)
1650 {
1651 if (GET_CODE (op) == CONST_DOUBLE)
1652 {
1653 long i;
1654
1655 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1656 return SPARC_SIMM13_P (i);
1657 }
1658
1659 return 0;
1660 }
1661
1662 /* Nonzero if OP is a floating point constant which can
1663 be loaded into an integer register using a high/losum
1664 instruction sequence. */
1665
1666 int
1667 fp_high_losum_p (rtx op)
1668 {
1669 /* The constraints calling this should only be in
1670 SFmode move insns, so any constant which cannot
1671 be moved using a single insn will do. */
1672 if (GET_CODE (op) == CONST_DOUBLE)
1673 {
1674 long i;
1675
1676 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1677 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1678 }
1679
1680 return 0;
1681 }
1682
1683 /* Return true if the address of LABEL can be loaded by means of the
1684 mov{si,di}_pic_label_ref patterns in PIC mode. */
1685
1686 static bool
1687 can_use_mov_pic_label_ref (rtx label)
1688 {
1689 /* VxWorks does not impose a fixed gap between segments; the run-time
1690 gap can be different from the object-file gap. We therefore can't
1691 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1692 are absolutely sure that X is in the same segment as the GOT.
1693 Unfortunately, the flexibility of linker scripts means that we
1694 can't be sure of that in general, so assume that GOT-relative
1695 accesses are never valid on VxWorks. */
1696 if (TARGET_VXWORKS_RTP)
1697 return false;
1698
1699 /* Similarly, if the label is non-local, it might end up being placed
1700 in a different section than the current one; now mov_pic_label_ref
1701 requires the label and the code to be in the same section. */
1702 if (LABEL_REF_NONLOCAL_P (label))
1703 return false;
1704
1705 /* Finally, if we are reordering basic blocks and partition into hot
1706 and cold sections, this might happen for any label. */
1707 if (flag_reorder_blocks_and_partition)
1708 return false;
1709
1710 return true;
1711 }
1712
1713 /* Expand a move instruction. Return true if all work is done. */
1714
1715 bool
1716 sparc_expand_move (machine_mode mode, rtx *operands)
1717 {
1718 /* Handle sets of MEM first. */
1719 if (GET_CODE (operands[0]) == MEM)
1720 {
1721 /* 0 is a register (or a pair of registers) on SPARC. */
1722 if (register_or_zero_operand (operands[1], mode))
1723 return false;
1724
1725 if (!reload_in_progress)
1726 {
1727 operands[0] = validize_mem (operands[0]);
1728 operands[1] = force_reg (mode, operands[1]);
1729 }
1730 }
1731
1732 /* Fixup TLS cases. */
1733 if (TARGET_HAVE_TLS
1734 && CONSTANT_P (operands[1])
1735 && sparc_tls_referenced_p (operands [1]))
1736 {
1737 operands[1] = sparc_legitimize_tls_address (operands[1]);
1738 return false;
1739 }
1740
1741 /* Fixup PIC cases. */
1742 if (flag_pic && CONSTANT_P (operands[1]))
1743 {
1744 if (pic_address_needs_scratch (operands[1]))
1745 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1746
1747 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1748 if (GET_CODE (operands[1]) == LABEL_REF
1749 && can_use_mov_pic_label_ref (operands[1]))
1750 {
1751 if (mode == SImode)
1752 {
1753 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1754 return true;
1755 }
1756
1757 if (mode == DImode)
1758 {
1759 gcc_assert (TARGET_ARCH64);
1760 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1761 return true;
1762 }
1763 }
1764
1765 if (symbolic_operand (operands[1], mode))
1766 {
1767 operands[1]
1768 = sparc_legitimize_pic_address (operands[1],
1769 reload_in_progress
1770 ? operands[0] : NULL_RTX);
1771 return false;
1772 }
1773 }
1774
1775 /* If we are trying to toss an integer constant into FP registers,
1776 or loading a FP or vector constant, force it into memory. */
1777 if (CONSTANT_P (operands[1])
1778 && REG_P (operands[0])
1779 && (SPARC_FP_REG_P (REGNO (operands[0]))
1780 || SCALAR_FLOAT_MODE_P (mode)
1781 || VECTOR_MODE_P (mode)))
1782 {
1783 /* emit_group_store will send such bogosity to us when it is
1784 not storing directly into memory. So fix this up to avoid
1785 crashes in output_constant_pool. */
1786 if (operands [1] == const0_rtx)
1787 operands[1] = CONST0_RTX (mode);
1788
1789 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1790 always other regs. */
1791 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1792 && (const_zero_operand (operands[1], mode)
1793 || const_all_ones_operand (operands[1], mode)))
1794 return false;
1795
1796 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1797 /* We are able to build any SF constant in integer registers
1798 with at most 2 instructions. */
1799 && (mode == SFmode
1800 /* And any DF constant in integer registers. */
1801 || (mode == DFmode
1802 && ! can_create_pseudo_p ())))
1803 return false;
1804
1805 operands[1] = force_const_mem (mode, operands[1]);
1806 if (!reload_in_progress)
1807 operands[1] = validize_mem (operands[1]);
1808 return false;
1809 }
1810
1811 /* Accept non-constants and valid constants unmodified. */
1812 if (!CONSTANT_P (operands[1])
1813 || GET_CODE (operands[1]) == HIGH
1814 || input_operand (operands[1], mode))
1815 return false;
1816
1817 switch (mode)
1818 {
1819 case QImode:
1820 /* All QImode constants require only one insn, so proceed. */
1821 break;
1822
1823 case HImode:
1824 case SImode:
1825 sparc_emit_set_const32 (operands[0], operands[1]);
1826 return true;
1827
1828 case DImode:
1829 /* input_operand should have filtered out 32-bit mode. */
1830 sparc_emit_set_const64 (operands[0], operands[1]);
1831 return true;
1832
1833 case TImode:
1834 {
1835 rtx high, low;
1836 /* TImode isn't available in 32-bit mode. */
1837 split_double (operands[1], &high, &low);
1838 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1839 high));
1840 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1841 low));
1842 }
1843 return true;
1844
1845 default:
1846 gcc_unreachable ();
1847 }
1848
1849 return false;
1850 }
1851
1852 /* Load OP1, a 32-bit constant, into OP0, a register.
1853 We know it can't be done in one insn when we get
1854 here, the move expander guarantees this. */
1855
1856 static void
1857 sparc_emit_set_const32 (rtx op0, rtx op1)
1858 {
1859 machine_mode mode = GET_MODE (op0);
1860 rtx temp = op0;
1861
1862 if (can_create_pseudo_p ())
1863 temp = gen_reg_rtx (mode);
1864
1865 if (GET_CODE (op1) == CONST_INT)
1866 {
1867 gcc_assert (!small_int_operand (op1, mode)
1868 && !const_high_operand (op1, mode));
1869
1870 /* Emit them as real moves instead of a HIGH/LO_SUM,
1871 this way CSE can see everything and reuse intermediate
1872 values if it wants. */
1873 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1874 & ~(HOST_WIDE_INT) 0x3ff)));
1875
1876 emit_insn (gen_rtx_SET (op0,
1877 gen_rtx_IOR (mode, temp,
1878 GEN_INT (INTVAL (op1) & 0x3ff))));
1879 }
1880 else
1881 {
1882 /* A symbol, emit in the traditional way. */
1883 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1884 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1885 }
1886 }
1887
1888 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1889 If TEMP is nonzero, we are forbidden to use any other scratch
1890 registers. Otherwise, we are allowed to generate them as needed.
1891
1892 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1893 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1894
1895 void
1896 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1897 {
1898 rtx temp1, temp2, temp3, temp4, temp5;
1899 rtx ti_temp = 0;
1900
1901 if (temp && GET_MODE (temp) == TImode)
1902 {
1903 ti_temp = temp;
1904 temp = gen_rtx_REG (DImode, REGNO (temp));
1905 }
1906
1907 /* SPARC-V9 code-model support. */
1908 switch (sparc_cmodel)
1909 {
1910 case CM_MEDLOW:
1911 /* The range spanned by all instructions in the object is less
1912 than 2^31 bytes (2GB) and the distance from any instruction
1913 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1914 than 2^31 bytes (2GB).
1915
1916 The executable must be in the low 4TB of the virtual address
1917 space.
1918
1919 sethi %hi(symbol), %temp1
1920 or %temp1, %lo(symbol), %reg */
1921 if (temp)
1922 temp1 = temp; /* op0 is allowed. */
1923 else
1924 temp1 = gen_reg_rtx (DImode);
1925
1926 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
1927 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1928 break;
1929
1930 case CM_MEDMID:
1931 /* The range spanned by all instructions in the object is less
1932 than 2^31 bytes (2GB) and the distance from any instruction
1933 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1934 than 2^31 bytes (2GB).
1935
1936 The executable must be in the low 16TB of the virtual address
1937 space.
1938
1939 sethi %h44(symbol), %temp1
1940 or %temp1, %m44(symbol), %temp2
1941 sllx %temp2, 12, %temp3
1942 or %temp3, %l44(symbol), %reg */
1943 if (temp)
1944 {
1945 temp1 = op0;
1946 temp2 = op0;
1947 temp3 = temp; /* op0 is allowed. */
1948 }
1949 else
1950 {
1951 temp1 = gen_reg_rtx (DImode);
1952 temp2 = gen_reg_rtx (DImode);
1953 temp3 = gen_reg_rtx (DImode);
1954 }
1955
1956 emit_insn (gen_seth44 (temp1, op1));
1957 emit_insn (gen_setm44 (temp2, temp1, op1));
1958 emit_insn (gen_rtx_SET (temp3,
1959 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1960 emit_insn (gen_setl44 (op0, temp3, op1));
1961 break;
1962
1963 case CM_MEDANY:
1964 /* The range spanned by all instructions in the object is less
1965 than 2^31 bytes (2GB) and the distance from any instruction
1966 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1967 than 2^31 bytes (2GB).
1968
1969 The executable can be placed anywhere in the virtual address
1970 space.
1971
1972 sethi %hh(symbol), %temp1
1973 sethi %lm(symbol), %temp2
1974 or %temp1, %hm(symbol), %temp3
1975 sllx %temp3, 32, %temp4
1976 or %temp4, %temp2, %temp5
1977 or %temp5, %lo(symbol), %reg */
1978 if (temp)
1979 {
1980 /* It is possible that one of the registers we got for operands[2]
1981 might coincide with that of operands[0] (which is why we made
1982 it TImode). Pick the other one to use as our scratch. */
1983 if (rtx_equal_p (temp, op0))
1984 {
1985 gcc_assert (ti_temp);
1986 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1987 }
1988 temp1 = op0;
1989 temp2 = temp; /* op0 is _not_ allowed, see above. */
1990 temp3 = op0;
1991 temp4 = op0;
1992 temp5 = op0;
1993 }
1994 else
1995 {
1996 temp1 = gen_reg_rtx (DImode);
1997 temp2 = gen_reg_rtx (DImode);
1998 temp3 = gen_reg_rtx (DImode);
1999 temp4 = gen_reg_rtx (DImode);
2000 temp5 = gen_reg_rtx (DImode);
2001 }
2002
2003 emit_insn (gen_sethh (temp1, op1));
2004 emit_insn (gen_setlm (temp2, op1));
2005 emit_insn (gen_sethm (temp3, temp1, op1));
2006 emit_insn (gen_rtx_SET (temp4,
2007 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2008 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2009 emit_insn (gen_setlo (op0, temp5, op1));
2010 break;
2011
2012 case CM_EMBMEDANY:
2013 /* Old old old backwards compatibility kruft here.
2014 Essentially it is MEDLOW with a fixed 64-bit
2015 virtual base added to all data segment addresses.
2016 Text-segment stuff is computed like MEDANY, we can't
2017 reuse the code above because the relocation knobs
2018 look different.
2019
2020 Data segment: sethi %hi(symbol), %temp1
2021 add %temp1, EMBMEDANY_BASE_REG, %temp2
2022 or %temp2, %lo(symbol), %reg */
2023 if (data_segment_operand (op1, GET_MODE (op1)))
2024 {
2025 if (temp)
2026 {
2027 temp1 = temp; /* op0 is allowed. */
2028 temp2 = op0;
2029 }
2030 else
2031 {
2032 temp1 = gen_reg_rtx (DImode);
2033 temp2 = gen_reg_rtx (DImode);
2034 }
2035
2036 emit_insn (gen_embmedany_sethi (temp1, op1));
2037 emit_insn (gen_embmedany_brsum (temp2, temp1));
2038 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2039 }
2040
2041 /* Text segment: sethi %uhi(symbol), %temp1
2042 sethi %hi(symbol), %temp2
2043 or %temp1, %ulo(symbol), %temp3
2044 sllx %temp3, 32, %temp4
2045 or %temp4, %temp2, %temp5
2046 or %temp5, %lo(symbol), %reg */
2047 else
2048 {
2049 if (temp)
2050 {
2051 /* It is possible that one of the registers we got for operands[2]
2052 might coincide with that of operands[0] (which is why we made
2053 it TImode). Pick the other one to use as our scratch. */
2054 if (rtx_equal_p (temp, op0))
2055 {
2056 gcc_assert (ti_temp);
2057 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2058 }
2059 temp1 = op0;
2060 temp2 = temp; /* op0 is _not_ allowed, see above. */
2061 temp3 = op0;
2062 temp4 = op0;
2063 temp5 = op0;
2064 }
2065 else
2066 {
2067 temp1 = gen_reg_rtx (DImode);
2068 temp2 = gen_reg_rtx (DImode);
2069 temp3 = gen_reg_rtx (DImode);
2070 temp4 = gen_reg_rtx (DImode);
2071 temp5 = gen_reg_rtx (DImode);
2072 }
2073
2074 emit_insn (gen_embmedany_textuhi (temp1, op1));
2075 emit_insn (gen_embmedany_texthi (temp2, op1));
2076 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2077 emit_insn (gen_rtx_SET (temp4,
2078 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2079 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2080 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2081 }
2082 break;
2083
2084 default:
2085 gcc_unreachable ();
2086 }
2087 }
2088
2089 /* These avoid problems when cross compiling. If we do not
2090 go through all this hair then the optimizer will see
2091 invalid REG_EQUAL notes or in some cases none at all. */
2092 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2093 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2094 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2095 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2096
2097 /* The optimizer is not to assume anything about exactly
2098 which bits are set for a HIGH, they are unspecified.
2099 Unfortunately this leads to many missed optimizations
2100 during CSE. We mask out the non-HIGH bits, and matches
2101 a plain movdi, to alleviate this problem. */
2102 static rtx
2103 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2104 {
2105 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2106 }
2107
2108 static rtx
2109 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2110 {
2111 return gen_rtx_SET (dest, GEN_INT (val));
2112 }
2113
2114 static rtx
2115 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2116 {
2117 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2118 }
2119
2120 static rtx
2121 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2122 {
2123 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2124 }
2125
2126 /* Worker routines for 64-bit constant formation on arch64.
2127 One of the key things to be doing in these emissions is
2128 to create as many temp REGs as possible. This makes it
2129 possible for half-built constants to be used later when
2130 such values are similar to something required later on.
2131 Without doing this, the optimizer cannot see such
2132 opportunities. */
2133
2134 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2135 unsigned HOST_WIDE_INT, int);
2136
2137 static void
2138 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2139 unsigned HOST_WIDE_INT low_bits, int is_neg)
2140 {
2141 unsigned HOST_WIDE_INT high_bits;
2142
2143 if (is_neg)
2144 high_bits = (~low_bits) & 0xffffffff;
2145 else
2146 high_bits = low_bits;
2147
2148 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2149 if (!is_neg)
2150 {
2151 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2152 }
2153 else
2154 {
2155 /* If we are XOR'ing with -1, then we should emit a one's complement
2156 instead. This way the combiner will notice logical operations
2157 such as ANDN later on and substitute. */
2158 if ((low_bits & 0x3ff) == 0x3ff)
2159 {
2160 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2161 }
2162 else
2163 {
2164 emit_insn (gen_rtx_SET (op0,
2165 gen_safe_XOR64 (temp,
2166 (-(HOST_WIDE_INT)0x400
2167 | (low_bits & 0x3ff)))));
2168 }
2169 }
2170 }
2171
2172 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2173 unsigned HOST_WIDE_INT, int);
2174
2175 static void
2176 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2177 unsigned HOST_WIDE_INT high_bits,
2178 unsigned HOST_WIDE_INT low_immediate,
2179 int shift_count)
2180 {
2181 rtx temp2 = op0;
2182
2183 if ((high_bits & 0xfffffc00) != 0)
2184 {
2185 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2186 if ((high_bits & ~0xfffffc00) != 0)
2187 emit_insn (gen_rtx_SET (op0,
2188 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2189 else
2190 temp2 = temp;
2191 }
2192 else
2193 {
2194 emit_insn (gen_safe_SET64 (temp, high_bits));
2195 temp2 = temp;
2196 }
2197
2198 /* Now shift it up into place. */
2199 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2200 GEN_INT (shift_count))));
2201
2202 /* If there is a low immediate part piece, finish up by
2203 putting that in as well. */
2204 if (low_immediate != 0)
2205 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2206 }
2207
2208 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2209 unsigned HOST_WIDE_INT);
2210
2211 /* Full 64-bit constant decomposition. Even though this is the
2212 'worst' case, we still optimize a few things away. */
2213 static void
2214 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2215 unsigned HOST_WIDE_INT high_bits,
2216 unsigned HOST_WIDE_INT low_bits)
2217 {
2218 rtx sub_temp = op0;
2219
2220 if (can_create_pseudo_p ())
2221 sub_temp = gen_reg_rtx (DImode);
2222
2223 if ((high_bits & 0xfffffc00) != 0)
2224 {
2225 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2226 if ((high_bits & ~0xfffffc00) != 0)
2227 emit_insn (gen_rtx_SET (sub_temp,
2228 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2229 else
2230 sub_temp = temp;
2231 }
2232 else
2233 {
2234 emit_insn (gen_safe_SET64 (temp, high_bits));
2235 sub_temp = temp;
2236 }
2237
2238 if (can_create_pseudo_p ())
2239 {
2240 rtx temp2 = gen_reg_rtx (DImode);
2241 rtx temp3 = gen_reg_rtx (DImode);
2242 rtx temp4 = gen_reg_rtx (DImode);
2243
2244 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2245 GEN_INT (32))));
2246
2247 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2248 if ((low_bits & ~0xfffffc00) != 0)
2249 {
2250 emit_insn (gen_rtx_SET (temp3,
2251 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2252 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2253 }
2254 else
2255 {
2256 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2257 }
2258 }
2259 else
2260 {
2261 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2262 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2263 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2264 int to_shift = 12;
2265
2266 /* We are in the middle of reload, so this is really
2267 painful. However we do still make an attempt to
2268 avoid emitting truly stupid code. */
2269 if (low1 != const0_rtx)
2270 {
2271 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2272 GEN_INT (to_shift))));
2273 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2274 sub_temp = op0;
2275 to_shift = 12;
2276 }
2277 else
2278 {
2279 to_shift += 12;
2280 }
2281 if (low2 != const0_rtx)
2282 {
2283 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2284 GEN_INT (to_shift))));
2285 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2286 sub_temp = op0;
2287 to_shift = 8;
2288 }
2289 else
2290 {
2291 to_shift += 8;
2292 }
2293 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2294 GEN_INT (to_shift))));
2295 if (low3 != const0_rtx)
2296 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2297 /* phew... */
2298 }
2299 }
2300
2301 /* Analyze a 64-bit constant for certain properties. */
2302 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2303 unsigned HOST_WIDE_INT,
2304 int *, int *, int *);
2305
2306 static void
2307 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2308 unsigned HOST_WIDE_INT low_bits,
2309 int *hbsp, int *lbsp, int *abbasp)
2310 {
2311 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2312 int i;
2313
2314 lowest_bit_set = highest_bit_set = -1;
2315 i = 0;
2316 do
2317 {
2318 if ((lowest_bit_set == -1)
2319 && ((low_bits >> i) & 1))
2320 lowest_bit_set = i;
2321 if ((highest_bit_set == -1)
2322 && ((high_bits >> (32 - i - 1)) & 1))
2323 highest_bit_set = (64 - i - 1);
2324 }
2325 while (++i < 32
2326 && ((highest_bit_set == -1)
2327 || (lowest_bit_set == -1)));
2328 if (i == 32)
2329 {
2330 i = 0;
2331 do
2332 {
2333 if ((lowest_bit_set == -1)
2334 && ((high_bits >> i) & 1))
2335 lowest_bit_set = i + 32;
2336 if ((highest_bit_set == -1)
2337 && ((low_bits >> (32 - i - 1)) & 1))
2338 highest_bit_set = 32 - i - 1;
2339 }
2340 while (++i < 32
2341 && ((highest_bit_set == -1)
2342 || (lowest_bit_set == -1)));
2343 }
2344 /* If there are no bits set this should have gone out
2345 as one instruction! */
2346 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2347 all_bits_between_are_set = 1;
2348 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2349 {
2350 if (i < 32)
2351 {
2352 if ((low_bits & (1 << i)) != 0)
2353 continue;
2354 }
2355 else
2356 {
2357 if ((high_bits & (1 << (i - 32))) != 0)
2358 continue;
2359 }
2360 all_bits_between_are_set = 0;
2361 break;
2362 }
2363 *hbsp = highest_bit_set;
2364 *lbsp = lowest_bit_set;
2365 *abbasp = all_bits_between_are_set;
2366 }
2367
2368 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2369
2370 static int
2371 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2372 unsigned HOST_WIDE_INT low_bits)
2373 {
2374 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2375
2376 if (high_bits == 0
2377 || high_bits == 0xffffffff)
2378 return 1;
2379
2380 analyze_64bit_constant (high_bits, low_bits,
2381 &highest_bit_set, &lowest_bit_set,
2382 &all_bits_between_are_set);
2383
2384 if ((highest_bit_set == 63
2385 || lowest_bit_set == 0)
2386 && all_bits_between_are_set != 0)
2387 return 1;
2388
2389 if ((highest_bit_set - lowest_bit_set) < 21)
2390 return 1;
2391
2392 return 0;
2393 }
2394
2395 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2396 unsigned HOST_WIDE_INT,
2397 int, int);
2398
2399 static unsigned HOST_WIDE_INT
2400 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2401 unsigned HOST_WIDE_INT low_bits,
2402 int lowest_bit_set, int shift)
2403 {
2404 HOST_WIDE_INT hi, lo;
2405
2406 if (lowest_bit_set < 32)
2407 {
2408 lo = (low_bits >> lowest_bit_set) << shift;
2409 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2410 }
2411 else
2412 {
2413 lo = 0;
2414 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2415 }
2416 gcc_assert (! (hi & lo));
2417 return (hi | lo);
2418 }
2419
2420 /* Here we are sure to be arch64 and this is an integer constant
2421 being loaded into a register. Emit the most efficient
2422 insn sequence possible. Detection of all the 1-insn cases
2423 has been done already. */
2424 static void
2425 sparc_emit_set_const64 (rtx op0, rtx op1)
2426 {
2427 unsigned HOST_WIDE_INT high_bits, low_bits;
2428 int lowest_bit_set, highest_bit_set;
2429 int all_bits_between_are_set;
2430 rtx temp = 0;
2431
2432 /* Sanity check that we know what we are working with. */
2433 gcc_assert (TARGET_ARCH64
2434 && (GET_CODE (op0) == SUBREG
2435 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2436
2437 if (! can_create_pseudo_p ())
2438 temp = op0;
2439
2440 if (GET_CODE (op1) != CONST_INT)
2441 {
2442 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2443 return;
2444 }
2445
2446 if (! temp)
2447 temp = gen_reg_rtx (DImode);
2448
2449 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2450 low_bits = (INTVAL (op1) & 0xffffffff);
2451
2452 /* low_bits bits 0 --> 31
2453 high_bits bits 32 --> 63 */
2454
2455 analyze_64bit_constant (high_bits, low_bits,
2456 &highest_bit_set, &lowest_bit_set,
2457 &all_bits_between_are_set);
2458
2459 /* First try for a 2-insn sequence. */
2460
2461 /* These situations are preferred because the optimizer can
2462 * do more things with them:
2463 * 1) mov -1, %reg
2464 * sllx %reg, shift, %reg
2465 * 2) mov -1, %reg
2466 * srlx %reg, shift, %reg
2467 * 3) mov some_small_const, %reg
2468 * sllx %reg, shift, %reg
2469 */
2470 if (((highest_bit_set == 63
2471 || lowest_bit_set == 0)
2472 && all_bits_between_are_set != 0)
2473 || ((highest_bit_set - lowest_bit_set) < 12))
2474 {
2475 HOST_WIDE_INT the_const = -1;
2476 int shift = lowest_bit_set;
2477
2478 if ((highest_bit_set != 63
2479 && lowest_bit_set != 0)
2480 || all_bits_between_are_set == 0)
2481 {
2482 the_const =
2483 create_simple_focus_bits (high_bits, low_bits,
2484 lowest_bit_set, 0);
2485 }
2486 else if (lowest_bit_set == 0)
2487 shift = -(63 - highest_bit_set);
2488
2489 gcc_assert (SPARC_SIMM13_P (the_const));
2490 gcc_assert (shift != 0);
2491
2492 emit_insn (gen_safe_SET64 (temp, the_const));
2493 if (shift > 0)
2494 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2495 GEN_INT (shift))));
2496 else if (shift < 0)
2497 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2498 GEN_INT (-shift))));
2499 return;
2500 }
2501
2502 /* Now a range of 22 or less bits set somewhere.
2503 * 1) sethi %hi(focus_bits), %reg
2504 * sllx %reg, shift, %reg
2505 * 2) sethi %hi(focus_bits), %reg
2506 * srlx %reg, shift, %reg
2507 */
2508 if ((highest_bit_set - lowest_bit_set) < 21)
2509 {
2510 unsigned HOST_WIDE_INT focus_bits =
2511 create_simple_focus_bits (high_bits, low_bits,
2512 lowest_bit_set, 10);
2513
2514 gcc_assert (SPARC_SETHI_P (focus_bits));
2515 gcc_assert (lowest_bit_set != 10);
2516
2517 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2518
2519 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2520 if (lowest_bit_set < 10)
2521 emit_insn (gen_rtx_SET (op0,
2522 gen_rtx_LSHIFTRT (DImode, temp,
2523 GEN_INT (10 - lowest_bit_set))));
2524 else if (lowest_bit_set > 10)
2525 emit_insn (gen_rtx_SET (op0,
2526 gen_rtx_ASHIFT (DImode, temp,
2527 GEN_INT (lowest_bit_set - 10))));
2528 return;
2529 }
2530
2531 /* 1) sethi %hi(low_bits), %reg
2532 * or %reg, %lo(low_bits), %reg
2533 * 2) sethi %hi(~low_bits), %reg
2534 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2535 */
2536 if (high_bits == 0
2537 || high_bits == 0xffffffff)
2538 {
2539 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2540 (high_bits == 0xffffffff));
2541 return;
2542 }
2543
2544 /* Now, try 3-insn sequences. */
2545
2546 /* 1) sethi %hi(high_bits), %reg
2547 * or %reg, %lo(high_bits), %reg
2548 * sllx %reg, 32, %reg
2549 */
2550 if (low_bits == 0)
2551 {
2552 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2553 return;
2554 }
2555
2556 /* We may be able to do something quick
2557 when the constant is negated, so try that. */
2558 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2559 (~low_bits) & 0xfffffc00))
2560 {
2561 /* NOTE: The trailing bits get XOR'd so we need the
2562 non-negated bits, not the negated ones. */
2563 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2564
2565 if ((((~high_bits) & 0xffffffff) == 0
2566 && ((~low_bits) & 0x80000000) == 0)
2567 || (((~high_bits) & 0xffffffff) == 0xffffffff
2568 && ((~low_bits) & 0x80000000) != 0))
2569 {
2570 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2571
2572 if ((SPARC_SETHI_P (fast_int)
2573 && (~high_bits & 0xffffffff) == 0)
2574 || SPARC_SIMM13_P (fast_int))
2575 emit_insn (gen_safe_SET64 (temp, fast_int));
2576 else
2577 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2578 }
2579 else
2580 {
2581 rtx negated_const;
2582 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2583 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2584 sparc_emit_set_const64 (temp, negated_const);
2585 }
2586
2587 /* If we are XOR'ing with -1, then we should emit a one's complement
2588 instead. This way the combiner will notice logical operations
2589 such as ANDN later on and substitute. */
2590 if (trailing_bits == 0x3ff)
2591 {
2592 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2593 }
2594 else
2595 {
2596 emit_insn (gen_rtx_SET (op0,
2597 gen_safe_XOR64 (temp,
2598 (-0x400 | trailing_bits))));
2599 }
2600 return;
2601 }
2602
2603 /* 1) sethi %hi(xxx), %reg
2604 * or %reg, %lo(xxx), %reg
2605 * sllx %reg, yyy, %reg
2606 *
2607 * ??? This is just a generalized version of the low_bits==0
2608 * thing above, FIXME...
2609 */
2610 if ((highest_bit_set - lowest_bit_set) < 32)
2611 {
2612 unsigned HOST_WIDE_INT focus_bits =
2613 create_simple_focus_bits (high_bits, low_bits,
2614 lowest_bit_set, 0);
2615
2616 /* We can't get here in this state. */
2617 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2618
2619 /* So what we know is that the set bits straddle the
2620 middle of the 64-bit word. */
2621 sparc_emit_set_const64_quick2 (op0, temp,
2622 focus_bits, 0,
2623 lowest_bit_set);
2624 return;
2625 }
2626
2627 /* 1) sethi %hi(high_bits), %reg
2628 * or %reg, %lo(high_bits), %reg
2629 * sllx %reg, 32, %reg
2630 * or %reg, low_bits, %reg
2631 */
2632 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2633 {
2634 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2635 return;
2636 }
2637
2638 /* The easiest way when all else fails, is full decomposition. */
2639 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2640 }
2641
2642 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2643 return the mode to be used for the comparison. For floating-point,
2644 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2645 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2646 processing is needed. */
2647
2648 machine_mode
2649 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2650 {
2651 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2652 {
2653 switch (op)
2654 {
2655 case EQ:
2656 case NE:
2657 case UNORDERED:
2658 case ORDERED:
2659 case UNLT:
2660 case UNLE:
2661 case UNGT:
2662 case UNGE:
2663 case UNEQ:
2664 case LTGT:
2665 return CCFPmode;
2666
2667 case LT:
2668 case LE:
2669 case GT:
2670 case GE:
2671 return CCFPEmode;
2672
2673 default:
2674 gcc_unreachable ();
2675 }
2676 }
2677 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2678 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2679 {
2680 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2681 return CCX_NOOVmode;
2682 else
2683 return CC_NOOVmode;
2684 }
2685 else
2686 {
2687 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2688 return CCXmode;
2689 else
2690 return CCmode;
2691 }
2692 }
2693
2694 /* Emit the compare insn and return the CC reg for a CODE comparison
2695 with operands X and Y. */
2696
2697 static rtx
2698 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2699 {
2700 machine_mode mode;
2701 rtx cc_reg;
2702
2703 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2704 return x;
2705
2706 mode = SELECT_CC_MODE (code, x, y);
2707
2708 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2709 fcc regs (cse can't tell they're really call clobbered regs and will
2710 remove a duplicate comparison even if there is an intervening function
2711 call - it will then try to reload the cc reg via an int reg which is why
2712 we need the movcc patterns). It is possible to provide the movcc
2713 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2714 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2715 to tell cse that CCFPE mode registers (even pseudos) are call
2716 clobbered. */
2717
2718 /* ??? This is an experiment. Rather than making changes to cse which may
2719 or may not be easy/clean, we do our own cse. This is possible because
2720 we will generate hard registers. Cse knows they're call clobbered (it
2721 doesn't know the same thing about pseudos). If we guess wrong, no big
2722 deal, but if we win, great! */
2723
2724 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2725 #if 1 /* experiment */
2726 {
2727 int reg;
2728 /* We cycle through the registers to ensure they're all exercised. */
2729 static int next_fcc_reg = 0;
2730 /* Previous x,y for each fcc reg. */
2731 static rtx prev_args[4][2];
2732
2733 /* Scan prev_args for x,y. */
2734 for (reg = 0; reg < 4; reg++)
2735 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2736 break;
2737 if (reg == 4)
2738 {
2739 reg = next_fcc_reg;
2740 prev_args[reg][0] = x;
2741 prev_args[reg][1] = y;
2742 next_fcc_reg = (next_fcc_reg + 1) & 3;
2743 }
2744 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2745 }
2746 #else
2747 cc_reg = gen_reg_rtx (mode);
2748 #endif /* ! experiment */
2749 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2750 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2751 else
2752 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2753
2754 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2755 will only result in an unrecognizable insn so no point in asserting. */
2756 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2757
2758 return cc_reg;
2759 }
2760
2761
2762 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2763
2764 rtx
2765 gen_compare_reg (rtx cmp)
2766 {
2767 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2768 }
2769
2770 /* This function is used for v9 only.
2771 DEST is the target of the Scc insn.
2772 CODE is the code for an Scc's comparison.
2773 X and Y are the values we compare.
2774
2775 This function is needed to turn
2776
2777 (set (reg:SI 110)
2778 (gt (reg:CCX 100 %icc)
2779 (const_int 0)))
2780 into
2781 (set (reg:SI 110)
2782 (gt:DI (reg:CCX 100 %icc)
2783 (const_int 0)))
2784
2785 IE: The instruction recognizer needs to see the mode of the comparison to
2786 find the right instruction. We could use "gt:DI" right in the
2787 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2788
2789 static int
2790 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2791 {
2792 if (! TARGET_ARCH64
2793 && (GET_MODE (x) == DImode
2794 || GET_MODE (dest) == DImode))
2795 return 0;
2796
2797 /* Try to use the movrCC insns. */
2798 if (TARGET_ARCH64
2799 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2800 && y == const0_rtx
2801 && v9_regcmp_p (compare_code))
2802 {
2803 rtx op0 = x;
2804 rtx temp;
2805
2806 /* Special case for op0 != 0. This can be done with one instruction if
2807 dest == x. */
2808
2809 if (compare_code == NE
2810 && GET_MODE (dest) == DImode
2811 && rtx_equal_p (op0, dest))
2812 {
2813 emit_insn (gen_rtx_SET (dest,
2814 gen_rtx_IF_THEN_ELSE (DImode,
2815 gen_rtx_fmt_ee (compare_code, DImode,
2816 op0, const0_rtx),
2817 const1_rtx,
2818 dest)));
2819 return 1;
2820 }
2821
2822 if (reg_overlap_mentioned_p (dest, op0))
2823 {
2824 /* Handle the case where dest == x.
2825 We "early clobber" the result. */
2826 op0 = gen_reg_rtx (GET_MODE (x));
2827 emit_move_insn (op0, x);
2828 }
2829
2830 emit_insn (gen_rtx_SET (dest, const0_rtx));
2831 if (GET_MODE (op0) != DImode)
2832 {
2833 temp = gen_reg_rtx (DImode);
2834 convert_move (temp, op0, 0);
2835 }
2836 else
2837 temp = op0;
2838 emit_insn (gen_rtx_SET (dest,
2839 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2840 gen_rtx_fmt_ee (compare_code, DImode,
2841 temp, const0_rtx),
2842 const1_rtx,
2843 dest)));
2844 return 1;
2845 }
2846 else
2847 {
2848 x = gen_compare_reg_1 (compare_code, x, y);
2849 y = const0_rtx;
2850
2851 gcc_assert (GET_MODE (x) != CC_NOOVmode
2852 && GET_MODE (x) != CCX_NOOVmode);
2853
2854 emit_insn (gen_rtx_SET (dest, const0_rtx));
2855 emit_insn (gen_rtx_SET (dest,
2856 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2857 gen_rtx_fmt_ee (compare_code,
2858 GET_MODE (x), x, y),
2859 const1_rtx, dest)));
2860 return 1;
2861 }
2862 }
2863
2864
2865 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2866 without jumps using the addx/subx instructions. */
2867
2868 bool
2869 emit_scc_insn (rtx operands[])
2870 {
2871 rtx tem;
2872 rtx x;
2873 rtx y;
2874 enum rtx_code code;
2875
2876 /* The quad-word fp compare library routines all return nonzero to indicate
2877 true, which is different from the equivalent libgcc routines, so we must
2878 handle them specially here. */
2879 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2880 {
2881 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2882 GET_CODE (operands[1]));
2883 operands[2] = XEXP (operands[1], 0);
2884 operands[3] = XEXP (operands[1], 1);
2885 }
2886
2887 code = GET_CODE (operands[1]);
2888 x = operands[2];
2889 y = operands[3];
2890
2891 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2892 more applications). The exception to this is "reg != 0" which can
2893 be done in one instruction on v9 (so we do it). */
2894 if (code == EQ)
2895 {
2896 if (GET_MODE (x) == SImode)
2897 {
2898 rtx pat;
2899 if (TARGET_ARCH64)
2900 pat = gen_seqsidi_special (operands[0], x, y);
2901 else
2902 pat = gen_seqsisi_special (operands[0], x, y);
2903 emit_insn (pat);
2904 return true;
2905 }
2906 else if (GET_MODE (x) == DImode)
2907 {
2908 rtx pat = gen_seqdi_special (operands[0], x, y);
2909 emit_insn (pat);
2910 return true;
2911 }
2912 }
2913
2914 if (code == NE)
2915 {
2916 if (GET_MODE (x) == SImode)
2917 {
2918 rtx pat;
2919 if (TARGET_ARCH64)
2920 pat = gen_snesidi_special (operands[0], x, y);
2921 else
2922 pat = gen_snesisi_special (operands[0], x, y);
2923 emit_insn (pat);
2924 return true;
2925 }
2926 else if (GET_MODE (x) == DImode)
2927 {
2928 rtx pat;
2929 if (TARGET_VIS3)
2930 pat = gen_snedi_special_vis3 (operands[0], x, y);
2931 else
2932 pat = gen_snedi_special (operands[0], x, y);
2933 emit_insn (pat);
2934 return true;
2935 }
2936 }
2937
2938 if (TARGET_V9
2939 && TARGET_ARCH64
2940 && GET_MODE (x) == DImode
2941 && !(TARGET_VIS3
2942 && (code == GTU || code == LTU))
2943 && gen_v9_scc (operands[0], code, x, y))
2944 return true;
2945
2946 /* We can do LTU and GEU using the addx/subx instructions too. And
2947 for GTU/LEU, if both operands are registers swap them and fall
2948 back to the easy case. */
2949 if (code == GTU || code == LEU)
2950 {
2951 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2952 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2953 {
2954 tem = x;
2955 x = y;
2956 y = tem;
2957 code = swap_condition (code);
2958 }
2959 }
2960
2961 if (code == LTU
2962 || (!TARGET_VIS3 && code == GEU))
2963 {
2964 emit_insn (gen_rtx_SET (operands[0],
2965 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2966 gen_compare_reg_1 (code, x, y),
2967 const0_rtx)));
2968 return true;
2969 }
2970
2971 /* All the posibilities to use addx/subx based sequences has been
2972 exhausted, try for a 3 instruction sequence using v9 conditional
2973 moves. */
2974 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2975 return true;
2976
2977 /* Nope, do branches. */
2978 return false;
2979 }
2980
2981 /* Emit a conditional jump insn for the v9 architecture using comparison code
2982 CODE and jump target LABEL.
2983 This function exists to take advantage of the v9 brxx insns. */
2984
2985 static void
2986 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2987 {
2988 emit_jump_insn (gen_rtx_SET (pc_rtx,
2989 gen_rtx_IF_THEN_ELSE (VOIDmode,
2990 gen_rtx_fmt_ee (code, GET_MODE (op0),
2991 op0, const0_rtx),
2992 gen_rtx_LABEL_REF (VOIDmode, label),
2993 pc_rtx)));
2994 }
2995
2996 /* Emit a conditional jump insn for the UA2011 architecture using
2997 comparison code CODE and jump target LABEL. This function exists
2998 to take advantage of the UA2011 Compare and Branch insns. */
2999
3000 static void
3001 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3002 {
3003 rtx if_then_else;
3004
3005 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3006 gen_rtx_fmt_ee(code, GET_MODE(op0),
3007 op0, op1),
3008 gen_rtx_LABEL_REF (VOIDmode, label),
3009 pc_rtx);
3010
3011 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3012 }
3013
3014 void
3015 emit_conditional_branch_insn (rtx operands[])
3016 {
3017 /* The quad-word fp compare library routines all return nonzero to indicate
3018 true, which is different from the equivalent libgcc routines, so we must
3019 handle them specially here. */
3020 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3021 {
3022 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3023 GET_CODE (operands[0]));
3024 operands[1] = XEXP (operands[0], 0);
3025 operands[2] = XEXP (operands[0], 1);
3026 }
3027
3028 /* If we can tell early on that the comparison is against a constant
3029 that won't fit in the 5-bit signed immediate field of a cbcond,
3030 use one of the other v9 conditional branch sequences. */
3031 if (TARGET_CBCOND
3032 && GET_CODE (operands[1]) == REG
3033 && (GET_MODE (operands[1]) == SImode
3034 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3035 && (GET_CODE (operands[2]) != CONST_INT
3036 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3037 {
3038 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3039 return;
3040 }
3041
3042 if (TARGET_ARCH64 && operands[2] == const0_rtx
3043 && GET_CODE (operands[1]) == REG
3044 && GET_MODE (operands[1]) == DImode)
3045 {
3046 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3047 return;
3048 }
3049
3050 operands[1] = gen_compare_reg (operands[0]);
3051 operands[2] = const0_rtx;
3052 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3053 operands[1], operands[2]);
3054 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3055 operands[3]));
3056 }
3057
3058
3059 /* Generate a DFmode part of a hard TFmode register.
3060 REG is the TFmode hard register, LOW is 1 for the
3061 low 64bit of the register and 0 otherwise.
3062 */
3063 rtx
3064 gen_df_reg (rtx reg, int low)
3065 {
3066 int regno = REGNO (reg);
3067
3068 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3069 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3070 return gen_rtx_REG (DFmode, regno);
3071 }
3072 \f
3073 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3074 Unlike normal calls, TFmode operands are passed by reference. It is
3075 assumed that no more than 3 operands are required. */
3076
3077 static void
3078 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3079 {
3080 rtx ret_slot = NULL, arg[3], func_sym;
3081 int i;
3082
3083 /* We only expect to be called for conversions, unary, and binary ops. */
3084 gcc_assert (nargs == 2 || nargs == 3);
3085
3086 for (i = 0; i < nargs; ++i)
3087 {
3088 rtx this_arg = operands[i];
3089 rtx this_slot;
3090
3091 /* TFmode arguments and return values are passed by reference. */
3092 if (GET_MODE (this_arg) == TFmode)
3093 {
3094 int force_stack_temp;
3095
3096 force_stack_temp = 0;
3097 if (TARGET_BUGGY_QP_LIB && i == 0)
3098 force_stack_temp = 1;
3099
3100 if (GET_CODE (this_arg) == MEM
3101 && ! force_stack_temp)
3102 {
3103 tree expr = MEM_EXPR (this_arg);
3104 if (expr)
3105 mark_addressable (expr);
3106 this_arg = XEXP (this_arg, 0);
3107 }
3108 else if (CONSTANT_P (this_arg)
3109 && ! force_stack_temp)
3110 {
3111 this_slot = force_const_mem (TFmode, this_arg);
3112 this_arg = XEXP (this_slot, 0);
3113 }
3114 else
3115 {
3116 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3117
3118 /* Operand 0 is the return value. We'll copy it out later. */
3119 if (i > 0)
3120 emit_move_insn (this_slot, this_arg);
3121 else
3122 ret_slot = this_slot;
3123
3124 this_arg = XEXP (this_slot, 0);
3125 }
3126 }
3127
3128 arg[i] = this_arg;
3129 }
3130
3131 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3132
3133 if (GET_MODE (operands[0]) == TFmode)
3134 {
3135 if (nargs == 2)
3136 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3137 arg[0], GET_MODE (arg[0]),
3138 arg[1], GET_MODE (arg[1]));
3139 else
3140 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3141 arg[0], GET_MODE (arg[0]),
3142 arg[1], GET_MODE (arg[1]),
3143 arg[2], GET_MODE (arg[2]));
3144
3145 if (ret_slot)
3146 emit_move_insn (operands[0], ret_slot);
3147 }
3148 else
3149 {
3150 rtx ret;
3151
3152 gcc_assert (nargs == 2);
3153
3154 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3155 GET_MODE (operands[0]), 1,
3156 arg[1], GET_MODE (arg[1]));
3157
3158 if (ret != operands[0])
3159 emit_move_insn (operands[0], ret);
3160 }
3161 }
3162
3163 /* Expand soft-float TFmode calls to sparc abi routines. */
3164
3165 static void
3166 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3167 {
3168 const char *func;
3169
3170 switch (code)
3171 {
3172 case PLUS:
3173 func = "_Qp_add";
3174 break;
3175 case MINUS:
3176 func = "_Qp_sub";
3177 break;
3178 case MULT:
3179 func = "_Qp_mul";
3180 break;
3181 case DIV:
3182 func = "_Qp_div";
3183 break;
3184 default:
3185 gcc_unreachable ();
3186 }
3187
3188 emit_soft_tfmode_libcall (func, 3, operands);
3189 }
3190
3191 static void
3192 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3193 {
3194 const char *func;
3195
3196 gcc_assert (code == SQRT);
3197 func = "_Qp_sqrt";
3198
3199 emit_soft_tfmode_libcall (func, 2, operands);
3200 }
3201
3202 static void
3203 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3204 {
3205 const char *func;
3206
3207 switch (code)
3208 {
3209 case FLOAT_EXTEND:
3210 switch (GET_MODE (operands[1]))
3211 {
3212 case SFmode:
3213 func = "_Qp_stoq";
3214 break;
3215 case DFmode:
3216 func = "_Qp_dtoq";
3217 break;
3218 default:
3219 gcc_unreachable ();
3220 }
3221 break;
3222
3223 case FLOAT_TRUNCATE:
3224 switch (GET_MODE (operands[0]))
3225 {
3226 case SFmode:
3227 func = "_Qp_qtos";
3228 break;
3229 case DFmode:
3230 func = "_Qp_qtod";
3231 break;
3232 default:
3233 gcc_unreachable ();
3234 }
3235 break;
3236
3237 case FLOAT:
3238 switch (GET_MODE (operands[1]))
3239 {
3240 case SImode:
3241 func = "_Qp_itoq";
3242 if (TARGET_ARCH64)
3243 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3244 break;
3245 case DImode:
3246 func = "_Qp_xtoq";
3247 break;
3248 default:
3249 gcc_unreachable ();
3250 }
3251 break;
3252
3253 case UNSIGNED_FLOAT:
3254 switch (GET_MODE (operands[1]))
3255 {
3256 case SImode:
3257 func = "_Qp_uitoq";
3258 if (TARGET_ARCH64)
3259 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3260 break;
3261 case DImode:
3262 func = "_Qp_uxtoq";
3263 break;
3264 default:
3265 gcc_unreachable ();
3266 }
3267 break;
3268
3269 case FIX:
3270 switch (GET_MODE (operands[0]))
3271 {
3272 case SImode:
3273 func = "_Qp_qtoi";
3274 break;
3275 case DImode:
3276 func = "_Qp_qtox";
3277 break;
3278 default:
3279 gcc_unreachable ();
3280 }
3281 break;
3282
3283 case UNSIGNED_FIX:
3284 switch (GET_MODE (operands[0]))
3285 {
3286 case SImode:
3287 func = "_Qp_qtoui";
3288 break;
3289 case DImode:
3290 func = "_Qp_qtoux";
3291 break;
3292 default:
3293 gcc_unreachable ();
3294 }
3295 break;
3296
3297 default:
3298 gcc_unreachable ();
3299 }
3300
3301 emit_soft_tfmode_libcall (func, 2, operands);
3302 }
3303
3304 /* Expand a hard-float tfmode operation. All arguments must be in
3305 registers. */
3306
3307 static void
3308 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3309 {
3310 rtx op, dest;
3311
3312 if (GET_RTX_CLASS (code) == RTX_UNARY)
3313 {
3314 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3315 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3316 }
3317 else
3318 {
3319 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3320 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3321 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3322 operands[1], operands[2]);
3323 }
3324
3325 if (register_operand (operands[0], VOIDmode))
3326 dest = operands[0];
3327 else
3328 dest = gen_reg_rtx (GET_MODE (operands[0]));
3329
3330 emit_insn (gen_rtx_SET (dest, op));
3331
3332 if (dest != operands[0])
3333 emit_move_insn (operands[0], dest);
3334 }
3335
3336 void
3337 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3338 {
3339 if (TARGET_HARD_QUAD)
3340 emit_hard_tfmode_operation (code, operands);
3341 else
3342 emit_soft_tfmode_binop (code, operands);
3343 }
3344
3345 void
3346 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3347 {
3348 if (TARGET_HARD_QUAD)
3349 emit_hard_tfmode_operation (code, operands);
3350 else
3351 emit_soft_tfmode_unop (code, operands);
3352 }
3353
3354 void
3355 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3356 {
3357 if (TARGET_HARD_QUAD)
3358 emit_hard_tfmode_operation (code, operands);
3359 else
3360 emit_soft_tfmode_cvt (code, operands);
3361 }
3362 \f
3363 /* Return nonzero if a branch/jump/call instruction will be emitting
3364 nop into its delay slot. */
3365
3366 int
3367 empty_delay_slot (rtx_insn *insn)
3368 {
3369 rtx seq;
3370
3371 /* If no previous instruction (should not happen), return true. */
3372 if (PREV_INSN (insn) == NULL)
3373 return 1;
3374
3375 seq = NEXT_INSN (PREV_INSN (insn));
3376 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3377 return 0;
3378
3379 return 1;
3380 }
3381
3382 /* Return nonzero if we should emit a nop after a cbcond instruction.
3383 The cbcond instruction does not have a delay slot, however there is
3384 a severe performance penalty if a control transfer appears right
3385 after a cbcond. Therefore we emit a nop when we detect this
3386 situation. */
3387
3388 int
3389 emit_cbcond_nop (rtx insn)
3390 {
3391 rtx next = next_active_insn (insn);
3392
3393 if (!next)
3394 return 1;
3395
3396 if (NONJUMP_INSN_P (next)
3397 && GET_CODE (PATTERN (next)) == SEQUENCE)
3398 next = XVECEXP (PATTERN (next), 0, 0);
3399 else if (CALL_P (next)
3400 && GET_CODE (PATTERN (next)) == PARALLEL)
3401 {
3402 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3403
3404 if (GET_CODE (delay) == RETURN)
3405 {
3406 /* It's a sibling call. Do not emit the nop if we're going
3407 to emit something other than the jump itself as the first
3408 instruction of the sibcall sequence. */
3409 if (sparc_leaf_function_p || TARGET_FLAT)
3410 return 0;
3411 }
3412 }
3413
3414 if (NONJUMP_INSN_P (next))
3415 return 0;
3416
3417 return 1;
3418 }
3419
3420 /* Return nonzero if TRIAL can go into the call delay slot. */
3421
3422 int
3423 eligible_for_call_delay (rtx_insn *trial)
3424 {
3425 rtx pat;
3426
3427 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3428 return 0;
3429
3430 /* Binutils allows
3431 call __tls_get_addr, %tgd_call (foo)
3432 add %l7, %o0, %o0, %tgd_add (foo)
3433 while Sun as/ld does not. */
3434 if (TARGET_GNU_TLS || !TARGET_TLS)
3435 return 1;
3436
3437 pat = PATTERN (trial);
3438
3439 /* We must reject tgd_add{32|64}, i.e.
3440 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3441 and tldm_add{32|64}, i.e.
3442 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3443 for Sun as/ld. */
3444 if (GET_CODE (pat) == SET
3445 && GET_CODE (SET_SRC (pat)) == PLUS)
3446 {
3447 rtx unspec = XEXP (SET_SRC (pat), 1);
3448
3449 if (GET_CODE (unspec) == UNSPEC
3450 && (XINT (unspec, 1) == UNSPEC_TLSGD
3451 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3452 return 0;
3453 }
3454
3455 return 1;
3456 }
3457
3458 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3459 instruction. RETURN_P is true if the v9 variant 'return' is to be
3460 considered in the test too.
3461
3462 TRIAL must be a SET whose destination is a REG appropriate for the
3463 'restore' instruction or, if RETURN_P is true, for the 'return'
3464 instruction. */
3465
3466 static int
3467 eligible_for_restore_insn (rtx trial, bool return_p)
3468 {
3469 rtx pat = PATTERN (trial);
3470 rtx src = SET_SRC (pat);
3471 bool src_is_freg = false;
3472 rtx src_reg;
3473
3474 /* Since we now can do moves between float and integer registers when
3475 VIS3 is enabled, we have to catch this case. We can allow such
3476 moves when doing a 'return' however. */
3477 src_reg = src;
3478 if (GET_CODE (src_reg) == SUBREG)
3479 src_reg = SUBREG_REG (src_reg);
3480 if (GET_CODE (src_reg) == REG
3481 && SPARC_FP_REG_P (REGNO (src_reg)))
3482 src_is_freg = true;
3483
3484 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3485 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3486 && arith_operand (src, GET_MODE (src))
3487 && ! src_is_freg)
3488 {
3489 if (TARGET_ARCH64)
3490 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3491 else
3492 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3493 }
3494
3495 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3496 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3497 && arith_double_operand (src, GET_MODE (src))
3498 && ! src_is_freg)
3499 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3500
3501 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3502 else if (! TARGET_FPU && register_operand (src, SFmode))
3503 return 1;
3504
3505 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3506 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3507 return 1;
3508
3509 /* If we have the 'return' instruction, anything that does not use
3510 local or output registers and can go into a delay slot wins. */
3511 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3512 return 1;
3513
3514 /* The 'restore src1,src2,dest' pattern for SImode. */
3515 else if (GET_CODE (src) == PLUS
3516 && register_operand (XEXP (src, 0), SImode)
3517 && arith_operand (XEXP (src, 1), SImode))
3518 return 1;
3519
3520 /* The 'restore src1,src2,dest' pattern for DImode. */
3521 else if (GET_CODE (src) == PLUS
3522 && register_operand (XEXP (src, 0), DImode)
3523 && arith_double_operand (XEXP (src, 1), DImode))
3524 return 1;
3525
3526 /* The 'restore src1,%lo(src2),dest' pattern. */
3527 else if (GET_CODE (src) == LO_SUM
3528 && ! TARGET_CM_MEDMID
3529 && ((register_operand (XEXP (src, 0), SImode)
3530 && immediate_operand (XEXP (src, 1), SImode))
3531 || (TARGET_ARCH64
3532 && register_operand (XEXP (src, 0), DImode)
3533 && immediate_operand (XEXP (src, 1), DImode))))
3534 return 1;
3535
3536 /* The 'restore src,src,dest' pattern. */
3537 else if (GET_CODE (src) == ASHIFT
3538 && (register_operand (XEXP (src, 0), SImode)
3539 || register_operand (XEXP (src, 0), DImode))
3540 && XEXP (src, 1) == const1_rtx)
3541 return 1;
3542
3543 return 0;
3544 }
3545
3546 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3547
3548 int
3549 eligible_for_return_delay (rtx_insn *trial)
3550 {
3551 int regno;
3552 rtx pat;
3553
3554 /* If the function uses __builtin_eh_return, the eh_return machinery
3555 occupies the delay slot. */
3556 if (crtl->calls_eh_return)
3557 return 0;
3558
3559 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3560 return 0;
3561
3562 /* In the case of a leaf or flat function, anything can go into the slot. */
3563 if (sparc_leaf_function_p || TARGET_FLAT)
3564 return 1;
3565
3566 if (!NONJUMP_INSN_P (trial))
3567 return 0;
3568
3569 pat = PATTERN (trial);
3570 if (GET_CODE (pat) == PARALLEL)
3571 {
3572 int i;
3573
3574 if (! TARGET_V9)
3575 return 0;
3576 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3577 {
3578 rtx expr = XVECEXP (pat, 0, i);
3579 if (GET_CODE (expr) != SET)
3580 return 0;
3581 if (GET_CODE (SET_DEST (expr)) != REG)
3582 return 0;
3583 regno = REGNO (SET_DEST (expr));
3584 if (regno >= 8 && regno < 24)
3585 return 0;
3586 }
3587 return !epilogue_renumber (&pat, 1);
3588 }
3589
3590 if (GET_CODE (pat) != SET)
3591 return 0;
3592
3593 if (GET_CODE (SET_DEST (pat)) != REG)
3594 return 0;
3595
3596 regno = REGNO (SET_DEST (pat));
3597
3598 /* Otherwise, only operations which can be done in tandem with
3599 a `restore' or `return' insn can go into the delay slot. */
3600 if (regno >= 8 && regno < 24)
3601 return 0;
3602
3603 /* If this instruction sets up floating point register and we have a return
3604 instruction, it can probably go in. But restore will not work
3605 with FP_REGS. */
3606 if (! SPARC_INT_REG_P (regno))
3607 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3608
3609 return eligible_for_restore_insn (trial, true);
3610 }
3611
3612 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3613
3614 int
3615 eligible_for_sibcall_delay (rtx_insn *trial)
3616 {
3617 rtx pat;
3618
3619 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3620 return 0;
3621
3622 if (!NONJUMP_INSN_P (trial))
3623 return 0;
3624
3625 pat = PATTERN (trial);
3626
3627 if (sparc_leaf_function_p || TARGET_FLAT)
3628 {
3629 /* If the tail call is done using the call instruction,
3630 we have to restore %o7 in the delay slot. */
3631 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3632 return 0;
3633
3634 /* %g1 is used to build the function address */
3635 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3636 return 0;
3637
3638 return 1;
3639 }
3640
3641 if (GET_CODE (pat) != SET)
3642 return 0;
3643
3644 /* Otherwise, only operations which can be done in tandem with
3645 a `restore' insn can go into the delay slot. */
3646 if (GET_CODE (SET_DEST (pat)) != REG
3647 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3648 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3649 return 0;
3650
3651 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3652 in most cases. */
3653 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3654 return 0;
3655
3656 return eligible_for_restore_insn (trial, false);
3657 }
3658 \f
3659 /* Determine if it's legal to put X into the constant pool. This
3660 is not possible if X contains the address of a symbol that is
3661 not constant (TLS) or not known at final link time (PIC). */
3662
3663 static bool
3664 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3665 {
3666 switch (GET_CODE (x))
3667 {
3668 case CONST_INT:
3669 case CONST_WIDE_INT:
3670 case CONST_DOUBLE:
3671 case CONST_VECTOR:
3672 /* Accept all non-symbolic constants. */
3673 return false;
3674
3675 case LABEL_REF:
3676 /* Labels are OK iff we are non-PIC. */
3677 return flag_pic != 0;
3678
3679 case SYMBOL_REF:
3680 /* 'Naked' TLS symbol references are never OK,
3681 non-TLS symbols are OK iff we are non-PIC. */
3682 if (SYMBOL_REF_TLS_MODEL (x))
3683 return true;
3684 else
3685 return flag_pic != 0;
3686
3687 case CONST:
3688 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3689 case PLUS:
3690 case MINUS:
3691 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3692 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3693 case UNSPEC:
3694 return true;
3695 default:
3696 gcc_unreachable ();
3697 }
3698 }
3699 \f
3700 /* Global Offset Table support. */
3701 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3702 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3703
3704 /* Return the SYMBOL_REF for the Global Offset Table. */
3705
3706 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3707
3708 static rtx
3709 sparc_got (void)
3710 {
3711 if (!sparc_got_symbol)
3712 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3713
3714 return sparc_got_symbol;
3715 }
3716
3717 /* Ensure that we are not using patterns that are not OK with PIC. */
3718
3719 int
3720 check_pic (int i)
3721 {
3722 rtx op;
3723
3724 switch (flag_pic)
3725 {
3726 case 1:
3727 op = recog_data.operand[i];
3728 gcc_assert (GET_CODE (op) != SYMBOL_REF
3729 && (GET_CODE (op) != CONST
3730 || (GET_CODE (XEXP (op, 0)) == MINUS
3731 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3732 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3733 case 2:
3734 default:
3735 return 1;
3736 }
3737 }
3738
3739 /* Return true if X is an address which needs a temporary register when
3740 reloaded while generating PIC code. */
3741
3742 int
3743 pic_address_needs_scratch (rtx x)
3744 {
3745 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3746 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3748 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3749 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3750 return 1;
3751
3752 return 0;
3753 }
3754
3755 /* Determine if a given RTX is a valid constant. We already know this
3756 satisfies CONSTANT_P. */
3757
3758 static bool
3759 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3760 {
3761 switch (GET_CODE (x))
3762 {
3763 case CONST:
3764 case SYMBOL_REF:
3765 if (sparc_tls_referenced_p (x))
3766 return false;
3767 break;
3768
3769 case CONST_DOUBLE:
3770 /* Floating point constants are generally not ok.
3771 The only exception is 0.0 and all-ones in VIS. */
3772 if (TARGET_VIS
3773 && SCALAR_FLOAT_MODE_P (mode)
3774 && (const_zero_operand (x, mode)
3775 || const_all_ones_operand (x, mode)))
3776 return true;
3777
3778 return false;
3779
3780 case CONST_VECTOR:
3781 /* Vector constants are generally not ok.
3782 The only exception is 0 or -1 in VIS. */
3783 if (TARGET_VIS
3784 && (const_zero_operand (x, mode)
3785 || const_all_ones_operand (x, mode)))
3786 return true;
3787
3788 return false;
3789
3790 default:
3791 break;
3792 }
3793
3794 return true;
3795 }
3796
3797 /* Determine if a given RTX is a valid constant address. */
3798
3799 bool
3800 constant_address_p (rtx x)
3801 {
3802 switch (GET_CODE (x))
3803 {
3804 case LABEL_REF:
3805 case CONST_INT:
3806 case HIGH:
3807 return true;
3808
3809 case CONST:
3810 if (flag_pic && pic_address_needs_scratch (x))
3811 return false;
3812 return sparc_legitimate_constant_p (Pmode, x);
3813
3814 case SYMBOL_REF:
3815 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3816
3817 default:
3818 return false;
3819 }
3820 }
3821
3822 /* Nonzero if the constant value X is a legitimate general operand
3823 when generating PIC code. It is given that flag_pic is on and
3824 that X satisfies CONSTANT_P. */
3825
3826 bool
3827 legitimate_pic_operand_p (rtx x)
3828 {
3829 if (pic_address_needs_scratch (x))
3830 return false;
3831 if (sparc_tls_referenced_p (x))
3832 return false;
3833 return true;
3834 }
3835
3836 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3837 (CONST_INT_P (X) \
3838 && INTVAL (X) >= -0x1000 \
3839 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3840
3841 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3842 (CONST_INT_P (X) \
3843 && INTVAL (X) >= -0x1000 \
3844 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3845
3846 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3847
3848 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3849 ordinarily. This changes a bit when generating PIC. */
3850
3851 static bool
3852 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3853 {
3854 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3855
3856 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3857 rs1 = addr;
3858 else if (GET_CODE (addr) == PLUS)
3859 {
3860 rs1 = XEXP (addr, 0);
3861 rs2 = XEXP (addr, 1);
3862
3863 /* Canonicalize. REG comes first, if there are no regs,
3864 LO_SUM comes first. */
3865 if (!REG_P (rs1)
3866 && GET_CODE (rs1) != SUBREG
3867 && (REG_P (rs2)
3868 || GET_CODE (rs2) == SUBREG
3869 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3870 {
3871 rs1 = XEXP (addr, 1);
3872 rs2 = XEXP (addr, 0);
3873 }
3874
3875 if ((flag_pic == 1
3876 && rs1 == pic_offset_table_rtx
3877 && !REG_P (rs2)
3878 && GET_CODE (rs2) != SUBREG
3879 && GET_CODE (rs2) != LO_SUM
3880 && GET_CODE (rs2) != MEM
3881 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3882 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3883 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3884 || ((REG_P (rs1)
3885 || GET_CODE (rs1) == SUBREG)
3886 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3887 {
3888 imm1 = rs2;
3889 rs2 = NULL;
3890 }
3891 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3892 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3893 {
3894 /* We prohibit REG + REG for TFmode when there are no quad move insns
3895 and we consequently need to split. We do this because REG+REG
3896 is not an offsettable address. If we get the situation in reload
3897 where source and destination of a movtf pattern are both MEMs with
3898 REG+REG address, then only one of them gets converted to an
3899 offsettable address. */
3900 if (mode == TFmode
3901 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3902 return 0;
3903
3904 /* Likewise for TImode, but in all cases. */
3905 if (mode == TImode)
3906 return 0;
3907
3908 /* We prohibit REG + REG on ARCH32 if not optimizing for
3909 DFmode/DImode because then mem_min_alignment is likely to be zero
3910 after reload and the forced split would lack a matching splitter
3911 pattern. */
3912 if (TARGET_ARCH32 && !optimize
3913 && (mode == DFmode || mode == DImode))
3914 return 0;
3915 }
3916 else if (USE_AS_OFFSETABLE_LO10
3917 && GET_CODE (rs1) == LO_SUM
3918 && TARGET_ARCH64
3919 && ! TARGET_CM_MEDMID
3920 && RTX_OK_FOR_OLO10_P (rs2, mode))
3921 {
3922 rs2 = NULL;
3923 imm1 = XEXP (rs1, 1);
3924 rs1 = XEXP (rs1, 0);
3925 if (!CONSTANT_P (imm1)
3926 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3927 return 0;
3928 }
3929 }
3930 else if (GET_CODE (addr) == LO_SUM)
3931 {
3932 rs1 = XEXP (addr, 0);
3933 imm1 = XEXP (addr, 1);
3934
3935 if (!CONSTANT_P (imm1)
3936 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3937 return 0;
3938
3939 /* We can't allow TFmode in 32-bit mode, because an offset greater
3940 than the alignment (8) may cause the LO_SUM to overflow. */
3941 if (mode == TFmode && TARGET_ARCH32)
3942 return 0;
3943 }
3944 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3945 return 1;
3946 else
3947 return 0;
3948
3949 if (GET_CODE (rs1) == SUBREG)
3950 rs1 = SUBREG_REG (rs1);
3951 if (!REG_P (rs1))
3952 return 0;
3953
3954 if (rs2)
3955 {
3956 if (GET_CODE (rs2) == SUBREG)
3957 rs2 = SUBREG_REG (rs2);
3958 if (!REG_P (rs2))
3959 return 0;
3960 }
3961
3962 if (strict)
3963 {
3964 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3965 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3966 return 0;
3967 }
3968 else
3969 {
3970 if ((! SPARC_INT_REG_P (REGNO (rs1))
3971 && REGNO (rs1) != FRAME_POINTER_REGNUM
3972 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3973 || (rs2
3974 && (! SPARC_INT_REG_P (REGNO (rs2))
3975 && REGNO (rs2) != FRAME_POINTER_REGNUM
3976 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3977 return 0;
3978 }
3979 return 1;
3980 }
3981
3982 /* Return the SYMBOL_REF for the tls_get_addr function. */
3983
3984 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3985
3986 static rtx
3987 sparc_tls_get_addr (void)
3988 {
3989 if (!sparc_tls_symbol)
3990 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3991
3992 return sparc_tls_symbol;
3993 }
3994
3995 /* Return the Global Offset Table to be used in TLS mode. */
3996
3997 static rtx
3998 sparc_tls_got (void)
3999 {
4000 /* In PIC mode, this is just the PIC offset table. */
4001 if (flag_pic)
4002 {
4003 crtl->uses_pic_offset_table = 1;
4004 return pic_offset_table_rtx;
4005 }
4006
4007 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4008 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4009 if (TARGET_SUN_TLS && TARGET_ARCH32)
4010 {
4011 load_got_register ();
4012 return global_offset_table_rtx;
4013 }
4014
4015 /* In all other cases, we load a new pseudo with the GOT symbol. */
4016 return copy_to_reg (sparc_got ());
4017 }
4018
4019 /* Return true if X contains a thread-local symbol. */
4020
4021 static bool
4022 sparc_tls_referenced_p (rtx x)
4023 {
4024 if (!TARGET_HAVE_TLS)
4025 return false;
4026
4027 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4028 x = XEXP (XEXP (x, 0), 0);
4029
4030 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4031 return true;
4032
4033 /* That's all we handle in sparc_legitimize_tls_address for now. */
4034 return false;
4035 }
4036
4037 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4038 this (thread-local) address. */
4039
4040 static rtx
4041 sparc_legitimize_tls_address (rtx addr)
4042 {
4043 rtx temp1, temp2, temp3, ret, o0, got;
4044 rtx_insn *insn;
4045
4046 gcc_assert (can_create_pseudo_p ());
4047
4048 if (GET_CODE (addr) == SYMBOL_REF)
4049 switch (SYMBOL_REF_TLS_MODEL (addr))
4050 {
4051 case TLS_MODEL_GLOBAL_DYNAMIC:
4052 start_sequence ();
4053 temp1 = gen_reg_rtx (SImode);
4054 temp2 = gen_reg_rtx (SImode);
4055 ret = gen_reg_rtx (Pmode);
4056 o0 = gen_rtx_REG (Pmode, 8);
4057 got = sparc_tls_got ();
4058 emit_insn (gen_tgd_hi22 (temp1, addr));
4059 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4060 if (TARGET_ARCH32)
4061 {
4062 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4063 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4064 addr, const1_rtx));
4065 }
4066 else
4067 {
4068 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4069 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4070 addr, const1_rtx));
4071 }
4072 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4073 insn = get_insns ();
4074 end_sequence ();
4075 emit_libcall_block (insn, ret, o0, addr);
4076 break;
4077
4078 case TLS_MODEL_LOCAL_DYNAMIC:
4079 start_sequence ();
4080 temp1 = gen_reg_rtx (SImode);
4081 temp2 = gen_reg_rtx (SImode);
4082 temp3 = gen_reg_rtx (Pmode);
4083 ret = gen_reg_rtx (Pmode);
4084 o0 = gen_rtx_REG (Pmode, 8);
4085 got = sparc_tls_got ();
4086 emit_insn (gen_tldm_hi22 (temp1));
4087 emit_insn (gen_tldm_lo10 (temp2, temp1));
4088 if (TARGET_ARCH32)
4089 {
4090 emit_insn (gen_tldm_add32 (o0, got, temp2));
4091 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4092 const1_rtx));
4093 }
4094 else
4095 {
4096 emit_insn (gen_tldm_add64 (o0, got, temp2));
4097 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4098 const1_rtx));
4099 }
4100 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4101 insn = get_insns ();
4102 end_sequence ();
4103 emit_libcall_block (insn, temp3, o0,
4104 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4105 UNSPEC_TLSLD_BASE));
4106 temp1 = gen_reg_rtx (SImode);
4107 temp2 = gen_reg_rtx (SImode);
4108 emit_insn (gen_tldo_hix22 (temp1, addr));
4109 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4110 if (TARGET_ARCH32)
4111 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4112 else
4113 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4114 break;
4115
4116 case TLS_MODEL_INITIAL_EXEC:
4117 temp1 = gen_reg_rtx (SImode);
4118 temp2 = gen_reg_rtx (SImode);
4119 temp3 = gen_reg_rtx (Pmode);
4120 got = sparc_tls_got ();
4121 emit_insn (gen_tie_hi22 (temp1, addr));
4122 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4123 if (TARGET_ARCH32)
4124 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4125 else
4126 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4127 if (TARGET_SUN_TLS)
4128 {
4129 ret = gen_reg_rtx (Pmode);
4130 if (TARGET_ARCH32)
4131 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4132 temp3, addr));
4133 else
4134 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4135 temp3, addr));
4136 }
4137 else
4138 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4139 break;
4140
4141 case TLS_MODEL_LOCAL_EXEC:
4142 temp1 = gen_reg_rtx (Pmode);
4143 temp2 = gen_reg_rtx (Pmode);
4144 if (TARGET_ARCH32)
4145 {
4146 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4147 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4148 }
4149 else
4150 {
4151 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4152 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4153 }
4154 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4155 break;
4156
4157 default:
4158 gcc_unreachable ();
4159 }
4160
4161 else if (GET_CODE (addr) == CONST)
4162 {
4163 rtx base, offset;
4164
4165 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4166
4167 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4168 offset = XEXP (XEXP (addr, 0), 1);
4169
4170 base = force_operand (base, NULL_RTX);
4171 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4172 offset = force_reg (Pmode, offset);
4173 ret = gen_rtx_PLUS (Pmode, base, offset);
4174 }
4175
4176 else
4177 gcc_unreachable (); /* for now ... */
4178
4179 return ret;
4180 }
4181
4182 /* Legitimize PIC addresses. If the address is already position-independent,
4183 we return ORIG. Newly generated position-independent addresses go into a
4184 reg. This is REG if nonzero, otherwise we allocate register(s) as
4185 necessary. */
4186
4187 static rtx
4188 sparc_legitimize_pic_address (rtx orig, rtx reg)
4189 {
4190 bool gotdata_op = false;
4191
4192 if (GET_CODE (orig) == SYMBOL_REF
4193 /* See the comment in sparc_expand_move. */
4194 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4195 {
4196 rtx pic_ref, address;
4197 rtx_insn *insn;
4198
4199 if (reg == 0)
4200 {
4201 gcc_assert (can_create_pseudo_p ());
4202 reg = gen_reg_rtx (Pmode);
4203 }
4204
4205 if (flag_pic == 2)
4206 {
4207 /* If not during reload, allocate another temp reg here for loading
4208 in the address, so that these instructions can be optimized
4209 properly. */
4210 rtx temp_reg = (! can_create_pseudo_p ()
4211 ? reg : gen_reg_rtx (Pmode));
4212
4213 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4214 won't get confused into thinking that these two instructions
4215 are loading in the true address of the symbol. If in the
4216 future a PIC rtx exists, that should be used instead. */
4217 if (TARGET_ARCH64)
4218 {
4219 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4220 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4221 }
4222 else
4223 {
4224 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4225 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4226 }
4227 address = temp_reg;
4228 gotdata_op = true;
4229 }
4230 else
4231 address = orig;
4232
4233 crtl->uses_pic_offset_table = 1;
4234 if (gotdata_op)
4235 {
4236 if (TARGET_ARCH64)
4237 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4238 pic_offset_table_rtx,
4239 address, orig));
4240 else
4241 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4242 pic_offset_table_rtx,
4243 address, orig));
4244 }
4245 else
4246 {
4247 pic_ref
4248 = gen_const_mem (Pmode,
4249 gen_rtx_PLUS (Pmode,
4250 pic_offset_table_rtx, address));
4251 insn = emit_move_insn (reg, pic_ref);
4252 }
4253
4254 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4255 by loop. */
4256 set_unique_reg_note (insn, REG_EQUAL, orig);
4257 return reg;
4258 }
4259 else if (GET_CODE (orig) == CONST)
4260 {
4261 rtx base, offset;
4262
4263 if (GET_CODE (XEXP (orig, 0)) == PLUS
4264 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4265 return orig;
4266
4267 if (reg == 0)
4268 {
4269 gcc_assert (can_create_pseudo_p ());
4270 reg = gen_reg_rtx (Pmode);
4271 }
4272
4273 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4274 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4275 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4276 base == reg ? NULL_RTX : reg);
4277
4278 if (GET_CODE (offset) == CONST_INT)
4279 {
4280 if (SMALL_INT (offset))
4281 return plus_constant (Pmode, base, INTVAL (offset));
4282 else if (can_create_pseudo_p ())
4283 offset = force_reg (Pmode, offset);
4284 else
4285 /* If we reach here, then something is seriously wrong. */
4286 gcc_unreachable ();
4287 }
4288 return gen_rtx_PLUS (Pmode, base, offset);
4289 }
4290 else if (GET_CODE (orig) == LABEL_REF)
4291 /* ??? We ought to be checking that the register is live instead, in case
4292 it is eliminated. */
4293 crtl->uses_pic_offset_table = 1;
4294
4295 return orig;
4296 }
4297
4298 /* Try machine-dependent ways of modifying an illegitimate address X
4299 to be legitimate. If we find one, return the new, valid address.
4300
4301 OLDX is the address as it was before break_out_memory_refs was called.
4302 In some cases it is useful to look at this to decide what needs to be done.
4303
4304 MODE is the mode of the operand pointed to by X.
4305
4306 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4307
4308 static rtx
4309 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4310 machine_mode mode)
4311 {
4312 rtx orig_x = x;
4313
4314 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4315 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4316 force_operand (XEXP (x, 0), NULL_RTX));
4317 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4318 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4319 force_operand (XEXP (x, 1), NULL_RTX));
4320 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4321 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4322 XEXP (x, 1));
4323 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4324 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4325 force_operand (XEXP (x, 1), NULL_RTX));
4326
4327 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4328 return x;
4329
4330 if (sparc_tls_referenced_p (x))
4331 x = sparc_legitimize_tls_address (x);
4332 else if (flag_pic)
4333 x = sparc_legitimize_pic_address (x, NULL_RTX);
4334 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4335 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4336 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4337 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4338 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4339 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4340 else if (GET_CODE (x) == SYMBOL_REF
4341 || GET_CODE (x) == CONST
4342 || GET_CODE (x) == LABEL_REF)
4343 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4344
4345 return x;
4346 }
4347
4348 /* Delegitimize an address that was legitimized by the above function. */
4349
4350 static rtx
4351 sparc_delegitimize_address (rtx x)
4352 {
4353 x = delegitimize_mem_from_attrs (x);
4354
4355 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4356 switch (XINT (XEXP (x, 1), 1))
4357 {
4358 case UNSPEC_MOVE_PIC:
4359 case UNSPEC_TLSLE:
4360 x = XVECEXP (XEXP (x, 1), 0, 0);
4361 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4362 break;
4363 default:
4364 break;
4365 }
4366
4367 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4368 if (GET_CODE (x) == MINUS
4369 && REG_P (XEXP (x, 0))
4370 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4371 && GET_CODE (XEXP (x, 1)) == LO_SUM
4372 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4373 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4374 {
4375 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4376 gcc_assert (GET_CODE (x) == LABEL_REF);
4377 }
4378
4379 return x;
4380 }
4381
4382 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4383 replace the input X, or the original X if no replacement is called for.
4384 The output parameter *WIN is 1 if the calling macro should goto WIN,
4385 0 if it should not.
4386
4387 For SPARC, we wish to handle addresses by splitting them into
4388 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4389 This cuts the number of extra insns by one.
4390
4391 Do nothing when generating PIC code and the address is a symbolic
4392 operand or requires a scratch register. */
4393
4394 rtx
4395 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4396 int opnum, int type,
4397 int ind_levels ATTRIBUTE_UNUSED, int *win)
4398 {
4399 /* Decompose SImode constants into HIGH+LO_SUM. */
4400 if (CONSTANT_P (x)
4401 && (mode != TFmode || TARGET_ARCH64)
4402 && GET_MODE (x) == SImode
4403 && GET_CODE (x) != LO_SUM
4404 && GET_CODE (x) != HIGH
4405 && sparc_cmodel <= CM_MEDLOW
4406 && !(flag_pic
4407 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4408 {
4409 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4410 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4411 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4412 opnum, (enum reload_type)type);
4413 *win = 1;
4414 return x;
4415 }
4416
4417 /* We have to recognize what we have already generated above. */
4418 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4419 {
4420 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4421 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4422 opnum, (enum reload_type)type);
4423 *win = 1;
4424 return x;
4425 }
4426
4427 *win = 0;
4428 return x;
4429 }
4430
4431 /* Return true if ADDR (a legitimate address expression)
4432 has an effect that depends on the machine mode it is used for.
4433
4434 In PIC mode,
4435
4436 (mem:HI [%l7+a])
4437
4438 is not equivalent to
4439
4440 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4441
4442 because [%l7+a+1] is interpreted as the address of (a+1). */
4443
4444
4445 static bool
4446 sparc_mode_dependent_address_p (const_rtx addr,
4447 addr_space_t as ATTRIBUTE_UNUSED)
4448 {
4449 if (flag_pic && GET_CODE (addr) == PLUS)
4450 {
4451 rtx op0 = XEXP (addr, 0);
4452 rtx op1 = XEXP (addr, 1);
4453 if (op0 == pic_offset_table_rtx
4454 && symbolic_operand (op1, VOIDmode))
4455 return true;
4456 }
4457
4458 return false;
4459 }
4460
4461 #ifdef HAVE_GAS_HIDDEN
4462 # define USE_HIDDEN_LINKONCE 1
4463 #else
4464 # define USE_HIDDEN_LINKONCE 0
4465 #endif
4466
4467 static void
4468 get_pc_thunk_name (char name[32], unsigned int regno)
4469 {
4470 const char *reg_name = reg_names[regno];
4471
4472 /* Skip the leading '%' as that cannot be used in a
4473 symbol name. */
4474 reg_name += 1;
4475
4476 if (USE_HIDDEN_LINKONCE)
4477 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4478 else
4479 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4480 }
4481
4482 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4483
4484 static rtx
4485 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4486 {
4487 int orig_flag_pic = flag_pic;
4488 rtx insn;
4489
4490 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4491 flag_pic = 0;
4492 if (TARGET_ARCH64)
4493 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4494 else
4495 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4496 flag_pic = orig_flag_pic;
4497
4498 return insn;
4499 }
4500
4501 /* Emit code to load the GOT register. */
4502
4503 void
4504 load_got_register (void)
4505 {
4506 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4507 if (!global_offset_table_rtx)
4508 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4509
4510 if (TARGET_VXWORKS_RTP)
4511 emit_insn (gen_vxworks_load_got ());
4512 else
4513 {
4514 /* The GOT symbol is subject to a PC-relative relocation so we need a
4515 helper function to add the PC value and thus get the final value. */
4516 if (!got_helper_rtx)
4517 {
4518 char name[32];
4519 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4520 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4521 }
4522
4523 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4524 got_helper_rtx,
4525 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4526 }
4527
4528 /* Need to emit this whether or not we obey regdecls,
4529 since setjmp/longjmp can cause life info to screw up.
4530 ??? In the case where we don't obey regdecls, this is not sufficient
4531 since we may not fall out the bottom. */
4532 emit_use (global_offset_table_rtx);
4533 }
4534
4535 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4536 address of the call target. */
4537
4538 void
4539 sparc_emit_call_insn (rtx pat, rtx addr)
4540 {
4541 rtx_insn *insn;
4542
4543 insn = emit_call_insn (pat);
4544
4545 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4546 if (TARGET_VXWORKS_RTP
4547 && flag_pic
4548 && GET_CODE (addr) == SYMBOL_REF
4549 && (SYMBOL_REF_DECL (addr)
4550 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4551 : !SYMBOL_REF_LOCAL_P (addr)))
4552 {
4553 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4554 crtl->uses_pic_offset_table = 1;
4555 }
4556 }
4557 \f
4558 /* Return 1 if RTX is a MEM which is known to be aligned to at
4559 least a DESIRED byte boundary. */
4560
4561 int
4562 mem_min_alignment (rtx mem, int desired)
4563 {
4564 rtx addr, base, offset;
4565
4566 /* If it's not a MEM we can't accept it. */
4567 if (GET_CODE (mem) != MEM)
4568 return 0;
4569
4570 /* Obviously... */
4571 if (!TARGET_UNALIGNED_DOUBLES
4572 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4573 return 1;
4574
4575 /* ??? The rest of the function predates MEM_ALIGN so
4576 there is probably a bit of redundancy. */
4577 addr = XEXP (mem, 0);
4578 base = offset = NULL_RTX;
4579 if (GET_CODE (addr) == PLUS)
4580 {
4581 if (GET_CODE (XEXP (addr, 0)) == REG)
4582 {
4583 base = XEXP (addr, 0);
4584
4585 /* What we are saying here is that if the base
4586 REG is aligned properly, the compiler will make
4587 sure any REG based index upon it will be so
4588 as well. */
4589 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4590 offset = XEXP (addr, 1);
4591 else
4592 offset = const0_rtx;
4593 }
4594 }
4595 else if (GET_CODE (addr) == REG)
4596 {
4597 base = addr;
4598 offset = const0_rtx;
4599 }
4600
4601 if (base != NULL_RTX)
4602 {
4603 int regno = REGNO (base);
4604
4605 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4606 {
4607 /* Check if the compiler has recorded some information
4608 about the alignment of the base REG. If reload has
4609 completed, we already matched with proper alignments.
4610 If not running global_alloc, reload might give us
4611 unaligned pointer to local stack though. */
4612 if (((cfun != 0
4613 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4614 || (optimize && reload_completed))
4615 && (INTVAL (offset) & (desired - 1)) == 0)
4616 return 1;
4617 }
4618 else
4619 {
4620 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4621 return 1;
4622 }
4623 }
4624 else if (! TARGET_UNALIGNED_DOUBLES
4625 || CONSTANT_P (addr)
4626 || GET_CODE (addr) == LO_SUM)
4627 {
4628 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4629 is true, in which case we can only assume that an access is aligned if
4630 it is to a constant address, or the address involves a LO_SUM. */
4631 return 1;
4632 }
4633
4634 /* An obviously unaligned address. */
4635 return 0;
4636 }
4637
4638 \f
4639 /* Vectors to keep interesting information about registers where it can easily
4640 be got. We used to use the actual mode value as the bit number, but there
4641 are more than 32 modes now. Instead we use two tables: one indexed by
4642 hard register number, and one indexed by mode. */
4643
4644 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4645 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4646 mapped into one sparc_mode_class mode. */
4647
4648 enum sparc_mode_class {
4649 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4650 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4651 CC_MODE, CCFP_MODE
4652 };
4653
4654 /* Modes for single-word and smaller quantities. */
4655 #define S_MODES \
4656 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4657
4658 /* Modes for double-word and smaller quantities. */
4659 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4660
4661 /* Modes for quad-word and smaller quantities. */
4662 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4663
4664 /* Modes for 8-word and smaller quantities. */
4665 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4666
4667 /* Modes for single-float quantities. */
4668 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4669
4670 /* Modes for double-float and smaller quantities. */
4671 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4672
4673 /* Modes for quad-float and smaller quantities. */
4674 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4675
4676 /* Modes for quad-float pairs and smaller quantities. */
4677 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4678
4679 /* Modes for double-float only quantities. */
4680 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4681
4682 /* Modes for quad-float and double-float only quantities. */
4683 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4684
4685 /* Modes for quad-float pairs and double-float only quantities. */
4686 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4687
4688 /* Modes for condition codes. */
4689 #define CC_MODES (1 << (int) CC_MODE)
4690 #define CCFP_MODES (1 << (int) CCFP_MODE)
4691
4692 /* Value is 1 if register/mode pair is acceptable on sparc.
4693
4694 The funny mixture of D and T modes is because integer operations
4695 do not specially operate on tetra quantities, so non-quad-aligned
4696 registers can hold quadword quantities (except %o4 and %i4 because
4697 they cross fixed registers).
4698
4699 ??? Note that, despite the settings, non-double-aligned parameter
4700 registers can hold double-word quantities in 32-bit mode. */
4701
4702 /* This points to either the 32 bit or the 64 bit version. */
4703 const int *hard_regno_mode_classes;
4704
4705 static const int hard_32bit_mode_classes[] = {
4706 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4707 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4708 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4709 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4710
4711 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4712 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4713 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4714 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4715
4716 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4717 and none can hold SFmode/SImode values. */
4718 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4719 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4720 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4721 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4722
4723 /* %fcc[0123] */
4724 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4725
4726 /* %icc, %sfp, %gsr */
4727 CC_MODES, 0, D_MODES
4728 };
4729
4730 static const int hard_64bit_mode_classes[] = {
4731 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4732 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4733 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4734 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4735
4736 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4737 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4738 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4739 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4740
4741 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4742 and none can hold SFmode/SImode values. */
4743 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4744 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4745 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4746 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4747
4748 /* %fcc[0123] */
4749 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4750
4751 /* %icc, %sfp, %gsr */
4752 CC_MODES, 0, D_MODES
4753 };
4754
4755 int sparc_mode_class [NUM_MACHINE_MODES];
4756
4757 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4758
4759 static void
4760 sparc_init_modes (void)
4761 {
4762 int i;
4763
4764 for (i = 0; i < NUM_MACHINE_MODES; i++)
4765 {
4766 machine_mode m = (machine_mode) i;
4767 unsigned int size = GET_MODE_SIZE (m);
4768
4769 switch (GET_MODE_CLASS (m))
4770 {
4771 case MODE_INT:
4772 case MODE_PARTIAL_INT:
4773 case MODE_COMPLEX_INT:
4774 if (size < 4)
4775 sparc_mode_class[i] = 1 << (int) H_MODE;
4776 else if (size == 4)
4777 sparc_mode_class[i] = 1 << (int) S_MODE;
4778 else if (size == 8)
4779 sparc_mode_class[i] = 1 << (int) D_MODE;
4780 else if (size == 16)
4781 sparc_mode_class[i] = 1 << (int) T_MODE;
4782 else if (size == 32)
4783 sparc_mode_class[i] = 1 << (int) O_MODE;
4784 else
4785 sparc_mode_class[i] = 0;
4786 break;
4787 case MODE_VECTOR_INT:
4788 if (size == 4)
4789 sparc_mode_class[i] = 1 << (int) SF_MODE;
4790 else if (size == 8)
4791 sparc_mode_class[i] = 1 << (int) DF_MODE;
4792 else
4793 sparc_mode_class[i] = 0;
4794 break;
4795 case MODE_FLOAT:
4796 case MODE_COMPLEX_FLOAT:
4797 if (size == 4)
4798 sparc_mode_class[i] = 1 << (int) SF_MODE;
4799 else if (size == 8)
4800 sparc_mode_class[i] = 1 << (int) DF_MODE;
4801 else if (size == 16)
4802 sparc_mode_class[i] = 1 << (int) TF_MODE;
4803 else if (size == 32)
4804 sparc_mode_class[i] = 1 << (int) OF_MODE;
4805 else
4806 sparc_mode_class[i] = 0;
4807 break;
4808 case MODE_CC:
4809 if (m == CCFPmode || m == CCFPEmode)
4810 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4811 else
4812 sparc_mode_class[i] = 1 << (int) CC_MODE;
4813 break;
4814 default:
4815 sparc_mode_class[i] = 0;
4816 break;
4817 }
4818 }
4819
4820 if (TARGET_ARCH64)
4821 hard_regno_mode_classes = hard_64bit_mode_classes;
4822 else
4823 hard_regno_mode_classes = hard_32bit_mode_classes;
4824
4825 /* Initialize the array used by REGNO_REG_CLASS. */
4826 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4827 {
4828 if (i < 16 && TARGET_V8PLUS)
4829 sparc_regno_reg_class[i] = I64_REGS;
4830 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4831 sparc_regno_reg_class[i] = GENERAL_REGS;
4832 else if (i < 64)
4833 sparc_regno_reg_class[i] = FP_REGS;
4834 else if (i < 96)
4835 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4836 else if (i < 100)
4837 sparc_regno_reg_class[i] = FPCC_REGS;
4838 else
4839 sparc_regno_reg_class[i] = NO_REGS;
4840 }
4841 }
4842 \f
4843 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4844
4845 static inline bool
4846 save_global_or_fp_reg_p (unsigned int regno,
4847 int leaf_function ATTRIBUTE_UNUSED)
4848 {
4849 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4850 }
4851
4852 /* Return whether the return address register (%i7) is needed. */
4853
4854 static inline bool
4855 return_addr_reg_needed_p (int leaf_function)
4856 {
4857 /* If it is live, for example because of __builtin_return_address (0). */
4858 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4859 return true;
4860
4861 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4862 if (!leaf_function
4863 /* Loading the GOT register clobbers %o7. */
4864 || crtl->uses_pic_offset_table
4865 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4866 return true;
4867
4868 return false;
4869 }
4870
4871 /* Return whether REGNO, a local or in register, must be saved/restored. */
4872
4873 static bool
4874 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4875 {
4876 /* General case: call-saved registers live at some point. */
4877 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4878 return true;
4879
4880 /* Frame pointer register (%fp) if needed. */
4881 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4882 return true;
4883
4884 /* Return address register (%i7) if needed. */
4885 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4886 return true;
4887
4888 /* GOT register (%l7) if needed. */
4889 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4890 return true;
4891
4892 /* If the function accesses prior frames, the frame pointer and the return
4893 address of the previous frame must be saved on the stack. */
4894 if (crtl->accesses_prior_frames
4895 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4896 return true;
4897
4898 return false;
4899 }
4900
4901 /* Compute the frame size required by the function. This function is called
4902 during the reload pass and also by sparc_expand_prologue. */
4903
4904 HOST_WIDE_INT
4905 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4906 {
4907 HOST_WIDE_INT frame_size, apparent_frame_size;
4908 int args_size, n_global_fp_regs = 0;
4909 bool save_local_in_regs_p = false;
4910 unsigned int i;
4911
4912 /* If the function allocates dynamic stack space, the dynamic offset is
4913 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4914 if (leaf_function && !cfun->calls_alloca)
4915 args_size = 0;
4916 else
4917 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4918
4919 /* Calculate space needed for global registers. */
4920 if (TARGET_ARCH64)
4921 for (i = 0; i < 8; i++)
4922 if (save_global_or_fp_reg_p (i, 0))
4923 n_global_fp_regs += 2;
4924 else
4925 for (i = 0; i < 8; i += 2)
4926 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4927 n_global_fp_regs += 2;
4928
4929 /* In the flat window model, find out which local and in registers need to
4930 be saved. We don't reserve space in the current frame for them as they
4931 will be spilled into the register window save area of the caller's frame.
4932 However, as soon as we use this register window save area, we must create
4933 that of the current frame to make it the live one. */
4934 if (TARGET_FLAT)
4935 for (i = 16; i < 32; i++)
4936 if (save_local_or_in_reg_p (i, leaf_function))
4937 {
4938 save_local_in_regs_p = true;
4939 break;
4940 }
4941
4942 /* Calculate space needed for FP registers. */
4943 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4944 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4945 n_global_fp_regs += 2;
4946
4947 if (size == 0
4948 && n_global_fp_regs == 0
4949 && args_size == 0
4950 && !save_local_in_regs_p)
4951 frame_size = apparent_frame_size = 0;
4952 else
4953 {
4954 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4955 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
4956 apparent_frame_size += n_global_fp_regs * 4;
4957
4958 /* We need to add the size of the outgoing argument area. */
4959 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
4960
4961 /* And that of the register window save area. */
4962 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4963
4964 /* Finally, bump to the appropriate alignment. */
4965 frame_size = SPARC_STACK_ALIGN (frame_size);
4966 }
4967
4968 /* Set up values for use in prologue and epilogue. */
4969 sparc_frame_size = frame_size;
4970 sparc_apparent_frame_size = apparent_frame_size;
4971 sparc_n_global_fp_regs = n_global_fp_regs;
4972 sparc_save_local_in_regs_p = save_local_in_regs_p;
4973
4974 return frame_size;
4975 }
4976
4977 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4978
4979 int
4980 sparc_initial_elimination_offset (int to)
4981 {
4982 int offset;
4983
4984 if (to == STACK_POINTER_REGNUM)
4985 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4986 else
4987 offset = 0;
4988
4989 offset += SPARC_STACK_BIAS;
4990 return offset;
4991 }
4992
4993 /* Output any necessary .register pseudo-ops. */
4994
4995 void
4996 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4997 {
4998 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4999 int i;
5000
5001 if (TARGET_ARCH32)
5002 return;
5003
5004 /* Check if %g[2367] were used without
5005 .register being printed for them already. */
5006 for (i = 2; i < 8; i++)
5007 {
5008 if (df_regs_ever_live_p (i)
5009 && ! sparc_hard_reg_printed [i])
5010 {
5011 sparc_hard_reg_printed [i] = 1;
5012 /* %g7 is used as TLS base register, use #ignore
5013 for it instead of #scratch. */
5014 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5015 i == 7 ? "ignore" : "scratch");
5016 }
5017 if (i == 3) i = 5;
5018 }
5019 #endif
5020 }
5021
5022 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5023
5024 #if PROBE_INTERVAL > 4096
5025 #error Cannot use indexed addressing mode for stack probing
5026 #endif
5027
5028 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5029 inclusive. These are offsets from the current stack pointer.
5030
5031 Note that we don't use the REG+REG addressing mode for the probes because
5032 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5033 so the advantages of having a single code win here. */
5034
5035 static void
5036 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5037 {
5038 rtx g1 = gen_rtx_REG (Pmode, 1);
5039
5040 /* See if we have a constant small number of probes to generate. If so,
5041 that's the easy case. */
5042 if (size <= PROBE_INTERVAL)
5043 {
5044 emit_move_insn (g1, GEN_INT (first));
5045 emit_insn (gen_rtx_SET (g1,
5046 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5047 emit_stack_probe (plus_constant (Pmode, g1, -size));
5048 }
5049
5050 /* The run-time loop is made up of 9 insns in the generic case while the
5051 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5052 else if (size <= 4 * PROBE_INTERVAL)
5053 {
5054 HOST_WIDE_INT i;
5055
5056 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5057 emit_insn (gen_rtx_SET (g1,
5058 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5059 emit_stack_probe (g1);
5060
5061 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5062 it exceeds SIZE. If only two probes are needed, this will not
5063 generate any code. Then probe at FIRST + SIZE. */
5064 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5065 {
5066 emit_insn (gen_rtx_SET (g1,
5067 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5068 emit_stack_probe (g1);
5069 }
5070
5071 emit_stack_probe (plus_constant (Pmode, g1,
5072 (i - PROBE_INTERVAL) - size));
5073 }
5074
5075 /* Otherwise, do the same as above, but in a loop. Note that we must be
5076 extra careful with variables wrapping around because we might be at
5077 the very top (or the very bottom) of the address space and we have
5078 to be able to handle this case properly; in particular, we use an
5079 equality test for the loop condition. */
5080 else
5081 {
5082 HOST_WIDE_INT rounded_size;
5083 rtx g4 = gen_rtx_REG (Pmode, 4);
5084
5085 emit_move_insn (g1, GEN_INT (first));
5086
5087
5088 /* Step 1: round SIZE to the previous multiple of the interval. */
5089
5090 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5091 emit_move_insn (g4, GEN_INT (rounded_size));
5092
5093
5094 /* Step 2: compute initial and final value of the loop counter. */
5095
5096 /* TEST_ADDR = SP + FIRST. */
5097 emit_insn (gen_rtx_SET (g1,
5098 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5099
5100 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5101 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5102
5103
5104 /* Step 3: the loop
5105
5106 while (TEST_ADDR != LAST_ADDR)
5107 {
5108 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5109 probe at TEST_ADDR
5110 }
5111
5112 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5113 until it is equal to ROUNDED_SIZE. */
5114
5115 if (TARGET_ARCH64)
5116 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5117 else
5118 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5119
5120
5121 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5122 that SIZE is equal to ROUNDED_SIZE. */
5123
5124 if (size != rounded_size)
5125 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5126 }
5127
5128 /* Make sure nothing is scheduled before we are done. */
5129 emit_insn (gen_blockage ());
5130 }
5131
5132 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5133 absolute addresses. */
5134
5135 const char *
5136 output_probe_stack_range (rtx reg1, rtx reg2)
5137 {
5138 static int labelno = 0;
5139 char loop_lab[32];
5140 rtx xops[2];
5141
5142 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5143
5144 /* Loop. */
5145 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5146
5147 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5148 xops[0] = reg1;
5149 xops[1] = GEN_INT (-PROBE_INTERVAL);
5150 output_asm_insn ("add\t%0, %1, %0", xops);
5151
5152 /* Test if TEST_ADDR == LAST_ADDR. */
5153 xops[1] = reg2;
5154 output_asm_insn ("cmp\t%0, %1", xops);
5155
5156 /* Probe at TEST_ADDR and branch. */
5157 if (TARGET_ARCH64)
5158 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5159 else
5160 fputs ("\tbne\t", asm_out_file);
5161 assemble_name_raw (asm_out_file, loop_lab);
5162 fputc ('\n', asm_out_file);
5163 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5164 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5165
5166 return "";
5167 }
5168
5169 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5170 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5171 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5172 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5173 the action to be performed if it returns false. Return the new offset. */
5174
5175 typedef bool (*sorr_pred_t) (unsigned int, int);
5176 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5177
5178 static int
5179 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5180 int offset, int leaf_function, sorr_pred_t save_p,
5181 sorr_act_t action_true, sorr_act_t action_false)
5182 {
5183 unsigned int i;
5184 rtx mem;
5185 rtx_insn *insn;
5186
5187 if (TARGET_ARCH64 && high <= 32)
5188 {
5189 int fp_offset = -1;
5190
5191 for (i = low; i < high; i++)
5192 {
5193 if (save_p (i, leaf_function))
5194 {
5195 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5196 base, offset));
5197 if (action_true == SORR_SAVE)
5198 {
5199 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5200 RTX_FRAME_RELATED_P (insn) = 1;
5201 }
5202 else /* action_true == SORR_RESTORE */
5203 {
5204 /* The frame pointer must be restored last since its old
5205 value may be used as base address for the frame. This
5206 is problematic in 64-bit mode only because of the lack
5207 of double-word load instruction. */
5208 if (i == HARD_FRAME_POINTER_REGNUM)
5209 fp_offset = offset;
5210 else
5211 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5212 }
5213 offset += 8;
5214 }
5215 else if (action_false == SORR_ADVANCE)
5216 offset += 8;
5217 }
5218
5219 if (fp_offset >= 0)
5220 {
5221 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5222 emit_move_insn (hard_frame_pointer_rtx, mem);
5223 }
5224 }
5225 else
5226 {
5227 for (i = low; i < high; i += 2)
5228 {
5229 bool reg0 = save_p (i, leaf_function);
5230 bool reg1 = save_p (i + 1, leaf_function);
5231 machine_mode mode;
5232 int regno;
5233
5234 if (reg0 && reg1)
5235 {
5236 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5237 regno = i;
5238 }
5239 else if (reg0)
5240 {
5241 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5242 regno = i;
5243 }
5244 else if (reg1)
5245 {
5246 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5247 regno = i + 1;
5248 offset += 4;
5249 }
5250 else
5251 {
5252 if (action_false == SORR_ADVANCE)
5253 offset += 8;
5254 continue;
5255 }
5256
5257 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5258 if (action_true == SORR_SAVE)
5259 {
5260 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5261 RTX_FRAME_RELATED_P (insn) = 1;
5262 if (mode == DImode)
5263 {
5264 rtx set1, set2;
5265 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5266 offset));
5267 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5268 RTX_FRAME_RELATED_P (set1) = 1;
5269 mem
5270 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5271 offset + 4));
5272 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5273 RTX_FRAME_RELATED_P (set2) = 1;
5274 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5275 gen_rtx_PARALLEL (VOIDmode,
5276 gen_rtvec (2, set1, set2)));
5277 }
5278 }
5279 else /* action_true == SORR_RESTORE */
5280 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5281
5282 /* Bump and round down to double word
5283 in case we already bumped by 4. */
5284 offset = ROUND_DOWN (offset + 8, 8);
5285 }
5286 }
5287
5288 return offset;
5289 }
5290
5291 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5292
5293 static rtx
5294 emit_adjust_base_to_offset (rtx base, int offset)
5295 {
5296 /* ??? This might be optimized a little as %g1 might already have a
5297 value close enough that a single add insn will do. */
5298 /* ??? Although, all of this is probably only a temporary fix because
5299 if %g1 can hold a function result, then sparc_expand_epilogue will
5300 lose (the result will be clobbered). */
5301 rtx new_base = gen_rtx_REG (Pmode, 1);
5302 emit_move_insn (new_base, GEN_INT (offset));
5303 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5304 return new_base;
5305 }
5306
5307 /* Emit code to save/restore call-saved global and FP registers. */
5308
5309 static void
5310 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5311 {
5312 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5313 {
5314 base = emit_adjust_base_to_offset (base, offset);
5315 offset = 0;
5316 }
5317
5318 offset
5319 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5320 save_global_or_fp_reg_p, action, SORR_NONE);
5321 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5322 save_global_or_fp_reg_p, action, SORR_NONE);
5323 }
5324
5325 /* Emit code to save/restore call-saved local and in registers. */
5326
5327 static void
5328 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5329 {
5330 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5331 {
5332 base = emit_adjust_base_to_offset (base, offset);
5333 offset = 0;
5334 }
5335
5336 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5337 save_local_or_in_reg_p, action, SORR_ADVANCE);
5338 }
5339
5340 /* Emit a window_save insn. */
5341
5342 static rtx_insn *
5343 emit_window_save (rtx increment)
5344 {
5345 rtx_insn *insn = emit_insn (gen_window_save (increment));
5346 RTX_FRAME_RELATED_P (insn) = 1;
5347
5348 /* The incoming return address (%o7) is saved in %i7. */
5349 add_reg_note (insn, REG_CFA_REGISTER,
5350 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5351 gen_rtx_REG (Pmode,
5352 INCOMING_RETURN_ADDR_REGNUM)));
5353
5354 /* The window save event. */
5355 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5356
5357 /* The CFA is %fp, the hard frame pointer. */
5358 add_reg_note (insn, REG_CFA_DEF_CFA,
5359 plus_constant (Pmode, hard_frame_pointer_rtx,
5360 INCOMING_FRAME_SP_OFFSET));
5361
5362 return insn;
5363 }
5364
5365 /* Generate an increment for the stack pointer. */
5366
5367 static rtx
5368 gen_stack_pointer_inc (rtx increment)
5369 {
5370 return gen_rtx_SET (stack_pointer_rtx,
5371 gen_rtx_PLUS (Pmode,
5372 stack_pointer_rtx,
5373 increment));
5374 }
5375
5376 /* Expand the function prologue. The prologue is responsible for reserving
5377 storage for the frame, saving the call-saved registers and loading the
5378 GOT register if needed. */
5379
5380 void
5381 sparc_expand_prologue (void)
5382 {
5383 HOST_WIDE_INT size;
5384 rtx_insn *insn;
5385
5386 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5387 on the final value of the flag means deferring the prologue/epilogue
5388 expansion until just before the second scheduling pass, which is too
5389 late to emit multiple epilogues or return insns.
5390
5391 Of course we are making the assumption that the value of the flag
5392 will not change between now and its final value. Of the three parts
5393 of the formula, only the last one can reasonably vary. Let's take a
5394 closer look, after assuming that the first two ones are set to true
5395 (otherwise the last value is effectively silenced).
5396
5397 If only_leaf_regs_used returns false, the global predicate will also
5398 be false so the actual frame size calculated below will be positive.
5399 As a consequence, the save_register_window insn will be emitted in
5400 the instruction stream; now this insn explicitly references %fp
5401 which is not a leaf register so only_leaf_regs_used will always
5402 return false subsequently.
5403
5404 If only_leaf_regs_used returns true, we hope that the subsequent
5405 optimization passes won't cause non-leaf registers to pop up. For
5406 example, the regrename pass has special provisions to not rename to
5407 non-leaf registers in a leaf function. */
5408 sparc_leaf_function_p
5409 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5410
5411 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5412
5413 if (flag_stack_usage_info)
5414 current_function_static_stack_size = size;
5415
5416 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5417 {
5418 if (crtl->is_leaf && !cfun->calls_alloca)
5419 {
5420 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5421 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5422 size - STACK_CHECK_PROTECT);
5423 }
5424 else if (size > 0)
5425 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5426 }
5427
5428 if (size == 0)
5429 ; /* do nothing. */
5430 else if (sparc_leaf_function_p)
5431 {
5432 rtx size_int_rtx = GEN_INT (-size);
5433
5434 if (size <= 4096)
5435 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5436 else if (size <= 8192)
5437 {
5438 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5439 RTX_FRAME_RELATED_P (insn) = 1;
5440
5441 /* %sp is still the CFA register. */
5442 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5443 }
5444 else
5445 {
5446 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5447 emit_move_insn (size_rtx, size_int_rtx);
5448 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5449 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5450 gen_stack_pointer_inc (size_int_rtx));
5451 }
5452
5453 RTX_FRAME_RELATED_P (insn) = 1;
5454 }
5455 else
5456 {
5457 rtx size_int_rtx = GEN_INT (-size);
5458
5459 if (size <= 4096)
5460 emit_window_save (size_int_rtx);
5461 else if (size <= 8192)
5462 {
5463 emit_window_save (GEN_INT (-4096));
5464
5465 /* %sp is not the CFA register anymore. */
5466 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5467
5468 /* Make sure no %fp-based store is issued until after the frame is
5469 established. The offset between the frame pointer and the stack
5470 pointer is calculated relative to the value of the stack pointer
5471 at the end of the function prologue, and moving instructions that
5472 access the stack via the frame pointer between the instructions
5473 that decrement the stack pointer could result in accessing the
5474 register window save area, which is volatile. */
5475 emit_insn (gen_frame_blockage ());
5476 }
5477 else
5478 {
5479 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5480 emit_move_insn (size_rtx, size_int_rtx);
5481 emit_window_save (size_rtx);
5482 }
5483 }
5484
5485 if (sparc_leaf_function_p)
5486 {
5487 sparc_frame_base_reg = stack_pointer_rtx;
5488 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5489 }
5490 else
5491 {
5492 sparc_frame_base_reg = hard_frame_pointer_rtx;
5493 sparc_frame_base_offset = SPARC_STACK_BIAS;
5494 }
5495
5496 if (sparc_n_global_fp_regs > 0)
5497 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5498 sparc_frame_base_offset
5499 - sparc_apparent_frame_size,
5500 SORR_SAVE);
5501
5502 /* Load the GOT register if needed. */
5503 if (crtl->uses_pic_offset_table)
5504 load_got_register ();
5505
5506 /* Advertise that the data calculated just above are now valid. */
5507 sparc_prologue_data_valid_p = true;
5508 }
5509
5510 /* Expand the function prologue. The prologue is responsible for reserving
5511 storage for the frame, saving the call-saved registers and loading the
5512 GOT register if needed. */
5513
5514 void
5515 sparc_flat_expand_prologue (void)
5516 {
5517 HOST_WIDE_INT size;
5518 rtx_insn *insn;
5519
5520 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5521
5522 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5523
5524 if (flag_stack_usage_info)
5525 current_function_static_stack_size = size;
5526
5527 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5528 {
5529 if (crtl->is_leaf && !cfun->calls_alloca)
5530 {
5531 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5532 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5533 size - STACK_CHECK_PROTECT);
5534 }
5535 else if (size > 0)
5536 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5537 }
5538
5539 if (sparc_save_local_in_regs_p)
5540 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5541 SORR_SAVE);
5542
5543 if (size == 0)
5544 ; /* do nothing. */
5545 else
5546 {
5547 rtx size_int_rtx, size_rtx;
5548
5549 size_rtx = size_int_rtx = GEN_INT (-size);
5550
5551 /* We establish the frame (i.e. decrement the stack pointer) first, even
5552 if we use a frame pointer, because we cannot clobber any call-saved
5553 registers, including the frame pointer, if we haven't created a new
5554 register save area, for the sake of compatibility with the ABI. */
5555 if (size <= 4096)
5556 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5557 else if (size <= 8192 && !frame_pointer_needed)
5558 {
5559 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5560 RTX_FRAME_RELATED_P (insn) = 1;
5561 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5562 }
5563 else
5564 {
5565 size_rtx = gen_rtx_REG (Pmode, 1);
5566 emit_move_insn (size_rtx, size_int_rtx);
5567 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5568 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5569 gen_stack_pointer_inc (size_int_rtx));
5570 }
5571 RTX_FRAME_RELATED_P (insn) = 1;
5572
5573 /* Ensure nothing is scheduled until after the frame is established. */
5574 emit_insn (gen_blockage ());
5575
5576 if (frame_pointer_needed)
5577 {
5578 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5579 gen_rtx_MINUS (Pmode,
5580 stack_pointer_rtx,
5581 size_rtx)));
5582 RTX_FRAME_RELATED_P (insn) = 1;
5583
5584 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5585 gen_rtx_SET (hard_frame_pointer_rtx,
5586 plus_constant (Pmode, stack_pointer_rtx,
5587 size)));
5588 }
5589
5590 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5591 {
5592 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5593 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5594
5595 insn = emit_move_insn (i7, o7);
5596 RTX_FRAME_RELATED_P (insn) = 1;
5597
5598 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5599
5600 /* Prevent this instruction from ever being considered dead,
5601 even if this function has no epilogue. */
5602 emit_use (i7);
5603 }
5604 }
5605
5606 if (frame_pointer_needed)
5607 {
5608 sparc_frame_base_reg = hard_frame_pointer_rtx;
5609 sparc_frame_base_offset = SPARC_STACK_BIAS;
5610 }
5611 else
5612 {
5613 sparc_frame_base_reg = stack_pointer_rtx;
5614 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5615 }
5616
5617 if (sparc_n_global_fp_regs > 0)
5618 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5619 sparc_frame_base_offset
5620 - sparc_apparent_frame_size,
5621 SORR_SAVE);
5622
5623 /* Load the GOT register if needed. */
5624 if (crtl->uses_pic_offset_table)
5625 load_got_register ();
5626
5627 /* Advertise that the data calculated just above are now valid. */
5628 sparc_prologue_data_valid_p = true;
5629 }
5630
5631 /* This function generates the assembly code for function entry, which boils
5632 down to emitting the necessary .register directives. */
5633
5634 static void
5635 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5636 {
5637 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5638 if (!TARGET_FLAT)
5639 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5640
5641 sparc_output_scratch_registers (file);
5642 }
5643
5644 /* Expand the function epilogue, either normal or part of a sibcall.
5645 We emit all the instructions except the return or the call. */
5646
5647 void
5648 sparc_expand_epilogue (bool for_eh)
5649 {
5650 HOST_WIDE_INT size = sparc_frame_size;
5651
5652 if (sparc_n_global_fp_regs > 0)
5653 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5654 sparc_frame_base_offset
5655 - sparc_apparent_frame_size,
5656 SORR_RESTORE);
5657
5658 if (size == 0 || for_eh)
5659 ; /* do nothing. */
5660 else if (sparc_leaf_function_p)
5661 {
5662 if (size <= 4096)
5663 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5664 else if (size <= 8192)
5665 {
5666 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5667 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5668 }
5669 else
5670 {
5671 rtx reg = gen_rtx_REG (Pmode, 1);
5672 emit_move_insn (reg, GEN_INT (size));
5673 emit_insn (gen_stack_pointer_inc (reg));
5674 }
5675 }
5676 }
5677
5678 /* Expand the function epilogue, either normal or part of a sibcall.
5679 We emit all the instructions except the return or the call. */
5680
5681 void
5682 sparc_flat_expand_epilogue (bool for_eh)
5683 {
5684 HOST_WIDE_INT size = sparc_frame_size;
5685
5686 if (sparc_n_global_fp_regs > 0)
5687 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5688 sparc_frame_base_offset
5689 - sparc_apparent_frame_size,
5690 SORR_RESTORE);
5691
5692 /* If we have a frame pointer, we'll need both to restore it before the
5693 frame is destroyed and use its current value in destroying the frame.
5694 Since we don't have an atomic way to do that in the flat window model,
5695 we save the current value into a temporary register (%g1). */
5696 if (frame_pointer_needed && !for_eh)
5697 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5698
5699 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5700 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5701 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5702
5703 if (sparc_save_local_in_regs_p)
5704 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5705 sparc_frame_base_offset,
5706 SORR_RESTORE);
5707
5708 if (size == 0 || for_eh)
5709 ; /* do nothing. */
5710 else if (frame_pointer_needed)
5711 {
5712 /* Make sure the frame is destroyed after everything else is done. */
5713 emit_insn (gen_blockage ());
5714
5715 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5716 }
5717 else
5718 {
5719 /* Likewise. */
5720 emit_insn (gen_blockage ());
5721
5722 if (size <= 4096)
5723 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5724 else if (size <= 8192)
5725 {
5726 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5727 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5728 }
5729 else
5730 {
5731 rtx reg = gen_rtx_REG (Pmode, 1);
5732 emit_move_insn (reg, GEN_INT (size));
5733 emit_insn (gen_stack_pointer_inc (reg));
5734 }
5735 }
5736 }
5737
5738 /* Return true if it is appropriate to emit `return' instructions in the
5739 body of a function. */
5740
5741 bool
5742 sparc_can_use_return_insn_p (void)
5743 {
5744 return sparc_prologue_data_valid_p
5745 && sparc_n_global_fp_regs == 0
5746 && TARGET_FLAT
5747 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5748 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5749 }
5750
5751 /* This function generates the assembly code for function exit. */
5752
5753 static void
5754 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5755 {
5756 /* If the last two instructions of a function are "call foo; dslot;"
5757 the return address might point to the first instruction in the next
5758 function and we have to output a dummy nop for the sake of sane
5759 backtraces in such cases. This is pointless for sibling calls since
5760 the return address is explicitly adjusted. */
5761
5762 rtx insn, last_real_insn;
5763
5764 insn = get_last_insn ();
5765
5766 last_real_insn = prev_real_insn (insn);
5767 if (last_real_insn
5768 && NONJUMP_INSN_P (last_real_insn)
5769 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5770 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5771
5772 if (last_real_insn
5773 && CALL_P (last_real_insn)
5774 && !SIBLING_CALL_P (last_real_insn))
5775 fputs("\tnop\n", file);
5776
5777 sparc_output_deferred_case_vectors ();
5778 }
5779
5780 /* Output a 'restore' instruction. */
5781
5782 static void
5783 output_restore (rtx pat)
5784 {
5785 rtx operands[3];
5786
5787 if (! pat)
5788 {
5789 fputs ("\t restore\n", asm_out_file);
5790 return;
5791 }
5792
5793 gcc_assert (GET_CODE (pat) == SET);
5794
5795 operands[0] = SET_DEST (pat);
5796 pat = SET_SRC (pat);
5797
5798 switch (GET_CODE (pat))
5799 {
5800 case PLUS:
5801 operands[1] = XEXP (pat, 0);
5802 operands[2] = XEXP (pat, 1);
5803 output_asm_insn (" restore %r1, %2, %Y0", operands);
5804 break;
5805 case LO_SUM:
5806 operands[1] = XEXP (pat, 0);
5807 operands[2] = XEXP (pat, 1);
5808 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5809 break;
5810 case ASHIFT:
5811 operands[1] = XEXP (pat, 0);
5812 gcc_assert (XEXP (pat, 1) == const1_rtx);
5813 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5814 break;
5815 default:
5816 operands[1] = pat;
5817 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5818 break;
5819 }
5820 }
5821
5822 /* Output a return. */
5823
5824 const char *
5825 output_return (rtx_insn *insn)
5826 {
5827 if (crtl->calls_eh_return)
5828 {
5829 /* If the function uses __builtin_eh_return, the eh_return
5830 machinery occupies the delay slot. */
5831 gcc_assert (!final_sequence);
5832
5833 if (flag_delayed_branch)
5834 {
5835 if (!TARGET_FLAT && TARGET_V9)
5836 fputs ("\treturn\t%i7+8\n", asm_out_file);
5837 else
5838 {
5839 if (!TARGET_FLAT)
5840 fputs ("\trestore\n", asm_out_file);
5841
5842 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5843 }
5844
5845 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5846 }
5847 else
5848 {
5849 if (!TARGET_FLAT)
5850 fputs ("\trestore\n", asm_out_file);
5851
5852 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5853 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5854 }
5855 }
5856 else if (sparc_leaf_function_p || TARGET_FLAT)
5857 {
5858 /* This is a leaf or flat function so we don't have to bother restoring
5859 the register window, which frees us from dealing with the convoluted
5860 semantics of restore/return. We simply output the jump to the
5861 return address and the insn in the delay slot (if any). */
5862
5863 return "jmp\t%%o7+%)%#";
5864 }
5865 else
5866 {
5867 /* This is a regular function so we have to restore the register window.
5868 We may have a pending insn for the delay slot, which will be either
5869 combined with the 'restore' instruction or put in the delay slot of
5870 the 'return' instruction. */
5871
5872 if (final_sequence)
5873 {
5874 rtx delay, pat;
5875
5876 delay = NEXT_INSN (insn);
5877 gcc_assert (delay);
5878
5879 pat = PATTERN (delay);
5880
5881 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5882 {
5883 epilogue_renumber (&pat, 0);
5884 return "return\t%%i7+%)%#";
5885 }
5886 else
5887 {
5888 output_asm_insn ("jmp\t%%i7+%)", NULL);
5889 output_restore (pat);
5890 PATTERN (delay) = gen_blockage ();
5891 INSN_CODE (delay) = -1;
5892 }
5893 }
5894 else
5895 {
5896 /* The delay slot is empty. */
5897 if (TARGET_V9)
5898 return "return\t%%i7+%)\n\t nop";
5899 else if (flag_delayed_branch)
5900 return "jmp\t%%i7+%)\n\t restore";
5901 else
5902 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5903 }
5904 }
5905
5906 return "";
5907 }
5908
5909 /* Output a sibling call. */
5910
5911 const char *
5912 output_sibcall (rtx_insn *insn, rtx call_operand)
5913 {
5914 rtx operands[1];
5915
5916 gcc_assert (flag_delayed_branch);
5917
5918 operands[0] = call_operand;
5919
5920 if (sparc_leaf_function_p || TARGET_FLAT)
5921 {
5922 /* This is a leaf or flat function so we don't have to bother restoring
5923 the register window. We simply output the jump to the function and
5924 the insn in the delay slot (if any). */
5925
5926 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5927
5928 if (final_sequence)
5929 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5930 operands);
5931 else
5932 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5933 it into branch if possible. */
5934 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5935 operands);
5936 }
5937 else
5938 {
5939 /* This is a regular function so we have to restore the register window.
5940 We may have a pending insn for the delay slot, which will be combined
5941 with the 'restore' instruction. */
5942
5943 output_asm_insn ("call\t%a0, 0", operands);
5944
5945 if (final_sequence)
5946 {
5947 rtx_insn *delay = NEXT_INSN (insn);
5948 gcc_assert (delay);
5949
5950 output_restore (PATTERN (delay));
5951
5952 PATTERN (delay) = gen_blockage ();
5953 INSN_CODE (delay) = -1;
5954 }
5955 else
5956 output_restore (NULL_RTX);
5957 }
5958
5959 return "";
5960 }
5961 \f
5962 /* Functions for handling argument passing.
5963
5964 For 32-bit, the first 6 args are normally in registers and the rest are
5965 pushed. Any arg that starts within the first 6 words is at least
5966 partially passed in a register unless its data type forbids.
5967
5968 For 64-bit, the argument registers are laid out as an array of 16 elements
5969 and arguments are added sequentially. The first 6 int args and up to the
5970 first 16 fp args (depending on size) are passed in regs.
5971
5972 Slot Stack Integral Float Float in structure Double Long Double
5973 ---- ----- -------- ----- ------------------ ------ -----------
5974 15 [SP+248] %f31 %f30,%f31 %d30
5975 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5976 13 [SP+232] %f27 %f26,%f27 %d26
5977 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5978 11 [SP+216] %f23 %f22,%f23 %d22
5979 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5980 9 [SP+200] %f19 %f18,%f19 %d18
5981 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5982 7 [SP+184] %f15 %f14,%f15 %d14
5983 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5984 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5985 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5986 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5987 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5988 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5989 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5990
5991 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5992
5993 Integral arguments are always passed as 64-bit quantities appropriately
5994 extended.
5995
5996 Passing of floating point values is handled as follows.
5997 If a prototype is in scope:
5998 If the value is in a named argument (i.e. not a stdarg function or a
5999 value not part of the `...') then the value is passed in the appropriate
6000 fp reg.
6001 If the value is part of the `...' and is passed in one of the first 6
6002 slots then the value is passed in the appropriate int reg.
6003 If the value is part of the `...' and is not passed in one of the first 6
6004 slots then the value is passed in memory.
6005 If a prototype is not in scope:
6006 If the value is one of the first 6 arguments the value is passed in the
6007 appropriate integer reg and the appropriate fp reg.
6008 If the value is not one of the first 6 arguments the value is passed in
6009 the appropriate fp reg and in memory.
6010
6011
6012 Summary of the calling conventions implemented by GCC on the SPARC:
6013
6014 32-bit ABI:
6015 size argument return value
6016
6017 small integer <4 int. reg. int. reg.
6018 word 4 int. reg. int. reg.
6019 double word 8 int. reg. int. reg.
6020
6021 _Complex small integer <8 int. reg. int. reg.
6022 _Complex word 8 int. reg. int. reg.
6023 _Complex double word 16 memory int. reg.
6024
6025 vector integer <=8 int. reg. FP reg.
6026 vector integer >8 memory memory
6027
6028 float 4 int. reg. FP reg.
6029 double 8 int. reg. FP reg.
6030 long double 16 memory memory
6031
6032 _Complex float 8 memory FP reg.
6033 _Complex double 16 memory FP reg.
6034 _Complex long double 32 memory FP reg.
6035
6036 vector float any memory memory
6037
6038 aggregate any memory memory
6039
6040
6041
6042 64-bit ABI:
6043 size argument return value
6044
6045 small integer <8 int. reg. int. reg.
6046 word 8 int. reg. int. reg.
6047 double word 16 int. reg. int. reg.
6048
6049 _Complex small integer <16 int. reg. int. reg.
6050 _Complex word 16 int. reg. int. reg.
6051 _Complex double word 32 memory int. reg.
6052
6053 vector integer <=16 FP reg. FP reg.
6054 vector integer 16<s<=32 memory FP reg.
6055 vector integer >32 memory memory
6056
6057 float 4 FP reg. FP reg.
6058 double 8 FP reg. FP reg.
6059 long double 16 FP reg. FP reg.
6060
6061 _Complex float 8 FP reg. FP reg.
6062 _Complex double 16 FP reg. FP reg.
6063 _Complex long double 32 memory FP reg.
6064
6065 vector float <=16 FP reg. FP reg.
6066 vector float 16<s<=32 memory FP reg.
6067 vector float >32 memory memory
6068
6069 aggregate <=16 reg. reg.
6070 aggregate 16<s<=32 memory reg.
6071 aggregate >32 memory memory
6072
6073
6074
6075 Note #1: complex floating-point types follow the extended SPARC ABIs as
6076 implemented by the Sun compiler.
6077
6078 Note #2: integral vector types follow the scalar floating-point types
6079 conventions to match what is implemented by the Sun VIS SDK.
6080
6081 Note #3: floating-point vector types follow the aggregate types
6082 conventions. */
6083
6084
6085 /* Maximum number of int regs for args. */
6086 #define SPARC_INT_ARG_MAX 6
6087 /* Maximum number of fp regs for args. */
6088 #define SPARC_FP_ARG_MAX 16
6089
6090 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6091
6092 /* Handle the INIT_CUMULATIVE_ARGS macro.
6093 Initialize a variable CUM of type CUMULATIVE_ARGS
6094 for a call to a function whose data type is FNTYPE.
6095 For a library call, FNTYPE is 0. */
6096
6097 void
6098 init_cumulative_args (struct sparc_args *cum, tree fntype,
6099 rtx libname ATTRIBUTE_UNUSED,
6100 tree fndecl ATTRIBUTE_UNUSED)
6101 {
6102 cum->words = 0;
6103 cum->prototype_p = fntype && prototype_p (fntype);
6104 cum->libcall_p = fntype == 0;
6105 }
6106
6107 /* Handle promotion of pointer and integer arguments. */
6108
6109 static machine_mode
6110 sparc_promote_function_mode (const_tree type,
6111 machine_mode mode,
6112 int *punsignedp,
6113 const_tree fntype ATTRIBUTE_UNUSED,
6114 int for_return ATTRIBUTE_UNUSED)
6115 {
6116 if (type != NULL_TREE && POINTER_TYPE_P (type))
6117 {
6118 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6119 return Pmode;
6120 }
6121
6122 /* Integral arguments are passed as full words, as per the ABI. */
6123 if (GET_MODE_CLASS (mode) == MODE_INT
6124 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6125 return word_mode;
6126
6127 return mode;
6128 }
6129
6130 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6131
6132 static bool
6133 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6134 {
6135 return TARGET_ARCH64 ? true : false;
6136 }
6137
6138 /* Scan the record type TYPE and return the following predicates:
6139 - INTREGS_P: the record contains at least one field or sub-field
6140 that is eligible for promotion in integer registers.
6141 - FP_REGS_P: the record contains at least one field or sub-field
6142 that is eligible for promotion in floating-point registers.
6143 - PACKED_P: the record contains at least one field that is packed. */
6144
6145 static void
6146 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6147 int *packed_p)
6148 {
6149 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6150 {
6151 if (TREE_CODE (field) == FIELD_DECL)
6152 {
6153 tree field_type = TREE_TYPE (field);
6154
6155 if (TREE_CODE (field_type) == RECORD_TYPE)
6156 scan_record_type (field_type, intregs_p, fpregs_p, packed_p);
6157 else if ((FLOAT_TYPE_P (field_type)
6158 || TREE_CODE (field_type) == VECTOR_TYPE)
6159 && TARGET_FPU)
6160 *fpregs_p = 1;
6161 else
6162 *intregs_p = 1;
6163
6164 if (DECL_PACKED (field))
6165 *packed_p = 1;
6166 }
6167 }
6168 }
6169
6170 /* Compute the slot number to pass an argument in.
6171 Return the slot number or -1 if passing on the stack.
6172
6173 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6174 the preceding args and about the function being called.
6175 MODE is the argument's machine mode.
6176 TYPE is the data type of the argument (as a tree).
6177 This is null for libcalls where that information may
6178 not be available.
6179 NAMED is nonzero if this argument is a named parameter
6180 (otherwise it is an extra parameter matching an ellipsis).
6181 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6182 *PREGNO records the register number to use if scalar type.
6183 *PPADDING records the amount of padding needed in words. */
6184
6185 static int
6186 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6187 const_tree type, bool named, bool incoming_p,
6188 int *pregno, int *ppadding)
6189 {
6190 int regbase = (incoming_p
6191 ? SPARC_INCOMING_INT_ARG_FIRST
6192 : SPARC_OUTGOING_INT_ARG_FIRST);
6193 int slotno = cum->words;
6194 enum mode_class mclass;
6195 int regno;
6196
6197 *ppadding = 0;
6198
6199 if (type && TREE_ADDRESSABLE (type))
6200 return -1;
6201
6202 if (TARGET_ARCH32
6203 && mode == BLKmode
6204 && type
6205 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6206 return -1;
6207
6208 /* For SPARC64, objects requiring 16-byte alignment get it. */
6209 if (TARGET_ARCH64
6210 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6211 && (slotno & 1) != 0)
6212 slotno++, *ppadding = 1;
6213
6214 mclass = GET_MODE_CLASS (mode);
6215 if (type && TREE_CODE (type) == VECTOR_TYPE)
6216 {
6217 /* Vector types deserve special treatment because they are
6218 polymorphic wrt their mode, depending upon whether VIS
6219 instructions are enabled. */
6220 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6221 {
6222 /* The SPARC port defines no floating-point vector modes. */
6223 gcc_assert (mode == BLKmode);
6224 }
6225 else
6226 {
6227 /* Integral vector types should either have a vector
6228 mode or an integral mode, because we are guaranteed
6229 by pass_by_reference that their size is not greater
6230 than 16 bytes and TImode is 16-byte wide. */
6231 gcc_assert (mode != BLKmode);
6232
6233 /* Vector integers are handled like floats according to
6234 the Sun VIS SDK. */
6235 mclass = MODE_FLOAT;
6236 }
6237 }
6238
6239 switch (mclass)
6240 {
6241 case MODE_FLOAT:
6242 case MODE_COMPLEX_FLOAT:
6243 case MODE_VECTOR_INT:
6244 if (TARGET_ARCH64 && TARGET_FPU && named)
6245 {
6246 if (slotno >= SPARC_FP_ARG_MAX)
6247 return -1;
6248 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6249 /* Arguments filling only one single FP register are
6250 right-justified in the outer double FP register. */
6251 if (GET_MODE_SIZE (mode) <= 4)
6252 regno++;
6253 break;
6254 }
6255 /* fallthrough */
6256
6257 case MODE_INT:
6258 case MODE_COMPLEX_INT:
6259 if (slotno >= SPARC_INT_ARG_MAX)
6260 return -1;
6261 regno = regbase + slotno;
6262 break;
6263
6264 case MODE_RANDOM:
6265 if (mode == VOIDmode)
6266 /* MODE is VOIDmode when generating the actual call. */
6267 return -1;
6268
6269 gcc_assert (mode == BLKmode);
6270
6271 if (TARGET_ARCH32
6272 || !type
6273 || (TREE_CODE (type) != VECTOR_TYPE
6274 && TREE_CODE (type) != RECORD_TYPE))
6275 {
6276 if (slotno >= SPARC_INT_ARG_MAX)
6277 return -1;
6278 regno = regbase + slotno;
6279 }
6280 else /* TARGET_ARCH64 && type */
6281 {
6282 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6283
6284 /* First see what kinds of registers we would need. */
6285 if (TREE_CODE (type) == VECTOR_TYPE)
6286 fpregs_p = 1;
6287 else
6288 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6289
6290 /* The ABI obviously doesn't specify how packed structures
6291 are passed. These are defined to be passed in int regs
6292 if possible, otherwise memory. */
6293 if (packed_p || !named)
6294 fpregs_p = 0, intregs_p = 1;
6295
6296 /* If all arg slots are filled, then must pass on stack. */
6297 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6298 return -1;
6299
6300 /* If there are only int args and all int arg slots are filled,
6301 then must pass on stack. */
6302 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6303 return -1;
6304
6305 /* Note that even if all int arg slots are filled, fp members may
6306 still be passed in regs if such regs are available.
6307 *PREGNO isn't set because there may be more than one, it's up
6308 to the caller to compute them. */
6309 return slotno;
6310 }
6311 break;
6312
6313 default :
6314 gcc_unreachable ();
6315 }
6316
6317 *pregno = regno;
6318 return slotno;
6319 }
6320
6321 /* Handle recursive register counting for structure field layout. */
6322
6323 struct function_arg_record_value_parms
6324 {
6325 rtx ret; /* return expression being built. */
6326 int slotno; /* slot number of the argument. */
6327 int named; /* whether the argument is named. */
6328 int regbase; /* regno of the base register. */
6329 int stack; /* 1 if part of the argument is on the stack. */
6330 int intoffset; /* offset of the first pending integer field. */
6331 unsigned int nregs; /* number of words passed in registers. */
6332 };
6333
6334 static void function_arg_record_value_3
6335 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6336 static void function_arg_record_value_2
6337 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6338 static void function_arg_record_value_1
6339 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6340 static rtx function_arg_record_value (const_tree, machine_mode, int, int, int);
6341 static rtx function_arg_union_value (int, machine_mode, int, int);
6342
6343 /* A subroutine of function_arg_record_value. Traverse the structure
6344 recursively and determine how many registers will be required. */
6345
6346 static void
6347 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6348 struct function_arg_record_value_parms *parms,
6349 bool packed_p)
6350 {
6351 tree field;
6352
6353 /* We need to compute how many registers are needed so we can
6354 allocate the PARALLEL but before we can do that we need to know
6355 whether there are any packed fields. The ABI obviously doesn't
6356 specify how structures are passed in this case, so they are
6357 defined to be passed in int regs if possible, otherwise memory,
6358 regardless of whether there are fp values present. */
6359
6360 if (! packed_p)
6361 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6362 {
6363 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6364 {
6365 packed_p = true;
6366 break;
6367 }
6368 }
6369
6370 /* Compute how many registers we need. */
6371 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6372 {
6373 if (TREE_CODE (field) == FIELD_DECL)
6374 {
6375 HOST_WIDE_INT bitpos = startbitpos;
6376
6377 if (DECL_SIZE (field) != 0)
6378 {
6379 if (integer_zerop (DECL_SIZE (field)))
6380 continue;
6381
6382 if (tree_fits_uhwi_p (bit_position (field)))
6383 bitpos += int_bit_position (field);
6384 }
6385
6386 /* ??? FIXME: else assume zero offset. */
6387
6388 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6389 function_arg_record_value_1 (TREE_TYPE (field),
6390 bitpos,
6391 parms,
6392 packed_p);
6393 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6394 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6395 && TARGET_FPU
6396 && parms->named
6397 && ! packed_p)
6398 {
6399 if (parms->intoffset != -1)
6400 {
6401 unsigned int startbit, endbit;
6402 int intslots, this_slotno;
6403
6404 startbit = ROUND_DOWN (parms->intoffset, BITS_PER_WORD);
6405 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6406
6407 intslots = (endbit - startbit) / BITS_PER_WORD;
6408 this_slotno = parms->slotno + parms->intoffset
6409 / BITS_PER_WORD;
6410
6411 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6412 {
6413 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6414 /* We need to pass this field on the stack. */
6415 parms->stack = 1;
6416 }
6417
6418 parms->nregs += intslots;
6419 parms->intoffset = -1;
6420 }
6421
6422 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6423 If it wasn't true we wouldn't be here. */
6424 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6425 && DECL_MODE (field) == BLKmode)
6426 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6427 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6428 parms->nregs += 2;
6429 else
6430 parms->nregs += 1;
6431 }
6432 else
6433 {
6434 if (parms->intoffset == -1)
6435 parms->intoffset = bitpos;
6436 }
6437 }
6438 }
6439 }
6440
6441 /* A subroutine of function_arg_record_value. Assign the bits of the
6442 structure between parms->intoffset and bitpos to integer registers. */
6443
6444 static void
6445 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6446 struct function_arg_record_value_parms *parms)
6447 {
6448 machine_mode mode;
6449 unsigned int regno;
6450 unsigned int startbit, endbit;
6451 int this_slotno, intslots, intoffset;
6452 rtx reg;
6453
6454 if (parms->intoffset == -1)
6455 return;
6456
6457 intoffset = parms->intoffset;
6458 parms->intoffset = -1;
6459
6460 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6461 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6462 intslots = (endbit - startbit) / BITS_PER_WORD;
6463 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6464
6465 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6466 if (intslots <= 0)
6467 return;
6468
6469 /* If this is the trailing part of a word, only load that much into
6470 the register. Otherwise load the whole register. Note that in
6471 the latter case we may pick up unwanted bits. It's not a problem
6472 at the moment but may wish to revisit. */
6473
6474 if (intoffset % BITS_PER_WORD != 0)
6475 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6476 MODE_INT);
6477 else
6478 mode = word_mode;
6479
6480 intoffset /= BITS_PER_UNIT;
6481 do
6482 {
6483 regno = parms->regbase + this_slotno;
6484 reg = gen_rtx_REG (mode, regno);
6485 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6486 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6487
6488 this_slotno += 1;
6489 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6490 mode = word_mode;
6491 parms->nregs += 1;
6492 intslots -= 1;
6493 }
6494 while (intslots > 0);
6495 }
6496
6497 /* A subroutine of function_arg_record_value. Traverse the structure
6498 recursively and assign bits to floating point registers. Track which
6499 bits in between need integer registers; invoke function_arg_record_value_3
6500 to make that happen. */
6501
6502 static void
6503 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6504 struct function_arg_record_value_parms *parms,
6505 bool packed_p)
6506 {
6507 tree field;
6508
6509 if (! packed_p)
6510 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6511 {
6512 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6513 {
6514 packed_p = true;
6515 break;
6516 }
6517 }
6518
6519 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6520 {
6521 if (TREE_CODE (field) == FIELD_DECL)
6522 {
6523 HOST_WIDE_INT bitpos = startbitpos;
6524
6525 if (DECL_SIZE (field) != 0)
6526 {
6527 if (integer_zerop (DECL_SIZE (field)))
6528 continue;
6529
6530 if (tree_fits_uhwi_p (bit_position (field)))
6531 bitpos += int_bit_position (field);
6532 }
6533
6534 /* ??? FIXME: else assume zero offset. */
6535
6536 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6537 function_arg_record_value_2 (TREE_TYPE (field),
6538 bitpos,
6539 parms,
6540 packed_p);
6541 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6542 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6543 && TARGET_FPU
6544 && parms->named
6545 && ! packed_p)
6546 {
6547 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6548 int regno, nregs, pos;
6549 machine_mode mode = DECL_MODE (field);
6550 rtx reg;
6551
6552 function_arg_record_value_3 (bitpos, parms);
6553
6554 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6555 && mode == BLKmode)
6556 {
6557 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6558 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6559 }
6560 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6561 {
6562 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6563 nregs = 2;
6564 }
6565 else
6566 nregs = 1;
6567
6568 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6569 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6570 regno++;
6571 reg = gen_rtx_REG (mode, regno);
6572 pos = bitpos / BITS_PER_UNIT;
6573 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6574 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6575 parms->nregs += 1;
6576 while (--nregs > 0)
6577 {
6578 regno += GET_MODE_SIZE (mode) / 4;
6579 reg = gen_rtx_REG (mode, regno);
6580 pos += GET_MODE_SIZE (mode);
6581 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6582 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6583 parms->nregs += 1;
6584 }
6585 }
6586 else
6587 {
6588 if (parms->intoffset == -1)
6589 parms->intoffset = bitpos;
6590 }
6591 }
6592 }
6593 }
6594
6595 /* Used by function_arg and sparc_function_value_1 to implement the complex
6596 conventions of the 64-bit ABI for passing and returning structures.
6597 Return an expression valid as a return value for the FUNCTION_ARG
6598 and TARGET_FUNCTION_VALUE.
6599
6600 TYPE is the data type of the argument (as a tree).
6601 This is null for libcalls where that information may
6602 not be available.
6603 MODE is the argument's machine mode.
6604 SLOTNO is the index number of the argument's slot in the parameter array.
6605 NAMED is nonzero if this argument is a named parameter
6606 (otherwise it is an extra parameter matching an ellipsis).
6607 REGBASE is the regno of the base register for the parameter array. */
6608
6609 static rtx
6610 function_arg_record_value (const_tree type, machine_mode mode,
6611 int slotno, int named, int regbase)
6612 {
6613 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6614 struct function_arg_record_value_parms parms;
6615 unsigned int nregs;
6616
6617 parms.ret = NULL_RTX;
6618 parms.slotno = slotno;
6619 parms.named = named;
6620 parms.regbase = regbase;
6621 parms.stack = 0;
6622
6623 /* Compute how many registers we need. */
6624 parms.nregs = 0;
6625 parms.intoffset = 0;
6626 function_arg_record_value_1 (type, 0, &parms, false);
6627
6628 /* Take into account pending integer fields. */
6629 if (parms.intoffset != -1)
6630 {
6631 unsigned int startbit, endbit;
6632 int intslots, this_slotno;
6633
6634 startbit = ROUND_DOWN (parms.intoffset, BITS_PER_WORD);
6635 endbit = ROUND_UP (typesize*BITS_PER_UNIT, BITS_PER_WORD);
6636 intslots = (endbit - startbit) / BITS_PER_WORD;
6637 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6638
6639 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6640 {
6641 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6642 /* We need to pass this field on the stack. */
6643 parms.stack = 1;
6644 }
6645
6646 parms.nregs += intslots;
6647 }
6648
6649 /* Allocate the vector and handle some annoying special cases. */
6650 nregs = parms.nregs;
6651
6652 if (nregs == 0)
6653 {
6654 /* ??? Empty structure has no value? Duh? */
6655 if (typesize <= 0)
6656 {
6657 /* Though there's nothing really to store, return a word register
6658 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6659 leads to breakage due to the fact that there are zero bytes to
6660 load. */
6661 return gen_rtx_REG (mode, regbase);
6662 }
6663
6664 /* ??? C++ has structures with no fields, and yet a size. Give up
6665 for now and pass everything back in integer registers. */
6666 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6667 if (nregs + slotno > SPARC_INT_ARG_MAX)
6668 nregs = SPARC_INT_ARG_MAX - slotno;
6669 }
6670
6671 gcc_assert (nregs > 0);
6672
6673 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6674
6675 /* If at least one field must be passed on the stack, generate
6676 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6677 also be passed on the stack. We can't do much better because the
6678 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6679 of structures for which the fields passed exclusively in registers
6680 are not at the beginning of the structure. */
6681 if (parms.stack)
6682 XVECEXP (parms.ret, 0, 0)
6683 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6684
6685 /* Fill in the entries. */
6686 parms.nregs = 0;
6687 parms.intoffset = 0;
6688 function_arg_record_value_2 (type, 0, &parms, false);
6689 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6690
6691 gcc_assert (parms.nregs == nregs);
6692
6693 return parms.ret;
6694 }
6695
6696 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6697 of the 64-bit ABI for passing and returning unions.
6698 Return an expression valid as a return value for the FUNCTION_ARG
6699 and TARGET_FUNCTION_VALUE.
6700
6701 SIZE is the size in bytes of the union.
6702 MODE is the argument's machine mode.
6703 REGNO is the hard register the union will be passed in. */
6704
6705 static rtx
6706 function_arg_union_value (int size, machine_mode mode, int slotno,
6707 int regno)
6708 {
6709 int nwords = ROUND_ADVANCE (size), i;
6710 rtx regs;
6711
6712 /* See comment in previous function for empty structures. */
6713 if (nwords == 0)
6714 return gen_rtx_REG (mode, regno);
6715
6716 if (slotno == SPARC_INT_ARG_MAX - 1)
6717 nwords = 1;
6718
6719 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6720
6721 for (i = 0; i < nwords; i++)
6722 {
6723 /* Unions are passed left-justified. */
6724 XVECEXP (regs, 0, i)
6725 = gen_rtx_EXPR_LIST (VOIDmode,
6726 gen_rtx_REG (word_mode, regno),
6727 GEN_INT (UNITS_PER_WORD * i));
6728 regno++;
6729 }
6730
6731 return regs;
6732 }
6733
6734 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6735 for passing and returning BLKmode vectors.
6736 Return an expression valid as a return value for the FUNCTION_ARG
6737 and TARGET_FUNCTION_VALUE.
6738
6739 SIZE is the size in bytes of the vector.
6740 REGNO is the FP hard register the vector will be passed in. */
6741
6742 static rtx
6743 function_arg_vector_value (int size, int regno)
6744 {
6745 const int nregs = MAX (1, size / 8);
6746 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6747
6748 if (size < 8)
6749 XVECEXP (regs, 0, 0)
6750 = gen_rtx_EXPR_LIST (VOIDmode,
6751 gen_rtx_REG (SImode, regno),
6752 const0_rtx);
6753 else
6754 for (int i = 0; i < nregs; i++)
6755 XVECEXP (regs, 0, i)
6756 = gen_rtx_EXPR_LIST (VOIDmode,
6757 gen_rtx_REG (DImode, regno + 2*i),
6758 GEN_INT (i*8));
6759
6760 return regs;
6761 }
6762
6763 /* Determine where to put an argument to a function.
6764 Value is zero to push the argument on the stack,
6765 or a hard register in which to store the argument.
6766
6767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6768 the preceding args and about the function being called.
6769 MODE is the argument's machine mode.
6770 TYPE is the data type of the argument (as a tree).
6771 This is null for libcalls where that information may
6772 not be available.
6773 NAMED is true if this argument is a named parameter
6774 (otherwise it is an extra parameter matching an ellipsis).
6775 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6776 TARGET_FUNCTION_INCOMING_ARG. */
6777
6778 static rtx
6779 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6780 const_tree type, bool named, bool incoming_p)
6781 {
6782 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6783
6784 int regbase = (incoming_p
6785 ? SPARC_INCOMING_INT_ARG_FIRST
6786 : SPARC_OUTGOING_INT_ARG_FIRST);
6787 int slotno, regno, padding;
6788 enum mode_class mclass = GET_MODE_CLASS (mode);
6789
6790 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6791 &regno, &padding);
6792 if (slotno == -1)
6793 return 0;
6794
6795 /* Vector types deserve special treatment because they are polymorphic wrt
6796 their mode, depending upon whether VIS instructions are enabled. */
6797 if (type && TREE_CODE (type) == VECTOR_TYPE)
6798 {
6799 HOST_WIDE_INT size = int_size_in_bytes (type);
6800 gcc_assert ((TARGET_ARCH32 && size <= 8)
6801 || (TARGET_ARCH64 && size <= 16));
6802
6803 if (mode == BLKmode)
6804 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6805
6806 mclass = MODE_FLOAT;
6807 }
6808
6809 if (TARGET_ARCH32)
6810 return gen_rtx_REG (mode, regno);
6811
6812 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6813 and are promoted to registers if possible. */
6814 if (type && TREE_CODE (type) == RECORD_TYPE)
6815 {
6816 HOST_WIDE_INT size = int_size_in_bytes (type);
6817 gcc_assert (size <= 16);
6818
6819 return function_arg_record_value (type, mode, slotno, named, regbase);
6820 }
6821
6822 /* Unions up to 16 bytes in size are passed in integer registers. */
6823 else if (type && TREE_CODE (type) == UNION_TYPE)
6824 {
6825 HOST_WIDE_INT size = int_size_in_bytes (type);
6826 gcc_assert (size <= 16);
6827
6828 return function_arg_union_value (size, mode, slotno, regno);
6829 }
6830
6831 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6832 but also have the slot allocated for them.
6833 If no prototype is in scope fp values in register slots get passed
6834 in two places, either fp regs and int regs or fp regs and memory. */
6835 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6836 && SPARC_FP_REG_P (regno))
6837 {
6838 rtx reg = gen_rtx_REG (mode, regno);
6839 if (cum->prototype_p || cum->libcall_p)
6840 {
6841 /* "* 2" because fp reg numbers are recorded in 4 byte
6842 quantities. */
6843 #if 0
6844 /* ??? This will cause the value to be passed in the fp reg and
6845 in the stack. When a prototype exists we want to pass the
6846 value in the reg but reserve space on the stack. That's an
6847 optimization, and is deferred [for a bit]. */
6848 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6849 return gen_rtx_PARALLEL (mode,
6850 gen_rtvec (2,
6851 gen_rtx_EXPR_LIST (VOIDmode,
6852 NULL_RTX, const0_rtx),
6853 gen_rtx_EXPR_LIST (VOIDmode,
6854 reg, const0_rtx)));
6855 else
6856 #else
6857 /* ??? It seems that passing back a register even when past
6858 the area declared by REG_PARM_STACK_SPACE will allocate
6859 space appropriately, and will not copy the data onto the
6860 stack, exactly as we desire.
6861
6862 This is due to locate_and_pad_parm being called in
6863 expand_call whenever reg_parm_stack_space > 0, which
6864 while beneficial to our example here, would seem to be
6865 in error from what had been intended. Ho hum... -- r~ */
6866 #endif
6867 return reg;
6868 }
6869 else
6870 {
6871 rtx v0, v1;
6872
6873 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6874 {
6875 int intreg;
6876
6877 /* On incoming, we don't need to know that the value
6878 is passed in %f0 and %i0, and it confuses other parts
6879 causing needless spillage even on the simplest cases. */
6880 if (incoming_p)
6881 return reg;
6882
6883 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6884 + (regno - SPARC_FP_ARG_FIRST) / 2);
6885
6886 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6887 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6888 const0_rtx);
6889 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6890 }
6891 else
6892 {
6893 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6894 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6895 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6896 }
6897 }
6898 }
6899
6900 /* All other aggregate types are passed in an integer register in a mode
6901 corresponding to the size of the type. */
6902 else if (type && AGGREGATE_TYPE_P (type))
6903 {
6904 HOST_WIDE_INT size = int_size_in_bytes (type);
6905 gcc_assert (size <= 16);
6906
6907 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6908 }
6909
6910 return gen_rtx_REG (mode, regno);
6911 }
6912
6913 /* Handle the TARGET_FUNCTION_ARG target hook. */
6914
6915 static rtx
6916 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6917 const_tree type, bool named)
6918 {
6919 return sparc_function_arg_1 (cum, mode, type, named, false);
6920 }
6921
6922 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6923
6924 static rtx
6925 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6926 const_tree type, bool named)
6927 {
6928 return sparc_function_arg_1 (cum, mode, type, named, true);
6929 }
6930
6931 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6932
6933 static unsigned int
6934 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6935 {
6936 return ((TARGET_ARCH64
6937 && (GET_MODE_ALIGNMENT (mode) == 128
6938 || (type && TYPE_ALIGN (type) == 128)))
6939 ? 128
6940 : PARM_BOUNDARY);
6941 }
6942
6943 /* For an arg passed partly in registers and partly in memory,
6944 this is the number of bytes of registers used.
6945 For args passed entirely in registers or entirely in memory, zero.
6946
6947 Any arg that starts in the first 6 regs but won't entirely fit in them
6948 needs partial registers on v8. On v9, structures with integer
6949 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6950 values that begin in the last fp reg [where "last fp reg" varies with the
6951 mode] will be split between that reg and memory. */
6952
6953 static int
6954 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6955 tree type, bool named)
6956 {
6957 int slotno, regno, padding;
6958
6959 /* We pass false for incoming_p here, it doesn't matter. */
6960 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6961 false, &regno, &padding);
6962
6963 if (slotno == -1)
6964 return 0;
6965
6966 if (TARGET_ARCH32)
6967 {
6968 if ((slotno + (mode == BLKmode
6969 ? ROUND_ADVANCE (int_size_in_bytes (type))
6970 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6971 > SPARC_INT_ARG_MAX)
6972 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6973 }
6974 else
6975 {
6976 /* We are guaranteed by pass_by_reference that the size of the
6977 argument is not greater than 16 bytes, so we only need to return
6978 one word if the argument is partially passed in registers. */
6979
6980 if (type && AGGREGATE_TYPE_P (type))
6981 {
6982 int size = int_size_in_bytes (type);
6983
6984 if (size > UNITS_PER_WORD
6985 && slotno == SPARC_INT_ARG_MAX - 1)
6986 return UNITS_PER_WORD;
6987 }
6988 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6989 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6990 && ! (TARGET_FPU && named)))
6991 {
6992 /* The complex types are passed as packed types. */
6993 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6994 && slotno == SPARC_INT_ARG_MAX - 1)
6995 return UNITS_PER_WORD;
6996 }
6997 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6998 {
6999 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7000 > SPARC_FP_ARG_MAX)
7001 return UNITS_PER_WORD;
7002 }
7003 }
7004
7005 return 0;
7006 }
7007
7008 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7009 Specify whether to pass the argument by reference. */
7010
7011 static bool
7012 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7013 machine_mode mode, const_tree type,
7014 bool named ATTRIBUTE_UNUSED)
7015 {
7016 if (TARGET_ARCH32)
7017 /* Original SPARC 32-bit ABI says that structures and unions,
7018 and quad-precision floats are passed by reference. For Pascal,
7019 also pass arrays by reference. All other base types are passed
7020 in registers.
7021
7022 Extended ABI (as implemented by the Sun compiler) says that all
7023 complex floats are passed by reference. Pass complex integers
7024 in registers up to 8 bytes. More generally, enforce the 2-word
7025 cap for passing arguments in registers.
7026
7027 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7028 integers are passed like floats of the same size, that is in
7029 registers up to 8 bytes. Pass all vector floats by reference
7030 like structure and unions. */
7031 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7032 || mode == SCmode
7033 /* Catch CDImode, TFmode, DCmode and TCmode. */
7034 || GET_MODE_SIZE (mode) > 8
7035 || (type
7036 && TREE_CODE (type) == VECTOR_TYPE
7037 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7038 else
7039 /* Original SPARC 64-bit ABI says that structures and unions
7040 smaller than 16 bytes are passed in registers, as well as
7041 all other base types.
7042
7043 Extended ABI (as implemented by the Sun compiler) says that
7044 complex floats are passed in registers up to 16 bytes. Pass
7045 all complex integers in registers up to 16 bytes. More generally,
7046 enforce the 2-word cap for passing arguments in registers.
7047
7048 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7049 integers are passed like floats of the same size, that is in
7050 registers (up to 16 bytes). Pass all vector floats like structure
7051 and unions. */
7052 return ((type
7053 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7054 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7055 /* Catch CTImode and TCmode. */
7056 || GET_MODE_SIZE (mode) > 16);
7057 }
7058
7059 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7060 Update the data in CUM to advance over an argument
7061 of mode MODE and data type TYPE.
7062 TYPE is null for libcalls where that information may not be available. */
7063
7064 static void
7065 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7066 const_tree type, bool named)
7067 {
7068 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7069 int regno, padding;
7070
7071 /* We pass false for incoming_p here, it doesn't matter. */
7072 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7073
7074 /* If argument requires leading padding, add it. */
7075 cum->words += padding;
7076
7077 if (TARGET_ARCH32)
7078 {
7079 cum->words += (mode != BLKmode
7080 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7081 : ROUND_ADVANCE (int_size_in_bytes (type)));
7082 }
7083 else
7084 {
7085 if (type && AGGREGATE_TYPE_P (type))
7086 {
7087 int size = int_size_in_bytes (type);
7088
7089 if (size <= 8)
7090 ++cum->words;
7091 else if (size <= 16)
7092 cum->words += 2;
7093 else /* passed by reference */
7094 ++cum->words;
7095 }
7096 else
7097 {
7098 cum->words += (mode != BLKmode
7099 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7100 : ROUND_ADVANCE (int_size_in_bytes (type)));
7101 }
7102 }
7103 }
7104
7105 /* Handle the FUNCTION_ARG_PADDING macro.
7106 For the 64 bit ABI structs are always stored left shifted in their
7107 argument slot. */
7108
7109 enum direction
7110 function_arg_padding (machine_mode mode, const_tree type)
7111 {
7112 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7113 return upward;
7114
7115 /* Fall back to the default. */
7116 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7117 }
7118
7119 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7120 Specify whether to return the return value in memory. */
7121
7122 static bool
7123 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7124 {
7125 if (TARGET_ARCH32)
7126 /* Original SPARC 32-bit ABI says that structures and unions,
7127 and quad-precision floats are returned in memory. All other
7128 base types are returned in registers.
7129
7130 Extended ABI (as implemented by the Sun compiler) says that
7131 all complex floats are returned in registers (8 FP registers
7132 at most for '_Complex long double'). Return all complex integers
7133 in registers (4 at most for '_Complex long long').
7134
7135 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7136 integers are returned like floats of the same size, that is in
7137 registers up to 8 bytes and in memory otherwise. Return all
7138 vector floats in memory like structure and unions; note that
7139 they always have BLKmode like the latter. */
7140 return (TYPE_MODE (type) == BLKmode
7141 || TYPE_MODE (type) == TFmode
7142 || (TREE_CODE (type) == VECTOR_TYPE
7143 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7144 else
7145 /* Original SPARC 64-bit ABI says that structures and unions
7146 smaller than 32 bytes are returned in registers, as well as
7147 all other base types.
7148
7149 Extended ABI (as implemented by the Sun compiler) says that all
7150 complex floats are returned in registers (8 FP registers at most
7151 for '_Complex long double'). Return all complex integers in
7152 registers (4 at most for '_Complex TItype').
7153
7154 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7155 integers are returned like floats of the same size, that is in
7156 registers. Return all vector floats like structure and unions;
7157 note that they always have BLKmode like the latter. */
7158 return (TYPE_MODE (type) == BLKmode
7159 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7160 }
7161
7162 /* Handle the TARGET_STRUCT_VALUE target hook.
7163 Return where to find the structure return value address. */
7164
7165 static rtx
7166 sparc_struct_value_rtx (tree fndecl, int incoming)
7167 {
7168 if (TARGET_ARCH64)
7169 return 0;
7170 else
7171 {
7172 rtx mem;
7173
7174 if (incoming)
7175 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7176 STRUCT_VALUE_OFFSET));
7177 else
7178 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7179 STRUCT_VALUE_OFFSET));
7180
7181 /* Only follow the SPARC ABI for fixed-size structure returns.
7182 Variable size structure returns are handled per the normal
7183 procedures in GCC. This is enabled by -mstd-struct-return */
7184 if (incoming == 2
7185 && sparc_std_struct_return
7186 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7187 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7188 {
7189 /* We must check and adjust the return address, as it is optional
7190 as to whether the return object is really provided. */
7191 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7192 rtx scratch = gen_reg_rtx (SImode);
7193 rtx_code_label *endlab = gen_label_rtx ();
7194
7195 /* Calculate the return object size. */
7196 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7197 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7198 /* Construct a temporary return value. */
7199 rtx temp_val
7200 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7201
7202 /* Implement SPARC 32-bit psABI callee return struct checking:
7203
7204 Fetch the instruction where we will return to and see if
7205 it's an unimp instruction (the most significant 10 bits
7206 will be zero). */
7207 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7208 plus_constant (Pmode,
7209 ret_reg, 8)));
7210 /* Assume the size is valid and pre-adjust. */
7211 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7212 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7213 0, endlab);
7214 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7215 /* Write the address of the memory pointed to by temp_val into
7216 the memory pointed to by mem. */
7217 emit_move_insn (mem, XEXP (temp_val, 0));
7218 emit_label (endlab);
7219 }
7220
7221 return mem;
7222 }
7223 }
7224
7225 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7226 For v9, function return values are subject to the same rules as arguments,
7227 except that up to 32 bytes may be returned in registers. */
7228
7229 static rtx
7230 sparc_function_value_1 (const_tree type, machine_mode mode,
7231 bool outgoing)
7232 {
7233 /* Beware that the two values are swapped here wrt function_arg. */
7234 int regbase = (outgoing
7235 ? SPARC_INCOMING_INT_ARG_FIRST
7236 : SPARC_OUTGOING_INT_ARG_FIRST);
7237 enum mode_class mclass = GET_MODE_CLASS (mode);
7238 int regno;
7239
7240 /* Vector types deserve special treatment because they are polymorphic wrt
7241 their mode, depending upon whether VIS instructions are enabled. */
7242 if (type && TREE_CODE (type) == VECTOR_TYPE)
7243 {
7244 HOST_WIDE_INT size = int_size_in_bytes (type);
7245 gcc_assert ((TARGET_ARCH32 && size <= 8)
7246 || (TARGET_ARCH64 && size <= 32));
7247
7248 if (mode == BLKmode)
7249 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7250
7251 mclass = MODE_FLOAT;
7252 }
7253
7254 if (TARGET_ARCH64 && type)
7255 {
7256 /* Structures up to 32 bytes in size are returned in registers. */
7257 if (TREE_CODE (type) == RECORD_TYPE)
7258 {
7259 HOST_WIDE_INT size = int_size_in_bytes (type);
7260 gcc_assert (size <= 32);
7261
7262 return function_arg_record_value (type, mode, 0, 1, regbase);
7263 }
7264
7265 /* Unions up to 32 bytes in size are returned in integer registers. */
7266 else if (TREE_CODE (type) == UNION_TYPE)
7267 {
7268 HOST_WIDE_INT size = int_size_in_bytes (type);
7269 gcc_assert (size <= 32);
7270
7271 return function_arg_union_value (size, mode, 0, regbase);
7272 }
7273
7274 /* Objects that require it are returned in FP registers. */
7275 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7276 ;
7277
7278 /* All other aggregate types are returned in an integer register in a
7279 mode corresponding to the size of the type. */
7280 else if (AGGREGATE_TYPE_P (type))
7281 {
7282 /* All other aggregate types are passed in an integer register
7283 in a mode corresponding to the size of the type. */
7284 HOST_WIDE_INT size = int_size_in_bytes (type);
7285 gcc_assert (size <= 32);
7286
7287 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7288
7289 /* ??? We probably should have made the same ABI change in
7290 3.4.0 as the one we made for unions. The latter was
7291 required by the SCD though, while the former is not
7292 specified, so we favored compatibility and efficiency.
7293
7294 Now we're stuck for aggregates larger than 16 bytes,
7295 because OImode vanished in the meantime. Let's not
7296 try to be unduly clever, and simply follow the ABI
7297 for unions in that case. */
7298 if (mode == BLKmode)
7299 return function_arg_union_value (size, mode, 0, regbase);
7300 else
7301 mclass = MODE_INT;
7302 }
7303
7304 /* We should only have pointer and integer types at this point. This
7305 must match sparc_promote_function_mode. */
7306 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7307 mode = word_mode;
7308 }
7309
7310 /* We should only have pointer and integer types at this point, except with
7311 -freg-struct-return. This must match sparc_promote_function_mode. */
7312 else if (TARGET_ARCH32
7313 && !(type && AGGREGATE_TYPE_P (type))
7314 && mclass == MODE_INT
7315 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7316 mode = word_mode;
7317
7318 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7319 regno = SPARC_FP_ARG_FIRST;
7320 else
7321 regno = regbase;
7322
7323 return gen_rtx_REG (mode, regno);
7324 }
7325
7326 /* Handle TARGET_FUNCTION_VALUE.
7327 On the SPARC, the value is found in the first "output" register, but the
7328 called function leaves it in the first "input" register. */
7329
7330 static rtx
7331 sparc_function_value (const_tree valtype,
7332 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7333 bool outgoing)
7334 {
7335 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7336 }
7337
7338 /* Handle TARGET_LIBCALL_VALUE. */
7339
7340 static rtx
7341 sparc_libcall_value (machine_mode mode,
7342 const_rtx fun ATTRIBUTE_UNUSED)
7343 {
7344 return sparc_function_value_1 (NULL_TREE, mode, false);
7345 }
7346
7347 /* Handle FUNCTION_VALUE_REGNO_P.
7348 On the SPARC, the first "output" reg is used for integer values, and the
7349 first floating point register is used for floating point values. */
7350
7351 static bool
7352 sparc_function_value_regno_p (const unsigned int regno)
7353 {
7354 return (regno == 8 || (TARGET_FPU && regno == 32));
7355 }
7356
7357 /* Do what is necessary for `va_start'. We look at the current function
7358 to determine if stdarg or varargs is used and return the address of
7359 the first unnamed parameter. */
7360
7361 static rtx
7362 sparc_builtin_saveregs (void)
7363 {
7364 int first_reg = crtl->args.info.words;
7365 rtx address;
7366 int regno;
7367
7368 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7369 emit_move_insn (gen_rtx_MEM (word_mode,
7370 gen_rtx_PLUS (Pmode,
7371 frame_pointer_rtx,
7372 GEN_INT (FIRST_PARM_OFFSET (0)
7373 + (UNITS_PER_WORD
7374 * regno)))),
7375 gen_rtx_REG (word_mode,
7376 SPARC_INCOMING_INT_ARG_FIRST + regno));
7377
7378 address = gen_rtx_PLUS (Pmode,
7379 frame_pointer_rtx,
7380 GEN_INT (FIRST_PARM_OFFSET (0)
7381 + UNITS_PER_WORD * first_reg));
7382
7383 return address;
7384 }
7385
7386 /* Implement `va_start' for stdarg. */
7387
7388 static void
7389 sparc_va_start (tree valist, rtx nextarg)
7390 {
7391 nextarg = expand_builtin_saveregs ();
7392 std_expand_builtin_va_start (valist, nextarg);
7393 }
7394
7395 /* Implement `va_arg' for stdarg. */
7396
7397 static tree
7398 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7399 gimple_seq *post_p)
7400 {
7401 HOST_WIDE_INT size, rsize, align;
7402 tree addr, incr;
7403 bool indirect;
7404 tree ptrtype = build_pointer_type (type);
7405
7406 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7407 {
7408 indirect = true;
7409 size = rsize = UNITS_PER_WORD;
7410 align = 0;
7411 }
7412 else
7413 {
7414 indirect = false;
7415 size = int_size_in_bytes (type);
7416 rsize = ROUND_UP (size, UNITS_PER_WORD);
7417 align = 0;
7418
7419 if (TARGET_ARCH64)
7420 {
7421 /* For SPARC64, objects requiring 16-byte alignment get it. */
7422 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7423 align = 2 * UNITS_PER_WORD;
7424
7425 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7426 are left-justified in their slots. */
7427 if (AGGREGATE_TYPE_P (type))
7428 {
7429 if (size == 0)
7430 size = rsize = UNITS_PER_WORD;
7431 else
7432 size = rsize;
7433 }
7434 }
7435 }
7436
7437 incr = valist;
7438 if (align)
7439 {
7440 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7441 incr = fold_convert (sizetype, incr);
7442 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7443 size_int (-align));
7444 incr = fold_convert (ptr_type_node, incr);
7445 }
7446
7447 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7448 addr = incr;
7449
7450 if (BYTES_BIG_ENDIAN && size < rsize)
7451 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7452
7453 if (indirect)
7454 {
7455 addr = fold_convert (build_pointer_type (ptrtype), addr);
7456 addr = build_va_arg_indirect_ref (addr);
7457 }
7458
7459 /* If the address isn't aligned properly for the type, we need a temporary.
7460 FIXME: This is inefficient, usually we can do this in registers. */
7461 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7462 {
7463 tree tmp = create_tmp_var (type, "va_arg_tmp");
7464 tree dest_addr = build_fold_addr_expr (tmp);
7465 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7466 3, dest_addr, addr, size_int (rsize));
7467 TREE_ADDRESSABLE (tmp) = 1;
7468 gimplify_and_add (copy, pre_p);
7469 addr = dest_addr;
7470 }
7471
7472 else
7473 addr = fold_convert (ptrtype, addr);
7474
7475 incr = fold_build_pointer_plus_hwi (incr, rsize);
7476 gimplify_assign (valist, incr, post_p);
7477
7478 return build_va_arg_indirect_ref (addr);
7479 }
7480 \f
7481 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7482 Specify whether the vector mode is supported by the hardware. */
7483
7484 static bool
7485 sparc_vector_mode_supported_p (machine_mode mode)
7486 {
7487 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7488 }
7489 \f
7490 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7491
7492 static machine_mode
7493 sparc_preferred_simd_mode (machine_mode mode)
7494 {
7495 if (TARGET_VIS)
7496 switch (mode)
7497 {
7498 case SImode:
7499 return V2SImode;
7500 case HImode:
7501 return V4HImode;
7502 case QImode:
7503 return V8QImode;
7504
7505 default:;
7506 }
7507
7508 return word_mode;
7509 }
7510 \f
7511 /* Return the string to output an unconditional branch to LABEL, which is
7512 the operand number of the label.
7513
7514 DEST is the destination insn (i.e. the label), INSN is the source. */
7515
7516 const char *
7517 output_ubranch (rtx dest, rtx_insn *insn)
7518 {
7519 static char string[64];
7520 bool v9_form = false;
7521 int delta;
7522 char *p;
7523
7524 /* Even if we are trying to use cbcond for this, evaluate
7525 whether we can use V9 branches as our backup plan. */
7526
7527 delta = 5000000;
7528 if (INSN_ADDRESSES_SET_P ())
7529 delta = (INSN_ADDRESSES (INSN_UID (dest))
7530 - INSN_ADDRESSES (INSN_UID (insn)));
7531
7532 /* Leave some instructions for "slop". */
7533 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7534 v9_form = true;
7535
7536 if (TARGET_CBCOND)
7537 {
7538 bool emit_nop = emit_cbcond_nop (insn);
7539 bool far = false;
7540 const char *rval;
7541
7542 if (delta < -500 || delta > 500)
7543 far = true;
7544
7545 if (far)
7546 {
7547 if (v9_form)
7548 rval = "ba,a,pt\t%%xcc, %l0";
7549 else
7550 rval = "b,a\t%l0";
7551 }
7552 else
7553 {
7554 if (emit_nop)
7555 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7556 else
7557 rval = "cwbe\t%%g0, %%g0, %l0";
7558 }
7559 return rval;
7560 }
7561
7562 if (v9_form)
7563 strcpy (string, "ba%*,pt\t%%xcc, ");
7564 else
7565 strcpy (string, "b%*\t");
7566
7567 p = strchr (string, '\0');
7568 *p++ = '%';
7569 *p++ = 'l';
7570 *p++ = '0';
7571 *p++ = '%';
7572 *p++ = '(';
7573 *p = '\0';
7574
7575 return string;
7576 }
7577
7578 /* Return the string to output a conditional branch to LABEL, which is
7579 the operand number of the label. OP is the conditional expression.
7580 XEXP (OP, 0) is assumed to be a condition code register (integer or
7581 floating point) and its mode specifies what kind of comparison we made.
7582
7583 DEST is the destination insn (i.e. the label), INSN is the source.
7584
7585 REVERSED is nonzero if we should reverse the sense of the comparison.
7586
7587 ANNUL is nonzero if we should generate an annulling branch. */
7588
7589 const char *
7590 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7591 rtx_insn *insn)
7592 {
7593 static char string[64];
7594 enum rtx_code code = GET_CODE (op);
7595 rtx cc_reg = XEXP (op, 0);
7596 machine_mode mode = GET_MODE (cc_reg);
7597 const char *labelno, *branch;
7598 int spaces = 8, far;
7599 char *p;
7600
7601 /* v9 branches are limited to +-1MB. If it is too far away,
7602 change
7603
7604 bne,pt %xcc, .LC30
7605
7606 to
7607
7608 be,pn %xcc, .+12
7609 nop
7610 ba .LC30
7611
7612 and
7613
7614 fbne,a,pn %fcc2, .LC29
7615
7616 to
7617
7618 fbe,pt %fcc2, .+16
7619 nop
7620 ba .LC29 */
7621
7622 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7623 if (reversed ^ far)
7624 {
7625 /* Reversal of FP compares takes care -- an ordered compare
7626 becomes an unordered compare and vice versa. */
7627 if (mode == CCFPmode || mode == CCFPEmode)
7628 code = reverse_condition_maybe_unordered (code);
7629 else
7630 code = reverse_condition (code);
7631 }
7632
7633 /* Start by writing the branch condition. */
7634 if (mode == CCFPmode || mode == CCFPEmode)
7635 {
7636 switch (code)
7637 {
7638 case NE:
7639 branch = "fbne";
7640 break;
7641 case EQ:
7642 branch = "fbe";
7643 break;
7644 case GE:
7645 branch = "fbge";
7646 break;
7647 case GT:
7648 branch = "fbg";
7649 break;
7650 case LE:
7651 branch = "fble";
7652 break;
7653 case LT:
7654 branch = "fbl";
7655 break;
7656 case UNORDERED:
7657 branch = "fbu";
7658 break;
7659 case ORDERED:
7660 branch = "fbo";
7661 break;
7662 case UNGT:
7663 branch = "fbug";
7664 break;
7665 case UNLT:
7666 branch = "fbul";
7667 break;
7668 case UNEQ:
7669 branch = "fbue";
7670 break;
7671 case UNGE:
7672 branch = "fbuge";
7673 break;
7674 case UNLE:
7675 branch = "fbule";
7676 break;
7677 case LTGT:
7678 branch = "fblg";
7679 break;
7680
7681 default:
7682 gcc_unreachable ();
7683 }
7684
7685 /* ??? !v9: FP branches cannot be preceded by another floating point
7686 insn. Because there is currently no concept of pre-delay slots,
7687 we can fix this only by always emitting a nop before a floating
7688 point branch. */
7689
7690 string[0] = '\0';
7691 if (! TARGET_V9)
7692 strcpy (string, "nop\n\t");
7693 strcat (string, branch);
7694 }
7695 else
7696 {
7697 switch (code)
7698 {
7699 case NE:
7700 branch = "bne";
7701 break;
7702 case EQ:
7703 branch = "be";
7704 break;
7705 case GE:
7706 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7707 branch = "bpos";
7708 else
7709 branch = "bge";
7710 break;
7711 case GT:
7712 branch = "bg";
7713 break;
7714 case LE:
7715 branch = "ble";
7716 break;
7717 case LT:
7718 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7719 branch = "bneg";
7720 else
7721 branch = "bl";
7722 break;
7723 case GEU:
7724 branch = "bgeu";
7725 break;
7726 case GTU:
7727 branch = "bgu";
7728 break;
7729 case LEU:
7730 branch = "bleu";
7731 break;
7732 case LTU:
7733 branch = "blu";
7734 break;
7735
7736 default:
7737 gcc_unreachable ();
7738 }
7739 strcpy (string, branch);
7740 }
7741 spaces -= strlen (branch);
7742 p = strchr (string, '\0');
7743
7744 /* Now add the annulling, the label, and a possible noop. */
7745 if (annul && ! far)
7746 {
7747 strcpy (p, ",a");
7748 p += 2;
7749 spaces -= 2;
7750 }
7751
7752 if (TARGET_V9)
7753 {
7754 rtx note;
7755 int v8 = 0;
7756
7757 if (! far && insn && INSN_ADDRESSES_SET_P ())
7758 {
7759 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7760 - INSN_ADDRESSES (INSN_UID (insn)));
7761 /* Leave some instructions for "slop". */
7762 if (delta < -260000 || delta >= 260000)
7763 v8 = 1;
7764 }
7765
7766 if (mode == CCFPmode || mode == CCFPEmode)
7767 {
7768 static char v9_fcc_labelno[] = "%%fccX, ";
7769 /* Set the char indicating the number of the fcc reg to use. */
7770 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7771 labelno = v9_fcc_labelno;
7772 if (v8)
7773 {
7774 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7775 labelno = "";
7776 }
7777 }
7778 else if (mode == CCXmode || mode == CCX_NOOVmode)
7779 {
7780 labelno = "%%xcc, ";
7781 gcc_assert (! v8);
7782 }
7783 else
7784 {
7785 labelno = "%%icc, ";
7786 if (v8)
7787 labelno = "";
7788 }
7789
7790 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7791 {
7792 strcpy (p,
7793 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7794 ? ",pt" : ",pn");
7795 p += 3;
7796 spaces -= 3;
7797 }
7798 }
7799 else
7800 labelno = "";
7801
7802 if (spaces > 0)
7803 *p++ = '\t';
7804 else
7805 *p++ = ' ';
7806 strcpy (p, labelno);
7807 p = strchr (p, '\0');
7808 if (far)
7809 {
7810 strcpy (p, ".+12\n\t nop\n\tb\t");
7811 /* Skip the next insn if requested or
7812 if we know that it will be a nop. */
7813 if (annul || ! final_sequence)
7814 p[3] = '6';
7815 p += 14;
7816 }
7817 *p++ = '%';
7818 *p++ = 'l';
7819 *p++ = label + '0';
7820 *p++ = '%';
7821 *p++ = '#';
7822 *p = '\0';
7823
7824 return string;
7825 }
7826
7827 /* Emit a library call comparison between floating point X and Y.
7828 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7829 Return the new operator to be used in the comparison sequence.
7830
7831 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7832 values as arguments instead of the TFmode registers themselves,
7833 that's why we cannot call emit_float_lib_cmp. */
7834
7835 rtx
7836 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7837 {
7838 const char *qpfunc;
7839 rtx slot0, slot1, result, tem, tem2, libfunc;
7840 machine_mode mode;
7841 enum rtx_code new_comparison;
7842
7843 switch (comparison)
7844 {
7845 case EQ:
7846 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7847 break;
7848
7849 case NE:
7850 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7851 break;
7852
7853 case GT:
7854 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7855 break;
7856
7857 case GE:
7858 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7859 break;
7860
7861 case LT:
7862 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7863 break;
7864
7865 case LE:
7866 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7867 break;
7868
7869 case ORDERED:
7870 case UNORDERED:
7871 case UNGT:
7872 case UNLT:
7873 case UNEQ:
7874 case UNGE:
7875 case UNLE:
7876 case LTGT:
7877 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7878 break;
7879
7880 default:
7881 gcc_unreachable ();
7882 }
7883
7884 if (TARGET_ARCH64)
7885 {
7886 if (MEM_P (x))
7887 {
7888 tree expr = MEM_EXPR (x);
7889 if (expr)
7890 mark_addressable (expr);
7891 slot0 = x;
7892 }
7893 else
7894 {
7895 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7896 emit_move_insn (slot0, x);
7897 }
7898
7899 if (MEM_P (y))
7900 {
7901 tree expr = MEM_EXPR (y);
7902 if (expr)
7903 mark_addressable (expr);
7904 slot1 = y;
7905 }
7906 else
7907 {
7908 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7909 emit_move_insn (slot1, y);
7910 }
7911
7912 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7913 emit_library_call (libfunc, LCT_NORMAL,
7914 DImode, 2,
7915 XEXP (slot0, 0), Pmode,
7916 XEXP (slot1, 0), Pmode);
7917 mode = DImode;
7918 }
7919 else
7920 {
7921 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7922 emit_library_call (libfunc, LCT_NORMAL,
7923 SImode, 2,
7924 x, TFmode, y, TFmode);
7925 mode = SImode;
7926 }
7927
7928
7929 /* Immediately move the result of the libcall into a pseudo
7930 register so reload doesn't clobber the value if it needs
7931 the return register for a spill reg. */
7932 result = gen_reg_rtx (mode);
7933 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7934
7935 switch (comparison)
7936 {
7937 default:
7938 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7939 case ORDERED:
7940 case UNORDERED:
7941 new_comparison = (comparison == UNORDERED ? EQ : NE);
7942 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7943 case UNGT:
7944 case UNGE:
7945 new_comparison = (comparison == UNGT ? GT : NE);
7946 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7947 case UNLE:
7948 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7949 case UNLT:
7950 tem = gen_reg_rtx (mode);
7951 if (TARGET_ARCH32)
7952 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7953 else
7954 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7955 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7956 case UNEQ:
7957 case LTGT:
7958 tem = gen_reg_rtx (mode);
7959 if (TARGET_ARCH32)
7960 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7961 else
7962 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7963 tem2 = gen_reg_rtx (mode);
7964 if (TARGET_ARCH32)
7965 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7966 else
7967 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7968 new_comparison = (comparison == UNEQ ? EQ : NE);
7969 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7970 }
7971
7972 gcc_unreachable ();
7973 }
7974
7975 /* Generate an unsigned DImode to FP conversion. This is the same code
7976 optabs would emit if we didn't have TFmode patterns. */
7977
7978 void
7979 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
7980 {
7981 rtx i0, i1, f0, in, out;
7982
7983 out = operands[0];
7984 in = force_reg (DImode, operands[1]);
7985 rtx_code_label *neglab = gen_label_rtx ();
7986 rtx_code_label *donelab = gen_label_rtx ();
7987 i0 = gen_reg_rtx (DImode);
7988 i1 = gen_reg_rtx (DImode);
7989 f0 = gen_reg_rtx (mode);
7990
7991 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7992
7993 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
7994 emit_jump_insn (gen_jump (donelab));
7995 emit_barrier ();
7996
7997 emit_label (neglab);
7998
7999 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8000 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8001 emit_insn (gen_iordi3 (i0, i0, i1));
8002 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8003 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8004
8005 emit_label (donelab);
8006 }
8007
8008 /* Generate an FP to unsigned DImode conversion. This is the same code
8009 optabs would emit if we didn't have TFmode patterns. */
8010
8011 void
8012 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8013 {
8014 rtx i0, i1, f0, in, out, limit;
8015
8016 out = operands[0];
8017 in = force_reg (mode, operands[1]);
8018 rtx_code_label *neglab = gen_label_rtx ();
8019 rtx_code_label *donelab = gen_label_rtx ();
8020 i0 = gen_reg_rtx (DImode);
8021 i1 = gen_reg_rtx (DImode);
8022 limit = gen_reg_rtx (mode);
8023 f0 = gen_reg_rtx (mode);
8024
8025 emit_move_insn (limit,
8026 const_double_from_real_value (
8027 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8028 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8029
8030 emit_insn (gen_rtx_SET (out,
8031 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8032 emit_jump_insn (gen_jump (donelab));
8033 emit_barrier ();
8034
8035 emit_label (neglab);
8036
8037 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8038 emit_insn (gen_rtx_SET (i0,
8039 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8040 emit_insn (gen_movdi (i1, const1_rtx));
8041 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8042 emit_insn (gen_xordi3 (out, i0, i1));
8043
8044 emit_label (donelab);
8045 }
8046
8047 /* Return the string to output a compare and branch instruction to DEST.
8048 DEST is the destination insn (i.e. the label), INSN is the source,
8049 and OP is the conditional expression. */
8050
8051 const char *
8052 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8053 {
8054 machine_mode mode = GET_MODE (XEXP (op, 0));
8055 enum rtx_code code = GET_CODE (op);
8056 const char *cond_str, *tmpl;
8057 int far, emit_nop, len;
8058 static char string[64];
8059 char size_char;
8060
8061 /* Compare and Branch is limited to +-2KB. If it is too far away,
8062 change
8063
8064 cxbne X, Y, .LC30
8065
8066 to
8067
8068 cxbe X, Y, .+16
8069 nop
8070 ba,pt xcc, .LC30
8071 nop */
8072
8073 len = get_attr_length (insn);
8074
8075 far = len == 4;
8076 emit_nop = len == 2;
8077
8078 if (far)
8079 code = reverse_condition (code);
8080
8081 size_char = ((mode == SImode) ? 'w' : 'x');
8082
8083 switch (code)
8084 {
8085 case NE:
8086 cond_str = "ne";
8087 break;
8088
8089 case EQ:
8090 cond_str = "e";
8091 break;
8092
8093 case GE:
8094 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8095 cond_str = "pos";
8096 else
8097 cond_str = "ge";
8098 break;
8099
8100 case GT:
8101 cond_str = "g";
8102 break;
8103
8104 case LE:
8105 cond_str = "le";
8106 break;
8107
8108 case LT:
8109 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8110 cond_str = "neg";
8111 else
8112 cond_str = "l";
8113 break;
8114
8115 case GEU:
8116 cond_str = "cc";
8117 break;
8118
8119 case GTU:
8120 cond_str = "gu";
8121 break;
8122
8123 case LEU:
8124 cond_str = "leu";
8125 break;
8126
8127 case LTU:
8128 cond_str = "cs";
8129 break;
8130
8131 default:
8132 gcc_unreachable ();
8133 }
8134
8135 if (far)
8136 {
8137 int veryfar = 1, delta;
8138
8139 if (INSN_ADDRESSES_SET_P ())
8140 {
8141 delta = (INSN_ADDRESSES (INSN_UID (dest))
8142 - INSN_ADDRESSES (INSN_UID (insn)));
8143 /* Leave some instructions for "slop". */
8144 if (delta >= -260000 && delta < 260000)
8145 veryfar = 0;
8146 }
8147
8148 if (veryfar)
8149 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8150 else
8151 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8152 }
8153 else
8154 {
8155 if (emit_nop)
8156 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8157 else
8158 tmpl = "c%cb%s\t%%1, %%2, %%3";
8159 }
8160
8161 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8162
8163 return string;
8164 }
8165
8166 /* Return the string to output a conditional branch to LABEL, testing
8167 register REG. LABEL is the operand number of the label; REG is the
8168 operand number of the reg. OP is the conditional expression. The mode
8169 of REG says what kind of comparison we made.
8170
8171 DEST is the destination insn (i.e. the label), INSN is the source.
8172
8173 REVERSED is nonzero if we should reverse the sense of the comparison.
8174
8175 ANNUL is nonzero if we should generate an annulling branch. */
8176
8177 const char *
8178 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8179 int annul, rtx_insn *insn)
8180 {
8181 static char string[64];
8182 enum rtx_code code = GET_CODE (op);
8183 machine_mode mode = GET_MODE (XEXP (op, 0));
8184 rtx note;
8185 int far;
8186 char *p;
8187
8188 /* branch on register are limited to +-128KB. If it is too far away,
8189 change
8190
8191 brnz,pt %g1, .LC30
8192
8193 to
8194
8195 brz,pn %g1, .+12
8196 nop
8197 ba,pt %xcc, .LC30
8198
8199 and
8200
8201 brgez,a,pn %o1, .LC29
8202
8203 to
8204
8205 brlz,pt %o1, .+16
8206 nop
8207 ba,pt %xcc, .LC29 */
8208
8209 far = get_attr_length (insn) >= 3;
8210
8211 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8212 if (reversed ^ far)
8213 code = reverse_condition (code);
8214
8215 /* Only 64 bit versions of these instructions exist. */
8216 gcc_assert (mode == DImode);
8217
8218 /* Start by writing the branch condition. */
8219
8220 switch (code)
8221 {
8222 case NE:
8223 strcpy (string, "brnz");
8224 break;
8225
8226 case EQ:
8227 strcpy (string, "brz");
8228 break;
8229
8230 case GE:
8231 strcpy (string, "brgez");
8232 break;
8233
8234 case LT:
8235 strcpy (string, "brlz");
8236 break;
8237
8238 case LE:
8239 strcpy (string, "brlez");
8240 break;
8241
8242 case GT:
8243 strcpy (string, "brgz");
8244 break;
8245
8246 default:
8247 gcc_unreachable ();
8248 }
8249
8250 p = strchr (string, '\0');
8251
8252 /* Now add the annulling, reg, label, and nop. */
8253 if (annul && ! far)
8254 {
8255 strcpy (p, ",a");
8256 p += 2;
8257 }
8258
8259 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8260 {
8261 strcpy (p,
8262 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8263 ? ",pt" : ",pn");
8264 p += 3;
8265 }
8266
8267 *p = p < string + 8 ? '\t' : ' ';
8268 p++;
8269 *p++ = '%';
8270 *p++ = '0' + reg;
8271 *p++ = ',';
8272 *p++ = ' ';
8273 if (far)
8274 {
8275 int veryfar = 1, delta;
8276
8277 if (INSN_ADDRESSES_SET_P ())
8278 {
8279 delta = (INSN_ADDRESSES (INSN_UID (dest))
8280 - INSN_ADDRESSES (INSN_UID (insn)));
8281 /* Leave some instructions for "slop". */
8282 if (delta >= -260000 && delta < 260000)
8283 veryfar = 0;
8284 }
8285
8286 strcpy (p, ".+12\n\t nop\n\t");
8287 /* Skip the next insn if requested or
8288 if we know that it will be a nop. */
8289 if (annul || ! final_sequence)
8290 p[3] = '6';
8291 p += 12;
8292 if (veryfar)
8293 {
8294 strcpy (p, "b\t");
8295 p += 2;
8296 }
8297 else
8298 {
8299 strcpy (p, "ba,pt\t%%xcc, ");
8300 p += 13;
8301 }
8302 }
8303 *p++ = '%';
8304 *p++ = 'l';
8305 *p++ = '0' + label;
8306 *p++ = '%';
8307 *p++ = '#';
8308 *p = '\0';
8309
8310 return string;
8311 }
8312
8313 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8314 Such instructions cannot be used in the delay slot of return insn on v9.
8315 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8316 */
8317
8318 static int
8319 epilogue_renumber (register rtx *where, int test)
8320 {
8321 register const char *fmt;
8322 register int i;
8323 register enum rtx_code code;
8324
8325 if (*where == 0)
8326 return 0;
8327
8328 code = GET_CODE (*where);
8329
8330 switch (code)
8331 {
8332 case REG:
8333 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8334 return 1;
8335 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8336 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8337 case SCRATCH:
8338 case CC0:
8339 case PC:
8340 case CONST_INT:
8341 case CONST_WIDE_INT:
8342 case CONST_DOUBLE:
8343 return 0;
8344
8345 /* Do not replace the frame pointer with the stack pointer because
8346 it can cause the delayed instruction to load below the stack.
8347 This occurs when instructions like:
8348
8349 (set (reg/i:SI 24 %i0)
8350 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8351 (const_int -20 [0xffffffec])) 0))
8352
8353 are in the return delayed slot. */
8354 case PLUS:
8355 if (GET_CODE (XEXP (*where, 0)) == REG
8356 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8357 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8358 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8359 return 1;
8360 break;
8361
8362 case MEM:
8363 if (SPARC_STACK_BIAS
8364 && GET_CODE (XEXP (*where, 0)) == REG
8365 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8366 return 1;
8367 break;
8368
8369 default:
8370 break;
8371 }
8372
8373 fmt = GET_RTX_FORMAT (code);
8374
8375 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8376 {
8377 if (fmt[i] == 'E')
8378 {
8379 register int j;
8380 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8381 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8382 return 1;
8383 }
8384 else if (fmt[i] == 'e'
8385 && epilogue_renumber (&(XEXP (*where, i)), test))
8386 return 1;
8387 }
8388 return 0;
8389 }
8390 \f
8391 /* Leaf functions and non-leaf functions have different needs. */
8392
8393 static const int
8394 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8395
8396 static const int
8397 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8398
8399 static const int *const reg_alloc_orders[] = {
8400 reg_leaf_alloc_order,
8401 reg_nonleaf_alloc_order};
8402
8403 void
8404 order_regs_for_local_alloc (void)
8405 {
8406 static int last_order_nonleaf = 1;
8407
8408 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8409 {
8410 last_order_nonleaf = !last_order_nonleaf;
8411 memcpy ((char *) reg_alloc_order,
8412 (const char *) reg_alloc_orders[last_order_nonleaf],
8413 FIRST_PSEUDO_REGISTER * sizeof (int));
8414 }
8415 }
8416 \f
8417 /* Return 1 if REG and MEM are legitimate enough to allow the various
8418 mem<-->reg splits to be run. */
8419
8420 int
8421 sparc_splitdi_legitimate (rtx reg, rtx mem)
8422 {
8423 /* Punt if we are here by mistake. */
8424 gcc_assert (reload_completed);
8425
8426 /* We must have an offsettable memory reference. */
8427 if (! offsettable_memref_p (mem))
8428 return 0;
8429
8430 /* If we have legitimate args for ldd/std, we do not want
8431 the split to happen. */
8432 if ((REGNO (reg) % 2) == 0
8433 && mem_min_alignment (mem, 8))
8434 return 0;
8435
8436 /* Success. */
8437 return 1;
8438 }
8439
8440 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8441
8442 int
8443 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8444 {
8445 int regno1, regno2;
8446
8447 if (GET_CODE (reg1) == SUBREG)
8448 reg1 = SUBREG_REG (reg1);
8449 if (GET_CODE (reg1) != REG)
8450 return 0;
8451 regno1 = REGNO (reg1);
8452
8453 if (GET_CODE (reg2) == SUBREG)
8454 reg2 = SUBREG_REG (reg2);
8455 if (GET_CODE (reg2) != REG)
8456 return 0;
8457 regno2 = REGNO (reg2);
8458
8459 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8460 return 1;
8461
8462 if (TARGET_VIS3)
8463 {
8464 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8465 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8466 return 1;
8467 }
8468
8469 return 0;
8470 }
8471
8472 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8473 This makes them candidates for using ldd and std insns.
8474
8475 Note reg1 and reg2 *must* be hard registers. */
8476
8477 int
8478 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8479 {
8480 /* We might have been passed a SUBREG. */
8481 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8482 return 0;
8483
8484 if (REGNO (reg1) % 2 != 0)
8485 return 0;
8486
8487 /* Integer ldd is deprecated in SPARC V9 */
8488 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8489 return 0;
8490
8491 return (REGNO (reg1) == REGNO (reg2) - 1);
8492 }
8493
8494 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8495 an ldd or std insn.
8496
8497 This can only happen when addr1 and addr2, the addresses in mem1
8498 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8499 addr1 must also be aligned on a 64-bit boundary.
8500
8501 Also iff dependent_reg_rtx is not null it should not be used to
8502 compute the address for mem1, i.e. we cannot optimize a sequence
8503 like:
8504 ld [%o0], %o0
8505 ld [%o0 + 4], %o1
8506 to
8507 ldd [%o0], %o0
8508 nor:
8509 ld [%g3 + 4], %g3
8510 ld [%g3], %g2
8511 to
8512 ldd [%g3], %g2
8513
8514 But, note that the transformation from:
8515 ld [%g2 + 4], %g3
8516 ld [%g2], %g2
8517 to
8518 ldd [%g2], %g2
8519 is perfectly fine. Thus, the peephole2 patterns always pass us
8520 the destination register of the first load, never the second one.
8521
8522 For stores we don't have a similar problem, so dependent_reg_rtx is
8523 NULL_RTX. */
8524
8525 int
8526 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8527 {
8528 rtx addr1, addr2;
8529 unsigned int reg1;
8530 HOST_WIDE_INT offset1;
8531
8532 /* The mems cannot be volatile. */
8533 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8534 return 0;
8535
8536 /* MEM1 should be aligned on a 64-bit boundary. */
8537 if (MEM_ALIGN (mem1) < 64)
8538 return 0;
8539
8540 addr1 = XEXP (mem1, 0);
8541 addr2 = XEXP (mem2, 0);
8542
8543 /* Extract a register number and offset (if used) from the first addr. */
8544 if (GET_CODE (addr1) == PLUS)
8545 {
8546 /* If not a REG, return zero. */
8547 if (GET_CODE (XEXP (addr1, 0)) != REG)
8548 return 0;
8549 else
8550 {
8551 reg1 = REGNO (XEXP (addr1, 0));
8552 /* The offset must be constant! */
8553 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8554 return 0;
8555 offset1 = INTVAL (XEXP (addr1, 1));
8556 }
8557 }
8558 else if (GET_CODE (addr1) != REG)
8559 return 0;
8560 else
8561 {
8562 reg1 = REGNO (addr1);
8563 /* This was a simple (mem (reg)) expression. Offset is 0. */
8564 offset1 = 0;
8565 }
8566
8567 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8568 if (GET_CODE (addr2) != PLUS)
8569 return 0;
8570
8571 if (GET_CODE (XEXP (addr2, 0)) != REG
8572 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8573 return 0;
8574
8575 if (reg1 != REGNO (XEXP (addr2, 0)))
8576 return 0;
8577
8578 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8579 return 0;
8580
8581 /* The first offset must be evenly divisible by 8 to ensure the
8582 address is 64 bit aligned. */
8583 if (offset1 % 8 != 0)
8584 return 0;
8585
8586 /* The offset for the second addr must be 4 more than the first addr. */
8587 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8588 return 0;
8589
8590 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8591 instructions. */
8592 return 1;
8593 }
8594
8595 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8596
8597 rtx
8598 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8599 {
8600 rtx x = widen_memory_access (mem1, mode, 0);
8601 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8602 return x;
8603 }
8604
8605 /* Return 1 if reg is a pseudo, or is the first register in
8606 a hard register pair. This makes it suitable for use in
8607 ldd and std insns. */
8608
8609 int
8610 register_ok_for_ldd (rtx reg)
8611 {
8612 /* We might have been passed a SUBREG. */
8613 if (!REG_P (reg))
8614 return 0;
8615
8616 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8617 return (REGNO (reg) % 2 == 0);
8618
8619 return 1;
8620 }
8621
8622 /* Return 1 if OP, a MEM, has an address which is known to be
8623 aligned to an 8-byte boundary. */
8624
8625 int
8626 memory_ok_for_ldd (rtx op)
8627 {
8628 /* In 64-bit mode, we assume that the address is word-aligned. */
8629 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8630 return 0;
8631
8632 if (! can_create_pseudo_p ()
8633 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8634 return 0;
8635
8636 return 1;
8637 }
8638 \f
8639 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8640
8641 static bool
8642 sparc_print_operand_punct_valid_p (unsigned char code)
8643 {
8644 if (code == '#'
8645 || code == '*'
8646 || code == '('
8647 || code == ')'
8648 || code == '_'
8649 || code == '&')
8650 return true;
8651
8652 return false;
8653 }
8654
8655 /* Implement TARGET_PRINT_OPERAND.
8656 Print operand X (an rtx) in assembler syntax to file FILE.
8657 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8658 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8659
8660 static void
8661 sparc_print_operand (FILE *file, rtx x, int code)
8662 {
8663 switch (code)
8664 {
8665 case '#':
8666 /* Output an insn in a delay slot. */
8667 if (final_sequence)
8668 sparc_indent_opcode = 1;
8669 else
8670 fputs ("\n\t nop", file);
8671 return;
8672 case '*':
8673 /* Output an annul flag if there's nothing for the delay slot and we
8674 are optimizing. This is always used with '(' below.
8675 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8676 this is a dbx bug. So, we only do this when optimizing.
8677 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8678 Always emit a nop in case the next instruction is a branch. */
8679 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8680 fputs (",a", file);
8681 return;
8682 case '(':
8683 /* Output a 'nop' if there's nothing for the delay slot and we are
8684 not optimizing. This is always used with '*' above. */
8685 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8686 fputs ("\n\t nop", file);
8687 else if (final_sequence)
8688 sparc_indent_opcode = 1;
8689 return;
8690 case ')':
8691 /* Output the right displacement from the saved PC on function return.
8692 The caller may have placed an "unimp" insn immediately after the call
8693 so we have to account for it. This insn is used in the 32-bit ABI
8694 when calling a function that returns a non zero-sized structure. The
8695 64-bit ABI doesn't have it. Be careful to have this test be the same
8696 as that for the call. The exception is when sparc_std_struct_return
8697 is enabled, the psABI is followed exactly and the adjustment is made
8698 by the code in sparc_struct_value_rtx. The call emitted is the same
8699 when sparc_std_struct_return is enabled. */
8700 if (!TARGET_ARCH64
8701 && cfun->returns_struct
8702 && !sparc_std_struct_return
8703 && DECL_SIZE (DECL_RESULT (current_function_decl))
8704 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8705 == INTEGER_CST
8706 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8707 fputs ("12", file);
8708 else
8709 fputc ('8', file);
8710 return;
8711 case '_':
8712 /* Output the Embedded Medium/Anywhere code model base register. */
8713 fputs (EMBMEDANY_BASE_REG, file);
8714 return;
8715 case '&':
8716 /* Print some local dynamic TLS name. */
8717 if (const char *name = get_some_local_dynamic_name ())
8718 assemble_name (file, name);
8719 else
8720 output_operand_lossage ("'%%&' used without any "
8721 "local dynamic TLS references");
8722 return;
8723
8724 case 'Y':
8725 /* Adjust the operand to take into account a RESTORE operation. */
8726 if (GET_CODE (x) == CONST_INT)
8727 break;
8728 else if (GET_CODE (x) != REG)
8729 output_operand_lossage ("invalid %%Y operand");
8730 else if (REGNO (x) < 8)
8731 fputs (reg_names[REGNO (x)], file);
8732 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8733 fputs (reg_names[REGNO (x)-16], file);
8734 else
8735 output_operand_lossage ("invalid %%Y operand");
8736 return;
8737 case 'L':
8738 /* Print out the low order register name of a register pair. */
8739 if (WORDS_BIG_ENDIAN)
8740 fputs (reg_names[REGNO (x)+1], file);
8741 else
8742 fputs (reg_names[REGNO (x)], file);
8743 return;
8744 case 'H':
8745 /* Print out the high order register name of a register pair. */
8746 if (WORDS_BIG_ENDIAN)
8747 fputs (reg_names[REGNO (x)], file);
8748 else
8749 fputs (reg_names[REGNO (x)+1], file);
8750 return;
8751 case 'R':
8752 /* Print out the second register name of a register pair or quad.
8753 I.e., R (%o0) => %o1. */
8754 fputs (reg_names[REGNO (x)+1], file);
8755 return;
8756 case 'S':
8757 /* Print out the third register name of a register quad.
8758 I.e., S (%o0) => %o2. */
8759 fputs (reg_names[REGNO (x)+2], file);
8760 return;
8761 case 'T':
8762 /* Print out the fourth register name of a register quad.
8763 I.e., T (%o0) => %o3. */
8764 fputs (reg_names[REGNO (x)+3], file);
8765 return;
8766 case 'x':
8767 /* Print a condition code register. */
8768 if (REGNO (x) == SPARC_ICC_REG)
8769 {
8770 /* We don't handle CC[X]_NOOVmode because they're not supposed
8771 to occur here. */
8772 if (GET_MODE (x) == CCmode)
8773 fputs ("%icc", file);
8774 else if (GET_MODE (x) == CCXmode)
8775 fputs ("%xcc", file);
8776 else
8777 gcc_unreachable ();
8778 }
8779 else
8780 /* %fccN register */
8781 fputs (reg_names[REGNO (x)], file);
8782 return;
8783 case 'm':
8784 /* Print the operand's address only. */
8785 output_address (GET_MODE (x), XEXP (x, 0));
8786 return;
8787 case 'r':
8788 /* In this case we need a register. Use %g0 if the
8789 operand is const0_rtx. */
8790 if (x == const0_rtx
8791 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8792 {
8793 fputs ("%g0", file);
8794 return;
8795 }
8796 else
8797 break;
8798
8799 case 'A':
8800 switch (GET_CODE (x))
8801 {
8802 case IOR: fputs ("or", file); break;
8803 case AND: fputs ("and", file); break;
8804 case XOR: fputs ("xor", file); break;
8805 default: output_operand_lossage ("invalid %%A operand");
8806 }
8807 return;
8808
8809 case 'B':
8810 switch (GET_CODE (x))
8811 {
8812 case IOR: fputs ("orn", file); break;
8813 case AND: fputs ("andn", file); break;
8814 case XOR: fputs ("xnor", file); break;
8815 default: output_operand_lossage ("invalid %%B operand");
8816 }
8817 return;
8818
8819 /* This is used by the conditional move instructions. */
8820 case 'C':
8821 {
8822 enum rtx_code rc = GET_CODE (x);
8823
8824 switch (rc)
8825 {
8826 case NE: fputs ("ne", file); break;
8827 case EQ: fputs ("e", file); break;
8828 case GE: fputs ("ge", file); break;
8829 case GT: fputs ("g", file); break;
8830 case LE: fputs ("le", file); break;
8831 case LT: fputs ("l", file); break;
8832 case GEU: fputs ("geu", file); break;
8833 case GTU: fputs ("gu", file); break;
8834 case LEU: fputs ("leu", file); break;
8835 case LTU: fputs ("lu", file); break;
8836 case LTGT: fputs ("lg", file); break;
8837 case UNORDERED: fputs ("u", file); break;
8838 case ORDERED: fputs ("o", file); break;
8839 case UNLT: fputs ("ul", file); break;
8840 case UNLE: fputs ("ule", file); break;
8841 case UNGT: fputs ("ug", file); break;
8842 case UNGE: fputs ("uge", file); break;
8843 case UNEQ: fputs ("ue", file); break;
8844 default: output_operand_lossage ("invalid %%C operand");
8845 }
8846 return;
8847 }
8848
8849 /* This are used by the movr instruction pattern. */
8850 case 'D':
8851 {
8852 enum rtx_code rc = GET_CODE (x);
8853 switch (rc)
8854 {
8855 case NE: fputs ("ne", file); break;
8856 case EQ: fputs ("e", file); break;
8857 case GE: fputs ("gez", file); break;
8858 case LT: fputs ("lz", file); break;
8859 case LE: fputs ("lez", file); break;
8860 case GT: fputs ("gz", file); break;
8861 default: output_operand_lossage ("invalid %%D operand");
8862 }
8863 return;
8864 }
8865
8866 case 'b':
8867 {
8868 /* Print a sign-extended character. */
8869 int i = trunc_int_for_mode (INTVAL (x), QImode);
8870 fprintf (file, "%d", i);
8871 return;
8872 }
8873
8874 case 'f':
8875 /* Operand must be a MEM; write its address. */
8876 if (GET_CODE (x) != MEM)
8877 output_operand_lossage ("invalid %%f operand");
8878 output_address (GET_MODE (x), XEXP (x, 0));
8879 return;
8880
8881 case 's':
8882 {
8883 /* Print a sign-extended 32-bit value. */
8884 HOST_WIDE_INT i;
8885 if (GET_CODE(x) == CONST_INT)
8886 i = INTVAL (x);
8887 else
8888 {
8889 output_operand_lossage ("invalid %%s operand");
8890 return;
8891 }
8892 i = trunc_int_for_mode (i, SImode);
8893 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8894 return;
8895 }
8896
8897 case 0:
8898 /* Do nothing special. */
8899 break;
8900
8901 default:
8902 /* Undocumented flag. */
8903 output_operand_lossage ("invalid operand output code");
8904 }
8905
8906 if (GET_CODE (x) == REG)
8907 fputs (reg_names[REGNO (x)], file);
8908 else if (GET_CODE (x) == MEM)
8909 {
8910 fputc ('[', file);
8911 /* Poor Sun assembler doesn't understand absolute addressing. */
8912 if (CONSTANT_P (XEXP (x, 0)))
8913 fputs ("%g0+", file);
8914 output_address (GET_MODE (x), XEXP (x, 0));
8915 fputc (']', file);
8916 }
8917 else if (GET_CODE (x) == HIGH)
8918 {
8919 fputs ("%hi(", file);
8920 output_addr_const (file, XEXP (x, 0));
8921 fputc (')', file);
8922 }
8923 else if (GET_CODE (x) == LO_SUM)
8924 {
8925 sparc_print_operand (file, XEXP (x, 0), 0);
8926 if (TARGET_CM_MEDMID)
8927 fputs ("+%l44(", file);
8928 else
8929 fputs ("+%lo(", file);
8930 output_addr_const (file, XEXP (x, 1));
8931 fputc (')', file);
8932 }
8933 else if (GET_CODE (x) == CONST_DOUBLE)
8934 output_operand_lossage ("floating-point constant not a valid immediate operand");
8935 else
8936 output_addr_const (file, x);
8937 }
8938
8939 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8940
8941 static void
8942 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8943 {
8944 register rtx base, index = 0;
8945 int offset = 0;
8946 register rtx addr = x;
8947
8948 if (REG_P (addr))
8949 fputs (reg_names[REGNO (addr)], file);
8950 else if (GET_CODE (addr) == PLUS)
8951 {
8952 if (CONST_INT_P (XEXP (addr, 0)))
8953 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8954 else if (CONST_INT_P (XEXP (addr, 1)))
8955 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8956 else
8957 base = XEXP (addr, 0), index = XEXP (addr, 1);
8958 if (GET_CODE (base) == LO_SUM)
8959 {
8960 gcc_assert (USE_AS_OFFSETABLE_LO10
8961 && TARGET_ARCH64
8962 && ! TARGET_CM_MEDMID);
8963 output_operand (XEXP (base, 0), 0);
8964 fputs ("+%lo(", file);
8965 output_address (VOIDmode, XEXP (base, 1));
8966 fprintf (file, ")+%d", offset);
8967 }
8968 else
8969 {
8970 fputs (reg_names[REGNO (base)], file);
8971 if (index == 0)
8972 fprintf (file, "%+d", offset);
8973 else if (REG_P (index))
8974 fprintf (file, "+%s", reg_names[REGNO (index)]);
8975 else if (GET_CODE (index) == SYMBOL_REF
8976 || GET_CODE (index) == LABEL_REF
8977 || GET_CODE (index) == CONST)
8978 fputc ('+', file), output_addr_const (file, index);
8979 else gcc_unreachable ();
8980 }
8981 }
8982 else if (GET_CODE (addr) == MINUS
8983 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8984 {
8985 output_addr_const (file, XEXP (addr, 0));
8986 fputs ("-(", file);
8987 output_addr_const (file, XEXP (addr, 1));
8988 fputs ("-.)", file);
8989 }
8990 else if (GET_CODE (addr) == LO_SUM)
8991 {
8992 output_operand (XEXP (addr, 0), 0);
8993 if (TARGET_CM_MEDMID)
8994 fputs ("+%l44(", file);
8995 else
8996 fputs ("+%lo(", file);
8997 output_address (VOIDmode, XEXP (addr, 1));
8998 fputc (')', file);
8999 }
9000 else if (flag_pic
9001 && GET_CODE (addr) == CONST
9002 && GET_CODE (XEXP (addr, 0)) == MINUS
9003 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9004 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9005 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9006 {
9007 addr = XEXP (addr, 0);
9008 output_addr_const (file, XEXP (addr, 0));
9009 /* Group the args of the second CONST in parenthesis. */
9010 fputs ("-(", file);
9011 /* Skip past the second CONST--it does nothing for us. */
9012 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9013 /* Close the parenthesis. */
9014 fputc (')', file);
9015 }
9016 else
9017 {
9018 output_addr_const (file, addr);
9019 }
9020 }
9021 \f
9022 /* Target hook for assembling integer objects. The sparc version has
9023 special handling for aligned DI-mode objects. */
9024
9025 static bool
9026 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9027 {
9028 /* ??? We only output .xword's for symbols and only then in environments
9029 where the assembler can handle them. */
9030 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9031 {
9032 if (TARGET_V9)
9033 {
9034 assemble_integer_with_op ("\t.xword\t", x);
9035 return true;
9036 }
9037 else
9038 {
9039 assemble_aligned_integer (4, const0_rtx);
9040 assemble_aligned_integer (4, x);
9041 return true;
9042 }
9043 }
9044 return default_assemble_integer (x, size, aligned_p);
9045 }
9046 \f
9047 /* Return the value of a code used in the .proc pseudo-op that says
9048 what kind of result this function returns. For non-C types, we pick
9049 the closest C type. */
9050
9051 #ifndef SHORT_TYPE_SIZE
9052 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9053 #endif
9054
9055 #ifndef INT_TYPE_SIZE
9056 #define INT_TYPE_SIZE BITS_PER_WORD
9057 #endif
9058
9059 #ifndef LONG_TYPE_SIZE
9060 #define LONG_TYPE_SIZE BITS_PER_WORD
9061 #endif
9062
9063 #ifndef LONG_LONG_TYPE_SIZE
9064 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9065 #endif
9066
9067 #ifndef FLOAT_TYPE_SIZE
9068 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9069 #endif
9070
9071 #ifndef DOUBLE_TYPE_SIZE
9072 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9073 #endif
9074
9075 #ifndef LONG_DOUBLE_TYPE_SIZE
9076 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9077 #endif
9078
9079 unsigned long
9080 sparc_type_code (register tree type)
9081 {
9082 register unsigned long qualifiers = 0;
9083 register unsigned shift;
9084
9085 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9086 setting more, since some assemblers will give an error for this. Also,
9087 we must be careful to avoid shifts of 32 bits or more to avoid getting
9088 unpredictable results. */
9089
9090 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9091 {
9092 switch (TREE_CODE (type))
9093 {
9094 case ERROR_MARK:
9095 return qualifiers;
9096
9097 case ARRAY_TYPE:
9098 qualifiers |= (3 << shift);
9099 break;
9100
9101 case FUNCTION_TYPE:
9102 case METHOD_TYPE:
9103 qualifiers |= (2 << shift);
9104 break;
9105
9106 case POINTER_TYPE:
9107 case REFERENCE_TYPE:
9108 case OFFSET_TYPE:
9109 qualifiers |= (1 << shift);
9110 break;
9111
9112 case RECORD_TYPE:
9113 return (qualifiers | 8);
9114
9115 case UNION_TYPE:
9116 case QUAL_UNION_TYPE:
9117 return (qualifiers | 9);
9118
9119 case ENUMERAL_TYPE:
9120 return (qualifiers | 10);
9121
9122 case VOID_TYPE:
9123 return (qualifiers | 16);
9124
9125 case INTEGER_TYPE:
9126 /* If this is a range type, consider it to be the underlying
9127 type. */
9128 if (TREE_TYPE (type) != 0)
9129 break;
9130
9131 /* Carefully distinguish all the standard types of C,
9132 without messing up if the language is not C. We do this by
9133 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9134 look at both the names and the above fields, but that's redundant.
9135 Any type whose size is between two C types will be considered
9136 to be the wider of the two types. Also, we do not have a
9137 special code to use for "long long", so anything wider than
9138 long is treated the same. Note that we can't distinguish
9139 between "int" and "long" in this code if they are the same
9140 size, but that's fine, since neither can the assembler. */
9141
9142 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9143 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9144
9145 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9146 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9147
9148 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9149 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9150
9151 else
9152 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9153
9154 case REAL_TYPE:
9155 /* If this is a range type, consider it to be the underlying
9156 type. */
9157 if (TREE_TYPE (type) != 0)
9158 break;
9159
9160 /* Carefully distinguish all the standard types of C,
9161 without messing up if the language is not C. */
9162
9163 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9164 return (qualifiers | 6);
9165
9166 else
9167 return (qualifiers | 7);
9168
9169 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9170 /* ??? We need to distinguish between double and float complex types,
9171 but I don't know how yet because I can't reach this code from
9172 existing front-ends. */
9173 return (qualifiers | 7); /* Who knows? */
9174
9175 case VECTOR_TYPE:
9176 case BOOLEAN_TYPE: /* Boolean truth value type. */
9177 case LANG_TYPE:
9178 case NULLPTR_TYPE:
9179 return qualifiers;
9180
9181 default:
9182 gcc_unreachable (); /* Not a type! */
9183 }
9184 }
9185
9186 return qualifiers;
9187 }
9188 \f
9189 /* Nested function support. */
9190
9191 /* Emit RTL insns to initialize the variable parts of a trampoline.
9192 FNADDR is an RTX for the address of the function's pure code.
9193 CXT is an RTX for the static chain value for the function.
9194
9195 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9196 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9197 (to store insns). This is a bit excessive. Perhaps a different
9198 mechanism would be better here.
9199
9200 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9201
9202 static void
9203 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9204 {
9205 /* SPARC 32-bit trampoline:
9206
9207 sethi %hi(fn), %g1
9208 sethi %hi(static), %g2
9209 jmp %g1+%lo(fn)
9210 or %g2, %lo(static), %g2
9211
9212 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9213 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9214 */
9215
9216 emit_move_insn
9217 (adjust_address (m_tramp, SImode, 0),
9218 expand_binop (SImode, ior_optab,
9219 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9220 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9221 NULL_RTX, 1, OPTAB_DIRECT));
9222
9223 emit_move_insn
9224 (adjust_address (m_tramp, SImode, 4),
9225 expand_binop (SImode, ior_optab,
9226 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9227 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9228 NULL_RTX, 1, OPTAB_DIRECT));
9229
9230 emit_move_insn
9231 (adjust_address (m_tramp, SImode, 8),
9232 expand_binop (SImode, ior_optab,
9233 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9234 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9235 NULL_RTX, 1, OPTAB_DIRECT));
9236
9237 emit_move_insn
9238 (adjust_address (m_tramp, SImode, 12),
9239 expand_binop (SImode, ior_optab,
9240 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9241 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9242 NULL_RTX, 1, OPTAB_DIRECT));
9243
9244 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9245 aligned on a 16 byte boundary so one flush clears it all. */
9246 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9247 if (sparc_cpu != PROCESSOR_ULTRASPARC
9248 && sparc_cpu != PROCESSOR_ULTRASPARC3
9249 && sparc_cpu != PROCESSOR_NIAGARA
9250 && sparc_cpu != PROCESSOR_NIAGARA2
9251 && sparc_cpu != PROCESSOR_NIAGARA3
9252 && sparc_cpu != PROCESSOR_NIAGARA4)
9253 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9254
9255 /* Call __enable_execute_stack after writing onto the stack to make sure
9256 the stack address is accessible. */
9257 #ifdef HAVE_ENABLE_EXECUTE_STACK
9258 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9259 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9260 #endif
9261
9262 }
9263
9264 /* The 64-bit version is simpler because it makes more sense to load the
9265 values as "immediate" data out of the trampoline. It's also easier since
9266 we can read the PC without clobbering a register. */
9267
9268 static void
9269 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9270 {
9271 /* SPARC 64-bit trampoline:
9272
9273 rd %pc, %g1
9274 ldx [%g1+24], %g5
9275 jmp %g5
9276 ldx [%g1+16], %g5
9277 +16 bytes data
9278 */
9279
9280 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9281 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9282 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9283 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9284 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9285 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9286 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9287 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9288 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9289 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9290 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9291
9292 if (sparc_cpu != PROCESSOR_ULTRASPARC
9293 && sparc_cpu != PROCESSOR_ULTRASPARC3
9294 && sparc_cpu != PROCESSOR_NIAGARA
9295 && sparc_cpu != PROCESSOR_NIAGARA2
9296 && sparc_cpu != PROCESSOR_NIAGARA3
9297 && sparc_cpu != PROCESSOR_NIAGARA4)
9298 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9299
9300 /* Call __enable_execute_stack after writing onto the stack to make sure
9301 the stack address is accessible. */
9302 #ifdef HAVE_ENABLE_EXECUTE_STACK
9303 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9304 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9305 #endif
9306 }
9307
9308 /* Worker for TARGET_TRAMPOLINE_INIT. */
9309
9310 static void
9311 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9312 {
9313 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9314 cxt = force_reg (Pmode, cxt);
9315 if (TARGET_ARCH64)
9316 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9317 else
9318 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9319 }
9320 \f
9321 /* Adjust the cost of a scheduling dependency. Return the new cost of
9322 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9323
9324 static int
9325 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9326 {
9327 enum attr_type insn_type;
9328
9329 if (recog_memoized (insn) < 0)
9330 return cost;
9331
9332 insn_type = get_attr_type (insn);
9333
9334 if (REG_NOTE_KIND (link) == 0)
9335 {
9336 /* Data dependency; DEP_INSN writes a register that INSN reads some
9337 cycles later. */
9338
9339 /* if a load, then the dependence must be on the memory address;
9340 add an extra "cycle". Note that the cost could be two cycles
9341 if the reg was written late in an instruction group; we ca not tell
9342 here. */
9343 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9344 return cost + 3;
9345
9346 /* Get the delay only if the address of the store is the dependence. */
9347 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9348 {
9349 rtx pat = PATTERN(insn);
9350 rtx dep_pat = PATTERN (dep_insn);
9351
9352 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9353 return cost; /* This should not happen! */
9354
9355 /* The dependency between the two instructions was on the data that
9356 is being stored. Assume that this implies that the address of the
9357 store is not dependent. */
9358 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9359 return cost;
9360
9361 return cost + 3; /* An approximation. */
9362 }
9363
9364 /* A shift instruction cannot receive its data from an instruction
9365 in the same cycle; add a one cycle penalty. */
9366 if (insn_type == TYPE_SHIFT)
9367 return cost + 3; /* Split before cascade into shift. */
9368 }
9369 else
9370 {
9371 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9372 INSN writes some cycles later. */
9373
9374 /* These are only significant for the fpu unit; writing a fp reg before
9375 the fpu has finished with it stalls the processor. */
9376
9377 /* Reusing an integer register causes no problems. */
9378 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9379 return 0;
9380 }
9381
9382 return cost;
9383 }
9384
9385 static int
9386 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9387 {
9388 enum attr_type insn_type, dep_type;
9389 rtx pat = PATTERN(insn);
9390 rtx dep_pat = PATTERN (dep_insn);
9391
9392 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9393 return cost;
9394
9395 insn_type = get_attr_type (insn);
9396 dep_type = get_attr_type (dep_insn);
9397
9398 switch (REG_NOTE_KIND (link))
9399 {
9400 case 0:
9401 /* Data dependency; DEP_INSN writes a register that INSN reads some
9402 cycles later. */
9403
9404 switch (insn_type)
9405 {
9406 case TYPE_STORE:
9407 case TYPE_FPSTORE:
9408 /* Get the delay iff the address of the store is the dependence. */
9409 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9410 return cost;
9411
9412 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9413 return cost;
9414 return cost + 3;
9415
9416 case TYPE_LOAD:
9417 case TYPE_SLOAD:
9418 case TYPE_FPLOAD:
9419 /* If a load, then the dependence must be on the memory address. If
9420 the addresses aren't equal, then it might be a false dependency */
9421 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9422 {
9423 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9424 || GET_CODE (SET_DEST (dep_pat)) != MEM
9425 || GET_CODE (SET_SRC (pat)) != MEM
9426 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9427 XEXP (SET_SRC (pat), 0)))
9428 return cost + 2;
9429
9430 return cost + 8;
9431 }
9432 break;
9433
9434 case TYPE_BRANCH:
9435 /* Compare to branch latency is 0. There is no benefit from
9436 separating compare and branch. */
9437 if (dep_type == TYPE_COMPARE)
9438 return 0;
9439 /* Floating point compare to branch latency is less than
9440 compare to conditional move. */
9441 if (dep_type == TYPE_FPCMP)
9442 return cost - 1;
9443 break;
9444 default:
9445 break;
9446 }
9447 break;
9448
9449 case REG_DEP_ANTI:
9450 /* Anti-dependencies only penalize the fpu unit. */
9451 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9452 return 0;
9453 break;
9454
9455 default:
9456 break;
9457 }
9458
9459 return cost;
9460 }
9461
9462 static int
9463 sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9464 {
9465 switch (sparc_cpu)
9466 {
9467 case PROCESSOR_SUPERSPARC:
9468 cost = supersparc_adjust_cost (insn, link, dep, cost);
9469 break;
9470 case PROCESSOR_HYPERSPARC:
9471 case PROCESSOR_SPARCLITE86X:
9472 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9473 break;
9474 default:
9475 break;
9476 }
9477 return cost;
9478 }
9479
9480 static void
9481 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9482 int sched_verbose ATTRIBUTE_UNUSED,
9483 int max_ready ATTRIBUTE_UNUSED)
9484 {}
9485
9486 static int
9487 sparc_use_sched_lookahead (void)
9488 {
9489 if (sparc_cpu == PROCESSOR_NIAGARA
9490 || sparc_cpu == PROCESSOR_NIAGARA2
9491 || sparc_cpu == PROCESSOR_NIAGARA3)
9492 return 0;
9493 if (sparc_cpu == PROCESSOR_NIAGARA4)
9494 return 2;
9495 if (sparc_cpu == PROCESSOR_ULTRASPARC
9496 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9497 return 4;
9498 if ((1 << sparc_cpu) &
9499 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9500 (1 << PROCESSOR_SPARCLITE86X)))
9501 return 3;
9502 return 0;
9503 }
9504
9505 static int
9506 sparc_issue_rate (void)
9507 {
9508 switch (sparc_cpu)
9509 {
9510 case PROCESSOR_NIAGARA:
9511 case PROCESSOR_NIAGARA2:
9512 case PROCESSOR_NIAGARA3:
9513 default:
9514 return 1;
9515 case PROCESSOR_NIAGARA4:
9516 case PROCESSOR_V9:
9517 /* Assume V9 processors are capable of at least dual-issue. */
9518 return 2;
9519 case PROCESSOR_SUPERSPARC:
9520 return 3;
9521 case PROCESSOR_HYPERSPARC:
9522 case PROCESSOR_SPARCLITE86X:
9523 return 2;
9524 case PROCESSOR_ULTRASPARC:
9525 case PROCESSOR_ULTRASPARC3:
9526 return 4;
9527 }
9528 }
9529
9530 static int
9531 set_extends (rtx_insn *insn)
9532 {
9533 register rtx pat = PATTERN (insn);
9534
9535 switch (GET_CODE (SET_SRC (pat)))
9536 {
9537 /* Load and some shift instructions zero extend. */
9538 case MEM:
9539 case ZERO_EXTEND:
9540 /* sethi clears the high bits */
9541 case HIGH:
9542 /* LO_SUM is used with sethi. sethi cleared the high
9543 bits and the values used with lo_sum are positive */
9544 case LO_SUM:
9545 /* Store flag stores 0 or 1 */
9546 case LT: case LTU:
9547 case GT: case GTU:
9548 case LE: case LEU:
9549 case GE: case GEU:
9550 case EQ:
9551 case NE:
9552 return 1;
9553 case AND:
9554 {
9555 rtx op0 = XEXP (SET_SRC (pat), 0);
9556 rtx op1 = XEXP (SET_SRC (pat), 1);
9557 if (GET_CODE (op1) == CONST_INT)
9558 return INTVAL (op1) >= 0;
9559 if (GET_CODE (op0) != REG)
9560 return 0;
9561 if (sparc_check_64 (op0, insn) == 1)
9562 return 1;
9563 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9564 }
9565 case IOR:
9566 case XOR:
9567 {
9568 rtx op0 = XEXP (SET_SRC (pat), 0);
9569 rtx op1 = XEXP (SET_SRC (pat), 1);
9570 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9571 return 0;
9572 if (GET_CODE (op1) == CONST_INT)
9573 return INTVAL (op1) >= 0;
9574 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9575 }
9576 case LSHIFTRT:
9577 return GET_MODE (SET_SRC (pat)) == SImode;
9578 /* Positive integers leave the high bits zero. */
9579 case CONST_INT:
9580 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9581 case ASHIFTRT:
9582 case SIGN_EXTEND:
9583 return - (GET_MODE (SET_SRC (pat)) == SImode);
9584 case REG:
9585 return sparc_check_64 (SET_SRC (pat), insn);
9586 default:
9587 return 0;
9588 }
9589 }
9590
9591 /* We _ought_ to have only one kind per function, but... */
9592 static GTY(()) rtx sparc_addr_diff_list;
9593 static GTY(()) rtx sparc_addr_list;
9594
9595 void
9596 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9597 {
9598 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9599 if (diff)
9600 sparc_addr_diff_list
9601 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9602 else
9603 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9604 }
9605
9606 static void
9607 sparc_output_addr_vec (rtx vec)
9608 {
9609 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9610 int idx, vlen = XVECLEN (body, 0);
9611
9612 #ifdef ASM_OUTPUT_ADDR_VEC_START
9613 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9614 #endif
9615
9616 #ifdef ASM_OUTPUT_CASE_LABEL
9617 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9618 NEXT_INSN (lab));
9619 #else
9620 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9621 #endif
9622
9623 for (idx = 0; idx < vlen; idx++)
9624 {
9625 ASM_OUTPUT_ADDR_VEC_ELT
9626 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9627 }
9628
9629 #ifdef ASM_OUTPUT_ADDR_VEC_END
9630 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9631 #endif
9632 }
9633
9634 static void
9635 sparc_output_addr_diff_vec (rtx vec)
9636 {
9637 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9638 rtx base = XEXP (XEXP (body, 0), 0);
9639 int idx, vlen = XVECLEN (body, 1);
9640
9641 #ifdef ASM_OUTPUT_ADDR_VEC_START
9642 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9643 #endif
9644
9645 #ifdef ASM_OUTPUT_CASE_LABEL
9646 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9647 NEXT_INSN (lab));
9648 #else
9649 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9650 #endif
9651
9652 for (idx = 0; idx < vlen; idx++)
9653 {
9654 ASM_OUTPUT_ADDR_DIFF_ELT
9655 (asm_out_file,
9656 body,
9657 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9658 CODE_LABEL_NUMBER (base));
9659 }
9660
9661 #ifdef ASM_OUTPUT_ADDR_VEC_END
9662 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9663 #endif
9664 }
9665
9666 static void
9667 sparc_output_deferred_case_vectors (void)
9668 {
9669 rtx t;
9670 int align;
9671
9672 if (sparc_addr_list == NULL_RTX
9673 && sparc_addr_diff_list == NULL_RTX)
9674 return;
9675
9676 /* Align to cache line in the function's code section. */
9677 switch_to_section (current_function_section ());
9678
9679 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9680 if (align > 0)
9681 ASM_OUTPUT_ALIGN (asm_out_file, align);
9682
9683 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9684 sparc_output_addr_vec (XEXP (t, 0));
9685 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9686 sparc_output_addr_diff_vec (XEXP (t, 0));
9687
9688 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9689 }
9690
9691 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9692 unknown. Return 1 if the high bits are zero, -1 if the register is
9693 sign extended. */
9694 int
9695 sparc_check_64 (rtx x, rtx_insn *insn)
9696 {
9697 /* If a register is set only once it is safe to ignore insns this
9698 code does not know how to handle. The loop will either recognize
9699 the single set and return the correct value or fail to recognize
9700 it and return 0. */
9701 int set_once = 0;
9702 rtx y = x;
9703
9704 gcc_assert (GET_CODE (x) == REG);
9705
9706 if (GET_MODE (x) == DImode)
9707 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9708
9709 if (flag_expensive_optimizations
9710 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9711 set_once = 1;
9712
9713 if (insn == 0)
9714 {
9715 if (set_once)
9716 insn = get_last_insn_anywhere ();
9717 else
9718 return 0;
9719 }
9720
9721 while ((insn = PREV_INSN (insn)))
9722 {
9723 switch (GET_CODE (insn))
9724 {
9725 case JUMP_INSN:
9726 case NOTE:
9727 break;
9728 case CODE_LABEL:
9729 case CALL_INSN:
9730 default:
9731 if (! set_once)
9732 return 0;
9733 break;
9734 case INSN:
9735 {
9736 rtx pat = PATTERN (insn);
9737 if (GET_CODE (pat) != SET)
9738 return 0;
9739 if (rtx_equal_p (x, SET_DEST (pat)))
9740 return set_extends (insn);
9741 if (y && rtx_equal_p (y, SET_DEST (pat)))
9742 return set_extends (insn);
9743 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9744 return 0;
9745 }
9746 }
9747 }
9748 return 0;
9749 }
9750
9751 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9752 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9753
9754 const char *
9755 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9756 {
9757 static char asm_code[60];
9758
9759 /* The scratch register is only required when the destination
9760 register is not a 64-bit global or out register. */
9761 if (which_alternative != 2)
9762 operands[3] = operands[0];
9763
9764 /* We can only shift by constants <= 63. */
9765 if (GET_CODE (operands[2]) == CONST_INT)
9766 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9767
9768 if (GET_CODE (operands[1]) == CONST_INT)
9769 {
9770 output_asm_insn ("mov\t%1, %3", operands);
9771 }
9772 else
9773 {
9774 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9775 if (sparc_check_64 (operands[1], insn) <= 0)
9776 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9777 output_asm_insn ("or\t%L1, %3, %3", operands);
9778 }
9779
9780 strcpy (asm_code, opcode);
9781
9782 if (which_alternative != 2)
9783 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9784 else
9785 return
9786 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9787 }
9788 \f
9789 /* Output rtl to increment the profiler label LABELNO
9790 for profiling a function entry. */
9791
9792 void
9793 sparc_profile_hook (int labelno)
9794 {
9795 char buf[32];
9796 rtx lab, fun;
9797
9798 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9799 if (NO_PROFILE_COUNTERS)
9800 {
9801 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9802 }
9803 else
9804 {
9805 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9806 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9807 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9808 }
9809 }
9810 \f
9811 #ifdef TARGET_SOLARIS
9812 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9813
9814 static void
9815 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9816 tree decl ATTRIBUTE_UNUSED)
9817 {
9818 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9819 {
9820 solaris_elf_asm_comdat_section (name, flags, decl);
9821 return;
9822 }
9823
9824 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9825
9826 if (!(flags & SECTION_DEBUG))
9827 fputs (",#alloc", asm_out_file);
9828 if (flags & SECTION_WRITE)
9829 fputs (",#write", asm_out_file);
9830 if (flags & SECTION_TLS)
9831 fputs (",#tls", asm_out_file);
9832 if (flags & SECTION_CODE)
9833 fputs (",#execinstr", asm_out_file);
9834
9835 if (flags & SECTION_NOTYPE)
9836 ;
9837 else if (flags & SECTION_BSS)
9838 fputs (",#nobits", asm_out_file);
9839 else
9840 fputs (",#progbits", asm_out_file);
9841
9842 fputc ('\n', asm_out_file);
9843 }
9844 #endif /* TARGET_SOLARIS */
9845
9846 /* We do not allow indirect calls to be optimized into sibling calls.
9847
9848 We cannot use sibling calls when delayed branches are disabled
9849 because they will likely require the call delay slot to be filled.
9850
9851 Also, on SPARC 32-bit we cannot emit a sibling call when the
9852 current function returns a structure. This is because the "unimp
9853 after call" convention would cause the callee to return to the
9854 wrong place. The generic code already disallows cases where the
9855 function being called returns a structure.
9856
9857 It may seem strange how this last case could occur. Usually there
9858 is code after the call which jumps to epilogue code which dumps the
9859 return value into the struct return area. That ought to invalidate
9860 the sibling call right? Well, in the C++ case we can end up passing
9861 the pointer to the struct return area to a constructor (which returns
9862 void) and then nothing else happens. Such a sibling call would look
9863 valid without the added check here.
9864
9865 VxWorks PIC PLT entries require the global pointer to be initialized
9866 on entry. We therefore can't emit sibling calls to them. */
9867 static bool
9868 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9869 {
9870 return (decl
9871 && flag_delayed_branch
9872 && (TARGET_ARCH64 || ! cfun->returns_struct)
9873 && !(TARGET_VXWORKS_RTP
9874 && flag_pic
9875 && !targetm.binds_local_p (decl)));
9876 }
9877 \f
9878 /* libfunc renaming. */
9879
9880 static void
9881 sparc_init_libfuncs (void)
9882 {
9883 if (TARGET_ARCH32)
9884 {
9885 /* Use the subroutines that Sun's library provides for integer
9886 multiply and divide. The `*' prevents an underscore from
9887 being prepended by the compiler. .umul is a little faster
9888 than .mul. */
9889 set_optab_libfunc (smul_optab, SImode, "*.umul");
9890 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9891 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9892 set_optab_libfunc (smod_optab, SImode, "*.rem");
9893 set_optab_libfunc (umod_optab, SImode, "*.urem");
9894
9895 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9896 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9897 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9898 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9899 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9900 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9901
9902 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9903 is because with soft-float, the SFmode and DFmode sqrt
9904 instructions will be absent, and the compiler will notice and
9905 try to use the TFmode sqrt instruction for calls to the
9906 builtin function sqrt, but this fails. */
9907 if (TARGET_FPU)
9908 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9909
9910 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9911 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9912 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9913 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9914 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9915 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9916
9917 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9918 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9919 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9920 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9921
9922 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9923 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9924 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9925 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9926
9927 if (DITF_CONVERSION_LIBFUNCS)
9928 {
9929 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9930 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9931 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9932 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9933 }
9934
9935 if (SUN_CONVERSION_LIBFUNCS)
9936 {
9937 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9938 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9939 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9940 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9941 }
9942 }
9943 if (TARGET_ARCH64)
9944 {
9945 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9946 do not exist in the library. Make sure the compiler does not
9947 emit calls to them by accident. (It should always use the
9948 hardware instructions.) */
9949 set_optab_libfunc (smul_optab, SImode, 0);
9950 set_optab_libfunc (sdiv_optab, SImode, 0);
9951 set_optab_libfunc (udiv_optab, SImode, 0);
9952 set_optab_libfunc (smod_optab, SImode, 0);
9953 set_optab_libfunc (umod_optab, SImode, 0);
9954
9955 if (SUN_INTEGER_MULTIPLY_64)
9956 {
9957 set_optab_libfunc (smul_optab, DImode, "__mul64");
9958 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9959 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9960 set_optab_libfunc (smod_optab, DImode, "__rem64");
9961 set_optab_libfunc (umod_optab, DImode, "__urem64");
9962 }
9963
9964 if (SUN_CONVERSION_LIBFUNCS)
9965 {
9966 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9967 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9968 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9969 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9970 }
9971 }
9972 }
9973 \f
9974 /* SPARC builtins. */
9975 enum sparc_builtins
9976 {
9977 /* FPU builtins. */
9978 SPARC_BUILTIN_LDFSR,
9979 SPARC_BUILTIN_STFSR,
9980
9981 /* VIS 1.0 builtins. */
9982 SPARC_BUILTIN_FPACK16,
9983 SPARC_BUILTIN_FPACK32,
9984 SPARC_BUILTIN_FPACKFIX,
9985 SPARC_BUILTIN_FEXPAND,
9986 SPARC_BUILTIN_FPMERGE,
9987 SPARC_BUILTIN_FMUL8X16,
9988 SPARC_BUILTIN_FMUL8X16AU,
9989 SPARC_BUILTIN_FMUL8X16AL,
9990 SPARC_BUILTIN_FMUL8SUX16,
9991 SPARC_BUILTIN_FMUL8ULX16,
9992 SPARC_BUILTIN_FMULD8SUX16,
9993 SPARC_BUILTIN_FMULD8ULX16,
9994 SPARC_BUILTIN_FALIGNDATAV4HI,
9995 SPARC_BUILTIN_FALIGNDATAV8QI,
9996 SPARC_BUILTIN_FALIGNDATAV2SI,
9997 SPARC_BUILTIN_FALIGNDATADI,
9998 SPARC_BUILTIN_WRGSR,
9999 SPARC_BUILTIN_RDGSR,
10000 SPARC_BUILTIN_ALIGNADDR,
10001 SPARC_BUILTIN_ALIGNADDRL,
10002 SPARC_BUILTIN_PDIST,
10003 SPARC_BUILTIN_EDGE8,
10004 SPARC_BUILTIN_EDGE8L,
10005 SPARC_BUILTIN_EDGE16,
10006 SPARC_BUILTIN_EDGE16L,
10007 SPARC_BUILTIN_EDGE32,
10008 SPARC_BUILTIN_EDGE32L,
10009 SPARC_BUILTIN_FCMPLE16,
10010 SPARC_BUILTIN_FCMPLE32,
10011 SPARC_BUILTIN_FCMPNE16,
10012 SPARC_BUILTIN_FCMPNE32,
10013 SPARC_BUILTIN_FCMPGT16,
10014 SPARC_BUILTIN_FCMPGT32,
10015 SPARC_BUILTIN_FCMPEQ16,
10016 SPARC_BUILTIN_FCMPEQ32,
10017 SPARC_BUILTIN_FPADD16,
10018 SPARC_BUILTIN_FPADD16S,
10019 SPARC_BUILTIN_FPADD32,
10020 SPARC_BUILTIN_FPADD32S,
10021 SPARC_BUILTIN_FPSUB16,
10022 SPARC_BUILTIN_FPSUB16S,
10023 SPARC_BUILTIN_FPSUB32,
10024 SPARC_BUILTIN_FPSUB32S,
10025 SPARC_BUILTIN_ARRAY8,
10026 SPARC_BUILTIN_ARRAY16,
10027 SPARC_BUILTIN_ARRAY32,
10028
10029 /* VIS 2.0 builtins. */
10030 SPARC_BUILTIN_EDGE8N,
10031 SPARC_BUILTIN_EDGE8LN,
10032 SPARC_BUILTIN_EDGE16N,
10033 SPARC_BUILTIN_EDGE16LN,
10034 SPARC_BUILTIN_EDGE32N,
10035 SPARC_BUILTIN_EDGE32LN,
10036 SPARC_BUILTIN_BMASK,
10037 SPARC_BUILTIN_BSHUFFLEV4HI,
10038 SPARC_BUILTIN_BSHUFFLEV8QI,
10039 SPARC_BUILTIN_BSHUFFLEV2SI,
10040 SPARC_BUILTIN_BSHUFFLEDI,
10041
10042 /* VIS 3.0 builtins. */
10043 SPARC_BUILTIN_CMASK8,
10044 SPARC_BUILTIN_CMASK16,
10045 SPARC_BUILTIN_CMASK32,
10046 SPARC_BUILTIN_FCHKSM16,
10047 SPARC_BUILTIN_FSLL16,
10048 SPARC_BUILTIN_FSLAS16,
10049 SPARC_BUILTIN_FSRL16,
10050 SPARC_BUILTIN_FSRA16,
10051 SPARC_BUILTIN_FSLL32,
10052 SPARC_BUILTIN_FSLAS32,
10053 SPARC_BUILTIN_FSRL32,
10054 SPARC_BUILTIN_FSRA32,
10055 SPARC_BUILTIN_PDISTN,
10056 SPARC_BUILTIN_FMEAN16,
10057 SPARC_BUILTIN_FPADD64,
10058 SPARC_BUILTIN_FPSUB64,
10059 SPARC_BUILTIN_FPADDS16,
10060 SPARC_BUILTIN_FPADDS16S,
10061 SPARC_BUILTIN_FPSUBS16,
10062 SPARC_BUILTIN_FPSUBS16S,
10063 SPARC_BUILTIN_FPADDS32,
10064 SPARC_BUILTIN_FPADDS32S,
10065 SPARC_BUILTIN_FPSUBS32,
10066 SPARC_BUILTIN_FPSUBS32S,
10067 SPARC_BUILTIN_FUCMPLE8,
10068 SPARC_BUILTIN_FUCMPNE8,
10069 SPARC_BUILTIN_FUCMPGT8,
10070 SPARC_BUILTIN_FUCMPEQ8,
10071 SPARC_BUILTIN_FHADDS,
10072 SPARC_BUILTIN_FHADDD,
10073 SPARC_BUILTIN_FHSUBS,
10074 SPARC_BUILTIN_FHSUBD,
10075 SPARC_BUILTIN_FNHADDS,
10076 SPARC_BUILTIN_FNHADDD,
10077 SPARC_BUILTIN_UMULXHI,
10078 SPARC_BUILTIN_XMULX,
10079 SPARC_BUILTIN_XMULXHI,
10080
10081 SPARC_BUILTIN_MAX
10082 };
10083
10084 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10085 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10086
10087 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10088 function decl or NULL_TREE if the builtin was not added. */
10089
10090 static tree
10091 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10092 tree type)
10093 {
10094 tree t
10095 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10096
10097 if (t)
10098 {
10099 sparc_builtins[code] = t;
10100 sparc_builtins_icode[code] = icode;
10101 }
10102
10103 return t;
10104 }
10105
10106 /* Likewise, but also marks the function as "const". */
10107
10108 static tree
10109 def_builtin_const (const char *name, enum insn_code icode,
10110 enum sparc_builtins code, tree type)
10111 {
10112 tree t = def_builtin (name, icode, code, type);
10113
10114 if (t)
10115 TREE_READONLY (t) = 1;
10116
10117 return t;
10118 }
10119
10120 /* Implement the TARGET_INIT_BUILTINS target hook.
10121 Create builtin functions for special SPARC instructions. */
10122
10123 static void
10124 sparc_init_builtins (void)
10125 {
10126 if (TARGET_FPU)
10127 sparc_fpu_init_builtins ();
10128
10129 if (TARGET_VIS)
10130 sparc_vis_init_builtins ();
10131 }
10132
10133 /* Create builtin functions for FPU instructions. */
10134
10135 static void
10136 sparc_fpu_init_builtins (void)
10137 {
10138 tree ftype
10139 = build_function_type_list (void_type_node,
10140 build_pointer_type (unsigned_type_node), 0);
10141 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10142 SPARC_BUILTIN_LDFSR, ftype);
10143 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10144 SPARC_BUILTIN_STFSR, ftype);
10145 }
10146
10147 /* Create builtin functions for VIS instructions. */
10148
10149 static void
10150 sparc_vis_init_builtins (void)
10151 {
10152 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10153 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10154 tree v4hi = build_vector_type (intHI_type_node, 4);
10155 tree v2hi = build_vector_type (intHI_type_node, 2);
10156 tree v2si = build_vector_type (intSI_type_node, 2);
10157 tree v1si = build_vector_type (intSI_type_node, 1);
10158
10159 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10160 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10161 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10162 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10163 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10164 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10165 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10166 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10167 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10168 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10169 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10170 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10171 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10172 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10173 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10174 v8qi, v8qi,
10175 intDI_type_node, 0);
10176 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10177 v8qi, v8qi, 0);
10178 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10179 v8qi, v8qi, 0);
10180 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10181 intDI_type_node,
10182 intDI_type_node, 0);
10183 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10184 intSI_type_node,
10185 intSI_type_node, 0);
10186 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10187 ptr_type_node,
10188 intSI_type_node, 0);
10189 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10190 ptr_type_node,
10191 intDI_type_node, 0);
10192 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10193 ptr_type_node,
10194 ptr_type_node, 0);
10195 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10196 ptr_type_node,
10197 ptr_type_node, 0);
10198 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10199 v4hi, v4hi, 0);
10200 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10201 v2si, v2si, 0);
10202 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10203 v4hi, v4hi, 0);
10204 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10205 v2si, v2si, 0);
10206 tree void_ftype_di = build_function_type_list (void_type_node,
10207 intDI_type_node, 0);
10208 tree di_ftype_void = build_function_type_list (intDI_type_node,
10209 void_type_node, 0);
10210 tree void_ftype_si = build_function_type_list (void_type_node,
10211 intSI_type_node, 0);
10212 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10213 float_type_node,
10214 float_type_node, 0);
10215 tree df_ftype_df_df = build_function_type_list (double_type_node,
10216 double_type_node,
10217 double_type_node, 0);
10218
10219 /* Packing and expanding vectors. */
10220 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10221 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10222 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10223 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10224 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10225 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10226 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10227 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10228 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10229 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10230
10231 /* Multiplications. */
10232 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10233 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10234 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10235 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10236 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10237 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10238 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10239 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10240 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10241 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10242 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10243 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10244 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10245 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10246
10247 /* Data aligning. */
10248 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10249 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10250 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10251 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10252 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10253 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10254 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10255 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10256
10257 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10258 SPARC_BUILTIN_WRGSR, void_ftype_di);
10259 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10260 SPARC_BUILTIN_RDGSR, di_ftype_void);
10261
10262 if (TARGET_ARCH64)
10263 {
10264 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10265 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10266 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10267 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10268 }
10269 else
10270 {
10271 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10272 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10273 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10274 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10275 }
10276
10277 /* Pixel distance. */
10278 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10279 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10280
10281 /* Edge handling. */
10282 if (TARGET_ARCH64)
10283 {
10284 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10285 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10286 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10287 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10288 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10289 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10290 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10291 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10292 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10293 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10294 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10295 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10296 }
10297 else
10298 {
10299 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10300 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10301 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10302 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10303 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10304 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10305 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10306 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10307 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10308 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10309 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10310 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10311 }
10312
10313 /* Pixel compare. */
10314 if (TARGET_ARCH64)
10315 {
10316 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10317 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10318 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10319 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10320 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10321 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10322 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10323 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10324 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10325 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10326 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10327 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10328 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10329 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10330 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10331 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10332 }
10333 else
10334 {
10335 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10336 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10337 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10338 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10339 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10340 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10341 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10342 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10343 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10344 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10345 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10346 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10347 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10348 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10349 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10350 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10351 }
10352
10353 /* Addition and subtraction. */
10354 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10355 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10356 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10357 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10358 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10359 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10360 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10361 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10362 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10363 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10364 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10365 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10366 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10367 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10368 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10369 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10370
10371 /* Three-dimensional array addressing. */
10372 if (TARGET_ARCH64)
10373 {
10374 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10375 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10376 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10377 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10378 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10379 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10380 }
10381 else
10382 {
10383 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10384 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10385 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10386 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10387 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10388 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10389 }
10390
10391 if (TARGET_VIS2)
10392 {
10393 /* Edge handling. */
10394 if (TARGET_ARCH64)
10395 {
10396 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10397 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10398 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10399 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10400 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10401 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10402 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10403 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10404 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10405 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10406 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10407 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10408 }
10409 else
10410 {
10411 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10412 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10413 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10414 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10415 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10416 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10417 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10418 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10419 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10420 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10421 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10422 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10423 }
10424
10425 /* Byte mask and shuffle. */
10426 if (TARGET_ARCH64)
10427 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10428 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10429 else
10430 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10431 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10432 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10433 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10434 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10435 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10436 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10437 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10438 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10439 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10440 }
10441
10442 if (TARGET_VIS3)
10443 {
10444 if (TARGET_ARCH64)
10445 {
10446 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10447 SPARC_BUILTIN_CMASK8, void_ftype_di);
10448 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10449 SPARC_BUILTIN_CMASK16, void_ftype_di);
10450 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10451 SPARC_BUILTIN_CMASK32, void_ftype_di);
10452 }
10453 else
10454 {
10455 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10456 SPARC_BUILTIN_CMASK8, void_ftype_si);
10457 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10458 SPARC_BUILTIN_CMASK16, void_ftype_si);
10459 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10460 SPARC_BUILTIN_CMASK32, void_ftype_si);
10461 }
10462
10463 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10464 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10465
10466 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10467 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10468 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10469 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10470 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10471 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10472 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10473 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10474 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10475 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10476 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10477 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10478 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10479 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10480 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10481 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10482
10483 if (TARGET_ARCH64)
10484 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10485 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10486 else
10487 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10488 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10489
10490 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10491 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10492 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10493 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10494 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10495 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10496
10497 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10498 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10499 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10500 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10501 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10502 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10503 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10504 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10505 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10506 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10507 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10508 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10509 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10510 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10511 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10512 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10513
10514 if (TARGET_ARCH64)
10515 {
10516 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10517 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10518 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10519 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10520 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10521 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10522 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10523 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10524 }
10525 else
10526 {
10527 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10528 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10529 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10530 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10531 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10532 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10533 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10534 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10535 }
10536
10537 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10538 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10539 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10540 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10541 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10542 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10543 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10544 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10545 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10546 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10547 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10548 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10549
10550 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10551 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10552 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10553 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10554 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10555 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10556 }
10557 }
10558
10559 /* Implement TARGET_BUILTIN_DECL hook. */
10560
10561 static tree
10562 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10563 {
10564 if (code >= SPARC_BUILTIN_MAX)
10565 return error_mark_node;
10566
10567 return sparc_builtins[code];
10568 }
10569
10570 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10571
10572 static rtx
10573 sparc_expand_builtin (tree exp, rtx target,
10574 rtx subtarget ATTRIBUTE_UNUSED,
10575 machine_mode tmode ATTRIBUTE_UNUSED,
10576 int ignore ATTRIBUTE_UNUSED)
10577 {
10578 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10579 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10580 enum insn_code icode = sparc_builtins_icode[code];
10581 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10582 call_expr_arg_iterator iter;
10583 int arg_count = 0;
10584 rtx pat, op[4];
10585 tree arg;
10586
10587 if (nonvoid)
10588 {
10589 machine_mode tmode = insn_data[icode].operand[0].mode;
10590 if (!target
10591 || GET_MODE (target) != tmode
10592 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10593 op[0] = gen_reg_rtx (tmode);
10594 else
10595 op[0] = target;
10596 }
10597
10598 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10599 {
10600 const struct insn_operand_data *insn_op;
10601 int idx;
10602
10603 if (arg == error_mark_node)
10604 return NULL_RTX;
10605
10606 arg_count++;
10607 idx = arg_count - !nonvoid;
10608 insn_op = &insn_data[icode].operand[idx];
10609 op[arg_count] = expand_normal (arg);
10610
10611 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10612 {
10613 if (!address_operand (op[arg_count], SImode))
10614 {
10615 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10616 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10617 }
10618 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10619 }
10620
10621 else if (insn_op->mode == V1DImode
10622 && GET_MODE (op[arg_count]) == DImode)
10623 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10624
10625 else if (insn_op->mode == V1SImode
10626 && GET_MODE (op[arg_count]) == SImode)
10627 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10628
10629 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10630 insn_op->mode))
10631 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10632 }
10633
10634 switch (arg_count)
10635 {
10636 case 0:
10637 pat = GEN_FCN (icode) (op[0]);
10638 break;
10639 case 1:
10640 if (nonvoid)
10641 pat = GEN_FCN (icode) (op[0], op[1]);
10642 else
10643 pat = GEN_FCN (icode) (op[1]);
10644 break;
10645 case 2:
10646 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10647 break;
10648 case 3:
10649 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10650 break;
10651 default:
10652 gcc_unreachable ();
10653 }
10654
10655 if (!pat)
10656 return NULL_RTX;
10657
10658 emit_insn (pat);
10659
10660 return (nonvoid ? op[0] : const0_rtx);
10661 }
10662
10663 /* Return the upper 16 bits of the 8x16 multiplication. */
10664
10665 static int
10666 sparc_vis_mul8x16 (int e8, int e16)
10667 {
10668 return (e8 * e16 + 128) / 256;
10669 }
10670
10671 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10672 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10673
10674 static void
10675 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10676 tree inner_type, tree cst0, tree cst1)
10677 {
10678 unsigned i, num = VECTOR_CST_NELTS (cst0);
10679 int scale;
10680
10681 switch (fncode)
10682 {
10683 case SPARC_BUILTIN_FMUL8X16:
10684 for (i = 0; i < num; ++i)
10685 {
10686 int val
10687 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10688 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10689 n_elts[i] = build_int_cst (inner_type, val);
10690 }
10691 break;
10692
10693 case SPARC_BUILTIN_FMUL8X16AU:
10694 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10695
10696 for (i = 0; i < num; ++i)
10697 {
10698 int val
10699 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10700 scale);
10701 n_elts[i] = build_int_cst (inner_type, val);
10702 }
10703 break;
10704
10705 case SPARC_BUILTIN_FMUL8X16AL:
10706 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10707
10708 for (i = 0; i < num; ++i)
10709 {
10710 int val
10711 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10712 scale);
10713 n_elts[i] = build_int_cst (inner_type, val);
10714 }
10715 break;
10716
10717 default:
10718 gcc_unreachable ();
10719 }
10720 }
10721
10722 /* Implement TARGET_FOLD_BUILTIN hook.
10723
10724 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10725 result of the function call is ignored. NULL_TREE is returned if the
10726 function could not be folded. */
10727
10728 static tree
10729 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10730 tree *args, bool ignore)
10731 {
10732 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10733 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10734 tree arg0, arg1, arg2;
10735
10736 if (ignore)
10737 switch (code)
10738 {
10739 case SPARC_BUILTIN_LDFSR:
10740 case SPARC_BUILTIN_STFSR:
10741 case SPARC_BUILTIN_ALIGNADDR:
10742 case SPARC_BUILTIN_WRGSR:
10743 case SPARC_BUILTIN_BMASK:
10744 case SPARC_BUILTIN_CMASK8:
10745 case SPARC_BUILTIN_CMASK16:
10746 case SPARC_BUILTIN_CMASK32:
10747 break;
10748
10749 default:
10750 return build_zero_cst (rtype);
10751 }
10752
10753 switch (code)
10754 {
10755 case SPARC_BUILTIN_FEXPAND:
10756 arg0 = args[0];
10757 STRIP_NOPS (arg0);
10758
10759 if (TREE_CODE (arg0) == VECTOR_CST)
10760 {
10761 tree inner_type = TREE_TYPE (rtype);
10762 tree *n_elts;
10763 unsigned i;
10764
10765 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10766 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10767 n_elts[i] = build_int_cst (inner_type,
10768 TREE_INT_CST_LOW
10769 (VECTOR_CST_ELT (arg0, i)) << 4);
10770 return build_vector (rtype, n_elts);
10771 }
10772 break;
10773
10774 case SPARC_BUILTIN_FMUL8X16:
10775 case SPARC_BUILTIN_FMUL8X16AU:
10776 case SPARC_BUILTIN_FMUL8X16AL:
10777 arg0 = args[0];
10778 arg1 = args[1];
10779 STRIP_NOPS (arg0);
10780 STRIP_NOPS (arg1);
10781
10782 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10783 {
10784 tree inner_type = TREE_TYPE (rtype);
10785 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10786 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10787 return build_vector (rtype, n_elts);
10788 }
10789 break;
10790
10791 case SPARC_BUILTIN_FPMERGE:
10792 arg0 = args[0];
10793 arg1 = args[1];
10794 STRIP_NOPS (arg0);
10795 STRIP_NOPS (arg1);
10796
10797 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10798 {
10799 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10800 unsigned i;
10801 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10802 {
10803 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10804 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10805 }
10806
10807 return build_vector (rtype, n_elts);
10808 }
10809 break;
10810
10811 case SPARC_BUILTIN_PDIST:
10812 case SPARC_BUILTIN_PDISTN:
10813 arg0 = args[0];
10814 arg1 = args[1];
10815 STRIP_NOPS (arg0);
10816 STRIP_NOPS (arg1);
10817 if (code == SPARC_BUILTIN_PDIST)
10818 {
10819 arg2 = args[2];
10820 STRIP_NOPS (arg2);
10821 }
10822 else
10823 arg2 = integer_zero_node;
10824
10825 if (TREE_CODE (arg0) == VECTOR_CST
10826 && TREE_CODE (arg1) == VECTOR_CST
10827 && TREE_CODE (arg2) == INTEGER_CST)
10828 {
10829 bool overflow = false;
10830 widest_int result = wi::to_widest (arg2);
10831 widest_int tmp;
10832 unsigned i;
10833
10834 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10835 {
10836 tree e0 = VECTOR_CST_ELT (arg0, i);
10837 tree e1 = VECTOR_CST_ELT (arg1, i);
10838
10839 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10840
10841 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10842 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10843 if (wi::neg_p (tmp))
10844 tmp = wi::neg (tmp, &neg2_ovf);
10845 else
10846 neg2_ovf = false;
10847 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10848 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10849 }
10850
10851 gcc_assert (!overflow);
10852
10853 return wide_int_to_tree (rtype, result);
10854 }
10855
10856 default:
10857 break;
10858 }
10859
10860 return NULL_TREE;
10861 }
10862 \f
10863 /* ??? This duplicates information provided to the compiler by the
10864 ??? scheduler description. Some day, teach genautomata to output
10865 ??? the latencies and then CSE will just use that. */
10866
10867 static bool
10868 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
10869 int opno ATTRIBUTE_UNUSED,
10870 int *total, bool speed ATTRIBUTE_UNUSED)
10871 {
10872 int code = GET_CODE (x);
10873 bool float_mode_p = FLOAT_MODE_P (mode);
10874
10875 switch (code)
10876 {
10877 case CONST_INT:
10878 if (SMALL_INT (x))
10879 *total = 0;
10880 else
10881 *total = 2;
10882 return true;
10883
10884 case CONST_WIDE_INT:
10885 *total = 0;
10886 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
10887 *total += 2;
10888 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
10889 *total += 2;
10890 return true;
10891
10892 case HIGH:
10893 *total = 2;
10894 return true;
10895
10896 case CONST:
10897 case LABEL_REF:
10898 case SYMBOL_REF:
10899 *total = 4;
10900 return true;
10901
10902 case CONST_DOUBLE:
10903 *total = 8;
10904 return true;
10905
10906 case MEM:
10907 /* If outer-code was a sign or zero extension, a cost
10908 of COSTS_N_INSNS (1) was already added in. This is
10909 why we are subtracting it back out. */
10910 if (outer_code == ZERO_EXTEND)
10911 {
10912 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10913 }
10914 else if (outer_code == SIGN_EXTEND)
10915 {
10916 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10917 }
10918 else if (float_mode_p)
10919 {
10920 *total = sparc_costs->float_load;
10921 }
10922 else
10923 {
10924 *total = sparc_costs->int_load;
10925 }
10926
10927 return true;
10928
10929 case PLUS:
10930 case MINUS:
10931 if (float_mode_p)
10932 *total = sparc_costs->float_plusminus;
10933 else
10934 *total = COSTS_N_INSNS (1);
10935 return false;
10936
10937 case FMA:
10938 {
10939 rtx sub;
10940
10941 gcc_assert (float_mode_p);
10942 *total = sparc_costs->float_mul;
10943
10944 sub = XEXP (x, 0);
10945 if (GET_CODE (sub) == NEG)
10946 sub = XEXP (sub, 0);
10947 *total += rtx_cost (sub, mode, FMA, 0, speed);
10948
10949 sub = XEXP (x, 2);
10950 if (GET_CODE (sub) == NEG)
10951 sub = XEXP (sub, 0);
10952 *total += rtx_cost (sub, mode, FMA, 2, speed);
10953 return true;
10954 }
10955
10956 case MULT:
10957 if (float_mode_p)
10958 *total = sparc_costs->float_mul;
10959 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
10960 *total = COSTS_N_INSNS (25);
10961 else
10962 {
10963 int bit_cost;
10964
10965 bit_cost = 0;
10966 if (sparc_costs->int_mul_bit_factor)
10967 {
10968 int nbits;
10969
10970 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10971 {
10972 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10973 for (nbits = 0; value != 0; value &= value - 1)
10974 nbits++;
10975 }
10976 else
10977 nbits = 7;
10978
10979 if (nbits < 3)
10980 nbits = 3;
10981 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10982 bit_cost = COSTS_N_INSNS (bit_cost);
10983 }
10984
10985 if (mode == DImode || !TARGET_HARD_MUL)
10986 *total = sparc_costs->int_mulX + bit_cost;
10987 else
10988 *total = sparc_costs->int_mul + bit_cost;
10989 }
10990 return false;
10991
10992 case ASHIFT:
10993 case ASHIFTRT:
10994 case LSHIFTRT:
10995 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10996 return false;
10997
10998 case DIV:
10999 case UDIV:
11000 case MOD:
11001 case UMOD:
11002 if (float_mode_p)
11003 {
11004 if (mode == DFmode)
11005 *total = sparc_costs->float_div_df;
11006 else
11007 *total = sparc_costs->float_div_sf;
11008 }
11009 else
11010 {
11011 if (mode == DImode)
11012 *total = sparc_costs->int_divX;
11013 else
11014 *total = sparc_costs->int_div;
11015 }
11016 return false;
11017
11018 case NEG:
11019 if (! float_mode_p)
11020 {
11021 *total = COSTS_N_INSNS (1);
11022 return false;
11023 }
11024 /* FALLTHRU */
11025
11026 case ABS:
11027 case FLOAT:
11028 case UNSIGNED_FLOAT:
11029 case FIX:
11030 case UNSIGNED_FIX:
11031 case FLOAT_EXTEND:
11032 case FLOAT_TRUNCATE:
11033 *total = sparc_costs->float_move;
11034 return false;
11035
11036 case SQRT:
11037 if (mode == DFmode)
11038 *total = sparc_costs->float_sqrt_df;
11039 else
11040 *total = sparc_costs->float_sqrt_sf;
11041 return false;
11042
11043 case COMPARE:
11044 if (float_mode_p)
11045 *total = sparc_costs->float_cmp;
11046 else
11047 *total = COSTS_N_INSNS (1);
11048 return false;
11049
11050 case IF_THEN_ELSE:
11051 if (float_mode_p)
11052 *total = sparc_costs->float_cmove;
11053 else
11054 *total = sparc_costs->int_cmove;
11055 return false;
11056
11057 case IOR:
11058 /* Handle the NAND vector patterns. */
11059 if (sparc_vector_mode_supported_p (mode)
11060 && GET_CODE (XEXP (x, 0)) == NOT
11061 && GET_CODE (XEXP (x, 1)) == NOT)
11062 {
11063 *total = COSTS_N_INSNS (1);
11064 return true;
11065 }
11066 else
11067 return false;
11068
11069 default:
11070 return false;
11071 }
11072 }
11073
11074 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11075
11076 static inline bool
11077 general_or_i64_p (reg_class_t rclass)
11078 {
11079 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11080 }
11081
11082 /* Implement TARGET_REGISTER_MOVE_COST. */
11083
11084 static int
11085 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11086 reg_class_t from, reg_class_t to)
11087 {
11088 bool need_memory = false;
11089
11090 if (from == FPCC_REGS || to == FPCC_REGS)
11091 need_memory = true;
11092 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11093 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11094 {
11095 if (TARGET_VIS3)
11096 {
11097 int size = GET_MODE_SIZE (mode);
11098 if (size == 8 || size == 4)
11099 {
11100 if (! TARGET_ARCH32 || size == 4)
11101 return 4;
11102 else
11103 return 6;
11104 }
11105 }
11106 need_memory = true;
11107 }
11108
11109 if (need_memory)
11110 {
11111 if (sparc_cpu == PROCESSOR_ULTRASPARC
11112 || sparc_cpu == PROCESSOR_ULTRASPARC3
11113 || sparc_cpu == PROCESSOR_NIAGARA
11114 || sparc_cpu == PROCESSOR_NIAGARA2
11115 || sparc_cpu == PROCESSOR_NIAGARA3
11116 || sparc_cpu == PROCESSOR_NIAGARA4)
11117 return 12;
11118
11119 return 6;
11120 }
11121
11122 return 2;
11123 }
11124
11125 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11126 This is achieved by means of a manual dynamic stack space allocation in
11127 the current frame. We make the assumption that SEQ doesn't contain any
11128 function calls, with the possible exception of calls to the GOT helper. */
11129
11130 static void
11131 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11132 {
11133 /* We must preserve the lowest 16 words for the register save area. */
11134 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11135 /* We really need only 2 words of fresh stack space. */
11136 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11137
11138 rtx slot
11139 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11140 SPARC_STACK_BIAS + offset));
11141
11142 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11143 emit_insn (gen_rtx_SET (slot, reg));
11144 if (reg2)
11145 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11146 reg2));
11147 emit_insn (seq);
11148 if (reg2)
11149 emit_insn (gen_rtx_SET (reg2,
11150 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11151 emit_insn (gen_rtx_SET (reg, slot));
11152 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11153 }
11154
11155 /* Output the assembler code for a thunk function. THUNK_DECL is the
11156 declaration for the thunk function itself, FUNCTION is the decl for
11157 the target function. DELTA is an immediate constant offset to be
11158 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11159 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11160
11161 static void
11162 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11163 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11164 tree function)
11165 {
11166 rtx this_rtx, funexp;
11167 rtx_insn *insn;
11168 unsigned int int_arg_first;
11169
11170 reload_completed = 1;
11171 epilogue_completed = 1;
11172
11173 emit_note (NOTE_INSN_PROLOGUE_END);
11174
11175 if (TARGET_FLAT)
11176 {
11177 sparc_leaf_function_p = 1;
11178
11179 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11180 }
11181 else if (flag_delayed_branch)
11182 {
11183 /* We will emit a regular sibcall below, so we need to instruct
11184 output_sibcall that we are in a leaf function. */
11185 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11186
11187 /* This will cause final.c to invoke leaf_renumber_regs so we
11188 must behave as if we were in a not-yet-leafified function. */
11189 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11190 }
11191 else
11192 {
11193 /* We will emit the sibcall manually below, so we will need to
11194 manually spill non-leaf registers. */
11195 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11196
11197 /* We really are in a leaf function. */
11198 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11199 }
11200
11201 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11202 returns a structure, the structure return pointer is there instead. */
11203 if (TARGET_ARCH64
11204 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11205 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11206 else
11207 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11208
11209 /* Add DELTA. When possible use a plain add, otherwise load it into
11210 a register first. */
11211 if (delta)
11212 {
11213 rtx delta_rtx = GEN_INT (delta);
11214
11215 if (! SPARC_SIMM13_P (delta))
11216 {
11217 rtx scratch = gen_rtx_REG (Pmode, 1);
11218 emit_move_insn (scratch, delta_rtx);
11219 delta_rtx = scratch;
11220 }
11221
11222 /* THIS_RTX += DELTA. */
11223 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11224 }
11225
11226 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11227 if (vcall_offset)
11228 {
11229 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11230 rtx scratch = gen_rtx_REG (Pmode, 1);
11231
11232 gcc_assert (vcall_offset < 0);
11233
11234 /* SCRATCH = *THIS_RTX. */
11235 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11236
11237 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11238 may not have any available scratch register at this point. */
11239 if (SPARC_SIMM13_P (vcall_offset))
11240 ;
11241 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11242 else if (! fixed_regs[5]
11243 /* The below sequence is made up of at least 2 insns,
11244 while the default method may need only one. */
11245 && vcall_offset < -8192)
11246 {
11247 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11248 emit_move_insn (scratch2, vcall_offset_rtx);
11249 vcall_offset_rtx = scratch2;
11250 }
11251 else
11252 {
11253 rtx increment = GEN_INT (-4096);
11254
11255 /* VCALL_OFFSET is a negative number whose typical range can be
11256 estimated as -32768..0 in 32-bit mode. In almost all cases
11257 it is therefore cheaper to emit multiple add insns than
11258 spilling and loading the constant into a register (at least
11259 6 insns). */
11260 while (! SPARC_SIMM13_P (vcall_offset))
11261 {
11262 emit_insn (gen_add2_insn (scratch, increment));
11263 vcall_offset += 4096;
11264 }
11265 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11266 }
11267
11268 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11269 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11270 gen_rtx_PLUS (Pmode,
11271 scratch,
11272 vcall_offset_rtx)));
11273
11274 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11275 emit_insn (gen_add2_insn (this_rtx, scratch));
11276 }
11277
11278 /* Generate a tail call to the target function. */
11279 if (! TREE_USED (function))
11280 {
11281 assemble_external (function);
11282 TREE_USED (function) = 1;
11283 }
11284 funexp = XEXP (DECL_RTL (function), 0);
11285
11286 if (flag_delayed_branch)
11287 {
11288 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11289 insn = emit_call_insn (gen_sibcall (funexp));
11290 SIBLING_CALL_P (insn) = 1;
11291 }
11292 else
11293 {
11294 /* The hoops we have to jump through in order to generate a sibcall
11295 without using delay slots... */
11296 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11297
11298 if (flag_pic)
11299 {
11300 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11301 start_sequence ();
11302 load_got_register (); /* clobbers %o7 */
11303 scratch = sparc_legitimize_pic_address (funexp, scratch);
11304 seq = get_insns ();
11305 end_sequence ();
11306 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11307 }
11308 else if (TARGET_ARCH32)
11309 {
11310 emit_insn (gen_rtx_SET (scratch,
11311 gen_rtx_HIGH (SImode, funexp)));
11312 emit_insn (gen_rtx_SET (scratch,
11313 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11314 }
11315 else /* TARGET_ARCH64 */
11316 {
11317 switch (sparc_cmodel)
11318 {
11319 case CM_MEDLOW:
11320 case CM_MEDMID:
11321 /* The destination can serve as a temporary. */
11322 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11323 break;
11324
11325 case CM_MEDANY:
11326 case CM_EMBMEDANY:
11327 /* The destination cannot serve as a temporary. */
11328 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11329 start_sequence ();
11330 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11331 seq = get_insns ();
11332 end_sequence ();
11333 emit_and_preserve (seq, spill_reg, 0);
11334 break;
11335
11336 default:
11337 gcc_unreachable ();
11338 }
11339 }
11340
11341 emit_jump_insn (gen_indirect_jump (scratch));
11342 }
11343
11344 emit_barrier ();
11345
11346 /* Run just enough of rest_of_compilation to get the insns emitted.
11347 There's not really enough bulk here to make other passes such as
11348 instruction scheduling worth while. Note that use_thunk calls
11349 assemble_start_function and assemble_end_function. */
11350 insn = get_insns ();
11351 shorten_branches (insn);
11352 final_start_function (insn, file, 1);
11353 final (insn, file, 1);
11354 final_end_function ();
11355
11356 reload_completed = 0;
11357 epilogue_completed = 0;
11358 }
11359
11360 /* Return true if sparc_output_mi_thunk would be able to output the
11361 assembler code for the thunk function specified by the arguments
11362 it is passed, and false otherwise. */
11363 static bool
11364 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11365 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11366 HOST_WIDE_INT vcall_offset,
11367 const_tree function ATTRIBUTE_UNUSED)
11368 {
11369 /* Bound the loop used in the default method above. */
11370 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11371 }
11372
11373 /* How to allocate a 'struct machine_function'. */
11374
11375 static struct machine_function *
11376 sparc_init_machine_status (void)
11377 {
11378 return ggc_cleared_alloc<machine_function> ();
11379 }
11380
11381 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11382 We need to emit DTP-relative relocations. */
11383
11384 static void
11385 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11386 {
11387 switch (size)
11388 {
11389 case 4:
11390 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11391 break;
11392 case 8:
11393 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11394 break;
11395 default:
11396 gcc_unreachable ();
11397 }
11398 output_addr_const (file, x);
11399 fputs (")", file);
11400 }
11401
11402 /* Do whatever processing is required at the end of a file. */
11403
11404 static void
11405 sparc_file_end (void)
11406 {
11407 /* If we need to emit the special GOT helper function, do so now. */
11408 if (got_helper_rtx)
11409 {
11410 const char *name = XSTR (got_helper_rtx, 0);
11411 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11412 #ifdef DWARF2_UNWIND_INFO
11413 bool do_cfi;
11414 #endif
11415
11416 if (USE_HIDDEN_LINKONCE)
11417 {
11418 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11419 get_identifier (name),
11420 build_function_type_list (void_type_node,
11421 NULL_TREE));
11422 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11423 NULL_TREE, void_type_node);
11424 TREE_PUBLIC (decl) = 1;
11425 TREE_STATIC (decl) = 1;
11426 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11427 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11428 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11429 resolve_unique_section (decl, 0, flag_function_sections);
11430 allocate_struct_function (decl, true);
11431 cfun->is_thunk = 1;
11432 current_function_decl = decl;
11433 init_varasm_status ();
11434 assemble_start_function (decl, name);
11435 }
11436 else
11437 {
11438 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11439 switch_to_section (text_section);
11440 if (align > 0)
11441 ASM_OUTPUT_ALIGN (asm_out_file, align);
11442 ASM_OUTPUT_LABEL (asm_out_file, name);
11443 }
11444
11445 #ifdef DWARF2_UNWIND_INFO
11446 do_cfi = dwarf2out_do_cfi_asm ();
11447 if (do_cfi)
11448 fprintf (asm_out_file, "\t.cfi_startproc\n");
11449 #endif
11450 if (flag_delayed_branch)
11451 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11452 reg_name, reg_name);
11453 else
11454 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11455 reg_name, reg_name);
11456 #ifdef DWARF2_UNWIND_INFO
11457 if (do_cfi)
11458 fprintf (asm_out_file, "\t.cfi_endproc\n");
11459 #endif
11460 }
11461
11462 if (NEED_INDICATE_EXEC_STACK)
11463 file_end_indicate_exec_stack ();
11464
11465 #ifdef TARGET_SOLARIS
11466 solaris_file_end ();
11467 #endif
11468 }
11469
11470 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11471 /* Implement TARGET_MANGLE_TYPE. */
11472
11473 static const char *
11474 sparc_mangle_type (const_tree type)
11475 {
11476 if (!TARGET_64BIT
11477 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11478 && TARGET_LONG_DOUBLE_128)
11479 return "g";
11480
11481 /* For all other types, use normal C++ mangling. */
11482 return NULL;
11483 }
11484 #endif
11485
11486 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11487 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11488 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11489
11490 void
11491 sparc_emit_membar_for_model (enum memmodel model,
11492 int load_store, int before_after)
11493 {
11494 /* Bits for the MEMBAR mmask field. */
11495 const int LoadLoad = 1;
11496 const int StoreLoad = 2;
11497 const int LoadStore = 4;
11498 const int StoreStore = 8;
11499
11500 int mm = 0, implied = 0;
11501
11502 switch (sparc_memory_model)
11503 {
11504 case SMM_SC:
11505 /* Sequential Consistency. All memory transactions are immediately
11506 visible in sequential execution order. No barriers needed. */
11507 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11508 break;
11509
11510 case SMM_TSO:
11511 /* Total Store Ordering: all memory transactions with store semantics
11512 are followed by an implied StoreStore. */
11513 implied |= StoreStore;
11514
11515 /* If we're not looking for a raw barrer (before+after), then atomic
11516 operations get the benefit of being both load and store. */
11517 if (load_store == 3 && before_after == 1)
11518 implied |= StoreLoad;
11519 /* FALLTHRU */
11520
11521 case SMM_PSO:
11522 /* Partial Store Ordering: all memory transactions with load semantics
11523 are followed by an implied LoadLoad | LoadStore. */
11524 implied |= LoadLoad | LoadStore;
11525
11526 /* If we're not looking for a raw barrer (before+after), then atomic
11527 operations get the benefit of being both load and store. */
11528 if (load_store == 3 && before_after == 2)
11529 implied |= StoreLoad | StoreStore;
11530 /* FALLTHRU */
11531
11532 case SMM_RMO:
11533 /* Relaxed Memory Ordering: no implicit bits. */
11534 break;
11535
11536 default:
11537 gcc_unreachable ();
11538 }
11539
11540 if (before_after & 1)
11541 {
11542 if (is_mm_release (model) || is_mm_acq_rel (model)
11543 || is_mm_seq_cst (model))
11544 {
11545 if (load_store & 1)
11546 mm |= LoadLoad | StoreLoad;
11547 if (load_store & 2)
11548 mm |= LoadStore | StoreStore;
11549 }
11550 }
11551 if (before_after & 2)
11552 {
11553 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11554 || is_mm_seq_cst (model))
11555 {
11556 if (load_store & 1)
11557 mm |= LoadLoad | LoadStore;
11558 if (load_store & 2)
11559 mm |= StoreLoad | StoreStore;
11560 }
11561 }
11562
11563 /* Remove the bits implied by the system memory model. */
11564 mm &= ~implied;
11565
11566 /* For raw barriers (before+after), always emit a barrier.
11567 This will become a compile-time barrier if needed. */
11568 if (mm || before_after == 3)
11569 emit_insn (gen_membar (GEN_INT (mm)));
11570 }
11571
11572 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11573 compare and swap on the word containing the byte or half-word. */
11574
11575 static void
11576 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11577 rtx oldval, rtx newval)
11578 {
11579 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11580 rtx addr = gen_reg_rtx (Pmode);
11581 rtx off = gen_reg_rtx (SImode);
11582 rtx oldv = gen_reg_rtx (SImode);
11583 rtx newv = gen_reg_rtx (SImode);
11584 rtx oldvalue = gen_reg_rtx (SImode);
11585 rtx newvalue = gen_reg_rtx (SImode);
11586 rtx res = gen_reg_rtx (SImode);
11587 rtx resv = gen_reg_rtx (SImode);
11588 rtx memsi, val, mask, cc;
11589
11590 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11591
11592 if (Pmode != SImode)
11593 addr1 = gen_lowpart (SImode, addr1);
11594 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11595
11596 memsi = gen_rtx_MEM (SImode, addr);
11597 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11598 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11599
11600 val = copy_to_reg (memsi);
11601
11602 emit_insn (gen_rtx_SET (off,
11603 gen_rtx_XOR (SImode, off,
11604 GEN_INT (GET_MODE (mem) == QImode
11605 ? 3 : 2))));
11606
11607 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11608
11609 if (GET_MODE (mem) == QImode)
11610 mask = force_reg (SImode, GEN_INT (0xff));
11611 else
11612 mask = force_reg (SImode, GEN_INT (0xffff));
11613
11614 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11615
11616 emit_insn (gen_rtx_SET (val,
11617 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11618 val)));
11619
11620 oldval = gen_lowpart (SImode, oldval);
11621 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11622
11623 newval = gen_lowpart_common (SImode, newval);
11624 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11625
11626 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11627
11628 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11629
11630 rtx_code_label *end_label = gen_label_rtx ();
11631 rtx_code_label *loop_label = gen_label_rtx ();
11632 emit_label (loop_label);
11633
11634 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11635
11636 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11637
11638 emit_move_insn (bool_result, const1_rtx);
11639
11640 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11641
11642 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11643
11644 emit_insn (gen_rtx_SET (resv,
11645 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11646 res)));
11647
11648 emit_move_insn (bool_result, const0_rtx);
11649
11650 cc = gen_compare_reg_1 (NE, resv, val);
11651 emit_insn (gen_rtx_SET (val, resv));
11652
11653 /* Use cbranchcc4 to separate the compare and branch! */
11654 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11655 cc, const0_rtx, loop_label));
11656
11657 emit_label (end_label);
11658
11659 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11660
11661 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11662
11663 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11664 }
11665
11666 /* Expand code to perform a compare-and-swap. */
11667
11668 void
11669 sparc_expand_compare_and_swap (rtx operands[])
11670 {
11671 rtx bval, retval, mem, oldval, newval;
11672 machine_mode mode;
11673 enum memmodel model;
11674
11675 bval = operands[0];
11676 retval = operands[1];
11677 mem = operands[2];
11678 oldval = operands[3];
11679 newval = operands[4];
11680 model = (enum memmodel) INTVAL (operands[6]);
11681 mode = GET_MODE (mem);
11682
11683 sparc_emit_membar_for_model (model, 3, 1);
11684
11685 if (reg_overlap_mentioned_p (retval, oldval))
11686 oldval = copy_to_reg (oldval);
11687
11688 if (mode == QImode || mode == HImode)
11689 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11690 else
11691 {
11692 rtx (*gen) (rtx, rtx, rtx, rtx);
11693 rtx x;
11694
11695 if (mode == SImode)
11696 gen = gen_atomic_compare_and_swapsi_1;
11697 else
11698 gen = gen_atomic_compare_and_swapdi_1;
11699 emit_insn (gen (retval, mem, oldval, newval));
11700
11701 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11702 if (x != bval)
11703 convert_move (bval, x, 1);
11704 }
11705
11706 sparc_emit_membar_for_model (model, 3, 2);
11707 }
11708
11709 void
11710 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11711 {
11712 rtx t_1, t_2, t_3;
11713
11714 sel = gen_lowpart (DImode, sel);
11715 switch (vmode)
11716 {
11717 case V2SImode:
11718 /* inp = xxxxxxxAxxxxxxxB */
11719 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11720 NULL_RTX, 1, OPTAB_DIRECT);
11721 /* t_1 = ....xxxxxxxAxxx. */
11722 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11723 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11724 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11725 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11726 /* sel = .......B */
11727 /* t_1 = ...A.... */
11728 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11729 /* sel = ...A...B */
11730 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11731 /* sel = AAAABBBB * 4 */
11732 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11733 /* sel = { A*4, A*4+1, A*4+2, ... } */
11734 break;
11735
11736 case V4HImode:
11737 /* inp = xxxAxxxBxxxCxxxD */
11738 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11739 NULL_RTX, 1, OPTAB_DIRECT);
11740 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11741 NULL_RTX, 1, OPTAB_DIRECT);
11742 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11743 NULL_RTX, 1, OPTAB_DIRECT);
11744 /* t_1 = ..xxxAxxxBxxxCxx */
11745 /* t_2 = ....xxxAxxxBxxxC */
11746 /* t_3 = ......xxxAxxxBxx */
11747 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11748 GEN_INT (0x07),
11749 NULL_RTX, 1, OPTAB_DIRECT);
11750 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11751 GEN_INT (0x0700),
11752 NULL_RTX, 1, OPTAB_DIRECT);
11753 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11754 GEN_INT (0x070000),
11755 NULL_RTX, 1, OPTAB_DIRECT);
11756 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11757 GEN_INT (0x07000000),
11758 NULL_RTX, 1, OPTAB_DIRECT);
11759 /* sel = .......D */
11760 /* t_1 = .....C.. */
11761 /* t_2 = ...B.... */
11762 /* t_3 = .A...... */
11763 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11764 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11765 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11766 /* sel = .A.B.C.D */
11767 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11768 /* sel = AABBCCDD * 2 */
11769 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11770 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11771 break;
11772
11773 case V8QImode:
11774 /* input = xAxBxCxDxExFxGxH */
11775 sel = expand_simple_binop (DImode, AND, sel,
11776 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11777 | 0x0f0f0f0f),
11778 NULL_RTX, 1, OPTAB_DIRECT);
11779 /* sel = .A.B.C.D.E.F.G.H */
11780 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11781 NULL_RTX, 1, OPTAB_DIRECT);
11782 /* t_1 = ..A.B.C.D.E.F.G. */
11783 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11784 NULL_RTX, 1, OPTAB_DIRECT);
11785 /* sel = .AABBCCDDEEFFGGH */
11786 sel = expand_simple_binop (DImode, AND, sel,
11787 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11788 | 0xff00ff),
11789 NULL_RTX, 1, OPTAB_DIRECT);
11790 /* sel = ..AB..CD..EF..GH */
11791 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11792 NULL_RTX, 1, OPTAB_DIRECT);
11793 /* t_1 = ....AB..CD..EF.. */
11794 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11795 NULL_RTX, 1, OPTAB_DIRECT);
11796 /* sel = ..ABABCDCDEFEFGH */
11797 sel = expand_simple_binop (DImode, AND, sel,
11798 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11799 NULL_RTX, 1, OPTAB_DIRECT);
11800 /* sel = ....ABCD....EFGH */
11801 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11802 NULL_RTX, 1, OPTAB_DIRECT);
11803 /* t_1 = ........ABCD.... */
11804 sel = gen_lowpart (SImode, sel);
11805 t_1 = gen_lowpart (SImode, t_1);
11806 break;
11807
11808 default:
11809 gcc_unreachable ();
11810 }
11811
11812 /* Always perform the final addition/merge within the bmask insn. */
11813 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11814 }
11815
11816 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11817
11818 static bool
11819 sparc_frame_pointer_required (void)
11820 {
11821 /* If the stack pointer is dynamically modified in the function, it cannot
11822 serve as the frame pointer. */
11823 if (cfun->calls_alloca)
11824 return true;
11825
11826 /* If the function receives nonlocal gotos, it needs to save the frame
11827 pointer in the nonlocal_goto_save_area object. */
11828 if (cfun->has_nonlocal_label)
11829 return true;
11830
11831 /* In flat mode, that's it. */
11832 if (TARGET_FLAT)
11833 return false;
11834
11835 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11836 return !(crtl->is_leaf && only_leaf_regs_used ());
11837 }
11838
11839 /* The way this is structured, we can't eliminate SFP in favor of SP
11840 if the frame pointer is required: we want to use the SFP->HFP elimination
11841 in that case. But the test in update_eliminables doesn't know we are
11842 assuming below that we only do the former elimination. */
11843
11844 static bool
11845 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11846 {
11847 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11848 }
11849
11850 /* Return the hard frame pointer directly to bypass the stack bias. */
11851
11852 static rtx
11853 sparc_builtin_setjmp_frame_value (void)
11854 {
11855 return hard_frame_pointer_rtx;
11856 }
11857
11858 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11859 they won't be allocated. */
11860
11861 static void
11862 sparc_conditional_register_usage (void)
11863 {
11864 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11865 {
11866 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11867 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11868 }
11869 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11870 /* then honor it. */
11871 if (TARGET_ARCH32 && fixed_regs[5])
11872 fixed_regs[5] = 1;
11873 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11874 fixed_regs[5] = 0;
11875 if (! TARGET_V9)
11876 {
11877 int regno;
11878 for (regno = SPARC_FIRST_V9_FP_REG;
11879 regno <= SPARC_LAST_V9_FP_REG;
11880 regno++)
11881 fixed_regs[regno] = 1;
11882 /* %fcc0 is used by v8 and v9. */
11883 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11884 regno <= SPARC_LAST_V9_FCC_REG;
11885 regno++)
11886 fixed_regs[regno] = 1;
11887 }
11888 if (! TARGET_FPU)
11889 {
11890 int regno;
11891 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11892 fixed_regs[regno] = 1;
11893 }
11894 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11895 /* then honor it. Likewise with g3 and g4. */
11896 if (fixed_regs[2] == 2)
11897 fixed_regs[2] = ! TARGET_APP_REGS;
11898 if (fixed_regs[3] == 2)
11899 fixed_regs[3] = ! TARGET_APP_REGS;
11900 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11901 fixed_regs[4] = ! TARGET_APP_REGS;
11902 else if (TARGET_CM_EMBMEDANY)
11903 fixed_regs[4] = 1;
11904 else if (fixed_regs[4] == 2)
11905 fixed_regs[4] = 0;
11906 if (TARGET_FLAT)
11907 {
11908 int regno;
11909 /* Disable leaf functions. */
11910 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11911 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11912 leaf_reg_remap [regno] = regno;
11913 }
11914 if (TARGET_VIS)
11915 global_regs[SPARC_GSR_REG] = 1;
11916 }
11917
11918 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11919
11920 - We can't load constants into FP registers.
11921 - We can't load FP constants into integer registers when soft-float,
11922 because there is no soft-float pattern with a r/F constraint.
11923 - We can't load FP constants into integer registers for TFmode unless
11924 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11925 - Try and reload integer constants (symbolic or otherwise) back into
11926 registers directly, rather than having them dumped to memory. */
11927
11928 static reg_class_t
11929 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11930 {
11931 machine_mode mode = GET_MODE (x);
11932 if (CONSTANT_P (x))
11933 {
11934 if (FP_REG_CLASS_P (rclass)
11935 || rclass == GENERAL_OR_FP_REGS
11936 || rclass == GENERAL_OR_EXTRA_FP_REGS
11937 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11938 || (mode == TFmode && ! const_zero_operand (x, mode)))
11939 return NO_REGS;
11940
11941 if (GET_MODE_CLASS (mode) == MODE_INT)
11942 return GENERAL_REGS;
11943
11944 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11945 {
11946 if (! FP_REG_CLASS_P (rclass)
11947 || !(const_zero_operand (x, mode)
11948 || const_all_ones_operand (x, mode)))
11949 return NO_REGS;
11950 }
11951 }
11952
11953 if (TARGET_VIS3
11954 && ! TARGET_ARCH64
11955 && (rclass == EXTRA_FP_REGS
11956 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11957 {
11958 int regno = true_regnum (x);
11959
11960 if (SPARC_INT_REG_P (regno))
11961 return (rclass == EXTRA_FP_REGS
11962 ? FP_REGS : GENERAL_OR_FP_REGS);
11963 }
11964
11965 return rclass;
11966 }
11967
11968 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11969 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11970
11971 const char *
11972 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
11973 {
11974 char mulstr[32];
11975
11976 gcc_assert (! TARGET_ARCH64);
11977
11978 if (sparc_check_64 (operands[1], insn) <= 0)
11979 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11980 if (which_alternative == 1)
11981 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11982 if (GET_CODE (operands[2]) == CONST_INT)
11983 {
11984 if (which_alternative == 1)
11985 {
11986 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11987 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11988 output_asm_insn (mulstr, operands);
11989 return "srlx\t%L0, 32, %H0";
11990 }
11991 else
11992 {
11993 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11994 output_asm_insn ("or\t%L1, %3, %3", operands);
11995 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11996 output_asm_insn (mulstr, operands);
11997 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11998 return "mov\t%3, %L0";
11999 }
12000 }
12001 else if (rtx_equal_p (operands[1], operands[2]))
12002 {
12003 if (which_alternative == 1)
12004 {
12005 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12006 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12007 output_asm_insn (mulstr, operands);
12008 return "srlx\t%L0, 32, %H0";
12009 }
12010 else
12011 {
12012 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12013 output_asm_insn ("or\t%L1, %3, %3", operands);
12014 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12015 output_asm_insn (mulstr, operands);
12016 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12017 return "mov\t%3, %L0";
12018 }
12019 }
12020 if (sparc_check_64 (operands[2], insn) <= 0)
12021 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12022 if (which_alternative == 1)
12023 {
12024 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12025 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12026 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12027 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12028 output_asm_insn (mulstr, operands);
12029 return "srlx\t%L0, 32, %H0";
12030 }
12031 else
12032 {
12033 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12034 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12035 output_asm_insn ("or\t%L1, %3, %3", operands);
12036 output_asm_insn ("or\t%L2, %4, %4", operands);
12037 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12038 output_asm_insn (mulstr, operands);
12039 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12040 return "mov\t%3, %L0";
12041 }
12042 }
12043
12044 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12045 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12046 and INNER_MODE are the modes describing TARGET. */
12047
12048 static void
12049 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12050 machine_mode inner_mode)
12051 {
12052 rtx t1, final_insn, sel;
12053 int bmask;
12054
12055 t1 = gen_reg_rtx (mode);
12056
12057 elt = convert_modes (SImode, inner_mode, elt, true);
12058 emit_move_insn (gen_lowpart(SImode, t1), elt);
12059
12060 switch (mode)
12061 {
12062 case V2SImode:
12063 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12064 bmask = 0x45674567;
12065 break;
12066 case V4HImode:
12067 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12068 bmask = 0x67676767;
12069 break;
12070 case V8QImode:
12071 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12072 bmask = 0x77777777;
12073 break;
12074 default:
12075 gcc_unreachable ();
12076 }
12077
12078 sel = force_reg (SImode, GEN_INT (bmask));
12079 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12080 emit_insn (final_insn);
12081 }
12082
12083 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12084 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12085
12086 static void
12087 vector_init_fpmerge (rtx target, rtx elt)
12088 {
12089 rtx t1, t2, t2_low, t3, t3_low;
12090
12091 t1 = gen_reg_rtx (V4QImode);
12092 elt = convert_modes (SImode, QImode, elt, true);
12093 emit_move_insn (gen_lowpart (SImode, t1), elt);
12094
12095 t2 = gen_reg_rtx (V8QImode);
12096 t2_low = gen_lowpart (V4QImode, t2);
12097 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12098
12099 t3 = gen_reg_rtx (V8QImode);
12100 t3_low = gen_lowpart (V4QImode, t3);
12101 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12102
12103 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12104 }
12105
12106 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12107 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12108
12109 static void
12110 vector_init_faligndata (rtx target, rtx elt)
12111 {
12112 rtx t1 = gen_reg_rtx (V4HImode);
12113 int i;
12114
12115 elt = convert_modes (SImode, HImode, elt, true);
12116 emit_move_insn (gen_lowpart (SImode, t1), elt);
12117
12118 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12119 force_reg (SImode, GEN_INT (6)),
12120 const0_rtx));
12121
12122 for (i = 0; i < 4; i++)
12123 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12124 }
12125
12126 /* Emit code to initialize TARGET to values for individual fields VALS. */
12127
12128 void
12129 sparc_expand_vector_init (rtx target, rtx vals)
12130 {
12131 const machine_mode mode = GET_MODE (target);
12132 const machine_mode inner_mode = GET_MODE_INNER (mode);
12133 const int n_elts = GET_MODE_NUNITS (mode);
12134 int i, n_var = 0;
12135 bool all_same;
12136 rtx mem;
12137
12138 all_same = true;
12139 for (i = 0; i < n_elts; i++)
12140 {
12141 rtx x = XVECEXP (vals, 0, i);
12142 if (!CONSTANT_P (x))
12143 n_var++;
12144
12145 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12146 all_same = false;
12147 }
12148
12149 if (n_var == 0)
12150 {
12151 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12152 return;
12153 }
12154
12155 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12156 {
12157 if (GET_MODE_SIZE (inner_mode) == 4)
12158 {
12159 emit_move_insn (gen_lowpart (SImode, target),
12160 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12161 return;
12162 }
12163 else if (GET_MODE_SIZE (inner_mode) == 8)
12164 {
12165 emit_move_insn (gen_lowpart (DImode, target),
12166 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12167 return;
12168 }
12169 }
12170 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12171 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12172 {
12173 emit_move_insn (gen_highpart (word_mode, target),
12174 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12175 emit_move_insn (gen_lowpart (word_mode, target),
12176 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12177 return;
12178 }
12179
12180 if (all_same && GET_MODE_SIZE (mode) == 8)
12181 {
12182 if (TARGET_VIS2)
12183 {
12184 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12185 return;
12186 }
12187 if (mode == V8QImode)
12188 {
12189 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12190 return;
12191 }
12192 if (mode == V4HImode)
12193 {
12194 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12195 return;
12196 }
12197 }
12198
12199 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12200 for (i = 0; i < n_elts; i++)
12201 emit_move_insn (adjust_address_nv (mem, inner_mode,
12202 i * GET_MODE_SIZE (inner_mode)),
12203 XVECEXP (vals, 0, i));
12204 emit_move_insn (target, mem);
12205 }
12206
12207 /* Implement TARGET_SECONDARY_RELOAD. */
12208
12209 static reg_class_t
12210 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12211 machine_mode mode, secondary_reload_info *sri)
12212 {
12213 enum reg_class rclass = (enum reg_class) rclass_i;
12214
12215 sri->icode = CODE_FOR_nothing;
12216 sri->extra_cost = 0;
12217
12218 /* We need a temporary when loading/storing a HImode/QImode value
12219 between memory and the FPU registers. This can happen when combine puts
12220 a paradoxical subreg in a float/fix conversion insn. */
12221 if (FP_REG_CLASS_P (rclass)
12222 && (mode == HImode || mode == QImode)
12223 && (GET_CODE (x) == MEM
12224 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12225 && true_regnum (x) == -1)))
12226 return GENERAL_REGS;
12227
12228 /* On 32-bit we need a temporary when loading/storing a DFmode value
12229 between unaligned memory and the upper FPU registers. */
12230 if (TARGET_ARCH32
12231 && rclass == EXTRA_FP_REGS
12232 && mode == DFmode
12233 && GET_CODE (x) == MEM
12234 && ! mem_min_alignment (x, 8))
12235 return FP_REGS;
12236
12237 if (((TARGET_CM_MEDANY
12238 && symbolic_operand (x, mode))
12239 || (TARGET_CM_EMBMEDANY
12240 && text_segment_operand (x, mode)))
12241 && ! flag_pic)
12242 {
12243 if (in_p)
12244 sri->icode = direct_optab_handler (reload_in_optab, mode);
12245 else
12246 sri->icode = direct_optab_handler (reload_out_optab, mode);
12247 return NO_REGS;
12248 }
12249
12250 if (TARGET_VIS3 && TARGET_ARCH32)
12251 {
12252 int regno = true_regnum (x);
12253
12254 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12255 to move 8-byte values in 4-byte pieces. This only works via
12256 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12257 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12258 an FP_REGS intermediate move. */
12259 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12260 || ((general_or_i64_p (rclass)
12261 || rclass == GENERAL_OR_FP_REGS)
12262 && SPARC_FP_REG_P (regno)))
12263 {
12264 sri->extra_cost = 2;
12265 return FP_REGS;
12266 }
12267 }
12268
12269 return NO_REGS;
12270 }
12271
12272 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12273 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12274
12275 bool
12276 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12277 {
12278 enum rtx_code rc = GET_CODE (operands[1]);
12279 machine_mode cmp_mode;
12280 rtx cc_reg, dst, cmp;
12281
12282 cmp = operands[1];
12283 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12284 return false;
12285
12286 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12287 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12288
12289 cmp_mode = GET_MODE (XEXP (cmp, 0));
12290 rc = GET_CODE (cmp);
12291
12292 dst = operands[0];
12293 if (! rtx_equal_p (operands[2], dst)
12294 && ! rtx_equal_p (operands[3], dst))
12295 {
12296 if (reg_overlap_mentioned_p (dst, cmp))
12297 dst = gen_reg_rtx (mode);
12298
12299 emit_move_insn (dst, operands[3]);
12300 }
12301 else if (operands[2] == dst)
12302 {
12303 operands[2] = operands[3];
12304
12305 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12306 rc = reverse_condition_maybe_unordered (rc);
12307 else
12308 rc = reverse_condition (rc);
12309 }
12310
12311 if (XEXP (cmp, 1) == const0_rtx
12312 && GET_CODE (XEXP (cmp, 0)) == REG
12313 && cmp_mode == DImode
12314 && v9_regcmp_p (rc))
12315 cc_reg = XEXP (cmp, 0);
12316 else
12317 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12318
12319 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12320
12321 emit_insn (gen_rtx_SET (dst,
12322 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12323
12324 if (dst != operands[0])
12325 emit_move_insn (operands[0], dst);
12326
12327 return true;
12328 }
12329
12330 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12331 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12332 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12333 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12334 code to be used for the condition mask. */
12335
12336 void
12337 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12338 {
12339 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12340 enum rtx_code code = GET_CODE (operands[3]);
12341
12342 mask = gen_reg_rtx (Pmode);
12343 cop0 = operands[4];
12344 cop1 = operands[5];
12345 if (code == LT || code == GE)
12346 {
12347 rtx t;
12348
12349 code = swap_condition (code);
12350 t = cop0; cop0 = cop1; cop1 = t;
12351 }
12352
12353 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12354
12355 fcmp = gen_rtx_UNSPEC (Pmode,
12356 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12357 fcode);
12358
12359 cmask = gen_rtx_UNSPEC (DImode,
12360 gen_rtvec (2, mask, gsr),
12361 ccode);
12362
12363 bshuf = gen_rtx_UNSPEC (mode,
12364 gen_rtvec (3, operands[1], operands[2], gsr),
12365 UNSPEC_BSHUFFLE);
12366
12367 emit_insn (gen_rtx_SET (mask, fcmp));
12368 emit_insn (gen_rtx_SET (gsr, cmask));
12369
12370 emit_insn (gen_rtx_SET (operands[0], bshuf));
12371 }
12372
12373 /* On sparc, any mode which naturally allocates into the float
12374 registers should return 4 here. */
12375
12376 unsigned int
12377 sparc_regmode_natural_size (machine_mode mode)
12378 {
12379 int size = UNITS_PER_WORD;
12380
12381 if (TARGET_ARCH64)
12382 {
12383 enum mode_class mclass = GET_MODE_CLASS (mode);
12384
12385 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12386 size = 4;
12387 }
12388
12389 return size;
12390 }
12391
12392 /* Return TRUE if it is a good idea to tie two pseudo registers
12393 when one has mode MODE1 and one has mode MODE2.
12394 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12395 for any hard reg, then this must be FALSE for correct output.
12396
12397 For V9 we have to deal with the fact that only the lower 32 floating
12398 point registers are 32-bit addressable. */
12399
12400 bool
12401 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12402 {
12403 enum mode_class mclass1, mclass2;
12404 unsigned short size1, size2;
12405
12406 if (mode1 == mode2)
12407 return true;
12408
12409 mclass1 = GET_MODE_CLASS (mode1);
12410 mclass2 = GET_MODE_CLASS (mode2);
12411 if (mclass1 != mclass2)
12412 return false;
12413
12414 if (! TARGET_V9)
12415 return true;
12416
12417 /* Classes are the same and we are V9 so we have to deal with upper
12418 vs. lower floating point registers. If one of the modes is a
12419 4-byte mode, and the other is not, we have to mark them as not
12420 tieable because only the lower 32 floating point register are
12421 addressable 32-bits at a time.
12422
12423 We can't just test explicitly for SFmode, otherwise we won't
12424 cover the vector mode cases properly. */
12425
12426 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12427 return true;
12428
12429 size1 = GET_MODE_SIZE (mode1);
12430 size2 = GET_MODE_SIZE (mode2);
12431 if ((size1 > 4 && size2 == 4)
12432 || (size2 > 4 && size1 == 4))
12433 return false;
12434
12435 return true;
12436 }
12437
12438 /* Implement TARGET_CSTORE_MODE. */
12439
12440 static machine_mode
12441 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12442 {
12443 return (TARGET_ARCH64 ? DImode : SImode);
12444 }
12445
12446 /* Return the compound expression made of T1 and T2. */
12447
12448 static inline tree
12449 compound_expr (tree t1, tree t2)
12450 {
12451 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12452 }
12453
12454 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12455
12456 static void
12457 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12458 {
12459 if (!TARGET_FPU)
12460 return;
12461
12462 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12463 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12464
12465 /* We generate the equivalent of feholdexcept (&fenv_var):
12466
12467 unsigned int fenv_var;
12468 __builtin_store_fsr (&fenv_var);
12469
12470 unsigned int tmp1_var;
12471 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12472
12473 __builtin_load_fsr (&tmp1_var); */
12474
12475 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12476 TREE_ADDRESSABLE (fenv_var) = 1;
12477 tree fenv_addr = build_fold_addr_expr (fenv_var);
12478 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12479 tree hold_stfsr
12480 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12481 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12482
12483 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12484 TREE_ADDRESSABLE (tmp1_var) = 1;
12485 tree masked_fenv_var
12486 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12487 build_int_cst (unsigned_type_node,
12488 ~(accrued_exception_mask | trap_enable_mask)));
12489 tree hold_mask
12490 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12491 NULL_TREE, NULL_TREE);
12492
12493 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12494 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12495 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12496
12497 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12498
12499 /* We reload the value of tmp1_var to clear the exceptions:
12500
12501 __builtin_load_fsr (&tmp1_var); */
12502
12503 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12504
12505 /* We generate the equivalent of feupdateenv (&fenv_var):
12506
12507 unsigned int tmp2_var;
12508 __builtin_store_fsr (&tmp2_var);
12509
12510 __builtin_load_fsr (&fenv_var);
12511
12512 if (SPARC_LOW_FE_EXCEPT_VALUES)
12513 tmp2_var >>= 5;
12514 __atomic_feraiseexcept ((int) tmp2_var); */
12515
12516 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12517 TREE_ADDRESSABLE (tmp2_var) = 1;
12518 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12519 tree update_stfsr
12520 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12521 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12522
12523 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12524
12525 tree atomic_feraiseexcept
12526 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12527 tree update_call
12528 = build_call_expr (atomic_feraiseexcept, 1,
12529 fold_convert (integer_type_node, tmp2_var));
12530
12531 if (SPARC_LOW_FE_EXCEPT_VALUES)
12532 {
12533 tree shifted_tmp2_var
12534 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12535 build_int_cst (unsigned_type_node, 5));
12536 tree update_shift
12537 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12538 update_call = compound_expr (update_shift, update_call);
12539 }
12540
12541 *update
12542 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12543 }
12544
12545 #include "gt-sparc.h"