]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
sparc: support for the SPARC M7 and VIS 4.0
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "debug.h"
50 #include "common/common-target.h"
51 #include "gimplify.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "tree-pass.h"
56 #include "context.h"
57 #include "builtins.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Processor costs */
63
64 struct processor_costs {
65 /* Integer load */
66 const int int_load;
67
68 /* Integer signed load */
69 const int int_sload;
70
71 /* Integer zeroed load */
72 const int int_zload;
73
74 /* Float load */
75 const int float_load;
76
77 /* fmov, fneg, fabs */
78 const int float_move;
79
80 /* fadd, fsub */
81 const int float_plusminus;
82
83 /* fcmp */
84 const int float_cmp;
85
86 /* fmov, fmovr */
87 const int float_cmove;
88
89 /* fmul */
90 const int float_mul;
91
92 /* fdivs */
93 const int float_div_sf;
94
95 /* fdivd */
96 const int float_div_df;
97
98 /* fsqrts */
99 const int float_sqrt_sf;
100
101 /* fsqrtd */
102 const int float_sqrt_df;
103
104 /* umul/smul */
105 const int int_mul;
106
107 /* mulX */
108 const int int_mulX;
109
110 /* integer multiply cost for each bit set past the most
111 significant 3, so the formula for multiply cost becomes:
112
113 if (rs1 < 0)
114 highest_bit = highest_clear_bit(rs1);
115 else
116 highest_bit = highest_set_bit(rs1);
117 if (highest_bit < 3)
118 highest_bit = 3;
119 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120
121 A value of zero indicates that the multiply costs is fixed,
122 and not variable. */
123 const int int_mul_bit_factor;
124
125 /* udiv/sdiv */
126 const int int_div;
127
128 /* divX */
129 const int int_divX;
130
131 /* movcc, movr */
132 const int int_cmove;
133
134 /* penalty for shifts, due to scheduling rules etc. */
135 const int shift_penalty;
136 };
137
138 static const
139 struct processor_costs cypress_costs = {
140 COSTS_N_INSNS (2), /* int load */
141 COSTS_N_INSNS (2), /* int signed load */
142 COSTS_N_INSNS (2), /* int zeroed load */
143 COSTS_N_INSNS (2), /* float load */
144 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145 COSTS_N_INSNS (5), /* fadd, fsub */
146 COSTS_N_INSNS (1), /* fcmp */
147 COSTS_N_INSNS (1), /* fmov, fmovr */
148 COSTS_N_INSNS (7), /* fmul */
149 COSTS_N_INSNS (37), /* fdivs */
150 COSTS_N_INSNS (37), /* fdivd */
151 COSTS_N_INSNS (63), /* fsqrts */
152 COSTS_N_INSNS (63), /* fsqrtd */
153 COSTS_N_INSNS (1), /* imul */
154 COSTS_N_INSNS (1), /* imulX */
155 0, /* imul bit factor */
156 COSTS_N_INSNS (1), /* idiv */
157 COSTS_N_INSNS (1), /* idivX */
158 COSTS_N_INSNS (1), /* movcc/movr */
159 0, /* shift penalty */
160 };
161
162 static const
163 struct processor_costs supersparc_costs = {
164 COSTS_N_INSNS (1), /* int load */
165 COSTS_N_INSNS (1), /* int signed load */
166 COSTS_N_INSNS (1), /* int zeroed load */
167 COSTS_N_INSNS (0), /* float load */
168 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169 COSTS_N_INSNS (3), /* fadd, fsub */
170 COSTS_N_INSNS (3), /* fcmp */
171 COSTS_N_INSNS (1), /* fmov, fmovr */
172 COSTS_N_INSNS (3), /* fmul */
173 COSTS_N_INSNS (6), /* fdivs */
174 COSTS_N_INSNS (9), /* fdivd */
175 COSTS_N_INSNS (12), /* fsqrts */
176 COSTS_N_INSNS (12), /* fsqrtd */
177 COSTS_N_INSNS (4), /* imul */
178 COSTS_N_INSNS (4), /* imulX */
179 0, /* imul bit factor */
180 COSTS_N_INSNS (4), /* idiv */
181 COSTS_N_INSNS (4), /* idivX */
182 COSTS_N_INSNS (1), /* movcc/movr */
183 1, /* shift penalty */
184 };
185
186 static const
187 struct processor_costs hypersparc_costs = {
188 COSTS_N_INSNS (1), /* int load */
189 COSTS_N_INSNS (1), /* int signed load */
190 COSTS_N_INSNS (1), /* int zeroed load */
191 COSTS_N_INSNS (1), /* float load */
192 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193 COSTS_N_INSNS (1), /* fadd, fsub */
194 COSTS_N_INSNS (1), /* fcmp */
195 COSTS_N_INSNS (1), /* fmov, fmovr */
196 COSTS_N_INSNS (1), /* fmul */
197 COSTS_N_INSNS (8), /* fdivs */
198 COSTS_N_INSNS (12), /* fdivd */
199 COSTS_N_INSNS (17), /* fsqrts */
200 COSTS_N_INSNS (17), /* fsqrtd */
201 COSTS_N_INSNS (17), /* imul */
202 COSTS_N_INSNS (17), /* imulX */
203 0, /* imul bit factor */
204 COSTS_N_INSNS (17), /* idiv */
205 COSTS_N_INSNS (17), /* idivX */
206 COSTS_N_INSNS (1), /* movcc/movr */
207 0, /* shift penalty */
208 };
209
210 static const
211 struct processor_costs leon_costs = {
212 COSTS_N_INSNS (1), /* int load */
213 COSTS_N_INSNS (1), /* int signed load */
214 COSTS_N_INSNS (1), /* int zeroed load */
215 COSTS_N_INSNS (1), /* float load */
216 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217 COSTS_N_INSNS (1), /* fadd, fsub */
218 COSTS_N_INSNS (1), /* fcmp */
219 COSTS_N_INSNS (1), /* fmov, fmovr */
220 COSTS_N_INSNS (1), /* fmul */
221 COSTS_N_INSNS (15), /* fdivs */
222 COSTS_N_INSNS (15), /* fdivd */
223 COSTS_N_INSNS (23), /* fsqrts */
224 COSTS_N_INSNS (23), /* fsqrtd */
225 COSTS_N_INSNS (5), /* imul */
226 COSTS_N_INSNS (5), /* imulX */
227 0, /* imul bit factor */
228 COSTS_N_INSNS (5), /* idiv */
229 COSTS_N_INSNS (5), /* idivX */
230 COSTS_N_INSNS (1), /* movcc/movr */
231 0, /* shift penalty */
232 };
233
234 static const
235 struct processor_costs leon3_costs = {
236 COSTS_N_INSNS (1), /* int load */
237 COSTS_N_INSNS (1), /* int signed load */
238 COSTS_N_INSNS (1), /* int zeroed load */
239 COSTS_N_INSNS (1), /* float load */
240 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241 COSTS_N_INSNS (1), /* fadd, fsub */
242 COSTS_N_INSNS (1), /* fcmp */
243 COSTS_N_INSNS (1), /* fmov, fmovr */
244 COSTS_N_INSNS (1), /* fmul */
245 COSTS_N_INSNS (14), /* fdivs */
246 COSTS_N_INSNS (15), /* fdivd */
247 COSTS_N_INSNS (22), /* fsqrts */
248 COSTS_N_INSNS (23), /* fsqrtd */
249 COSTS_N_INSNS (5), /* imul */
250 COSTS_N_INSNS (5), /* imulX */
251 0, /* imul bit factor */
252 COSTS_N_INSNS (35), /* idiv */
253 COSTS_N_INSNS (35), /* idivX */
254 COSTS_N_INSNS (1), /* movcc/movr */
255 0, /* shift penalty */
256 };
257
258 static const
259 struct processor_costs sparclet_costs = {
260 COSTS_N_INSNS (3), /* int load */
261 COSTS_N_INSNS (3), /* int signed load */
262 COSTS_N_INSNS (1), /* int zeroed load */
263 COSTS_N_INSNS (1), /* float load */
264 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265 COSTS_N_INSNS (1), /* fadd, fsub */
266 COSTS_N_INSNS (1), /* fcmp */
267 COSTS_N_INSNS (1), /* fmov, fmovr */
268 COSTS_N_INSNS (1), /* fmul */
269 COSTS_N_INSNS (1), /* fdivs */
270 COSTS_N_INSNS (1), /* fdivd */
271 COSTS_N_INSNS (1), /* fsqrts */
272 COSTS_N_INSNS (1), /* fsqrtd */
273 COSTS_N_INSNS (5), /* imul */
274 COSTS_N_INSNS (5), /* imulX */
275 0, /* imul bit factor */
276 COSTS_N_INSNS (5), /* idiv */
277 COSTS_N_INSNS (5), /* idivX */
278 COSTS_N_INSNS (1), /* movcc/movr */
279 0, /* shift penalty */
280 };
281
282 static const
283 struct processor_costs ultrasparc_costs = {
284 COSTS_N_INSNS (2), /* int load */
285 COSTS_N_INSNS (3), /* int signed load */
286 COSTS_N_INSNS (2), /* int zeroed load */
287 COSTS_N_INSNS (2), /* float load */
288 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289 COSTS_N_INSNS (4), /* fadd, fsub */
290 COSTS_N_INSNS (1), /* fcmp */
291 COSTS_N_INSNS (2), /* fmov, fmovr */
292 COSTS_N_INSNS (4), /* fmul */
293 COSTS_N_INSNS (13), /* fdivs */
294 COSTS_N_INSNS (23), /* fdivd */
295 COSTS_N_INSNS (13), /* fsqrts */
296 COSTS_N_INSNS (23), /* fsqrtd */
297 COSTS_N_INSNS (4), /* imul */
298 COSTS_N_INSNS (4), /* imulX */
299 2, /* imul bit factor */
300 COSTS_N_INSNS (37), /* idiv */
301 COSTS_N_INSNS (68), /* idivX */
302 COSTS_N_INSNS (2), /* movcc/movr */
303 2, /* shift penalty */
304 };
305
306 static const
307 struct processor_costs ultrasparc3_costs = {
308 COSTS_N_INSNS (2), /* int load */
309 COSTS_N_INSNS (3), /* int signed load */
310 COSTS_N_INSNS (3), /* int zeroed load */
311 COSTS_N_INSNS (2), /* float load */
312 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313 COSTS_N_INSNS (4), /* fadd, fsub */
314 COSTS_N_INSNS (5), /* fcmp */
315 COSTS_N_INSNS (3), /* fmov, fmovr */
316 COSTS_N_INSNS (4), /* fmul */
317 COSTS_N_INSNS (17), /* fdivs */
318 COSTS_N_INSNS (20), /* fdivd */
319 COSTS_N_INSNS (20), /* fsqrts */
320 COSTS_N_INSNS (29), /* fsqrtd */
321 COSTS_N_INSNS (6), /* imul */
322 COSTS_N_INSNS (6), /* imulX */
323 0, /* imul bit factor */
324 COSTS_N_INSNS (40), /* idiv */
325 COSTS_N_INSNS (71), /* idivX */
326 COSTS_N_INSNS (2), /* movcc/movr */
327 0, /* shift penalty */
328 };
329
330 static const
331 struct processor_costs niagara_costs = {
332 COSTS_N_INSNS (3), /* int load */
333 COSTS_N_INSNS (3), /* int signed load */
334 COSTS_N_INSNS (3), /* int zeroed load */
335 COSTS_N_INSNS (9), /* float load */
336 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337 COSTS_N_INSNS (8), /* fadd, fsub */
338 COSTS_N_INSNS (26), /* fcmp */
339 COSTS_N_INSNS (8), /* fmov, fmovr */
340 COSTS_N_INSNS (29), /* fmul */
341 COSTS_N_INSNS (54), /* fdivs */
342 COSTS_N_INSNS (83), /* fdivd */
343 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345 COSTS_N_INSNS (11), /* imul */
346 COSTS_N_INSNS (11), /* imulX */
347 0, /* imul bit factor */
348 COSTS_N_INSNS (72), /* idiv */
349 COSTS_N_INSNS (72), /* idivX */
350 COSTS_N_INSNS (1), /* movcc/movr */
351 0, /* shift penalty */
352 };
353
354 static const
355 struct processor_costs niagara2_costs = {
356 COSTS_N_INSNS (3), /* int load */
357 COSTS_N_INSNS (3), /* int signed load */
358 COSTS_N_INSNS (3), /* int zeroed load */
359 COSTS_N_INSNS (3), /* float load */
360 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361 COSTS_N_INSNS (6), /* fadd, fsub */
362 COSTS_N_INSNS (6), /* fcmp */
363 COSTS_N_INSNS (6), /* fmov, fmovr */
364 COSTS_N_INSNS (6), /* fmul */
365 COSTS_N_INSNS (19), /* fdivs */
366 COSTS_N_INSNS (33), /* fdivd */
367 COSTS_N_INSNS (19), /* fsqrts */
368 COSTS_N_INSNS (33), /* fsqrtd */
369 COSTS_N_INSNS (5), /* imul */
370 COSTS_N_INSNS (5), /* imulX */
371 0, /* imul bit factor */
372 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (1), /* movcc/movr */
375 0, /* shift penalty */
376 };
377
378 static const
379 struct processor_costs niagara3_costs = {
380 COSTS_N_INSNS (3), /* int load */
381 COSTS_N_INSNS (3), /* int signed load */
382 COSTS_N_INSNS (3), /* int zeroed load */
383 COSTS_N_INSNS (3), /* float load */
384 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385 COSTS_N_INSNS (9), /* fadd, fsub */
386 COSTS_N_INSNS (9), /* fcmp */
387 COSTS_N_INSNS (9), /* fmov, fmovr */
388 COSTS_N_INSNS (9), /* fmul */
389 COSTS_N_INSNS (23), /* fdivs */
390 COSTS_N_INSNS (37), /* fdivd */
391 COSTS_N_INSNS (23), /* fsqrts */
392 COSTS_N_INSNS (37), /* fsqrtd */
393 COSTS_N_INSNS (9), /* imul */
394 COSTS_N_INSNS (9), /* imulX */
395 0, /* imul bit factor */
396 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398 COSTS_N_INSNS (1), /* movcc/movr */
399 0, /* shift penalty */
400 };
401
402 static const
403 struct processor_costs niagara4_costs = {
404 COSTS_N_INSNS (5), /* int load */
405 COSTS_N_INSNS (5), /* int signed load */
406 COSTS_N_INSNS (5), /* int zeroed load */
407 COSTS_N_INSNS (5), /* float load */
408 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409 COSTS_N_INSNS (11), /* fadd, fsub */
410 COSTS_N_INSNS (11), /* fcmp */
411 COSTS_N_INSNS (11), /* fmov, fmovr */
412 COSTS_N_INSNS (11), /* fmul */
413 COSTS_N_INSNS (24), /* fdivs */
414 COSTS_N_INSNS (37), /* fdivd */
415 COSTS_N_INSNS (24), /* fsqrts */
416 COSTS_N_INSNS (37), /* fsqrtd */
417 COSTS_N_INSNS (12), /* imul */
418 COSTS_N_INSNS (12), /* imulX */
419 0, /* imul bit factor */
420 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422 COSTS_N_INSNS (1), /* movcc/movr */
423 0, /* shift penalty */
424 };
425
426 static const
427 struct processor_costs niagara7_costs = {
428 COSTS_N_INSNS (5), /* int load */
429 COSTS_N_INSNS (5), /* int signed load */
430 COSTS_N_INSNS (5), /* int zeroed load */
431 COSTS_N_INSNS (5), /* float load */
432 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
433 COSTS_N_INSNS (11), /* fadd, fsub */
434 COSTS_N_INSNS (11), /* fcmp */
435 COSTS_N_INSNS (11), /* fmov, fmovr */
436 COSTS_N_INSNS (11), /* fmul */
437 COSTS_N_INSNS (24), /* fdivs */
438 COSTS_N_INSNS (37), /* fdivd */
439 COSTS_N_INSNS (24), /* fsqrts */
440 COSTS_N_INSNS (37), /* fsqrtd */
441 COSTS_N_INSNS (12), /* imul */
442 COSTS_N_INSNS (12), /* imulX */
443 0, /* imul bit factor */
444 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
445 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
446 COSTS_N_INSNS (1), /* movcc/movr */
447 0, /* shift penalty */
448 };
449
450 static const struct processor_costs *sparc_costs = &cypress_costs;
451
452 #ifdef HAVE_AS_RELAX_OPTION
453 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
454 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
455 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
456 somebody does not branch between the sethi and jmp. */
457 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
458 #else
459 #define LEAF_SIBCALL_SLOT_RESERVED_P \
460 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
461 #endif
462
463 /* Vector to say how input registers are mapped to output registers.
464 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
465 eliminate it. You must use -fomit-frame-pointer to get that. */
466 char leaf_reg_remap[] =
467 { 0, 1, 2, 3, 4, 5, 6, 7,
468 -1, -1, -1, -1, -1, -1, 14, -1,
469 -1, -1, -1, -1, -1, -1, -1, -1,
470 8, 9, 10, 11, 12, 13, -1, 15,
471
472 32, 33, 34, 35, 36, 37, 38, 39,
473 40, 41, 42, 43, 44, 45, 46, 47,
474 48, 49, 50, 51, 52, 53, 54, 55,
475 56, 57, 58, 59, 60, 61, 62, 63,
476 64, 65, 66, 67, 68, 69, 70, 71,
477 72, 73, 74, 75, 76, 77, 78, 79,
478 80, 81, 82, 83, 84, 85, 86, 87,
479 88, 89, 90, 91, 92, 93, 94, 95,
480 96, 97, 98, 99, 100, 101, 102};
481
482 /* Vector, indexed by hard register number, which contains 1
483 for a register that is allowable in a candidate for leaf
484 function treatment. */
485 char sparc_leaf_regs[] =
486 { 1, 1, 1, 1, 1, 1, 1, 1,
487 0, 0, 0, 0, 0, 0, 1, 0,
488 0, 0, 0, 0, 0, 0, 0, 0,
489 1, 1, 1, 1, 1, 1, 0, 1,
490 1, 1, 1, 1, 1, 1, 1, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1};
499
500 struct GTY(()) machine_function
501 {
502 /* Size of the frame of the function. */
503 HOST_WIDE_INT frame_size;
504
505 /* Size of the frame of the function minus the register window save area
506 and the outgoing argument area. */
507 HOST_WIDE_INT apparent_frame_size;
508
509 /* Register we pretend the frame pointer is allocated to. Normally, this
510 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
511 record "offset" separately as it may be too big for (reg + disp). */
512 rtx frame_base_reg;
513 HOST_WIDE_INT frame_base_offset;
514
515 /* Number of global or FP registers to be saved (as 4-byte quantities). */
516 int n_global_fp_regs;
517
518 /* True if the current function is leaf and uses only leaf regs,
519 so that the SPARC leaf function optimization can be applied.
520 Private version of crtl->uses_only_leaf_regs, see
521 sparc_expand_prologue for the rationale. */
522 int leaf_function_p;
523
524 /* True if the prologue saves local or in registers. */
525 bool save_local_in_regs_p;
526
527 /* True if the data calculated by sparc_expand_prologue are valid. */
528 bool prologue_data_valid_p;
529 };
530
531 #define sparc_frame_size cfun->machine->frame_size
532 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
533 #define sparc_frame_base_reg cfun->machine->frame_base_reg
534 #define sparc_frame_base_offset cfun->machine->frame_base_offset
535 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
536 #define sparc_leaf_function_p cfun->machine->leaf_function_p
537 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
538 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
539
540 /* 1 if the next opcode is to be specially indented. */
541 int sparc_indent_opcode = 0;
542
543 static void sparc_option_override (void);
544 static void sparc_init_modes (void);
545 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
546 const_tree, bool, bool, int *, int *);
547
548 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
549 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
550
551 static void sparc_emit_set_const32 (rtx, rtx);
552 static void sparc_emit_set_const64 (rtx, rtx);
553 static void sparc_output_addr_vec (rtx);
554 static void sparc_output_addr_diff_vec (rtx);
555 static void sparc_output_deferred_case_vectors (void);
556 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
557 static bool sparc_legitimate_constant_p (machine_mode, rtx);
558 static rtx sparc_builtin_saveregs (void);
559 static int epilogue_renumber (rtx *, int);
560 static bool sparc_assemble_integer (rtx, unsigned int, int);
561 static int set_extends (rtx_insn *);
562 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
563 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
564 #ifdef TARGET_SOLARIS
565 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
566 tree) ATTRIBUTE_UNUSED;
567 #endif
568 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
569 static int sparc_issue_rate (void);
570 static void sparc_sched_init (FILE *, int, int);
571 static int sparc_use_sched_lookahead (void);
572
573 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
574 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
575 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
577 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
578
579 static bool sparc_function_ok_for_sibcall (tree, tree);
580 static void sparc_init_libfuncs (void);
581 static void sparc_init_builtins (void);
582 static void sparc_fpu_init_builtins (void);
583 static void sparc_vis_init_builtins (void);
584 static tree sparc_builtin_decl (unsigned, bool);
585 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
586 static tree sparc_fold_builtin (tree, int, tree *, bool);
587 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
588 HOST_WIDE_INT, tree);
589 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
590 HOST_WIDE_INT, const_tree);
591 static struct machine_function * sparc_init_machine_status (void);
592 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
593 static rtx sparc_tls_get_addr (void);
594 static rtx sparc_tls_got (void);
595 static int sparc_register_move_cost (machine_mode,
596 reg_class_t, reg_class_t);
597 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
598 static rtx sparc_function_value (const_tree, const_tree, bool);
599 static rtx sparc_libcall_value (machine_mode, const_rtx);
600 static bool sparc_function_value_regno_p (const unsigned int);
601 static rtx sparc_struct_value_rtx (tree, int);
602 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
603 int *, const_tree, int);
604 static bool sparc_return_in_memory (const_tree, const_tree);
605 static bool sparc_strict_argument_naming (cumulative_args_t);
606 static void sparc_va_start (tree, rtx);
607 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
608 static bool sparc_vector_mode_supported_p (machine_mode);
609 static bool sparc_tls_referenced_p (rtx);
610 static rtx sparc_legitimize_tls_address (rtx);
611 static rtx sparc_legitimize_pic_address (rtx, rtx);
612 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
613 static rtx sparc_delegitimize_address (rtx);
614 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
615 static bool sparc_pass_by_reference (cumulative_args_t,
616 machine_mode, const_tree, bool);
617 static void sparc_function_arg_advance (cumulative_args_t,
618 machine_mode, const_tree, bool);
619 static rtx sparc_function_arg_1 (cumulative_args_t,
620 machine_mode, const_tree, bool, bool);
621 static rtx sparc_function_arg (cumulative_args_t,
622 machine_mode, const_tree, bool);
623 static rtx sparc_function_incoming_arg (cumulative_args_t,
624 machine_mode, const_tree, bool);
625 static unsigned int sparc_function_arg_boundary (machine_mode,
626 const_tree);
627 static int sparc_arg_partial_bytes (cumulative_args_t,
628 machine_mode, tree, bool);
629 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
630 static void sparc_file_end (void);
631 static bool sparc_frame_pointer_required (void);
632 static bool sparc_can_eliminate (const int, const int);
633 static rtx sparc_builtin_setjmp_frame_value (void);
634 static void sparc_conditional_register_usage (void);
635 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
636 static const char *sparc_mangle_type (const_tree);
637 #endif
638 static void sparc_trampoline_init (rtx, tree, rtx);
639 static machine_mode sparc_preferred_simd_mode (machine_mode);
640 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
641 static bool sparc_print_operand_punct_valid_p (unsigned char);
642 static void sparc_print_operand (FILE *, rtx, int);
643 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
644 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
645 machine_mode,
646 secondary_reload_info *);
647 static machine_mode sparc_cstore_mode (enum insn_code icode);
648 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
649 \f
650 #ifdef SUBTARGET_ATTRIBUTE_TABLE
651 /* Table of valid machine attributes. */
652 static const struct attribute_spec sparc_attribute_table[] =
653 {
654 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
655 do_diagnostic } */
656 SUBTARGET_ATTRIBUTE_TABLE,
657 { NULL, 0, 0, false, false, false, NULL, false }
658 };
659 #endif
660 \f
661 /* Option handling. */
662
663 /* Parsed value. */
664 enum cmodel sparc_cmodel;
665
666 char sparc_hard_reg_printed[8];
667
668 /* Initialize the GCC target structure. */
669
670 /* The default is to use .half rather than .short for aligned HI objects. */
671 #undef TARGET_ASM_ALIGNED_HI_OP
672 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
673
674 #undef TARGET_ASM_UNALIGNED_HI_OP
675 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
676 #undef TARGET_ASM_UNALIGNED_SI_OP
677 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
678 #undef TARGET_ASM_UNALIGNED_DI_OP
679 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
680
681 /* The target hook has to handle DI-mode values. */
682 #undef TARGET_ASM_INTEGER
683 #define TARGET_ASM_INTEGER sparc_assemble_integer
684
685 #undef TARGET_ASM_FUNCTION_PROLOGUE
686 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
687 #undef TARGET_ASM_FUNCTION_EPILOGUE
688 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
689
690 #undef TARGET_SCHED_ADJUST_COST
691 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
692 #undef TARGET_SCHED_ISSUE_RATE
693 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
694 #undef TARGET_SCHED_INIT
695 #define TARGET_SCHED_INIT sparc_sched_init
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
698
699 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
700 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
701
702 #undef TARGET_INIT_LIBFUNCS
703 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
704
705 #undef TARGET_LEGITIMIZE_ADDRESS
706 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
707 #undef TARGET_DELEGITIMIZE_ADDRESS
708 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
709 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
710 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
711
712 #undef TARGET_INIT_BUILTINS
713 #define TARGET_INIT_BUILTINS sparc_init_builtins
714 #undef TARGET_BUILTIN_DECL
715 #define TARGET_BUILTIN_DECL sparc_builtin_decl
716 #undef TARGET_EXPAND_BUILTIN
717 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
718 #undef TARGET_FOLD_BUILTIN
719 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
720
721 #if TARGET_TLS
722 #undef TARGET_HAVE_TLS
723 #define TARGET_HAVE_TLS true
724 #endif
725
726 #undef TARGET_CANNOT_FORCE_CONST_MEM
727 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
728
729 #undef TARGET_ASM_OUTPUT_MI_THUNK
730 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
733
734 #undef TARGET_RTX_COSTS
735 #define TARGET_RTX_COSTS sparc_rtx_costs
736 #undef TARGET_ADDRESS_COST
737 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
738 #undef TARGET_REGISTER_MOVE_COST
739 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
740
741 #undef TARGET_PROMOTE_FUNCTION_MODE
742 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
743
744 #undef TARGET_FUNCTION_VALUE
745 #define TARGET_FUNCTION_VALUE sparc_function_value
746 #undef TARGET_LIBCALL_VALUE
747 #define TARGET_LIBCALL_VALUE sparc_libcall_value
748 #undef TARGET_FUNCTION_VALUE_REGNO_P
749 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
750
751 #undef TARGET_STRUCT_VALUE_RTX
752 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
753 #undef TARGET_RETURN_IN_MEMORY
754 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
755 #undef TARGET_MUST_PASS_IN_STACK
756 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
757 #undef TARGET_PASS_BY_REFERENCE
758 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
759 #undef TARGET_ARG_PARTIAL_BYTES
760 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
761 #undef TARGET_FUNCTION_ARG_ADVANCE
762 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
763 #undef TARGET_FUNCTION_ARG
764 #define TARGET_FUNCTION_ARG sparc_function_arg
765 #undef TARGET_FUNCTION_INCOMING_ARG
766 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
767 #undef TARGET_FUNCTION_ARG_BOUNDARY
768 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
769
770 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
771 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
772 #undef TARGET_STRICT_ARGUMENT_NAMING
773 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
774
775 #undef TARGET_EXPAND_BUILTIN_VA_START
776 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
777 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
778 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
779
780 #undef TARGET_VECTOR_MODE_SUPPORTED_P
781 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
782
783 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
784 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
785
786 #ifdef SUBTARGET_INSERT_ATTRIBUTES
787 #undef TARGET_INSERT_ATTRIBUTES
788 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
789 #endif
790
791 #ifdef SUBTARGET_ATTRIBUTE_TABLE
792 #undef TARGET_ATTRIBUTE_TABLE
793 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
794 #endif
795
796 #undef TARGET_OPTION_OVERRIDE
797 #define TARGET_OPTION_OVERRIDE sparc_option_override
798
799 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
800 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
801 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
802 #endif
803
804 #undef TARGET_ASM_FILE_END
805 #define TARGET_ASM_FILE_END sparc_file_end
806
807 #undef TARGET_FRAME_POINTER_REQUIRED
808 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
809
810 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
811 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
812
813 #undef TARGET_CAN_ELIMINATE
814 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
815
816 #undef TARGET_PREFERRED_RELOAD_CLASS
817 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
818
819 #undef TARGET_SECONDARY_RELOAD
820 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
821
822 #undef TARGET_CONDITIONAL_REGISTER_USAGE
823 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
824
825 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
826 #undef TARGET_MANGLE_TYPE
827 #define TARGET_MANGLE_TYPE sparc_mangle_type
828 #endif
829
830 #undef TARGET_LEGITIMATE_ADDRESS_P
831 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
832
833 #undef TARGET_LEGITIMATE_CONSTANT_P
834 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
835
836 #undef TARGET_TRAMPOLINE_INIT
837 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
838
839 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
840 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
841 #undef TARGET_PRINT_OPERAND
842 #define TARGET_PRINT_OPERAND sparc_print_operand
843 #undef TARGET_PRINT_OPERAND_ADDRESS
844 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
845
846 /* The value stored by LDSTUB. */
847 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
848 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
849
850 #undef TARGET_CSTORE_MODE
851 #define TARGET_CSTORE_MODE sparc_cstore_mode
852
853 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
854 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
855
856 struct gcc_target targetm = TARGET_INITIALIZER;
857
858 /* Return the memory reference contained in X if any, zero otherwise. */
859
860 static rtx
861 mem_ref (rtx x)
862 {
863 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
864 x = XEXP (x, 0);
865
866 if (MEM_P (x))
867 return x;
868
869 return NULL_RTX;
870 }
871
872 /* We use a machine specific pass to enable workarounds for errata.
873 We need to have the (essentially) final form of the insn stream in order
874 to properly detect the various hazards. Therefore, this machine specific
875 pass runs as late as possible. The pass is inserted in the pass pipeline
876 at the end of sparc_option_override. */
877
878 static unsigned int
879 sparc_do_work_around_errata (void)
880 {
881 rtx_insn *insn, *next;
882
883 /* Force all instructions to be split into their final form. */
884 split_all_insns_noflow ();
885
886 /* Now look for specific patterns in the insn stream. */
887 for (insn = get_insns (); insn; insn = next)
888 {
889 bool insert_nop = false;
890 rtx set;
891
892 /* Look into the instruction in a delay slot. */
893 if (NONJUMP_INSN_P (insn))
894 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
895 insn = seq->insn (1);
896
897 /* Look for a single-word load into an odd-numbered FP register. */
898 if (sparc_fix_at697f
899 && NONJUMP_INSN_P (insn)
900 && (set = single_set (insn)) != NULL_RTX
901 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
902 && MEM_P (SET_SRC (set))
903 && REG_P (SET_DEST (set))
904 && REGNO (SET_DEST (set)) > 31
905 && REGNO (SET_DEST (set)) % 2 != 0)
906 {
907 /* The wrong dependency is on the enclosing double register. */
908 const unsigned int x = REGNO (SET_DEST (set)) - 1;
909 unsigned int src1, src2, dest;
910 int code;
911
912 next = next_active_insn (insn);
913 if (!next)
914 break;
915 /* If the insn is a branch, then it cannot be problematic. */
916 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
917 continue;
918
919 extract_insn (next);
920 code = INSN_CODE (next);
921
922 switch (code)
923 {
924 case CODE_FOR_adddf3:
925 case CODE_FOR_subdf3:
926 case CODE_FOR_muldf3:
927 case CODE_FOR_divdf3:
928 dest = REGNO (recog_data.operand[0]);
929 src1 = REGNO (recog_data.operand[1]);
930 src2 = REGNO (recog_data.operand[2]);
931 if (src1 != src2)
932 {
933 /* Case [1-4]:
934 ld [address], %fx+1
935 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
936 if ((src1 == x || src2 == x)
937 && (dest == src1 || dest == src2))
938 insert_nop = true;
939 }
940 else
941 {
942 /* Case 5:
943 ld [address], %fx+1
944 FPOPd %fx, %fx, %fx */
945 if (src1 == x
946 && dest == src1
947 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
948 insert_nop = true;
949 }
950 break;
951
952 case CODE_FOR_sqrtdf2:
953 dest = REGNO (recog_data.operand[0]);
954 src1 = REGNO (recog_data.operand[1]);
955 /* Case 6:
956 ld [address], %fx+1
957 fsqrtd %fx, %fx */
958 if (src1 == x && dest == src1)
959 insert_nop = true;
960 break;
961
962 default:
963 break;
964 }
965 }
966
967 /* Look for a single-word load into an integer register. */
968 else if (sparc_fix_ut699
969 && NONJUMP_INSN_P (insn)
970 && (set = single_set (insn)) != NULL_RTX
971 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
972 && mem_ref (SET_SRC (set)) != NULL_RTX
973 && REG_P (SET_DEST (set))
974 && REGNO (SET_DEST (set)) < 32)
975 {
976 /* There is no problem if the second memory access has a data
977 dependency on the first single-cycle load. */
978 rtx x = SET_DEST (set);
979
980 next = next_active_insn (insn);
981 if (!next)
982 break;
983 /* If the insn is a branch, then it cannot be problematic. */
984 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
985 continue;
986
987 /* Look for a second memory access to/from an integer register. */
988 if ((set = single_set (next)) != NULL_RTX)
989 {
990 rtx src = SET_SRC (set);
991 rtx dest = SET_DEST (set);
992 rtx mem;
993
994 /* LDD is affected. */
995 if ((mem = mem_ref (src)) != NULL_RTX
996 && REG_P (dest)
997 && REGNO (dest) < 32
998 && !reg_mentioned_p (x, XEXP (mem, 0)))
999 insert_nop = true;
1000
1001 /* STD is *not* affected. */
1002 else if (MEM_P (dest)
1003 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1004 && (src == CONST0_RTX (GET_MODE (dest))
1005 || (REG_P (src)
1006 && REGNO (src) < 32
1007 && REGNO (src) != REGNO (x)))
1008 && !reg_mentioned_p (x, XEXP (dest, 0)))
1009 insert_nop = true;
1010 }
1011 }
1012
1013 /* Look for a single-word load/operation into an FP register. */
1014 else if (sparc_fix_ut699
1015 && NONJUMP_INSN_P (insn)
1016 && (set = single_set (insn)) != NULL_RTX
1017 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1018 && REG_P (SET_DEST (set))
1019 && REGNO (SET_DEST (set)) > 31)
1020 {
1021 /* Number of instructions in the problematic window. */
1022 const int n_insns = 4;
1023 /* The problematic combination is with the sibling FP register. */
1024 const unsigned int x = REGNO (SET_DEST (set));
1025 const unsigned int y = x ^ 1;
1026 rtx_insn *after;
1027 int i;
1028
1029 next = next_active_insn (insn);
1030 if (!next)
1031 break;
1032 /* If the insn is a branch, then it cannot be problematic. */
1033 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1034 continue;
1035
1036 /* Look for a second load/operation into the sibling FP register. */
1037 if (!((set = single_set (next)) != NULL_RTX
1038 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1039 && REG_P (SET_DEST (set))
1040 && REGNO (SET_DEST (set)) == y))
1041 continue;
1042
1043 /* Look for a (possible) store from the FP register in the next N
1044 instructions, but bail out if it is again modified or if there
1045 is a store from the sibling FP register before this store. */
1046 for (after = next, i = 0; i < n_insns; i++)
1047 {
1048 bool branch_p;
1049
1050 after = next_active_insn (after);
1051 if (!after)
1052 break;
1053
1054 /* This is a branch with an empty delay slot. */
1055 if (!NONJUMP_INSN_P (after))
1056 {
1057 if (++i == n_insns)
1058 break;
1059 branch_p = true;
1060 after = NULL;
1061 }
1062 /* This is a branch with a filled delay slot. */
1063 else if (rtx_sequence *seq =
1064 dyn_cast <rtx_sequence *> (PATTERN (after)))
1065 {
1066 if (++i == n_insns)
1067 break;
1068 branch_p = true;
1069 after = seq->insn (1);
1070 }
1071 /* This is a regular instruction. */
1072 else
1073 branch_p = false;
1074
1075 if (after && (set = single_set (after)) != NULL_RTX)
1076 {
1077 const rtx src = SET_SRC (set);
1078 const rtx dest = SET_DEST (set);
1079 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1080
1081 /* If the FP register is again modified before the store,
1082 then the store isn't affected. */
1083 if (REG_P (dest)
1084 && (REGNO (dest) == x
1085 || (REGNO (dest) == y && size == 8)))
1086 break;
1087
1088 if (MEM_P (dest) && REG_P (src))
1089 {
1090 /* If there is a store from the sibling FP register
1091 before the store, then the store is not affected. */
1092 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1093 break;
1094
1095 /* Otherwise, the store is affected. */
1096 if (REGNO (src) == x && size == 4)
1097 {
1098 insert_nop = true;
1099 break;
1100 }
1101 }
1102 }
1103
1104 /* If we have a branch in the first M instructions, then we
1105 cannot see the (M+2)th instruction so we play safe. */
1106 if (branch_p && i <= (n_insns - 2))
1107 {
1108 insert_nop = true;
1109 break;
1110 }
1111 }
1112 }
1113
1114 else
1115 next = NEXT_INSN (insn);
1116
1117 if (insert_nop)
1118 emit_insn_before (gen_nop (), next);
1119 }
1120
1121 return 0;
1122 }
1123
1124 namespace {
1125
1126 const pass_data pass_data_work_around_errata =
1127 {
1128 RTL_PASS, /* type */
1129 "errata", /* name */
1130 OPTGROUP_NONE, /* optinfo_flags */
1131 TV_MACH_DEP, /* tv_id */
1132 0, /* properties_required */
1133 0, /* properties_provided */
1134 0, /* properties_destroyed */
1135 0, /* todo_flags_start */
1136 0, /* todo_flags_finish */
1137 };
1138
1139 class pass_work_around_errata : public rtl_opt_pass
1140 {
1141 public:
1142 pass_work_around_errata(gcc::context *ctxt)
1143 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1144 {}
1145
1146 /* opt_pass methods: */
1147 virtual bool gate (function *)
1148 {
1149 /* The only errata we handle are those of the AT697F and UT699. */
1150 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1151 }
1152
1153 virtual unsigned int execute (function *)
1154 {
1155 return sparc_do_work_around_errata ();
1156 }
1157
1158 }; // class pass_work_around_errata
1159
1160 } // anon namespace
1161
1162 rtl_opt_pass *
1163 make_pass_work_around_errata (gcc::context *ctxt)
1164 {
1165 return new pass_work_around_errata (ctxt);
1166 }
1167
1168 /* Helpers for TARGET_DEBUG_OPTIONS. */
1169 static void
1170 dump_target_flag_bits (const int flags)
1171 {
1172 if (flags & MASK_64BIT)
1173 fprintf (stderr, "64BIT ");
1174 if (flags & MASK_APP_REGS)
1175 fprintf (stderr, "APP_REGS ");
1176 if (flags & MASK_FASTER_STRUCTS)
1177 fprintf (stderr, "FASTER_STRUCTS ");
1178 if (flags & MASK_FLAT)
1179 fprintf (stderr, "FLAT ");
1180 if (flags & MASK_FMAF)
1181 fprintf (stderr, "FMAF ");
1182 if (flags & MASK_FPU)
1183 fprintf (stderr, "FPU ");
1184 if (flags & MASK_HARD_QUAD)
1185 fprintf (stderr, "HARD_QUAD ");
1186 if (flags & MASK_POPC)
1187 fprintf (stderr, "POPC ");
1188 if (flags & MASK_PTR64)
1189 fprintf (stderr, "PTR64 ");
1190 if (flags & MASK_STACK_BIAS)
1191 fprintf (stderr, "STACK_BIAS ");
1192 if (flags & MASK_UNALIGNED_DOUBLES)
1193 fprintf (stderr, "UNALIGNED_DOUBLES ");
1194 if (flags & MASK_V8PLUS)
1195 fprintf (stderr, "V8PLUS ");
1196 if (flags & MASK_VIS)
1197 fprintf (stderr, "VIS ");
1198 if (flags & MASK_VIS2)
1199 fprintf (stderr, "VIS2 ");
1200 if (flags & MASK_VIS3)
1201 fprintf (stderr, "VIS3 ");
1202 if (flags & MASK_VIS4)
1203 fprintf (stderr, "VIS4 ");
1204 if (flags & MASK_CBCOND)
1205 fprintf (stderr, "CBCOND ");
1206 if (flags & MASK_DEPRECATED_V8_INSNS)
1207 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1208 if (flags & MASK_SPARCLET)
1209 fprintf (stderr, "SPARCLET ");
1210 if (flags & MASK_SPARCLITE)
1211 fprintf (stderr, "SPARCLITE ");
1212 if (flags & MASK_V8)
1213 fprintf (stderr, "V8 ");
1214 if (flags & MASK_V9)
1215 fprintf (stderr, "V9 ");
1216 }
1217
1218 static void
1219 dump_target_flags (const char *prefix, const int flags)
1220 {
1221 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1222 dump_target_flag_bits (flags);
1223 fprintf(stderr, "]\n");
1224 }
1225
1226 /* Validate and override various options, and do some machine dependent
1227 initialization. */
1228
1229 static void
1230 sparc_option_override (void)
1231 {
1232 static struct code_model {
1233 const char *const name;
1234 const enum cmodel value;
1235 } const cmodels[] = {
1236 { "32", CM_32 },
1237 { "medlow", CM_MEDLOW },
1238 { "medmid", CM_MEDMID },
1239 { "medany", CM_MEDANY },
1240 { "embmedany", CM_EMBMEDANY },
1241 { NULL, (enum cmodel) 0 }
1242 };
1243 const struct code_model *cmodel;
1244 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1245 static struct cpu_default {
1246 const int cpu;
1247 const enum processor_type processor;
1248 } const cpu_default[] = {
1249 /* There must be one entry here for each TARGET_CPU value. */
1250 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1251 { TARGET_CPU_v8, PROCESSOR_V8 },
1252 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1253 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1254 { TARGET_CPU_leon, PROCESSOR_LEON },
1255 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1256 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1257 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1258 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1259 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1260 { TARGET_CPU_v9, PROCESSOR_V9 },
1261 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1262 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1263 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1264 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1265 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1266 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1267 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1268 { -1, PROCESSOR_V7 }
1269 };
1270 const struct cpu_default *def;
1271 /* Table of values for -m{cpu,tune}=. This must match the order of
1272 the enum processor_type in sparc-opts.h. */
1273 static struct cpu_table {
1274 const char *const name;
1275 const int disable;
1276 const int enable;
1277 } const cpu_table[] = {
1278 { "v7", MASK_ISA, 0 },
1279 { "cypress", MASK_ISA, 0 },
1280 { "v8", MASK_ISA, MASK_V8 },
1281 /* TI TMS390Z55 supersparc */
1282 { "supersparc", MASK_ISA, MASK_V8 },
1283 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1284 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1285 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1286 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1287 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1288 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1289 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1290 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1291 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1292 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1293 { "sparclet", MASK_ISA, MASK_SPARCLET },
1294 /* TEMIC sparclet */
1295 { "tsc701", MASK_ISA, MASK_SPARCLET },
1296 { "v9", MASK_ISA, MASK_V9 },
1297 /* UltraSPARC I, II, IIi */
1298 { "ultrasparc", MASK_ISA,
1299 /* Although insns using %y are deprecated, it is a clear win. */
1300 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1301 /* UltraSPARC III */
1302 /* ??? Check if %y issue still holds true. */
1303 { "ultrasparc3", MASK_ISA,
1304 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1305 /* UltraSPARC T1 */
1306 { "niagara", MASK_ISA,
1307 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1308 /* UltraSPARC T2 */
1309 { "niagara2", MASK_ISA,
1310 MASK_V9|MASK_POPC|MASK_VIS2 },
1311 /* UltraSPARC T3 */
1312 { "niagara3", MASK_ISA,
1313 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1314 /* UltraSPARC T4 */
1315 { "niagara4", MASK_ISA,
1316 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1317 /* UltraSPARC M7 */
1318 { "niagara7", MASK_ISA,
1319 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_VIS4|MASK_FMAF|MASK_CBCOND },
1320 };
1321 const struct cpu_table *cpu;
1322 unsigned int i;
1323 int fpu;
1324
1325 if (sparc_debug_string != NULL)
1326 {
1327 const char *q;
1328 char *p;
1329
1330 p = ASTRDUP (sparc_debug_string);
1331 while ((q = strtok (p, ",")) != NULL)
1332 {
1333 bool invert;
1334 int mask;
1335
1336 p = NULL;
1337 if (*q == '!')
1338 {
1339 invert = true;
1340 q++;
1341 }
1342 else
1343 invert = false;
1344
1345 if (! strcmp (q, "all"))
1346 mask = MASK_DEBUG_ALL;
1347 else if (! strcmp (q, "options"))
1348 mask = MASK_DEBUG_OPTIONS;
1349 else
1350 error ("unknown -mdebug-%s switch", q);
1351
1352 if (invert)
1353 sparc_debug &= ~mask;
1354 else
1355 sparc_debug |= mask;
1356 }
1357 }
1358
1359 if (TARGET_DEBUG_OPTIONS)
1360 {
1361 dump_target_flags("Initial target_flags", target_flags);
1362 dump_target_flags("target_flags_explicit", target_flags_explicit);
1363 }
1364
1365 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1366 SUBTARGET_OVERRIDE_OPTIONS;
1367 #endif
1368
1369 #ifndef SPARC_BI_ARCH
1370 /* Check for unsupported architecture size. */
1371 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1372 error ("%s is not supported by this configuration",
1373 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1374 #endif
1375
1376 /* We force all 64bit archs to use 128 bit long double */
1377 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1378 {
1379 error ("-mlong-double-64 not allowed with -m64");
1380 target_flags |= MASK_LONG_DOUBLE_128;
1381 }
1382
1383 /* Code model selection. */
1384 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1385
1386 #ifdef SPARC_BI_ARCH
1387 if (TARGET_ARCH32)
1388 sparc_cmodel = CM_32;
1389 #endif
1390
1391 if (sparc_cmodel_string != NULL)
1392 {
1393 if (TARGET_ARCH64)
1394 {
1395 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1396 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1397 break;
1398 if (cmodel->name == NULL)
1399 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1400 else
1401 sparc_cmodel = cmodel->value;
1402 }
1403 else
1404 error ("-mcmodel= is not supported on 32 bit systems");
1405 }
1406
1407 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1408 for (i = 8; i < 16; i++)
1409 if (!call_used_regs [i])
1410 {
1411 error ("-fcall-saved-REG is not supported for out registers");
1412 call_used_regs [i] = 1;
1413 }
1414
1415 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1416
1417 /* Set the default CPU. */
1418 if (!global_options_set.x_sparc_cpu_and_features)
1419 {
1420 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1421 if (def->cpu == TARGET_CPU_DEFAULT)
1422 break;
1423 gcc_assert (def->cpu != -1);
1424 sparc_cpu_and_features = def->processor;
1425 }
1426
1427 if (!global_options_set.x_sparc_cpu)
1428 sparc_cpu = sparc_cpu_and_features;
1429
1430 cpu = &cpu_table[(int) sparc_cpu_and_features];
1431
1432 if (TARGET_DEBUG_OPTIONS)
1433 {
1434 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1435 fprintf (stderr, "sparc_cpu: %s\n",
1436 cpu_table[(int) sparc_cpu].name);
1437 dump_target_flags ("cpu->disable", cpu->disable);
1438 dump_target_flags ("cpu->enable", cpu->enable);
1439 }
1440
1441 target_flags &= ~cpu->disable;
1442 target_flags |= (cpu->enable
1443 #ifndef HAVE_AS_FMAF_HPC_VIS3
1444 & ~(MASK_FMAF | MASK_VIS3)
1445 #endif
1446 #ifndef HAVE_AS_SPARC4
1447 & ~MASK_CBCOND
1448 #endif
1449 #ifndef HAVE_AS_SPARC5_VIS4
1450 & ~MASK_VIS4
1451 #endif
1452 #ifndef HAVE_AS_LEON
1453 & ~(MASK_LEON | MASK_LEON3)
1454 #endif
1455 );
1456
1457 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1458 the processor default. */
1459 if (target_flags_explicit & MASK_FPU)
1460 target_flags = (target_flags & ~MASK_FPU) | fpu;
1461
1462 /* -mvis2 implies -mvis */
1463 if (TARGET_VIS2)
1464 target_flags |= MASK_VIS;
1465
1466 /* -mvis3 implies -mvis2 and -mvis */
1467 if (TARGET_VIS3)
1468 target_flags |= MASK_VIS2 | MASK_VIS;
1469
1470 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1471 if (TARGET_VIS4)
1472 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1473
1474 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1475 disabled. */
1476 if (! TARGET_FPU)
1477 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1478 | MASK_FMAF);
1479
1480 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1481 are available.
1482 -m64 also implies v9. */
1483 if (TARGET_VIS || TARGET_ARCH64)
1484 {
1485 target_flags |= MASK_V9;
1486 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1487 }
1488
1489 /* -mvis also implies -mv8plus on 32-bit */
1490 if (TARGET_VIS && ! TARGET_ARCH64)
1491 target_flags |= MASK_V8PLUS;
1492
1493 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1494 if (TARGET_V9 && TARGET_ARCH32)
1495 target_flags |= MASK_DEPRECATED_V8_INSNS;
1496
1497 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1498 if (! TARGET_V9 || TARGET_ARCH64)
1499 target_flags &= ~MASK_V8PLUS;
1500
1501 /* Don't use stack biasing in 32 bit mode. */
1502 if (TARGET_ARCH32)
1503 target_flags &= ~MASK_STACK_BIAS;
1504
1505 /* Supply a default value for align_functions. */
1506 if (align_functions == 0
1507 && (sparc_cpu == PROCESSOR_ULTRASPARC
1508 || sparc_cpu == PROCESSOR_ULTRASPARC3
1509 || sparc_cpu == PROCESSOR_NIAGARA
1510 || sparc_cpu == PROCESSOR_NIAGARA2
1511 || sparc_cpu == PROCESSOR_NIAGARA3
1512 || sparc_cpu == PROCESSOR_NIAGARA4
1513 || sparc_cpu == PROCESSOR_NIAGARA7))
1514 align_functions = 32;
1515
1516 /* Validate PCC_STRUCT_RETURN. */
1517 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1518 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1519
1520 /* Only use .uaxword when compiling for a 64-bit target. */
1521 if (!TARGET_ARCH64)
1522 targetm.asm_out.unaligned_op.di = NULL;
1523
1524 /* Do various machine dependent initializations. */
1525 sparc_init_modes ();
1526
1527 /* Set up function hooks. */
1528 init_machine_status = sparc_init_machine_status;
1529
1530 switch (sparc_cpu)
1531 {
1532 case PROCESSOR_V7:
1533 case PROCESSOR_CYPRESS:
1534 sparc_costs = &cypress_costs;
1535 break;
1536 case PROCESSOR_V8:
1537 case PROCESSOR_SPARCLITE:
1538 case PROCESSOR_SUPERSPARC:
1539 sparc_costs = &supersparc_costs;
1540 break;
1541 case PROCESSOR_F930:
1542 case PROCESSOR_F934:
1543 case PROCESSOR_HYPERSPARC:
1544 case PROCESSOR_SPARCLITE86X:
1545 sparc_costs = &hypersparc_costs;
1546 break;
1547 case PROCESSOR_LEON:
1548 sparc_costs = &leon_costs;
1549 break;
1550 case PROCESSOR_LEON3:
1551 case PROCESSOR_LEON3V7:
1552 sparc_costs = &leon3_costs;
1553 break;
1554 case PROCESSOR_SPARCLET:
1555 case PROCESSOR_TSC701:
1556 sparc_costs = &sparclet_costs;
1557 break;
1558 case PROCESSOR_V9:
1559 case PROCESSOR_ULTRASPARC:
1560 sparc_costs = &ultrasparc_costs;
1561 break;
1562 case PROCESSOR_ULTRASPARC3:
1563 sparc_costs = &ultrasparc3_costs;
1564 break;
1565 case PROCESSOR_NIAGARA:
1566 sparc_costs = &niagara_costs;
1567 break;
1568 case PROCESSOR_NIAGARA2:
1569 sparc_costs = &niagara2_costs;
1570 break;
1571 case PROCESSOR_NIAGARA3:
1572 sparc_costs = &niagara3_costs;
1573 break;
1574 case PROCESSOR_NIAGARA4:
1575 sparc_costs = &niagara4_costs;
1576 break;
1577 case PROCESSOR_NIAGARA7:
1578 sparc_costs = &niagara7_costs;
1579 break;
1580 case PROCESSOR_NATIVE:
1581 gcc_unreachable ();
1582 };
1583
1584 if (sparc_memory_model == SMM_DEFAULT)
1585 {
1586 /* Choose the memory model for the operating system. */
1587 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1588 if (os_default != SMM_DEFAULT)
1589 sparc_memory_model = os_default;
1590 /* Choose the most relaxed model for the processor. */
1591 else if (TARGET_V9)
1592 sparc_memory_model = SMM_RMO;
1593 else if (TARGET_LEON3)
1594 sparc_memory_model = SMM_TSO;
1595 else if (TARGET_LEON)
1596 sparc_memory_model = SMM_SC;
1597 else if (TARGET_V8)
1598 sparc_memory_model = SMM_PSO;
1599 else
1600 sparc_memory_model = SMM_SC;
1601 }
1602
1603 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1604 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1605 target_flags |= MASK_LONG_DOUBLE_128;
1606 #endif
1607
1608 if (TARGET_DEBUG_OPTIONS)
1609 dump_target_flags ("Final target_flags", target_flags);
1610
1611 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1612 can run at the same time. More important, it is the threshold
1613 defining when additional prefetches will be dropped by the
1614 hardware.
1615
1616 The UltraSPARC-III features a documented prefetch queue with a
1617 size of 8. Additional prefetches issued in the cpu are
1618 dropped.
1619
1620 Niagara processors are different. In these processors prefetches
1621 are handled much like regular loads. The L1 miss buffer is 32
1622 entries, but prefetches start getting affected when 30 entries
1623 become occupied. That occupation could be a mix of regular loads
1624 and prefetches though. And that buffer is shared by all threads.
1625 Once the threshold is reached, if the core is running a single
1626 thread the prefetch will retry. If more than one thread is
1627 running, the prefetch will be dropped.
1628
1629 All this makes it very difficult to determine how many
1630 simultaneous prefetches can be issued simultaneously, even in a
1631 single-threaded program. Experimental results show that setting
1632 this parameter to 32 works well when the number of threads is not
1633 high. */
1634 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1635 ((sparc_cpu == PROCESSOR_ULTRASPARC
1636 || sparc_cpu == PROCESSOR_NIAGARA
1637 || sparc_cpu == PROCESSOR_NIAGARA2
1638 || sparc_cpu == PROCESSOR_NIAGARA3
1639 || sparc_cpu == PROCESSOR_NIAGARA4)
1640 ? 2
1641 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1642 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1643 ? 32 : 3))),
1644 global_options.x_param_values,
1645 global_options_set.x_param_values);
1646
1647 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1648 params.def), so no maybe_set_param_value is needed.
1649
1650 The Oracle SPARC Architecture (previously the UltraSPARC
1651 Architecture) specification states that when a PREFETCH[A]
1652 instruction is executed an implementation-specific amount of data
1653 is prefetched, and that it is at least 64 bytes long (aligned to
1654 at least 64 bytes).
1655
1656 However, this is not correct. The M7 (and implementations prior
1657 to that) does not guarantee a 64B prefetch into a cache if the
1658 line size is smaller. A single cache line is all that is ever
1659 prefetched. So for the M7, where the L1D$ has 32B lines and the
1660 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1661 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1662 is a read_n prefetch, which is the only type which allocates to
1663 the L1.) */
1664
1665 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1666 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1667 Niagara processors feature a L1D$ of 16KB. */
1668 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1669 ((sparc_cpu == PROCESSOR_ULTRASPARC
1670 || sparc_cpu == PROCESSOR_ULTRASPARC3
1671 || sparc_cpu == PROCESSOR_NIAGARA
1672 || sparc_cpu == PROCESSOR_NIAGARA2
1673 || sparc_cpu == PROCESSOR_NIAGARA3
1674 || sparc_cpu == PROCESSOR_NIAGARA4
1675 || sparc_cpu == PROCESSOR_NIAGARA7)
1676 ? 16 : 64),
1677 global_options.x_param_values,
1678 global_options_set.x_param_values);
1679
1680
1681 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1682 that 512 is the default in params.def. */
1683 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1684 (sparc_cpu == PROCESSOR_NIAGARA4
1685 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1686 ? 256 : 512)),
1687 global_options.x_param_values,
1688 global_options_set.x_param_values);
1689
1690
1691 /* Disable save slot sharing for call-clobbered registers by default.
1692 The IRA sharing algorithm works on single registers only and this
1693 pessimizes for double floating-point registers. */
1694 if (!global_options_set.x_flag_ira_share_save_slots)
1695 flag_ira_share_save_slots = 0;
1696
1697 /* We register a machine specific pass to work around errata, if any.
1698 The pass mut be scheduled as late as possible so that we have the
1699 (essentially) final form of the insn stream to work on.
1700 Registering the pass must be done at start up. It's convenient to
1701 do it here. */
1702 opt_pass *errata_pass = make_pass_work_around_errata (g);
1703 struct register_pass_info insert_pass_work_around_errata =
1704 {
1705 errata_pass, /* pass */
1706 "dbr", /* reference_pass_name */
1707 1, /* ref_pass_instance_number */
1708 PASS_POS_INSERT_AFTER /* po_op */
1709 };
1710 register_pass (&insert_pass_work_around_errata);
1711 }
1712 \f
1713 /* Miscellaneous utilities. */
1714
1715 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1716 or branch on register contents instructions. */
1717
1718 int
1719 v9_regcmp_p (enum rtx_code code)
1720 {
1721 return (code == EQ || code == NE || code == GE || code == LT
1722 || code == LE || code == GT);
1723 }
1724
1725 /* Nonzero if OP is a floating point constant which can
1726 be loaded into an integer register using a single
1727 sethi instruction. */
1728
1729 int
1730 fp_sethi_p (rtx op)
1731 {
1732 if (GET_CODE (op) == CONST_DOUBLE)
1733 {
1734 long i;
1735
1736 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1737 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1738 }
1739
1740 return 0;
1741 }
1742
1743 /* Nonzero if OP is a floating point constant which can
1744 be loaded into an integer register using a single
1745 mov instruction. */
1746
1747 int
1748 fp_mov_p (rtx op)
1749 {
1750 if (GET_CODE (op) == CONST_DOUBLE)
1751 {
1752 long i;
1753
1754 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1755 return SPARC_SIMM13_P (i);
1756 }
1757
1758 return 0;
1759 }
1760
1761 /* Nonzero if OP is a floating point constant which can
1762 be loaded into an integer register using a high/losum
1763 instruction sequence. */
1764
1765 int
1766 fp_high_losum_p (rtx op)
1767 {
1768 /* The constraints calling this should only be in
1769 SFmode move insns, so any constant which cannot
1770 be moved using a single insn will do. */
1771 if (GET_CODE (op) == CONST_DOUBLE)
1772 {
1773 long i;
1774
1775 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1776 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1777 }
1778
1779 return 0;
1780 }
1781
1782 /* Return true if the address of LABEL can be loaded by means of the
1783 mov{si,di}_pic_label_ref patterns in PIC mode. */
1784
1785 static bool
1786 can_use_mov_pic_label_ref (rtx label)
1787 {
1788 /* VxWorks does not impose a fixed gap between segments; the run-time
1789 gap can be different from the object-file gap. We therefore can't
1790 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1791 are absolutely sure that X is in the same segment as the GOT.
1792 Unfortunately, the flexibility of linker scripts means that we
1793 can't be sure of that in general, so assume that GOT-relative
1794 accesses are never valid on VxWorks. */
1795 if (TARGET_VXWORKS_RTP)
1796 return false;
1797
1798 /* Similarly, if the label is non-local, it might end up being placed
1799 in a different section than the current one; now mov_pic_label_ref
1800 requires the label and the code to be in the same section. */
1801 if (LABEL_REF_NONLOCAL_P (label))
1802 return false;
1803
1804 /* Finally, if we are reordering basic blocks and partition into hot
1805 and cold sections, this might happen for any label. */
1806 if (flag_reorder_blocks_and_partition)
1807 return false;
1808
1809 return true;
1810 }
1811
1812 /* Expand a move instruction. Return true if all work is done. */
1813
1814 bool
1815 sparc_expand_move (machine_mode mode, rtx *operands)
1816 {
1817 /* Handle sets of MEM first. */
1818 if (GET_CODE (operands[0]) == MEM)
1819 {
1820 /* 0 is a register (or a pair of registers) on SPARC. */
1821 if (register_or_zero_operand (operands[1], mode))
1822 return false;
1823
1824 if (!reload_in_progress)
1825 {
1826 operands[0] = validize_mem (operands[0]);
1827 operands[1] = force_reg (mode, operands[1]);
1828 }
1829 }
1830
1831 /* Fixup TLS cases. */
1832 if (TARGET_HAVE_TLS
1833 && CONSTANT_P (operands[1])
1834 && sparc_tls_referenced_p (operands [1]))
1835 {
1836 operands[1] = sparc_legitimize_tls_address (operands[1]);
1837 return false;
1838 }
1839
1840 /* Fixup PIC cases. */
1841 if (flag_pic && CONSTANT_P (operands[1]))
1842 {
1843 if (pic_address_needs_scratch (operands[1]))
1844 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1845
1846 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1847 if (GET_CODE (operands[1]) == LABEL_REF
1848 && can_use_mov_pic_label_ref (operands[1]))
1849 {
1850 if (mode == SImode)
1851 {
1852 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1853 return true;
1854 }
1855
1856 if (mode == DImode)
1857 {
1858 gcc_assert (TARGET_ARCH64);
1859 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1860 return true;
1861 }
1862 }
1863
1864 if (symbolic_operand (operands[1], mode))
1865 {
1866 operands[1]
1867 = sparc_legitimize_pic_address (operands[1],
1868 reload_in_progress
1869 ? operands[0] : NULL_RTX);
1870 return false;
1871 }
1872 }
1873
1874 /* If we are trying to toss an integer constant into FP registers,
1875 or loading a FP or vector constant, force it into memory. */
1876 if (CONSTANT_P (operands[1])
1877 && REG_P (operands[0])
1878 && (SPARC_FP_REG_P (REGNO (operands[0]))
1879 || SCALAR_FLOAT_MODE_P (mode)
1880 || VECTOR_MODE_P (mode)))
1881 {
1882 /* emit_group_store will send such bogosity to us when it is
1883 not storing directly into memory. So fix this up to avoid
1884 crashes in output_constant_pool. */
1885 if (operands [1] == const0_rtx)
1886 operands[1] = CONST0_RTX (mode);
1887
1888 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1889 always other regs. */
1890 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1891 && (const_zero_operand (operands[1], mode)
1892 || const_all_ones_operand (operands[1], mode)))
1893 return false;
1894
1895 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1896 /* We are able to build any SF constant in integer registers
1897 with at most 2 instructions. */
1898 && (mode == SFmode
1899 /* And any DF constant in integer registers. */
1900 || (mode == DFmode
1901 && ! can_create_pseudo_p ())))
1902 return false;
1903
1904 operands[1] = force_const_mem (mode, operands[1]);
1905 if (!reload_in_progress)
1906 operands[1] = validize_mem (operands[1]);
1907 return false;
1908 }
1909
1910 /* Accept non-constants and valid constants unmodified. */
1911 if (!CONSTANT_P (operands[1])
1912 || GET_CODE (operands[1]) == HIGH
1913 || input_operand (operands[1], mode))
1914 return false;
1915
1916 switch (mode)
1917 {
1918 case QImode:
1919 /* All QImode constants require only one insn, so proceed. */
1920 break;
1921
1922 case HImode:
1923 case SImode:
1924 sparc_emit_set_const32 (operands[0], operands[1]);
1925 return true;
1926
1927 case DImode:
1928 /* input_operand should have filtered out 32-bit mode. */
1929 sparc_emit_set_const64 (operands[0], operands[1]);
1930 return true;
1931
1932 case TImode:
1933 {
1934 rtx high, low;
1935 /* TImode isn't available in 32-bit mode. */
1936 split_double (operands[1], &high, &low);
1937 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1938 high));
1939 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1940 low));
1941 }
1942 return true;
1943
1944 default:
1945 gcc_unreachable ();
1946 }
1947
1948 return false;
1949 }
1950
1951 /* Load OP1, a 32-bit constant, into OP0, a register.
1952 We know it can't be done in one insn when we get
1953 here, the move expander guarantees this. */
1954
1955 static void
1956 sparc_emit_set_const32 (rtx op0, rtx op1)
1957 {
1958 machine_mode mode = GET_MODE (op0);
1959 rtx temp = op0;
1960
1961 if (can_create_pseudo_p ())
1962 temp = gen_reg_rtx (mode);
1963
1964 if (GET_CODE (op1) == CONST_INT)
1965 {
1966 gcc_assert (!small_int_operand (op1, mode)
1967 && !const_high_operand (op1, mode));
1968
1969 /* Emit them as real moves instead of a HIGH/LO_SUM,
1970 this way CSE can see everything and reuse intermediate
1971 values if it wants. */
1972 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1973 & ~(HOST_WIDE_INT) 0x3ff)));
1974
1975 emit_insn (gen_rtx_SET (op0,
1976 gen_rtx_IOR (mode, temp,
1977 GEN_INT (INTVAL (op1) & 0x3ff))));
1978 }
1979 else
1980 {
1981 /* A symbol, emit in the traditional way. */
1982 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1983 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1984 }
1985 }
1986
1987 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1988 If TEMP is nonzero, we are forbidden to use any other scratch
1989 registers. Otherwise, we are allowed to generate them as needed.
1990
1991 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1992 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1993
1994 void
1995 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1996 {
1997 rtx temp1, temp2, temp3, temp4, temp5;
1998 rtx ti_temp = 0;
1999
2000 if (temp && GET_MODE (temp) == TImode)
2001 {
2002 ti_temp = temp;
2003 temp = gen_rtx_REG (DImode, REGNO (temp));
2004 }
2005
2006 /* SPARC-V9 code-model support. */
2007 switch (sparc_cmodel)
2008 {
2009 case CM_MEDLOW:
2010 /* The range spanned by all instructions in the object is less
2011 than 2^31 bytes (2GB) and the distance from any instruction
2012 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2013 than 2^31 bytes (2GB).
2014
2015 The executable must be in the low 4TB of the virtual address
2016 space.
2017
2018 sethi %hi(symbol), %temp1
2019 or %temp1, %lo(symbol), %reg */
2020 if (temp)
2021 temp1 = temp; /* op0 is allowed. */
2022 else
2023 temp1 = gen_reg_rtx (DImode);
2024
2025 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2026 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2027 break;
2028
2029 case CM_MEDMID:
2030 /* The range spanned by all instructions in the object is less
2031 than 2^31 bytes (2GB) and the distance from any instruction
2032 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2033 than 2^31 bytes (2GB).
2034
2035 The executable must be in the low 16TB of the virtual address
2036 space.
2037
2038 sethi %h44(symbol), %temp1
2039 or %temp1, %m44(symbol), %temp2
2040 sllx %temp2, 12, %temp3
2041 or %temp3, %l44(symbol), %reg */
2042 if (temp)
2043 {
2044 temp1 = op0;
2045 temp2 = op0;
2046 temp3 = temp; /* op0 is allowed. */
2047 }
2048 else
2049 {
2050 temp1 = gen_reg_rtx (DImode);
2051 temp2 = gen_reg_rtx (DImode);
2052 temp3 = gen_reg_rtx (DImode);
2053 }
2054
2055 emit_insn (gen_seth44 (temp1, op1));
2056 emit_insn (gen_setm44 (temp2, temp1, op1));
2057 emit_insn (gen_rtx_SET (temp3,
2058 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2059 emit_insn (gen_setl44 (op0, temp3, op1));
2060 break;
2061
2062 case CM_MEDANY:
2063 /* The range spanned by all instructions in the object is less
2064 than 2^31 bytes (2GB) and the distance from any instruction
2065 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2066 than 2^31 bytes (2GB).
2067
2068 The executable can be placed anywhere in the virtual address
2069 space.
2070
2071 sethi %hh(symbol), %temp1
2072 sethi %lm(symbol), %temp2
2073 or %temp1, %hm(symbol), %temp3
2074 sllx %temp3, 32, %temp4
2075 or %temp4, %temp2, %temp5
2076 or %temp5, %lo(symbol), %reg */
2077 if (temp)
2078 {
2079 /* It is possible that one of the registers we got for operands[2]
2080 might coincide with that of operands[0] (which is why we made
2081 it TImode). Pick the other one to use as our scratch. */
2082 if (rtx_equal_p (temp, op0))
2083 {
2084 gcc_assert (ti_temp);
2085 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2086 }
2087 temp1 = op0;
2088 temp2 = temp; /* op0 is _not_ allowed, see above. */
2089 temp3 = op0;
2090 temp4 = op0;
2091 temp5 = op0;
2092 }
2093 else
2094 {
2095 temp1 = gen_reg_rtx (DImode);
2096 temp2 = gen_reg_rtx (DImode);
2097 temp3 = gen_reg_rtx (DImode);
2098 temp4 = gen_reg_rtx (DImode);
2099 temp5 = gen_reg_rtx (DImode);
2100 }
2101
2102 emit_insn (gen_sethh (temp1, op1));
2103 emit_insn (gen_setlm (temp2, op1));
2104 emit_insn (gen_sethm (temp3, temp1, op1));
2105 emit_insn (gen_rtx_SET (temp4,
2106 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2107 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2108 emit_insn (gen_setlo (op0, temp5, op1));
2109 break;
2110
2111 case CM_EMBMEDANY:
2112 /* Old old old backwards compatibility kruft here.
2113 Essentially it is MEDLOW with a fixed 64-bit
2114 virtual base added to all data segment addresses.
2115 Text-segment stuff is computed like MEDANY, we can't
2116 reuse the code above because the relocation knobs
2117 look different.
2118
2119 Data segment: sethi %hi(symbol), %temp1
2120 add %temp1, EMBMEDANY_BASE_REG, %temp2
2121 or %temp2, %lo(symbol), %reg */
2122 if (data_segment_operand (op1, GET_MODE (op1)))
2123 {
2124 if (temp)
2125 {
2126 temp1 = temp; /* op0 is allowed. */
2127 temp2 = op0;
2128 }
2129 else
2130 {
2131 temp1 = gen_reg_rtx (DImode);
2132 temp2 = gen_reg_rtx (DImode);
2133 }
2134
2135 emit_insn (gen_embmedany_sethi (temp1, op1));
2136 emit_insn (gen_embmedany_brsum (temp2, temp1));
2137 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2138 }
2139
2140 /* Text segment: sethi %uhi(symbol), %temp1
2141 sethi %hi(symbol), %temp2
2142 or %temp1, %ulo(symbol), %temp3
2143 sllx %temp3, 32, %temp4
2144 or %temp4, %temp2, %temp5
2145 or %temp5, %lo(symbol), %reg */
2146 else
2147 {
2148 if (temp)
2149 {
2150 /* It is possible that one of the registers we got for operands[2]
2151 might coincide with that of operands[0] (which is why we made
2152 it TImode). Pick the other one to use as our scratch. */
2153 if (rtx_equal_p (temp, op0))
2154 {
2155 gcc_assert (ti_temp);
2156 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2157 }
2158 temp1 = op0;
2159 temp2 = temp; /* op0 is _not_ allowed, see above. */
2160 temp3 = op0;
2161 temp4 = op0;
2162 temp5 = op0;
2163 }
2164 else
2165 {
2166 temp1 = gen_reg_rtx (DImode);
2167 temp2 = gen_reg_rtx (DImode);
2168 temp3 = gen_reg_rtx (DImode);
2169 temp4 = gen_reg_rtx (DImode);
2170 temp5 = gen_reg_rtx (DImode);
2171 }
2172
2173 emit_insn (gen_embmedany_textuhi (temp1, op1));
2174 emit_insn (gen_embmedany_texthi (temp2, op1));
2175 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2176 emit_insn (gen_rtx_SET (temp4,
2177 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2178 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2179 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2180 }
2181 break;
2182
2183 default:
2184 gcc_unreachable ();
2185 }
2186 }
2187
2188 /* These avoid problems when cross compiling. If we do not
2189 go through all this hair then the optimizer will see
2190 invalid REG_EQUAL notes or in some cases none at all. */
2191 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2192 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2193 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2194 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2195
2196 /* The optimizer is not to assume anything about exactly
2197 which bits are set for a HIGH, they are unspecified.
2198 Unfortunately this leads to many missed optimizations
2199 during CSE. We mask out the non-HIGH bits, and matches
2200 a plain movdi, to alleviate this problem. */
2201 static rtx
2202 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2203 {
2204 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2205 }
2206
2207 static rtx
2208 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2209 {
2210 return gen_rtx_SET (dest, GEN_INT (val));
2211 }
2212
2213 static rtx
2214 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2215 {
2216 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2217 }
2218
2219 static rtx
2220 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2221 {
2222 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2223 }
2224
2225 /* Worker routines for 64-bit constant formation on arch64.
2226 One of the key things to be doing in these emissions is
2227 to create as many temp REGs as possible. This makes it
2228 possible for half-built constants to be used later when
2229 such values are similar to something required later on.
2230 Without doing this, the optimizer cannot see such
2231 opportunities. */
2232
2233 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2234 unsigned HOST_WIDE_INT, int);
2235
2236 static void
2237 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2238 unsigned HOST_WIDE_INT low_bits, int is_neg)
2239 {
2240 unsigned HOST_WIDE_INT high_bits;
2241
2242 if (is_neg)
2243 high_bits = (~low_bits) & 0xffffffff;
2244 else
2245 high_bits = low_bits;
2246
2247 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2248 if (!is_neg)
2249 {
2250 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2251 }
2252 else
2253 {
2254 /* If we are XOR'ing with -1, then we should emit a one's complement
2255 instead. This way the combiner will notice logical operations
2256 such as ANDN later on and substitute. */
2257 if ((low_bits & 0x3ff) == 0x3ff)
2258 {
2259 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2260 }
2261 else
2262 {
2263 emit_insn (gen_rtx_SET (op0,
2264 gen_safe_XOR64 (temp,
2265 (-(HOST_WIDE_INT)0x400
2266 | (low_bits & 0x3ff)))));
2267 }
2268 }
2269 }
2270
2271 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2272 unsigned HOST_WIDE_INT, int);
2273
2274 static void
2275 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2276 unsigned HOST_WIDE_INT high_bits,
2277 unsigned HOST_WIDE_INT low_immediate,
2278 int shift_count)
2279 {
2280 rtx temp2 = op0;
2281
2282 if ((high_bits & 0xfffffc00) != 0)
2283 {
2284 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2285 if ((high_bits & ~0xfffffc00) != 0)
2286 emit_insn (gen_rtx_SET (op0,
2287 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2288 else
2289 temp2 = temp;
2290 }
2291 else
2292 {
2293 emit_insn (gen_safe_SET64 (temp, high_bits));
2294 temp2 = temp;
2295 }
2296
2297 /* Now shift it up into place. */
2298 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2299 GEN_INT (shift_count))));
2300
2301 /* If there is a low immediate part piece, finish up by
2302 putting that in as well. */
2303 if (low_immediate != 0)
2304 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2305 }
2306
2307 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2308 unsigned HOST_WIDE_INT);
2309
2310 /* Full 64-bit constant decomposition. Even though this is the
2311 'worst' case, we still optimize a few things away. */
2312 static void
2313 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2314 unsigned HOST_WIDE_INT high_bits,
2315 unsigned HOST_WIDE_INT low_bits)
2316 {
2317 rtx sub_temp = op0;
2318
2319 if (can_create_pseudo_p ())
2320 sub_temp = gen_reg_rtx (DImode);
2321
2322 if ((high_bits & 0xfffffc00) != 0)
2323 {
2324 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2325 if ((high_bits & ~0xfffffc00) != 0)
2326 emit_insn (gen_rtx_SET (sub_temp,
2327 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2328 else
2329 sub_temp = temp;
2330 }
2331 else
2332 {
2333 emit_insn (gen_safe_SET64 (temp, high_bits));
2334 sub_temp = temp;
2335 }
2336
2337 if (can_create_pseudo_p ())
2338 {
2339 rtx temp2 = gen_reg_rtx (DImode);
2340 rtx temp3 = gen_reg_rtx (DImode);
2341 rtx temp4 = gen_reg_rtx (DImode);
2342
2343 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2344 GEN_INT (32))));
2345
2346 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2347 if ((low_bits & ~0xfffffc00) != 0)
2348 {
2349 emit_insn (gen_rtx_SET (temp3,
2350 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2351 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2352 }
2353 else
2354 {
2355 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2356 }
2357 }
2358 else
2359 {
2360 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2361 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2362 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2363 int to_shift = 12;
2364
2365 /* We are in the middle of reload, so this is really
2366 painful. However we do still make an attempt to
2367 avoid emitting truly stupid code. */
2368 if (low1 != const0_rtx)
2369 {
2370 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2371 GEN_INT (to_shift))));
2372 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2373 sub_temp = op0;
2374 to_shift = 12;
2375 }
2376 else
2377 {
2378 to_shift += 12;
2379 }
2380 if (low2 != const0_rtx)
2381 {
2382 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2383 GEN_INT (to_shift))));
2384 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2385 sub_temp = op0;
2386 to_shift = 8;
2387 }
2388 else
2389 {
2390 to_shift += 8;
2391 }
2392 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2393 GEN_INT (to_shift))));
2394 if (low3 != const0_rtx)
2395 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2396 /* phew... */
2397 }
2398 }
2399
2400 /* Analyze a 64-bit constant for certain properties. */
2401 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2402 unsigned HOST_WIDE_INT,
2403 int *, int *, int *);
2404
2405 static void
2406 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2407 unsigned HOST_WIDE_INT low_bits,
2408 int *hbsp, int *lbsp, int *abbasp)
2409 {
2410 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2411 int i;
2412
2413 lowest_bit_set = highest_bit_set = -1;
2414 i = 0;
2415 do
2416 {
2417 if ((lowest_bit_set == -1)
2418 && ((low_bits >> i) & 1))
2419 lowest_bit_set = i;
2420 if ((highest_bit_set == -1)
2421 && ((high_bits >> (32 - i - 1)) & 1))
2422 highest_bit_set = (64 - i - 1);
2423 }
2424 while (++i < 32
2425 && ((highest_bit_set == -1)
2426 || (lowest_bit_set == -1)));
2427 if (i == 32)
2428 {
2429 i = 0;
2430 do
2431 {
2432 if ((lowest_bit_set == -1)
2433 && ((high_bits >> i) & 1))
2434 lowest_bit_set = i + 32;
2435 if ((highest_bit_set == -1)
2436 && ((low_bits >> (32 - i - 1)) & 1))
2437 highest_bit_set = 32 - i - 1;
2438 }
2439 while (++i < 32
2440 && ((highest_bit_set == -1)
2441 || (lowest_bit_set == -1)));
2442 }
2443 /* If there are no bits set this should have gone out
2444 as one instruction! */
2445 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2446 all_bits_between_are_set = 1;
2447 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2448 {
2449 if (i < 32)
2450 {
2451 if ((low_bits & (1 << i)) != 0)
2452 continue;
2453 }
2454 else
2455 {
2456 if ((high_bits & (1 << (i - 32))) != 0)
2457 continue;
2458 }
2459 all_bits_between_are_set = 0;
2460 break;
2461 }
2462 *hbsp = highest_bit_set;
2463 *lbsp = lowest_bit_set;
2464 *abbasp = all_bits_between_are_set;
2465 }
2466
2467 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2468
2469 static int
2470 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2471 unsigned HOST_WIDE_INT low_bits)
2472 {
2473 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2474
2475 if (high_bits == 0
2476 || high_bits == 0xffffffff)
2477 return 1;
2478
2479 analyze_64bit_constant (high_bits, low_bits,
2480 &highest_bit_set, &lowest_bit_set,
2481 &all_bits_between_are_set);
2482
2483 if ((highest_bit_set == 63
2484 || lowest_bit_set == 0)
2485 && all_bits_between_are_set != 0)
2486 return 1;
2487
2488 if ((highest_bit_set - lowest_bit_set) < 21)
2489 return 1;
2490
2491 return 0;
2492 }
2493
2494 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2495 unsigned HOST_WIDE_INT,
2496 int, int);
2497
2498 static unsigned HOST_WIDE_INT
2499 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2500 unsigned HOST_WIDE_INT low_bits,
2501 int lowest_bit_set, int shift)
2502 {
2503 HOST_WIDE_INT hi, lo;
2504
2505 if (lowest_bit_set < 32)
2506 {
2507 lo = (low_bits >> lowest_bit_set) << shift;
2508 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2509 }
2510 else
2511 {
2512 lo = 0;
2513 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2514 }
2515 gcc_assert (! (hi & lo));
2516 return (hi | lo);
2517 }
2518
2519 /* Here we are sure to be arch64 and this is an integer constant
2520 being loaded into a register. Emit the most efficient
2521 insn sequence possible. Detection of all the 1-insn cases
2522 has been done already. */
2523 static void
2524 sparc_emit_set_const64 (rtx op0, rtx op1)
2525 {
2526 unsigned HOST_WIDE_INT high_bits, low_bits;
2527 int lowest_bit_set, highest_bit_set;
2528 int all_bits_between_are_set;
2529 rtx temp = 0;
2530
2531 /* Sanity check that we know what we are working with. */
2532 gcc_assert (TARGET_ARCH64
2533 && (GET_CODE (op0) == SUBREG
2534 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2535
2536 if (! can_create_pseudo_p ())
2537 temp = op0;
2538
2539 if (GET_CODE (op1) != CONST_INT)
2540 {
2541 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2542 return;
2543 }
2544
2545 if (! temp)
2546 temp = gen_reg_rtx (DImode);
2547
2548 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2549 low_bits = (INTVAL (op1) & 0xffffffff);
2550
2551 /* low_bits bits 0 --> 31
2552 high_bits bits 32 --> 63 */
2553
2554 analyze_64bit_constant (high_bits, low_bits,
2555 &highest_bit_set, &lowest_bit_set,
2556 &all_bits_between_are_set);
2557
2558 /* First try for a 2-insn sequence. */
2559
2560 /* These situations are preferred because the optimizer can
2561 * do more things with them:
2562 * 1) mov -1, %reg
2563 * sllx %reg, shift, %reg
2564 * 2) mov -1, %reg
2565 * srlx %reg, shift, %reg
2566 * 3) mov some_small_const, %reg
2567 * sllx %reg, shift, %reg
2568 */
2569 if (((highest_bit_set == 63
2570 || lowest_bit_set == 0)
2571 && all_bits_between_are_set != 0)
2572 || ((highest_bit_set - lowest_bit_set) < 12))
2573 {
2574 HOST_WIDE_INT the_const = -1;
2575 int shift = lowest_bit_set;
2576
2577 if ((highest_bit_set != 63
2578 && lowest_bit_set != 0)
2579 || all_bits_between_are_set == 0)
2580 {
2581 the_const =
2582 create_simple_focus_bits (high_bits, low_bits,
2583 lowest_bit_set, 0);
2584 }
2585 else if (lowest_bit_set == 0)
2586 shift = -(63 - highest_bit_set);
2587
2588 gcc_assert (SPARC_SIMM13_P (the_const));
2589 gcc_assert (shift != 0);
2590
2591 emit_insn (gen_safe_SET64 (temp, the_const));
2592 if (shift > 0)
2593 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2594 GEN_INT (shift))));
2595 else if (shift < 0)
2596 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2597 GEN_INT (-shift))));
2598 return;
2599 }
2600
2601 /* Now a range of 22 or less bits set somewhere.
2602 * 1) sethi %hi(focus_bits), %reg
2603 * sllx %reg, shift, %reg
2604 * 2) sethi %hi(focus_bits), %reg
2605 * srlx %reg, shift, %reg
2606 */
2607 if ((highest_bit_set - lowest_bit_set) < 21)
2608 {
2609 unsigned HOST_WIDE_INT focus_bits =
2610 create_simple_focus_bits (high_bits, low_bits,
2611 lowest_bit_set, 10);
2612
2613 gcc_assert (SPARC_SETHI_P (focus_bits));
2614 gcc_assert (lowest_bit_set != 10);
2615
2616 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2617
2618 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2619 if (lowest_bit_set < 10)
2620 emit_insn (gen_rtx_SET (op0,
2621 gen_rtx_LSHIFTRT (DImode, temp,
2622 GEN_INT (10 - lowest_bit_set))));
2623 else if (lowest_bit_set > 10)
2624 emit_insn (gen_rtx_SET (op0,
2625 gen_rtx_ASHIFT (DImode, temp,
2626 GEN_INT (lowest_bit_set - 10))));
2627 return;
2628 }
2629
2630 /* 1) sethi %hi(low_bits), %reg
2631 * or %reg, %lo(low_bits), %reg
2632 * 2) sethi %hi(~low_bits), %reg
2633 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2634 */
2635 if (high_bits == 0
2636 || high_bits == 0xffffffff)
2637 {
2638 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2639 (high_bits == 0xffffffff));
2640 return;
2641 }
2642
2643 /* Now, try 3-insn sequences. */
2644
2645 /* 1) sethi %hi(high_bits), %reg
2646 * or %reg, %lo(high_bits), %reg
2647 * sllx %reg, 32, %reg
2648 */
2649 if (low_bits == 0)
2650 {
2651 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2652 return;
2653 }
2654
2655 /* We may be able to do something quick
2656 when the constant is negated, so try that. */
2657 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2658 (~low_bits) & 0xfffffc00))
2659 {
2660 /* NOTE: The trailing bits get XOR'd so we need the
2661 non-negated bits, not the negated ones. */
2662 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2663
2664 if ((((~high_bits) & 0xffffffff) == 0
2665 && ((~low_bits) & 0x80000000) == 0)
2666 || (((~high_bits) & 0xffffffff) == 0xffffffff
2667 && ((~low_bits) & 0x80000000) != 0))
2668 {
2669 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2670
2671 if ((SPARC_SETHI_P (fast_int)
2672 && (~high_bits & 0xffffffff) == 0)
2673 || SPARC_SIMM13_P (fast_int))
2674 emit_insn (gen_safe_SET64 (temp, fast_int));
2675 else
2676 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2677 }
2678 else
2679 {
2680 rtx negated_const;
2681 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2682 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2683 sparc_emit_set_const64 (temp, negated_const);
2684 }
2685
2686 /* If we are XOR'ing with -1, then we should emit a one's complement
2687 instead. This way the combiner will notice logical operations
2688 such as ANDN later on and substitute. */
2689 if (trailing_bits == 0x3ff)
2690 {
2691 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2692 }
2693 else
2694 {
2695 emit_insn (gen_rtx_SET (op0,
2696 gen_safe_XOR64 (temp,
2697 (-0x400 | trailing_bits))));
2698 }
2699 return;
2700 }
2701
2702 /* 1) sethi %hi(xxx), %reg
2703 * or %reg, %lo(xxx), %reg
2704 * sllx %reg, yyy, %reg
2705 *
2706 * ??? This is just a generalized version of the low_bits==0
2707 * thing above, FIXME...
2708 */
2709 if ((highest_bit_set - lowest_bit_set) < 32)
2710 {
2711 unsigned HOST_WIDE_INT focus_bits =
2712 create_simple_focus_bits (high_bits, low_bits,
2713 lowest_bit_set, 0);
2714
2715 /* We can't get here in this state. */
2716 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2717
2718 /* So what we know is that the set bits straddle the
2719 middle of the 64-bit word. */
2720 sparc_emit_set_const64_quick2 (op0, temp,
2721 focus_bits, 0,
2722 lowest_bit_set);
2723 return;
2724 }
2725
2726 /* 1) sethi %hi(high_bits), %reg
2727 * or %reg, %lo(high_bits), %reg
2728 * sllx %reg, 32, %reg
2729 * or %reg, low_bits, %reg
2730 */
2731 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2732 {
2733 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2734 return;
2735 }
2736
2737 /* The easiest way when all else fails, is full decomposition. */
2738 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2739 }
2740
2741 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2742 return the mode to be used for the comparison. For floating-point,
2743 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2744 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2745 processing is needed. */
2746
2747 machine_mode
2748 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2749 {
2750 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2751 {
2752 switch (op)
2753 {
2754 case EQ:
2755 case NE:
2756 case UNORDERED:
2757 case ORDERED:
2758 case UNLT:
2759 case UNLE:
2760 case UNGT:
2761 case UNGE:
2762 case UNEQ:
2763 case LTGT:
2764 return CCFPmode;
2765
2766 case LT:
2767 case LE:
2768 case GT:
2769 case GE:
2770 return CCFPEmode;
2771
2772 default:
2773 gcc_unreachable ();
2774 }
2775 }
2776 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2777 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2778 {
2779 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2780 return CCX_NOOVmode;
2781 else
2782 return CC_NOOVmode;
2783 }
2784 else
2785 {
2786 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2787 return CCXmode;
2788 else
2789 return CCmode;
2790 }
2791 }
2792
2793 /* Emit the compare insn and return the CC reg for a CODE comparison
2794 with operands X and Y. */
2795
2796 static rtx
2797 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2798 {
2799 machine_mode mode;
2800 rtx cc_reg;
2801
2802 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2803 return x;
2804
2805 mode = SELECT_CC_MODE (code, x, y);
2806
2807 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2808 fcc regs (cse can't tell they're really call clobbered regs and will
2809 remove a duplicate comparison even if there is an intervening function
2810 call - it will then try to reload the cc reg via an int reg which is why
2811 we need the movcc patterns). It is possible to provide the movcc
2812 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2813 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2814 to tell cse that CCFPE mode registers (even pseudos) are call
2815 clobbered. */
2816
2817 /* ??? This is an experiment. Rather than making changes to cse which may
2818 or may not be easy/clean, we do our own cse. This is possible because
2819 we will generate hard registers. Cse knows they're call clobbered (it
2820 doesn't know the same thing about pseudos). If we guess wrong, no big
2821 deal, but if we win, great! */
2822
2823 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2824 #if 1 /* experiment */
2825 {
2826 int reg;
2827 /* We cycle through the registers to ensure they're all exercised. */
2828 static int next_fcc_reg = 0;
2829 /* Previous x,y for each fcc reg. */
2830 static rtx prev_args[4][2];
2831
2832 /* Scan prev_args for x,y. */
2833 for (reg = 0; reg < 4; reg++)
2834 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2835 break;
2836 if (reg == 4)
2837 {
2838 reg = next_fcc_reg;
2839 prev_args[reg][0] = x;
2840 prev_args[reg][1] = y;
2841 next_fcc_reg = (next_fcc_reg + 1) & 3;
2842 }
2843 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2844 }
2845 #else
2846 cc_reg = gen_reg_rtx (mode);
2847 #endif /* ! experiment */
2848 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2849 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2850 else
2851 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2852
2853 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2854 will only result in an unrecognizable insn so no point in asserting. */
2855 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2856
2857 return cc_reg;
2858 }
2859
2860
2861 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2862
2863 rtx
2864 gen_compare_reg (rtx cmp)
2865 {
2866 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2867 }
2868
2869 /* This function is used for v9 only.
2870 DEST is the target of the Scc insn.
2871 CODE is the code for an Scc's comparison.
2872 X and Y are the values we compare.
2873
2874 This function is needed to turn
2875
2876 (set (reg:SI 110)
2877 (gt (reg:CCX 100 %icc)
2878 (const_int 0)))
2879 into
2880 (set (reg:SI 110)
2881 (gt:DI (reg:CCX 100 %icc)
2882 (const_int 0)))
2883
2884 IE: The instruction recognizer needs to see the mode of the comparison to
2885 find the right instruction. We could use "gt:DI" right in the
2886 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2887
2888 static int
2889 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2890 {
2891 if (! TARGET_ARCH64
2892 && (GET_MODE (x) == DImode
2893 || GET_MODE (dest) == DImode))
2894 return 0;
2895
2896 /* Try to use the movrCC insns. */
2897 if (TARGET_ARCH64
2898 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2899 && y == const0_rtx
2900 && v9_regcmp_p (compare_code))
2901 {
2902 rtx op0 = x;
2903 rtx temp;
2904
2905 /* Special case for op0 != 0. This can be done with one instruction if
2906 dest == x. */
2907
2908 if (compare_code == NE
2909 && GET_MODE (dest) == DImode
2910 && rtx_equal_p (op0, dest))
2911 {
2912 emit_insn (gen_rtx_SET (dest,
2913 gen_rtx_IF_THEN_ELSE (DImode,
2914 gen_rtx_fmt_ee (compare_code, DImode,
2915 op0, const0_rtx),
2916 const1_rtx,
2917 dest)));
2918 return 1;
2919 }
2920
2921 if (reg_overlap_mentioned_p (dest, op0))
2922 {
2923 /* Handle the case where dest == x.
2924 We "early clobber" the result. */
2925 op0 = gen_reg_rtx (GET_MODE (x));
2926 emit_move_insn (op0, x);
2927 }
2928
2929 emit_insn (gen_rtx_SET (dest, const0_rtx));
2930 if (GET_MODE (op0) != DImode)
2931 {
2932 temp = gen_reg_rtx (DImode);
2933 convert_move (temp, op0, 0);
2934 }
2935 else
2936 temp = op0;
2937 emit_insn (gen_rtx_SET (dest,
2938 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2939 gen_rtx_fmt_ee (compare_code, DImode,
2940 temp, const0_rtx),
2941 const1_rtx,
2942 dest)));
2943 return 1;
2944 }
2945 else
2946 {
2947 x = gen_compare_reg_1 (compare_code, x, y);
2948 y = const0_rtx;
2949
2950 gcc_assert (GET_MODE (x) != CC_NOOVmode
2951 && GET_MODE (x) != CCX_NOOVmode);
2952
2953 emit_insn (gen_rtx_SET (dest, const0_rtx));
2954 emit_insn (gen_rtx_SET (dest,
2955 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2956 gen_rtx_fmt_ee (compare_code,
2957 GET_MODE (x), x, y),
2958 const1_rtx, dest)));
2959 return 1;
2960 }
2961 }
2962
2963
2964 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2965 without jumps using the addx/subx instructions. */
2966
2967 bool
2968 emit_scc_insn (rtx operands[])
2969 {
2970 rtx tem;
2971 rtx x;
2972 rtx y;
2973 enum rtx_code code;
2974
2975 /* The quad-word fp compare library routines all return nonzero to indicate
2976 true, which is different from the equivalent libgcc routines, so we must
2977 handle them specially here. */
2978 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2979 {
2980 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2981 GET_CODE (operands[1]));
2982 operands[2] = XEXP (operands[1], 0);
2983 operands[3] = XEXP (operands[1], 1);
2984 }
2985
2986 code = GET_CODE (operands[1]);
2987 x = operands[2];
2988 y = operands[3];
2989
2990 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2991 more applications). The exception to this is "reg != 0" which can
2992 be done in one instruction on v9 (so we do it). */
2993 if (code == EQ)
2994 {
2995 if (GET_MODE (x) == SImode)
2996 {
2997 rtx pat;
2998 if (TARGET_ARCH64)
2999 pat = gen_seqsidi_special (operands[0], x, y);
3000 else
3001 pat = gen_seqsisi_special (operands[0], x, y);
3002 emit_insn (pat);
3003 return true;
3004 }
3005 else if (GET_MODE (x) == DImode)
3006 {
3007 rtx pat = gen_seqdi_special (operands[0], x, y);
3008 emit_insn (pat);
3009 return true;
3010 }
3011 }
3012
3013 if (code == NE)
3014 {
3015 if (GET_MODE (x) == SImode)
3016 {
3017 rtx pat;
3018 if (TARGET_ARCH64)
3019 pat = gen_snesidi_special (operands[0], x, y);
3020 else
3021 pat = gen_snesisi_special (operands[0], x, y);
3022 emit_insn (pat);
3023 return true;
3024 }
3025 else if (GET_MODE (x) == DImode)
3026 {
3027 rtx pat;
3028 if (TARGET_VIS3)
3029 pat = gen_snedi_special_vis3 (operands[0], x, y);
3030 else
3031 pat = gen_snedi_special (operands[0], x, y);
3032 emit_insn (pat);
3033 return true;
3034 }
3035 }
3036
3037 if (TARGET_V9
3038 && TARGET_ARCH64
3039 && GET_MODE (x) == DImode
3040 && !(TARGET_VIS3
3041 && (code == GTU || code == LTU))
3042 && gen_v9_scc (operands[0], code, x, y))
3043 return true;
3044
3045 /* We can do LTU and GEU using the addx/subx instructions too. And
3046 for GTU/LEU, if both operands are registers swap them and fall
3047 back to the easy case. */
3048 if (code == GTU || code == LEU)
3049 {
3050 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3051 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3052 {
3053 tem = x;
3054 x = y;
3055 y = tem;
3056 code = swap_condition (code);
3057 }
3058 }
3059
3060 if (code == LTU
3061 || (!TARGET_VIS3 && code == GEU))
3062 {
3063 emit_insn (gen_rtx_SET (operands[0],
3064 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3065 gen_compare_reg_1 (code, x, y),
3066 const0_rtx)));
3067 return true;
3068 }
3069
3070 /* All the posibilities to use addx/subx based sequences has been
3071 exhausted, try for a 3 instruction sequence using v9 conditional
3072 moves. */
3073 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3074 return true;
3075
3076 /* Nope, do branches. */
3077 return false;
3078 }
3079
3080 /* Emit a conditional jump insn for the v9 architecture using comparison code
3081 CODE and jump target LABEL.
3082 This function exists to take advantage of the v9 brxx insns. */
3083
3084 static void
3085 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3086 {
3087 emit_jump_insn (gen_rtx_SET (pc_rtx,
3088 gen_rtx_IF_THEN_ELSE (VOIDmode,
3089 gen_rtx_fmt_ee (code, GET_MODE (op0),
3090 op0, const0_rtx),
3091 gen_rtx_LABEL_REF (VOIDmode, label),
3092 pc_rtx)));
3093 }
3094
3095 /* Emit a conditional jump insn for the UA2011 architecture using
3096 comparison code CODE and jump target LABEL. This function exists
3097 to take advantage of the UA2011 Compare and Branch insns. */
3098
3099 static void
3100 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3101 {
3102 rtx if_then_else;
3103
3104 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3105 gen_rtx_fmt_ee(code, GET_MODE(op0),
3106 op0, op1),
3107 gen_rtx_LABEL_REF (VOIDmode, label),
3108 pc_rtx);
3109
3110 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3111 }
3112
3113 void
3114 emit_conditional_branch_insn (rtx operands[])
3115 {
3116 /* The quad-word fp compare library routines all return nonzero to indicate
3117 true, which is different from the equivalent libgcc routines, so we must
3118 handle them specially here. */
3119 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3120 {
3121 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3122 GET_CODE (operands[0]));
3123 operands[1] = XEXP (operands[0], 0);
3124 operands[2] = XEXP (operands[0], 1);
3125 }
3126
3127 /* If we can tell early on that the comparison is against a constant
3128 that won't fit in the 5-bit signed immediate field of a cbcond,
3129 use one of the other v9 conditional branch sequences. */
3130 if (TARGET_CBCOND
3131 && GET_CODE (operands[1]) == REG
3132 && (GET_MODE (operands[1]) == SImode
3133 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3134 && (GET_CODE (operands[2]) != CONST_INT
3135 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3136 {
3137 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3138 return;
3139 }
3140
3141 if (TARGET_ARCH64 && operands[2] == const0_rtx
3142 && GET_CODE (operands[1]) == REG
3143 && GET_MODE (operands[1]) == DImode)
3144 {
3145 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3146 return;
3147 }
3148
3149 operands[1] = gen_compare_reg (operands[0]);
3150 operands[2] = const0_rtx;
3151 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3152 operands[1], operands[2]);
3153 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3154 operands[3]));
3155 }
3156
3157
3158 /* Generate a DFmode part of a hard TFmode register.
3159 REG is the TFmode hard register, LOW is 1 for the
3160 low 64bit of the register and 0 otherwise.
3161 */
3162 rtx
3163 gen_df_reg (rtx reg, int low)
3164 {
3165 int regno = REGNO (reg);
3166
3167 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3168 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3169 return gen_rtx_REG (DFmode, regno);
3170 }
3171 \f
3172 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3173 Unlike normal calls, TFmode operands are passed by reference. It is
3174 assumed that no more than 3 operands are required. */
3175
3176 static void
3177 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3178 {
3179 rtx ret_slot = NULL, arg[3], func_sym;
3180 int i;
3181
3182 /* We only expect to be called for conversions, unary, and binary ops. */
3183 gcc_assert (nargs == 2 || nargs == 3);
3184
3185 for (i = 0; i < nargs; ++i)
3186 {
3187 rtx this_arg = operands[i];
3188 rtx this_slot;
3189
3190 /* TFmode arguments and return values are passed by reference. */
3191 if (GET_MODE (this_arg) == TFmode)
3192 {
3193 int force_stack_temp;
3194
3195 force_stack_temp = 0;
3196 if (TARGET_BUGGY_QP_LIB && i == 0)
3197 force_stack_temp = 1;
3198
3199 if (GET_CODE (this_arg) == MEM
3200 && ! force_stack_temp)
3201 {
3202 tree expr = MEM_EXPR (this_arg);
3203 if (expr)
3204 mark_addressable (expr);
3205 this_arg = XEXP (this_arg, 0);
3206 }
3207 else if (CONSTANT_P (this_arg)
3208 && ! force_stack_temp)
3209 {
3210 this_slot = force_const_mem (TFmode, this_arg);
3211 this_arg = XEXP (this_slot, 0);
3212 }
3213 else
3214 {
3215 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3216
3217 /* Operand 0 is the return value. We'll copy it out later. */
3218 if (i > 0)
3219 emit_move_insn (this_slot, this_arg);
3220 else
3221 ret_slot = this_slot;
3222
3223 this_arg = XEXP (this_slot, 0);
3224 }
3225 }
3226
3227 arg[i] = this_arg;
3228 }
3229
3230 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3231
3232 if (GET_MODE (operands[0]) == TFmode)
3233 {
3234 if (nargs == 2)
3235 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3236 arg[0], GET_MODE (arg[0]),
3237 arg[1], GET_MODE (arg[1]));
3238 else
3239 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3240 arg[0], GET_MODE (arg[0]),
3241 arg[1], GET_MODE (arg[1]),
3242 arg[2], GET_MODE (arg[2]));
3243
3244 if (ret_slot)
3245 emit_move_insn (operands[0], ret_slot);
3246 }
3247 else
3248 {
3249 rtx ret;
3250
3251 gcc_assert (nargs == 2);
3252
3253 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3254 GET_MODE (operands[0]), 1,
3255 arg[1], GET_MODE (arg[1]));
3256
3257 if (ret != operands[0])
3258 emit_move_insn (operands[0], ret);
3259 }
3260 }
3261
3262 /* Expand soft-float TFmode calls to sparc abi routines. */
3263
3264 static void
3265 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3266 {
3267 const char *func;
3268
3269 switch (code)
3270 {
3271 case PLUS:
3272 func = "_Qp_add";
3273 break;
3274 case MINUS:
3275 func = "_Qp_sub";
3276 break;
3277 case MULT:
3278 func = "_Qp_mul";
3279 break;
3280 case DIV:
3281 func = "_Qp_div";
3282 break;
3283 default:
3284 gcc_unreachable ();
3285 }
3286
3287 emit_soft_tfmode_libcall (func, 3, operands);
3288 }
3289
3290 static void
3291 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3292 {
3293 const char *func;
3294
3295 gcc_assert (code == SQRT);
3296 func = "_Qp_sqrt";
3297
3298 emit_soft_tfmode_libcall (func, 2, operands);
3299 }
3300
3301 static void
3302 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3303 {
3304 const char *func;
3305
3306 switch (code)
3307 {
3308 case FLOAT_EXTEND:
3309 switch (GET_MODE (operands[1]))
3310 {
3311 case SFmode:
3312 func = "_Qp_stoq";
3313 break;
3314 case DFmode:
3315 func = "_Qp_dtoq";
3316 break;
3317 default:
3318 gcc_unreachable ();
3319 }
3320 break;
3321
3322 case FLOAT_TRUNCATE:
3323 switch (GET_MODE (operands[0]))
3324 {
3325 case SFmode:
3326 func = "_Qp_qtos";
3327 break;
3328 case DFmode:
3329 func = "_Qp_qtod";
3330 break;
3331 default:
3332 gcc_unreachable ();
3333 }
3334 break;
3335
3336 case FLOAT:
3337 switch (GET_MODE (operands[1]))
3338 {
3339 case SImode:
3340 func = "_Qp_itoq";
3341 if (TARGET_ARCH64)
3342 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3343 break;
3344 case DImode:
3345 func = "_Qp_xtoq";
3346 break;
3347 default:
3348 gcc_unreachable ();
3349 }
3350 break;
3351
3352 case UNSIGNED_FLOAT:
3353 switch (GET_MODE (operands[1]))
3354 {
3355 case SImode:
3356 func = "_Qp_uitoq";
3357 if (TARGET_ARCH64)
3358 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3359 break;
3360 case DImode:
3361 func = "_Qp_uxtoq";
3362 break;
3363 default:
3364 gcc_unreachable ();
3365 }
3366 break;
3367
3368 case FIX:
3369 switch (GET_MODE (operands[0]))
3370 {
3371 case SImode:
3372 func = "_Qp_qtoi";
3373 break;
3374 case DImode:
3375 func = "_Qp_qtox";
3376 break;
3377 default:
3378 gcc_unreachable ();
3379 }
3380 break;
3381
3382 case UNSIGNED_FIX:
3383 switch (GET_MODE (operands[0]))
3384 {
3385 case SImode:
3386 func = "_Qp_qtoui";
3387 break;
3388 case DImode:
3389 func = "_Qp_qtoux";
3390 break;
3391 default:
3392 gcc_unreachable ();
3393 }
3394 break;
3395
3396 default:
3397 gcc_unreachable ();
3398 }
3399
3400 emit_soft_tfmode_libcall (func, 2, operands);
3401 }
3402
3403 /* Expand a hard-float tfmode operation. All arguments must be in
3404 registers. */
3405
3406 static void
3407 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3408 {
3409 rtx op, dest;
3410
3411 if (GET_RTX_CLASS (code) == RTX_UNARY)
3412 {
3413 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3414 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3415 }
3416 else
3417 {
3418 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3419 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3420 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3421 operands[1], operands[2]);
3422 }
3423
3424 if (register_operand (operands[0], VOIDmode))
3425 dest = operands[0];
3426 else
3427 dest = gen_reg_rtx (GET_MODE (operands[0]));
3428
3429 emit_insn (gen_rtx_SET (dest, op));
3430
3431 if (dest != operands[0])
3432 emit_move_insn (operands[0], dest);
3433 }
3434
3435 void
3436 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3437 {
3438 if (TARGET_HARD_QUAD)
3439 emit_hard_tfmode_operation (code, operands);
3440 else
3441 emit_soft_tfmode_binop (code, operands);
3442 }
3443
3444 void
3445 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3446 {
3447 if (TARGET_HARD_QUAD)
3448 emit_hard_tfmode_operation (code, operands);
3449 else
3450 emit_soft_tfmode_unop (code, operands);
3451 }
3452
3453 void
3454 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3455 {
3456 if (TARGET_HARD_QUAD)
3457 emit_hard_tfmode_operation (code, operands);
3458 else
3459 emit_soft_tfmode_cvt (code, operands);
3460 }
3461 \f
3462 /* Return nonzero if a branch/jump/call instruction will be emitting
3463 nop into its delay slot. */
3464
3465 int
3466 empty_delay_slot (rtx_insn *insn)
3467 {
3468 rtx seq;
3469
3470 /* If no previous instruction (should not happen), return true. */
3471 if (PREV_INSN (insn) == NULL)
3472 return 1;
3473
3474 seq = NEXT_INSN (PREV_INSN (insn));
3475 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3476 return 0;
3477
3478 return 1;
3479 }
3480
3481 /* Return nonzero if we should emit a nop after a cbcond instruction.
3482 The cbcond instruction does not have a delay slot, however there is
3483 a severe performance penalty if a control transfer appears right
3484 after a cbcond. Therefore we emit a nop when we detect this
3485 situation. */
3486
3487 int
3488 emit_cbcond_nop (rtx insn)
3489 {
3490 rtx next = next_active_insn (insn);
3491
3492 if (!next)
3493 return 1;
3494
3495 if (NONJUMP_INSN_P (next)
3496 && GET_CODE (PATTERN (next)) == SEQUENCE)
3497 next = XVECEXP (PATTERN (next), 0, 0);
3498 else if (CALL_P (next)
3499 && GET_CODE (PATTERN (next)) == PARALLEL)
3500 {
3501 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3502
3503 if (GET_CODE (delay) == RETURN)
3504 {
3505 /* It's a sibling call. Do not emit the nop if we're going
3506 to emit something other than the jump itself as the first
3507 instruction of the sibcall sequence. */
3508 if (sparc_leaf_function_p || TARGET_FLAT)
3509 return 0;
3510 }
3511 }
3512
3513 if (NONJUMP_INSN_P (next))
3514 return 0;
3515
3516 return 1;
3517 }
3518
3519 /* Return nonzero if TRIAL can go into the call delay slot. */
3520
3521 int
3522 eligible_for_call_delay (rtx_insn *trial)
3523 {
3524 rtx pat;
3525
3526 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3527 return 0;
3528
3529 /* Binutils allows
3530 call __tls_get_addr, %tgd_call (foo)
3531 add %l7, %o0, %o0, %tgd_add (foo)
3532 while Sun as/ld does not. */
3533 if (TARGET_GNU_TLS || !TARGET_TLS)
3534 return 1;
3535
3536 pat = PATTERN (trial);
3537
3538 /* We must reject tgd_add{32|64}, i.e.
3539 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3540 and tldm_add{32|64}, i.e.
3541 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3542 for Sun as/ld. */
3543 if (GET_CODE (pat) == SET
3544 && GET_CODE (SET_SRC (pat)) == PLUS)
3545 {
3546 rtx unspec = XEXP (SET_SRC (pat), 1);
3547
3548 if (GET_CODE (unspec) == UNSPEC
3549 && (XINT (unspec, 1) == UNSPEC_TLSGD
3550 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3551 return 0;
3552 }
3553
3554 return 1;
3555 }
3556
3557 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3558 instruction. RETURN_P is true if the v9 variant 'return' is to be
3559 considered in the test too.
3560
3561 TRIAL must be a SET whose destination is a REG appropriate for the
3562 'restore' instruction or, if RETURN_P is true, for the 'return'
3563 instruction. */
3564
3565 static int
3566 eligible_for_restore_insn (rtx trial, bool return_p)
3567 {
3568 rtx pat = PATTERN (trial);
3569 rtx src = SET_SRC (pat);
3570 bool src_is_freg = false;
3571 rtx src_reg;
3572
3573 /* Since we now can do moves between float and integer registers when
3574 VIS3 is enabled, we have to catch this case. We can allow such
3575 moves when doing a 'return' however. */
3576 src_reg = src;
3577 if (GET_CODE (src_reg) == SUBREG)
3578 src_reg = SUBREG_REG (src_reg);
3579 if (GET_CODE (src_reg) == REG
3580 && SPARC_FP_REG_P (REGNO (src_reg)))
3581 src_is_freg = true;
3582
3583 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3584 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3585 && arith_operand (src, GET_MODE (src))
3586 && ! src_is_freg)
3587 {
3588 if (TARGET_ARCH64)
3589 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3590 else
3591 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3592 }
3593
3594 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3595 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3596 && arith_double_operand (src, GET_MODE (src))
3597 && ! src_is_freg)
3598 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3599
3600 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3601 else if (! TARGET_FPU && register_operand (src, SFmode))
3602 return 1;
3603
3604 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3605 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3606 return 1;
3607
3608 /* If we have the 'return' instruction, anything that does not use
3609 local or output registers and can go into a delay slot wins. */
3610 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3611 return 1;
3612
3613 /* The 'restore src1,src2,dest' pattern for SImode. */
3614 else if (GET_CODE (src) == PLUS
3615 && register_operand (XEXP (src, 0), SImode)
3616 && arith_operand (XEXP (src, 1), SImode))
3617 return 1;
3618
3619 /* The 'restore src1,src2,dest' pattern for DImode. */
3620 else if (GET_CODE (src) == PLUS
3621 && register_operand (XEXP (src, 0), DImode)
3622 && arith_double_operand (XEXP (src, 1), DImode))
3623 return 1;
3624
3625 /* The 'restore src1,%lo(src2),dest' pattern. */
3626 else if (GET_CODE (src) == LO_SUM
3627 && ! TARGET_CM_MEDMID
3628 && ((register_operand (XEXP (src, 0), SImode)
3629 && immediate_operand (XEXP (src, 1), SImode))
3630 || (TARGET_ARCH64
3631 && register_operand (XEXP (src, 0), DImode)
3632 && immediate_operand (XEXP (src, 1), DImode))))
3633 return 1;
3634
3635 /* The 'restore src,src,dest' pattern. */
3636 else if (GET_CODE (src) == ASHIFT
3637 && (register_operand (XEXP (src, 0), SImode)
3638 || register_operand (XEXP (src, 0), DImode))
3639 && XEXP (src, 1) == const1_rtx)
3640 return 1;
3641
3642 return 0;
3643 }
3644
3645 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3646
3647 int
3648 eligible_for_return_delay (rtx_insn *trial)
3649 {
3650 int regno;
3651 rtx pat;
3652
3653 /* If the function uses __builtin_eh_return, the eh_return machinery
3654 occupies the delay slot. */
3655 if (crtl->calls_eh_return)
3656 return 0;
3657
3658 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3659 return 0;
3660
3661 /* In the case of a leaf or flat function, anything can go into the slot. */
3662 if (sparc_leaf_function_p || TARGET_FLAT)
3663 return 1;
3664
3665 if (!NONJUMP_INSN_P (trial))
3666 return 0;
3667
3668 pat = PATTERN (trial);
3669 if (GET_CODE (pat) == PARALLEL)
3670 {
3671 int i;
3672
3673 if (! TARGET_V9)
3674 return 0;
3675 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3676 {
3677 rtx expr = XVECEXP (pat, 0, i);
3678 if (GET_CODE (expr) != SET)
3679 return 0;
3680 if (GET_CODE (SET_DEST (expr)) != REG)
3681 return 0;
3682 regno = REGNO (SET_DEST (expr));
3683 if (regno >= 8 && regno < 24)
3684 return 0;
3685 }
3686 return !epilogue_renumber (&pat, 1);
3687 }
3688
3689 if (GET_CODE (pat) != SET)
3690 return 0;
3691
3692 if (GET_CODE (SET_DEST (pat)) != REG)
3693 return 0;
3694
3695 regno = REGNO (SET_DEST (pat));
3696
3697 /* Otherwise, only operations which can be done in tandem with
3698 a `restore' or `return' insn can go into the delay slot. */
3699 if (regno >= 8 && regno < 24)
3700 return 0;
3701
3702 /* If this instruction sets up floating point register and we have a return
3703 instruction, it can probably go in. But restore will not work
3704 with FP_REGS. */
3705 if (! SPARC_INT_REG_P (regno))
3706 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3707
3708 return eligible_for_restore_insn (trial, true);
3709 }
3710
3711 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3712
3713 int
3714 eligible_for_sibcall_delay (rtx_insn *trial)
3715 {
3716 rtx pat;
3717
3718 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3719 return 0;
3720
3721 if (!NONJUMP_INSN_P (trial))
3722 return 0;
3723
3724 pat = PATTERN (trial);
3725
3726 if (sparc_leaf_function_p || TARGET_FLAT)
3727 {
3728 /* If the tail call is done using the call instruction,
3729 we have to restore %o7 in the delay slot. */
3730 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3731 return 0;
3732
3733 /* %g1 is used to build the function address */
3734 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3735 return 0;
3736
3737 return 1;
3738 }
3739
3740 if (GET_CODE (pat) != SET)
3741 return 0;
3742
3743 /* Otherwise, only operations which can be done in tandem with
3744 a `restore' insn can go into the delay slot. */
3745 if (GET_CODE (SET_DEST (pat)) != REG
3746 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3747 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3748 return 0;
3749
3750 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3751 in most cases. */
3752 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3753 return 0;
3754
3755 return eligible_for_restore_insn (trial, false);
3756 }
3757 \f
3758 /* Determine if it's legal to put X into the constant pool. This
3759 is not possible if X contains the address of a symbol that is
3760 not constant (TLS) or not known at final link time (PIC). */
3761
3762 static bool
3763 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3764 {
3765 switch (GET_CODE (x))
3766 {
3767 case CONST_INT:
3768 case CONST_WIDE_INT:
3769 case CONST_DOUBLE:
3770 case CONST_VECTOR:
3771 /* Accept all non-symbolic constants. */
3772 return false;
3773
3774 case LABEL_REF:
3775 /* Labels are OK iff we are non-PIC. */
3776 return flag_pic != 0;
3777
3778 case SYMBOL_REF:
3779 /* 'Naked' TLS symbol references are never OK,
3780 non-TLS symbols are OK iff we are non-PIC. */
3781 if (SYMBOL_REF_TLS_MODEL (x))
3782 return true;
3783 else
3784 return flag_pic != 0;
3785
3786 case CONST:
3787 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3788 case PLUS:
3789 case MINUS:
3790 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3791 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3792 case UNSPEC:
3793 return true;
3794 default:
3795 gcc_unreachable ();
3796 }
3797 }
3798 \f
3799 /* Global Offset Table support. */
3800 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3801 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3802
3803 /* Return the SYMBOL_REF for the Global Offset Table. */
3804
3805 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3806
3807 static rtx
3808 sparc_got (void)
3809 {
3810 if (!sparc_got_symbol)
3811 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3812
3813 return sparc_got_symbol;
3814 }
3815
3816 /* Ensure that we are not using patterns that are not OK with PIC. */
3817
3818 int
3819 check_pic (int i)
3820 {
3821 rtx op;
3822
3823 switch (flag_pic)
3824 {
3825 case 1:
3826 op = recog_data.operand[i];
3827 gcc_assert (GET_CODE (op) != SYMBOL_REF
3828 && (GET_CODE (op) != CONST
3829 || (GET_CODE (XEXP (op, 0)) == MINUS
3830 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3831 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3832 case 2:
3833 default:
3834 return 1;
3835 }
3836 }
3837
3838 /* Return true if X is an address which needs a temporary register when
3839 reloaded while generating PIC code. */
3840
3841 int
3842 pic_address_needs_scratch (rtx x)
3843 {
3844 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3845 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3846 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3847 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3848 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3849 return 1;
3850
3851 return 0;
3852 }
3853
3854 /* Determine if a given RTX is a valid constant. We already know this
3855 satisfies CONSTANT_P. */
3856
3857 static bool
3858 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3859 {
3860 switch (GET_CODE (x))
3861 {
3862 case CONST:
3863 case SYMBOL_REF:
3864 if (sparc_tls_referenced_p (x))
3865 return false;
3866 break;
3867
3868 case CONST_DOUBLE:
3869 /* Floating point constants are generally not ok.
3870 The only exception is 0.0 and all-ones in VIS. */
3871 if (TARGET_VIS
3872 && SCALAR_FLOAT_MODE_P (mode)
3873 && (const_zero_operand (x, mode)
3874 || const_all_ones_operand (x, mode)))
3875 return true;
3876
3877 return false;
3878
3879 case CONST_VECTOR:
3880 /* Vector constants are generally not ok.
3881 The only exception is 0 or -1 in VIS. */
3882 if (TARGET_VIS
3883 && (const_zero_operand (x, mode)
3884 || const_all_ones_operand (x, mode)))
3885 return true;
3886
3887 return false;
3888
3889 default:
3890 break;
3891 }
3892
3893 return true;
3894 }
3895
3896 /* Determine if a given RTX is a valid constant address. */
3897
3898 bool
3899 constant_address_p (rtx x)
3900 {
3901 switch (GET_CODE (x))
3902 {
3903 case LABEL_REF:
3904 case CONST_INT:
3905 case HIGH:
3906 return true;
3907
3908 case CONST:
3909 if (flag_pic && pic_address_needs_scratch (x))
3910 return false;
3911 return sparc_legitimate_constant_p (Pmode, x);
3912
3913 case SYMBOL_REF:
3914 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3915
3916 default:
3917 return false;
3918 }
3919 }
3920
3921 /* Nonzero if the constant value X is a legitimate general operand
3922 when generating PIC code. It is given that flag_pic is on and
3923 that X satisfies CONSTANT_P. */
3924
3925 bool
3926 legitimate_pic_operand_p (rtx x)
3927 {
3928 if (pic_address_needs_scratch (x))
3929 return false;
3930 if (sparc_tls_referenced_p (x))
3931 return false;
3932 return true;
3933 }
3934
3935 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3936 (CONST_INT_P (X) \
3937 && INTVAL (X) >= -0x1000 \
3938 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3939
3940 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3941 (CONST_INT_P (X) \
3942 && INTVAL (X) >= -0x1000 \
3943 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3944
3945 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3946
3947 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3948 ordinarily. This changes a bit when generating PIC. */
3949
3950 static bool
3951 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3952 {
3953 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3954
3955 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3956 rs1 = addr;
3957 else if (GET_CODE (addr) == PLUS)
3958 {
3959 rs1 = XEXP (addr, 0);
3960 rs2 = XEXP (addr, 1);
3961
3962 /* Canonicalize. REG comes first, if there are no regs,
3963 LO_SUM comes first. */
3964 if (!REG_P (rs1)
3965 && GET_CODE (rs1) != SUBREG
3966 && (REG_P (rs2)
3967 || GET_CODE (rs2) == SUBREG
3968 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3969 {
3970 rs1 = XEXP (addr, 1);
3971 rs2 = XEXP (addr, 0);
3972 }
3973
3974 if ((flag_pic == 1
3975 && rs1 == pic_offset_table_rtx
3976 && !REG_P (rs2)
3977 && GET_CODE (rs2) != SUBREG
3978 && GET_CODE (rs2) != LO_SUM
3979 && GET_CODE (rs2) != MEM
3980 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3981 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3982 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3983 || ((REG_P (rs1)
3984 || GET_CODE (rs1) == SUBREG)
3985 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3986 {
3987 imm1 = rs2;
3988 rs2 = NULL;
3989 }
3990 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3991 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3992 {
3993 /* We prohibit REG + REG for TFmode when there are no quad move insns
3994 and we consequently need to split. We do this because REG+REG
3995 is not an offsettable address. If we get the situation in reload
3996 where source and destination of a movtf pattern are both MEMs with
3997 REG+REG address, then only one of them gets converted to an
3998 offsettable address. */
3999 if (mode == TFmode
4000 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4001 return 0;
4002
4003 /* Likewise for TImode, but in all cases. */
4004 if (mode == TImode)
4005 return 0;
4006
4007 /* We prohibit REG + REG on ARCH32 if not optimizing for
4008 DFmode/DImode because then mem_min_alignment is likely to be zero
4009 after reload and the forced split would lack a matching splitter
4010 pattern. */
4011 if (TARGET_ARCH32 && !optimize
4012 && (mode == DFmode || mode == DImode))
4013 return 0;
4014 }
4015 else if (USE_AS_OFFSETABLE_LO10
4016 && GET_CODE (rs1) == LO_SUM
4017 && TARGET_ARCH64
4018 && ! TARGET_CM_MEDMID
4019 && RTX_OK_FOR_OLO10_P (rs2, mode))
4020 {
4021 rs2 = NULL;
4022 imm1 = XEXP (rs1, 1);
4023 rs1 = XEXP (rs1, 0);
4024 if (!CONSTANT_P (imm1)
4025 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4026 return 0;
4027 }
4028 }
4029 else if (GET_CODE (addr) == LO_SUM)
4030 {
4031 rs1 = XEXP (addr, 0);
4032 imm1 = XEXP (addr, 1);
4033
4034 if (!CONSTANT_P (imm1)
4035 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4036 return 0;
4037
4038 /* We can't allow TFmode in 32-bit mode, because an offset greater
4039 than the alignment (8) may cause the LO_SUM to overflow. */
4040 if (mode == TFmode && TARGET_ARCH32)
4041 return 0;
4042 }
4043 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4044 return 1;
4045 else
4046 return 0;
4047
4048 if (GET_CODE (rs1) == SUBREG)
4049 rs1 = SUBREG_REG (rs1);
4050 if (!REG_P (rs1))
4051 return 0;
4052
4053 if (rs2)
4054 {
4055 if (GET_CODE (rs2) == SUBREG)
4056 rs2 = SUBREG_REG (rs2);
4057 if (!REG_P (rs2))
4058 return 0;
4059 }
4060
4061 if (strict)
4062 {
4063 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4064 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4065 return 0;
4066 }
4067 else
4068 {
4069 if ((! SPARC_INT_REG_P (REGNO (rs1))
4070 && REGNO (rs1) != FRAME_POINTER_REGNUM
4071 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4072 || (rs2
4073 && (! SPARC_INT_REG_P (REGNO (rs2))
4074 && REGNO (rs2) != FRAME_POINTER_REGNUM
4075 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4076 return 0;
4077 }
4078 return 1;
4079 }
4080
4081 /* Return the SYMBOL_REF for the tls_get_addr function. */
4082
4083 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4084
4085 static rtx
4086 sparc_tls_get_addr (void)
4087 {
4088 if (!sparc_tls_symbol)
4089 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4090
4091 return sparc_tls_symbol;
4092 }
4093
4094 /* Return the Global Offset Table to be used in TLS mode. */
4095
4096 static rtx
4097 sparc_tls_got (void)
4098 {
4099 /* In PIC mode, this is just the PIC offset table. */
4100 if (flag_pic)
4101 {
4102 crtl->uses_pic_offset_table = 1;
4103 return pic_offset_table_rtx;
4104 }
4105
4106 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4107 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4108 if (TARGET_SUN_TLS && TARGET_ARCH32)
4109 {
4110 load_got_register ();
4111 return global_offset_table_rtx;
4112 }
4113
4114 /* In all other cases, we load a new pseudo with the GOT symbol. */
4115 return copy_to_reg (sparc_got ());
4116 }
4117
4118 /* Return true if X contains a thread-local symbol. */
4119
4120 static bool
4121 sparc_tls_referenced_p (rtx x)
4122 {
4123 if (!TARGET_HAVE_TLS)
4124 return false;
4125
4126 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4127 x = XEXP (XEXP (x, 0), 0);
4128
4129 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4130 return true;
4131
4132 /* That's all we handle in sparc_legitimize_tls_address for now. */
4133 return false;
4134 }
4135
4136 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4137 this (thread-local) address. */
4138
4139 static rtx
4140 sparc_legitimize_tls_address (rtx addr)
4141 {
4142 rtx temp1, temp2, temp3, ret, o0, got;
4143 rtx_insn *insn;
4144
4145 gcc_assert (can_create_pseudo_p ());
4146
4147 if (GET_CODE (addr) == SYMBOL_REF)
4148 switch (SYMBOL_REF_TLS_MODEL (addr))
4149 {
4150 case TLS_MODEL_GLOBAL_DYNAMIC:
4151 start_sequence ();
4152 temp1 = gen_reg_rtx (SImode);
4153 temp2 = gen_reg_rtx (SImode);
4154 ret = gen_reg_rtx (Pmode);
4155 o0 = gen_rtx_REG (Pmode, 8);
4156 got = sparc_tls_got ();
4157 emit_insn (gen_tgd_hi22 (temp1, addr));
4158 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4159 if (TARGET_ARCH32)
4160 {
4161 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4162 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4163 addr, const1_rtx));
4164 }
4165 else
4166 {
4167 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4168 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4169 addr, const1_rtx));
4170 }
4171 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4172 insn = get_insns ();
4173 end_sequence ();
4174 emit_libcall_block (insn, ret, o0, addr);
4175 break;
4176
4177 case TLS_MODEL_LOCAL_DYNAMIC:
4178 start_sequence ();
4179 temp1 = gen_reg_rtx (SImode);
4180 temp2 = gen_reg_rtx (SImode);
4181 temp3 = gen_reg_rtx (Pmode);
4182 ret = gen_reg_rtx (Pmode);
4183 o0 = gen_rtx_REG (Pmode, 8);
4184 got = sparc_tls_got ();
4185 emit_insn (gen_tldm_hi22 (temp1));
4186 emit_insn (gen_tldm_lo10 (temp2, temp1));
4187 if (TARGET_ARCH32)
4188 {
4189 emit_insn (gen_tldm_add32 (o0, got, temp2));
4190 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4191 const1_rtx));
4192 }
4193 else
4194 {
4195 emit_insn (gen_tldm_add64 (o0, got, temp2));
4196 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4197 const1_rtx));
4198 }
4199 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4200 insn = get_insns ();
4201 end_sequence ();
4202 emit_libcall_block (insn, temp3, o0,
4203 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4204 UNSPEC_TLSLD_BASE));
4205 temp1 = gen_reg_rtx (SImode);
4206 temp2 = gen_reg_rtx (SImode);
4207 emit_insn (gen_tldo_hix22 (temp1, addr));
4208 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4209 if (TARGET_ARCH32)
4210 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4211 else
4212 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4213 break;
4214
4215 case TLS_MODEL_INITIAL_EXEC:
4216 temp1 = gen_reg_rtx (SImode);
4217 temp2 = gen_reg_rtx (SImode);
4218 temp3 = gen_reg_rtx (Pmode);
4219 got = sparc_tls_got ();
4220 emit_insn (gen_tie_hi22 (temp1, addr));
4221 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4222 if (TARGET_ARCH32)
4223 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4224 else
4225 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4226 if (TARGET_SUN_TLS)
4227 {
4228 ret = gen_reg_rtx (Pmode);
4229 if (TARGET_ARCH32)
4230 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4231 temp3, addr));
4232 else
4233 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4234 temp3, addr));
4235 }
4236 else
4237 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4238 break;
4239
4240 case TLS_MODEL_LOCAL_EXEC:
4241 temp1 = gen_reg_rtx (Pmode);
4242 temp2 = gen_reg_rtx (Pmode);
4243 if (TARGET_ARCH32)
4244 {
4245 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4246 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4247 }
4248 else
4249 {
4250 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4251 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4252 }
4253 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4254 break;
4255
4256 default:
4257 gcc_unreachable ();
4258 }
4259
4260 else if (GET_CODE (addr) == CONST)
4261 {
4262 rtx base, offset;
4263
4264 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4265
4266 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4267 offset = XEXP (XEXP (addr, 0), 1);
4268
4269 base = force_operand (base, NULL_RTX);
4270 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4271 offset = force_reg (Pmode, offset);
4272 ret = gen_rtx_PLUS (Pmode, base, offset);
4273 }
4274
4275 else
4276 gcc_unreachable (); /* for now ... */
4277
4278 return ret;
4279 }
4280
4281 /* Legitimize PIC addresses. If the address is already position-independent,
4282 we return ORIG. Newly generated position-independent addresses go into a
4283 reg. This is REG if nonzero, otherwise we allocate register(s) as
4284 necessary. */
4285
4286 static rtx
4287 sparc_legitimize_pic_address (rtx orig, rtx reg)
4288 {
4289 bool gotdata_op = false;
4290
4291 if (GET_CODE (orig) == SYMBOL_REF
4292 /* See the comment in sparc_expand_move. */
4293 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4294 {
4295 rtx pic_ref, address;
4296 rtx_insn *insn;
4297
4298 if (reg == 0)
4299 {
4300 gcc_assert (can_create_pseudo_p ());
4301 reg = gen_reg_rtx (Pmode);
4302 }
4303
4304 if (flag_pic == 2)
4305 {
4306 /* If not during reload, allocate another temp reg here for loading
4307 in the address, so that these instructions can be optimized
4308 properly. */
4309 rtx temp_reg = (! can_create_pseudo_p ()
4310 ? reg : gen_reg_rtx (Pmode));
4311
4312 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4313 won't get confused into thinking that these two instructions
4314 are loading in the true address of the symbol. If in the
4315 future a PIC rtx exists, that should be used instead. */
4316 if (TARGET_ARCH64)
4317 {
4318 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4319 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4320 }
4321 else
4322 {
4323 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4324 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4325 }
4326 address = temp_reg;
4327 gotdata_op = true;
4328 }
4329 else
4330 address = orig;
4331
4332 crtl->uses_pic_offset_table = 1;
4333 if (gotdata_op)
4334 {
4335 if (TARGET_ARCH64)
4336 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4337 pic_offset_table_rtx,
4338 address, orig));
4339 else
4340 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4341 pic_offset_table_rtx,
4342 address, orig));
4343 }
4344 else
4345 {
4346 pic_ref
4347 = gen_const_mem (Pmode,
4348 gen_rtx_PLUS (Pmode,
4349 pic_offset_table_rtx, address));
4350 insn = emit_move_insn (reg, pic_ref);
4351 }
4352
4353 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4354 by loop. */
4355 set_unique_reg_note (insn, REG_EQUAL, orig);
4356 return reg;
4357 }
4358 else if (GET_CODE (orig) == CONST)
4359 {
4360 rtx base, offset;
4361
4362 if (GET_CODE (XEXP (orig, 0)) == PLUS
4363 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4364 return orig;
4365
4366 if (reg == 0)
4367 {
4368 gcc_assert (can_create_pseudo_p ());
4369 reg = gen_reg_rtx (Pmode);
4370 }
4371
4372 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4373 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4374 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4375 base == reg ? NULL_RTX : reg);
4376
4377 if (GET_CODE (offset) == CONST_INT)
4378 {
4379 if (SMALL_INT (offset))
4380 return plus_constant (Pmode, base, INTVAL (offset));
4381 else if (can_create_pseudo_p ())
4382 offset = force_reg (Pmode, offset);
4383 else
4384 /* If we reach here, then something is seriously wrong. */
4385 gcc_unreachable ();
4386 }
4387 return gen_rtx_PLUS (Pmode, base, offset);
4388 }
4389 else if (GET_CODE (orig) == LABEL_REF)
4390 /* ??? We ought to be checking that the register is live instead, in case
4391 it is eliminated. */
4392 crtl->uses_pic_offset_table = 1;
4393
4394 return orig;
4395 }
4396
4397 /* Try machine-dependent ways of modifying an illegitimate address X
4398 to be legitimate. If we find one, return the new, valid address.
4399
4400 OLDX is the address as it was before break_out_memory_refs was called.
4401 In some cases it is useful to look at this to decide what needs to be done.
4402
4403 MODE is the mode of the operand pointed to by X.
4404
4405 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4406
4407 static rtx
4408 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4409 machine_mode mode)
4410 {
4411 rtx orig_x = x;
4412
4413 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4414 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4415 force_operand (XEXP (x, 0), NULL_RTX));
4416 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4417 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4418 force_operand (XEXP (x, 1), NULL_RTX));
4419 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4420 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4421 XEXP (x, 1));
4422 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4423 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4424 force_operand (XEXP (x, 1), NULL_RTX));
4425
4426 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4427 return x;
4428
4429 if (sparc_tls_referenced_p (x))
4430 x = sparc_legitimize_tls_address (x);
4431 else if (flag_pic)
4432 x = sparc_legitimize_pic_address (x, NULL_RTX);
4433 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4434 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4435 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4436 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4437 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4438 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4439 else if (GET_CODE (x) == SYMBOL_REF
4440 || GET_CODE (x) == CONST
4441 || GET_CODE (x) == LABEL_REF)
4442 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4443
4444 return x;
4445 }
4446
4447 /* Delegitimize an address that was legitimized by the above function. */
4448
4449 static rtx
4450 sparc_delegitimize_address (rtx x)
4451 {
4452 x = delegitimize_mem_from_attrs (x);
4453
4454 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4455 switch (XINT (XEXP (x, 1), 1))
4456 {
4457 case UNSPEC_MOVE_PIC:
4458 case UNSPEC_TLSLE:
4459 x = XVECEXP (XEXP (x, 1), 0, 0);
4460 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4461 break;
4462 default:
4463 break;
4464 }
4465
4466 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4467 if (GET_CODE (x) == MINUS
4468 && REG_P (XEXP (x, 0))
4469 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4470 && GET_CODE (XEXP (x, 1)) == LO_SUM
4471 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4472 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4473 {
4474 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4475 gcc_assert (GET_CODE (x) == LABEL_REF);
4476 }
4477
4478 return x;
4479 }
4480
4481 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4482 replace the input X, or the original X if no replacement is called for.
4483 The output parameter *WIN is 1 if the calling macro should goto WIN,
4484 0 if it should not.
4485
4486 For SPARC, we wish to handle addresses by splitting them into
4487 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4488 This cuts the number of extra insns by one.
4489
4490 Do nothing when generating PIC code and the address is a symbolic
4491 operand or requires a scratch register. */
4492
4493 rtx
4494 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4495 int opnum, int type,
4496 int ind_levels ATTRIBUTE_UNUSED, int *win)
4497 {
4498 /* Decompose SImode constants into HIGH+LO_SUM. */
4499 if (CONSTANT_P (x)
4500 && (mode != TFmode || TARGET_ARCH64)
4501 && GET_MODE (x) == SImode
4502 && GET_CODE (x) != LO_SUM
4503 && GET_CODE (x) != HIGH
4504 && sparc_cmodel <= CM_MEDLOW
4505 && !(flag_pic
4506 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4507 {
4508 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4509 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4510 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4511 opnum, (enum reload_type)type);
4512 *win = 1;
4513 return x;
4514 }
4515
4516 /* We have to recognize what we have already generated above. */
4517 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4518 {
4519 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4520 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4521 opnum, (enum reload_type)type);
4522 *win = 1;
4523 return x;
4524 }
4525
4526 *win = 0;
4527 return x;
4528 }
4529
4530 /* Return true if ADDR (a legitimate address expression)
4531 has an effect that depends on the machine mode it is used for.
4532
4533 In PIC mode,
4534
4535 (mem:HI [%l7+a])
4536
4537 is not equivalent to
4538
4539 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4540
4541 because [%l7+a+1] is interpreted as the address of (a+1). */
4542
4543
4544 static bool
4545 sparc_mode_dependent_address_p (const_rtx addr,
4546 addr_space_t as ATTRIBUTE_UNUSED)
4547 {
4548 if (flag_pic && GET_CODE (addr) == PLUS)
4549 {
4550 rtx op0 = XEXP (addr, 0);
4551 rtx op1 = XEXP (addr, 1);
4552 if (op0 == pic_offset_table_rtx
4553 && symbolic_operand (op1, VOIDmode))
4554 return true;
4555 }
4556
4557 return false;
4558 }
4559
4560 #ifdef HAVE_GAS_HIDDEN
4561 # define USE_HIDDEN_LINKONCE 1
4562 #else
4563 # define USE_HIDDEN_LINKONCE 0
4564 #endif
4565
4566 static void
4567 get_pc_thunk_name (char name[32], unsigned int regno)
4568 {
4569 const char *reg_name = reg_names[regno];
4570
4571 /* Skip the leading '%' as that cannot be used in a
4572 symbol name. */
4573 reg_name += 1;
4574
4575 if (USE_HIDDEN_LINKONCE)
4576 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4577 else
4578 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4579 }
4580
4581 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4582
4583 static rtx
4584 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4585 {
4586 int orig_flag_pic = flag_pic;
4587 rtx insn;
4588
4589 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4590 flag_pic = 0;
4591 if (TARGET_ARCH64)
4592 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4593 else
4594 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4595 flag_pic = orig_flag_pic;
4596
4597 return insn;
4598 }
4599
4600 /* Emit code to load the GOT register. */
4601
4602 void
4603 load_got_register (void)
4604 {
4605 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4606 if (!global_offset_table_rtx)
4607 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4608
4609 if (TARGET_VXWORKS_RTP)
4610 emit_insn (gen_vxworks_load_got ());
4611 else
4612 {
4613 /* The GOT symbol is subject to a PC-relative relocation so we need a
4614 helper function to add the PC value and thus get the final value. */
4615 if (!got_helper_rtx)
4616 {
4617 char name[32];
4618 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4619 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4620 }
4621
4622 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4623 got_helper_rtx,
4624 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4625 }
4626
4627 /* Need to emit this whether or not we obey regdecls,
4628 since setjmp/longjmp can cause life info to screw up.
4629 ??? In the case where we don't obey regdecls, this is not sufficient
4630 since we may not fall out the bottom. */
4631 emit_use (global_offset_table_rtx);
4632 }
4633
4634 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4635 address of the call target. */
4636
4637 void
4638 sparc_emit_call_insn (rtx pat, rtx addr)
4639 {
4640 rtx_insn *insn;
4641
4642 insn = emit_call_insn (pat);
4643
4644 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4645 if (TARGET_VXWORKS_RTP
4646 && flag_pic
4647 && GET_CODE (addr) == SYMBOL_REF
4648 && (SYMBOL_REF_DECL (addr)
4649 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4650 : !SYMBOL_REF_LOCAL_P (addr)))
4651 {
4652 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4653 crtl->uses_pic_offset_table = 1;
4654 }
4655 }
4656 \f
4657 /* Return 1 if RTX is a MEM which is known to be aligned to at
4658 least a DESIRED byte boundary. */
4659
4660 int
4661 mem_min_alignment (rtx mem, int desired)
4662 {
4663 rtx addr, base, offset;
4664
4665 /* If it's not a MEM we can't accept it. */
4666 if (GET_CODE (mem) != MEM)
4667 return 0;
4668
4669 /* Obviously... */
4670 if (!TARGET_UNALIGNED_DOUBLES
4671 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4672 return 1;
4673
4674 /* ??? The rest of the function predates MEM_ALIGN so
4675 there is probably a bit of redundancy. */
4676 addr = XEXP (mem, 0);
4677 base = offset = NULL_RTX;
4678 if (GET_CODE (addr) == PLUS)
4679 {
4680 if (GET_CODE (XEXP (addr, 0)) == REG)
4681 {
4682 base = XEXP (addr, 0);
4683
4684 /* What we are saying here is that if the base
4685 REG is aligned properly, the compiler will make
4686 sure any REG based index upon it will be so
4687 as well. */
4688 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4689 offset = XEXP (addr, 1);
4690 else
4691 offset = const0_rtx;
4692 }
4693 }
4694 else if (GET_CODE (addr) == REG)
4695 {
4696 base = addr;
4697 offset = const0_rtx;
4698 }
4699
4700 if (base != NULL_RTX)
4701 {
4702 int regno = REGNO (base);
4703
4704 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4705 {
4706 /* Check if the compiler has recorded some information
4707 about the alignment of the base REG. If reload has
4708 completed, we already matched with proper alignments.
4709 If not running global_alloc, reload might give us
4710 unaligned pointer to local stack though. */
4711 if (((cfun != 0
4712 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4713 || (optimize && reload_completed))
4714 && (INTVAL (offset) & (desired - 1)) == 0)
4715 return 1;
4716 }
4717 else
4718 {
4719 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4720 return 1;
4721 }
4722 }
4723 else if (! TARGET_UNALIGNED_DOUBLES
4724 || CONSTANT_P (addr)
4725 || GET_CODE (addr) == LO_SUM)
4726 {
4727 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4728 is true, in which case we can only assume that an access is aligned if
4729 it is to a constant address, or the address involves a LO_SUM. */
4730 return 1;
4731 }
4732
4733 /* An obviously unaligned address. */
4734 return 0;
4735 }
4736
4737 \f
4738 /* Vectors to keep interesting information about registers where it can easily
4739 be got. We used to use the actual mode value as the bit number, but there
4740 are more than 32 modes now. Instead we use two tables: one indexed by
4741 hard register number, and one indexed by mode. */
4742
4743 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4744 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4745 mapped into one sparc_mode_class mode. */
4746
4747 enum sparc_mode_class {
4748 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4749 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4750 CC_MODE, CCFP_MODE
4751 };
4752
4753 /* Modes for single-word and smaller quantities. */
4754 #define S_MODES \
4755 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4756
4757 /* Modes for double-word and smaller quantities. */
4758 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4759
4760 /* Modes for quad-word and smaller quantities. */
4761 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4762
4763 /* Modes for 8-word and smaller quantities. */
4764 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4765
4766 /* Modes for single-float quantities. */
4767 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4768
4769 /* Modes for double-float and smaller quantities. */
4770 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4771
4772 /* Modes for quad-float and smaller quantities. */
4773 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4774
4775 /* Modes for quad-float pairs and smaller quantities. */
4776 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4777
4778 /* Modes for double-float only quantities. */
4779 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4780
4781 /* Modes for quad-float and double-float only quantities. */
4782 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4783
4784 /* Modes for quad-float pairs and double-float only quantities. */
4785 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4786
4787 /* Modes for condition codes. */
4788 #define CC_MODES (1 << (int) CC_MODE)
4789 #define CCFP_MODES (1 << (int) CCFP_MODE)
4790
4791 /* Value is 1 if register/mode pair is acceptable on sparc.
4792
4793 The funny mixture of D and T modes is because integer operations
4794 do not specially operate on tetra quantities, so non-quad-aligned
4795 registers can hold quadword quantities (except %o4 and %i4 because
4796 they cross fixed registers).
4797
4798 ??? Note that, despite the settings, non-double-aligned parameter
4799 registers can hold double-word quantities in 32-bit mode. */
4800
4801 /* This points to either the 32 bit or the 64 bit version. */
4802 const int *hard_regno_mode_classes;
4803
4804 static const int hard_32bit_mode_classes[] = {
4805 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4806 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4807 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4808 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4809
4810 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4811 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4812 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4813 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4814
4815 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4816 and none can hold SFmode/SImode values. */
4817 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4818 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4819 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4820 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4821
4822 /* %fcc[0123] */
4823 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4824
4825 /* %icc, %sfp, %gsr */
4826 CC_MODES, 0, D_MODES
4827 };
4828
4829 static const int hard_64bit_mode_classes[] = {
4830 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4831 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4832 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4833 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4834
4835 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4836 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4837 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4838 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4839
4840 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4841 and none can hold SFmode/SImode values. */
4842 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4843 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4844 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4845 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4846
4847 /* %fcc[0123] */
4848 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4849
4850 /* %icc, %sfp, %gsr */
4851 CC_MODES, 0, D_MODES
4852 };
4853
4854 int sparc_mode_class [NUM_MACHINE_MODES];
4855
4856 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4857
4858 static void
4859 sparc_init_modes (void)
4860 {
4861 int i;
4862
4863 for (i = 0; i < NUM_MACHINE_MODES; i++)
4864 {
4865 machine_mode m = (machine_mode) i;
4866 unsigned int size = GET_MODE_SIZE (m);
4867
4868 switch (GET_MODE_CLASS (m))
4869 {
4870 case MODE_INT:
4871 case MODE_PARTIAL_INT:
4872 case MODE_COMPLEX_INT:
4873 if (size < 4)
4874 sparc_mode_class[i] = 1 << (int) H_MODE;
4875 else if (size == 4)
4876 sparc_mode_class[i] = 1 << (int) S_MODE;
4877 else if (size == 8)
4878 sparc_mode_class[i] = 1 << (int) D_MODE;
4879 else if (size == 16)
4880 sparc_mode_class[i] = 1 << (int) T_MODE;
4881 else if (size == 32)
4882 sparc_mode_class[i] = 1 << (int) O_MODE;
4883 else
4884 sparc_mode_class[i] = 0;
4885 break;
4886 case MODE_VECTOR_INT:
4887 if (size == 4)
4888 sparc_mode_class[i] = 1 << (int) SF_MODE;
4889 else if (size == 8)
4890 sparc_mode_class[i] = 1 << (int) DF_MODE;
4891 else
4892 sparc_mode_class[i] = 0;
4893 break;
4894 case MODE_FLOAT:
4895 case MODE_COMPLEX_FLOAT:
4896 if (size == 4)
4897 sparc_mode_class[i] = 1 << (int) SF_MODE;
4898 else if (size == 8)
4899 sparc_mode_class[i] = 1 << (int) DF_MODE;
4900 else if (size == 16)
4901 sparc_mode_class[i] = 1 << (int) TF_MODE;
4902 else if (size == 32)
4903 sparc_mode_class[i] = 1 << (int) OF_MODE;
4904 else
4905 sparc_mode_class[i] = 0;
4906 break;
4907 case MODE_CC:
4908 if (m == CCFPmode || m == CCFPEmode)
4909 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4910 else
4911 sparc_mode_class[i] = 1 << (int) CC_MODE;
4912 break;
4913 default:
4914 sparc_mode_class[i] = 0;
4915 break;
4916 }
4917 }
4918
4919 if (TARGET_ARCH64)
4920 hard_regno_mode_classes = hard_64bit_mode_classes;
4921 else
4922 hard_regno_mode_classes = hard_32bit_mode_classes;
4923
4924 /* Initialize the array used by REGNO_REG_CLASS. */
4925 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4926 {
4927 if (i < 16 && TARGET_V8PLUS)
4928 sparc_regno_reg_class[i] = I64_REGS;
4929 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4930 sparc_regno_reg_class[i] = GENERAL_REGS;
4931 else if (i < 64)
4932 sparc_regno_reg_class[i] = FP_REGS;
4933 else if (i < 96)
4934 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4935 else if (i < 100)
4936 sparc_regno_reg_class[i] = FPCC_REGS;
4937 else
4938 sparc_regno_reg_class[i] = NO_REGS;
4939 }
4940 }
4941 \f
4942 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4943
4944 static inline bool
4945 save_global_or_fp_reg_p (unsigned int regno,
4946 int leaf_function ATTRIBUTE_UNUSED)
4947 {
4948 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4949 }
4950
4951 /* Return whether the return address register (%i7) is needed. */
4952
4953 static inline bool
4954 return_addr_reg_needed_p (int leaf_function)
4955 {
4956 /* If it is live, for example because of __builtin_return_address (0). */
4957 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4958 return true;
4959
4960 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4961 if (!leaf_function
4962 /* Loading the GOT register clobbers %o7. */
4963 || crtl->uses_pic_offset_table
4964 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4965 return true;
4966
4967 return false;
4968 }
4969
4970 /* Return whether REGNO, a local or in register, must be saved/restored. */
4971
4972 static bool
4973 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4974 {
4975 /* General case: call-saved registers live at some point. */
4976 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4977 return true;
4978
4979 /* Frame pointer register (%fp) if needed. */
4980 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4981 return true;
4982
4983 /* Return address register (%i7) if needed. */
4984 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4985 return true;
4986
4987 /* GOT register (%l7) if needed. */
4988 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4989 return true;
4990
4991 /* If the function accesses prior frames, the frame pointer and the return
4992 address of the previous frame must be saved on the stack. */
4993 if (crtl->accesses_prior_frames
4994 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4995 return true;
4996
4997 return false;
4998 }
4999
5000 /* Compute the frame size required by the function. This function is called
5001 during the reload pass and also by sparc_expand_prologue. */
5002
5003 HOST_WIDE_INT
5004 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5005 {
5006 HOST_WIDE_INT frame_size, apparent_frame_size;
5007 int args_size, n_global_fp_regs = 0;
5008 bool save_local_in_regs_p = false;
5009 unsigned int i;
5010
5011 /* If the function allocates dynamic stack space, the dynamic offset is
5012 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5013 if (leaf_function && !cfun->calls_alloca)
5014 args_size = 0;
5015 else
5016 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5017
5018 /* Calculate space needed for global registers. */
5019 if (TARGET_ARCH64)
5020 {
5021 for (i = 0; i < 8; i++)
5022 if (save_global_or_fp_reg_p (i, 0))
5023 n_global_fp_regs += 2;
5024 }
5025 else
5026 {
5027 for (i = 0; i < 8; i += 2)
5028 if (save_global_or_fp_reg_p (i, 0)
5029 || save_global_or_fp_reg_p (i + 1, 0))
5030 n_global_fp_regs += 2;
5031 }
5032
5033 /* In the flat window model, find out which local and in registers need to
5034 be saved. We don't reserve space in the current frame for them as they
5035 will be spilled into the register window save area of the caller's frame.
5036 However, as soon as we use this register window save area, we must create
5037 that of the current frame to make it the live one. */
5038 if (TARGET_FLAT)
5039 for (i = 16; i < 32; i++)
5040 if (save_local_or_in_reg_p (i, leaf_function))
5041 {
5042 save_local_in_regs_p = true;
5043 break;
5044 }
5045
5046 /* Calculate space needed for FP registers. */
5047 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5048 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5049 n_global_fp_regs += 2;
5050
5051 if (size == 0
5052 && n_global_fp_regs == 0
5053 && args_size == 0
5054 && !save_local_in_regs_p)
5055 frame_size = apparent_frame_size = 0;
5056 else
5057 {
5058 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5059 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5060 apparent_frame_size += n_global_fp_regs * 4;
5061
5062 /* We need to add the size of the outgoing argument area. */
5063 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5064
5065 /* And that of the register window save area. */
5066 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5067
5068 /* Finally, bump to the appropriate alignment. */
5069 frame_size = SPARC_STACK_ALIGN (frame_size);
5070 }
5071
5072 /* Set up values for use in prologue and epilogue. */
5073 sparc_frame_size = frame_size;
5074 sparc_apparent_frame_size = apparent_frame_size;
5075 sparc_n_global_fp_regs = n_global_fp_regs;
5076 sparc_save_local_in_regs_p = save_local_in_regs_p;
5077
5078 return frame_size;
5079 }
5080
5081 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5082
5083 int
5084 sparc_initial_elimination_offset (int to)
5085 {
5086 int offset;
5087
5088 if (to == STACK_POINTER_REGNUM)
5089 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5090 else
5091 offset = 0;
5092
5093 offset += SPARC_STACK_BIAS;
5094 return offset;
5095 }
5096
5097 /* Output any necessary .register pseudo-ops. */
5098
5099 void
5100 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5101 {
5102 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5103 int i;
5104
5105 if (TARGET_ARCH32)
5106 return;
5107
5108 /* Check if %g[2367] were used without
5109 .register being printed for them already. */
5110 for (i = 2; i < 8; i++)
5111 {
5112 if (df_regs_ever_live_p (i)
5113 && ! sparc_hard_reg_printed [i])
5114 {
5115 sparc_hard_reg_printed [i] = 1;
5116 /* %g7 is used as TLS base register, use #ignore
5117 for it instead of #scratch. */
5118 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5119 i == 7 ? "ignore" : "scratch");
5120 }
5121 if (i == 3) i = 5;
5122 }
5123 #endif
5124 }
5125
5126 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5127
5128 #if PROBE_INTERVAL > 4096
5129 #error Cannot use indexed addressing mode for stack probing
5130 #endif
5131
5132 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5133 inclusive. These are offsets from the current stack pointer.
5134
5135 Note that we don't use the REG+REG addressing mode for the probes because
5136 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5137 so the advantages of having a single code win here. */
5138
5139 static void
5140 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5141 {
5142 rtx g1 = gen_rtx_REG (Pmode, 1);
5143
5144 /* See if we have a constant small number of probes to generate. If so,
5145 that's the easy case. */
5146 if (size <= PROBE_INTERVAL)
5147 {
5148 emit_move_insn (g1, GEN_INT (first));
5149 emit_insn (gen_rtx_SET (g1,
5150 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5151 emit_stack_probe (plus_constant (Pmode, g1, -size));
5152 }
5153
5154 /* The run-time loop is made up of 9 insns in the generic case while the
5155 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5156 else if (size <= 4 * PROBE_INTERVAL)
5157 {
5158 HOST_WIDE_INT i;
5159
5160 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5161 emit_insn (gen_rtx_SET (g1,
5162 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5163 emit_stack_probe (g1);
5164
5165 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5166 it exceeds SIZE. If only two probes are needed, this will not
5167 generate any code. Then probe at FIRST + SIZE. */
5168 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5169 {
5170 emit_insn (gen_rtx_SET (g1,
5171 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5172 emit_stack_probe (g1);
5173 }
5174
5175 emit_stack_probe (plus_constant (Pmode, g1,
5176 (i - PROBE_INTERVAL) - size));
5177 }
5178
5179 /* Otherwise, do the same as above, but in a loop. Note that we must be
5180 extra careful with variables wrapping around because we might be at
5181 the very top (or the very bottom) of the address space and we have
5182 to be able to handle this case properly; in particular, we use an
5183 equality test for the loop condition. */
5184 else
5185 {
5186 HOST_WIDE_INT rounded_size;
5187 rtx g4 = gen_rtx_REG (Pmode, 4);
5188
5189 emit_move_insn (g1, GEN_INT (first));
5190
5191
5192 /* Step 1: round SIZE to the previous multiple of the interval. */
5193
5194 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5195 emit_move_insn (g4, GEN_INT (rounded_size));
5196
5197
5198 /* Step 2: compute initial and final value of the loop counter. */
5199
5200 /* TEST_ADDR = SP + FIRST. */
5201 emit_insn (gen_rtx_SET (g1,
5202 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5203
5204 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5205 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5206
5207
5208 /* Step 3: the loop
5209
5210 while (TEST_ADDR != LAST_ADDR)
5211 {
5212 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5213 probe at TEST_ADDR
5214 }
5215
5216 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5217 until it is equal to ROUNDED_SIZE. */
5218
5219 if (TARGET_ARCH64)
5220 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5221 else
5222 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5223
5224
5225 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5226 that SIZE is equal to ROUNDED_SIZE. */
5227
5228 if (size != rounded_size)
5229 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5230 }
5231
5232 /* Make sure nothing is scheduled before we are done. */
5233 emit_insn (gen_blockage ());
5234 }
5235
5236 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5237 absolute addresses. */
5238
5239 const char *
5240 output_probe_stack_range (rtx reg1, rtx reg2)
5241 {
5242 static int labelno = 0;
5243 char loop_lab[32];
5244 rtx xops[2];
5245
5246 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5247
5248 /* Loop. */
5249 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5250
5251 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5252 xops[0] = reg1;
5253 xops[1] = GEN_INT (-PROBE_INTERVAL);
5254 output_asm_insn ("add\t%0, %1, %0", xops);
5255
5256 /* Test if TEST_ADDR == LAST_ADDR. */
5257 xops[1] = reg2;
5258 output_asm_insn ("cmp\t%0, %1", xops);
5259
5260 /* Probe at TEST_ADDR and branch. */
5261 if (TARGET_ARCH64)
5262 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5263 else
5264 fputs ("\tbne\t", asm_out_file);
5265 assemble_name_raw (asm_out_file, loop_lab);
5266 fputc ('\n', asm_out_file);
5267 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5268 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5269
5270 return "";
5271 }
5272
5273 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5274 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5275 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5276 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5277 the action to be performed if it returns false. Return the new offset. */
5278
5279 typedef bool (*sorr_pred_t) (unsigned int, int);
5280 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5281
5282 static int
5283 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5284 int offset, int leaf_function, sorr_pred_t save_p,
5285 sorr_act_t action_true, sorr_act_t action_false)
5286 {
5287 unsigned int i;
5288 rtx mem;
5289 rtx_insn *insn;
5290
5291 if (TARGET_ARCH64 && high <= 32)
5292 {
5293 int fp_offset = -1;
5294
5295 for (i = low; i < high; i++)
5296 {
5297 if (save_p (i, leaf_function))
5298 {
5299 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5300 base, offset));
5301 if (action_true == SORR_SAVE)
5302 {
5303 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5304 RTX_FRAME_RELATED_P (insn) = 1;
5305 }
5306 else /* action_true == SORR_RESTORE */
5307 {
5308 /* The frame pointer must be restored last since its old
5309 value may be used as base address for the frame. This
5310 is problematic in 64-bit mode only because of the lack
5311 of double-word load instruction. */
5312 if (i == HARD_FRAME_POINTER_REGNUM)
5313 fp_offset = offset;
5314 else
5315 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5316 }
5317 offset += 8;
5318 }
5319 else if (action_false == SORR_ADVANCE)
5320 offset += 8;
5321 }
5322
5323 if (fp_offset >= 0)
5324 {
5325 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5326 emit_move_insn (hard_frame_pointer_rtx, mem);
5327 }
5328 }
5329 else
5330 {
5331 for (i = low; i < high; i += 2)
5332 {
5333 bool reg0 = save_p (i, leaf_function);
5334 bool reg1 = save_p (i + 1, leaf_function);
5335 machine_mode mode;
5336 int regno;
5337
5338 if (reg0 && reg1)
5339 {
5340 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5341 regno = i;
5342 }
5343 else if (reg0)
5344 {
5345 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5346 regno = i;
5347 }
5348 else if (reg1)
5349 {
5350 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5351 regno = i + 1;
5352 offset += 4;
5353 }
5354 else
5355 {
5356 if (action_false == SORR_ADVANCE)
5357 offset += 8;
5358 continue;
5359 }
5360
5361 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5362 if (action_true == SORR_SAVE)
5363 {
5364 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5365 RTX_FRAME_RELATED_P (insn) = 1;
5366 if (mode == DImode)
5367 {
5368 rtx set1, set2;
5369 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5370 offset));
5371 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5372 RTX_FRAME_RELATED_P (set1) = 1;
5373 mem
5374 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5375 offset + 4));
5376 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5377 RTX_FRAME_RELATED_P (set2) = 1;
5378 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5379 gen_rtx_PARALLEL (VOIDmode,
5380 gen_rtvec (2, set1, set2)));
5381 }
5382 }
5383 else /* action_true == SORR_RESTORE */
5384 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5385
5386 /* Bump and round down to double word
5387 in case we already bumped by 4. */
5388 offset = ROUND_DOWN (offset + 8, 8);
5389 }
5390 }
5391
5392 return offset;
5393 }
5394
5395 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5396
5397 static rtx
5398 emit_adjust_base_to_offset (rtx base, int offset)
5399 {
5400 /* ??? This might be optimized a little as %g1 might already have a
5401 value close enough that a single add insn will do. */
5402 /* ??? Although, all of this is probably only a temporary fix because
5403 if %g1 can hold a function result, then sparc_expand_epilogue will
5404 lose (the result will be clobbered). */
5405 rtx new_base = gen_rtx_REG (Pmode, 1);
5406 emit_move_insn (new_base, GEN_INT (offset));
5407 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5408 return new_base;
5409 }
5410
5411 /* Emit code to save/restore call-saved global and FP registers. */
5412
5413 static void
5414 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5415 {
5416 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5417 {
5418 base = emit_adjust_base_to_offset (base, offset);
5419 offset = 0;
5420 }
5421
5422 offset
5423 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5424 save_global_or_fp_reg_p, action, SORR_NONE);
5425 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5426 save_global_or_fp_reg_p, action, SORR_NONE);
5427 }
5428
5429 /* Emit code to save/restore call-saved local and in registers. */
5430
5431 static void
5432 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5433 {
5434 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5435 {
5436 base = emit_adjust_base_to_offset (base, offset);
5437 offset = 0;
5438 }
5439
5440 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5441 save_local_or_in_reg_p, action, SORR_ADVANCE);
5442 }
5443
5444 /* Emit a window_save insn. */
5445
5446 static rtx_insn *
5447 emit_window_save (rtx increment)
5448 {
5449 rtx_insn *insn = emit_insn (gen_window_save (increment));
5450 RTX_FRAME_RELATED_P (insn) = 1;
5451
5452 /* The incoming return address (%o7) is saved in %i7. */
5453 add_reg_note (insn, REG_CFA_REGISTER,
5454 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5455 gen_rtx_REG (Pmode,
5456 INCOMING_RETURN_ADDR_REGNUM)));
5457
5458 /* The window save event. */
5459 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5460
5461 /* The CFA is %fp, the hard frame pointer. */
5462 add_reg_note (insn, REG_CFA_DEF_CFA,
5463 plus_constant (Pmode, hard_frame_pointer_rtx,
5464 INCOMING_FRAME_SP_OFFSET));
5465
5466 return insn;
5467 }
5468
5469 /* Generate an increment for the stack pointer. */
5470
5471 static rtx
5472 gen_stack_pointer_inc (rtx increment)
5473 {
5474 return gen_rtx_SET (stack_pointer_rtx,
5475 gen_rtx_PLUS (Pmode,
5476 stack_pointer_rtx,
5477 increment));
5478 }
5479
5480 /* Expand the function prologue. The prologue is responsible for reserving
5481 storage for the frame, saving the call-saved registers and loading the
5482 GOT register if needed. */
5483
5484 void
5485 sparc_expand_prologue (void)
5486 {
5487 HOST_WIDE_INT size;
5488 rtx_insn *insn;
5489
5490 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5491 on the final value of the flag means deferring the prologue/epilogue
5492 expansion until just before the second scheduling pass, which is too
5493 late to emit multiple epilogues or return insns.
5494
5495 Of course we are making the assumption that the value of the flag
5496 will not change between now and its final value. Of the three parts
5497 of the formula, only the last one can reasonably vary. Let's take a
5498 closer look, after assuming that the first two ones are set to true
5499 (otherwise the last value is effectively silenced).
5500
5501 If only_leaf_regs_used returns false, the global predicate will also
5502 be false so the actual frame size calculated below will be positive.
5503 As a consequence, the save_register_window insn will be emitted in
5504 the instruction stream; now this insn explicitly references %fp
5505 which is not a leaf register so only_leaf_regs_used will always
5506 return false subsequently.
5507
5508 If only_leaf_regs_used returns true, we hope that the subsequent
5509 optimization passes won't cause non-leaf registers to pop up. For
5510 example, the regrename pass has special provisions to not rename to
5511 non-leaf registers in a leaf function. */
5512 sparc_leaf_function_p
5513 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5514
5515 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5516
5517 if (flag_stack_usage_info)
5518 current_function_static_stack_size = size;
5519
5520 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5521 {
5522 if (crtl->is_leaf && !cfun->calls_alloca)
5523 {
5524 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5525 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5526 size - STACK_CHECK_PROTECT);
5527 }
5528 else if (size > 0)
5529 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5530 }
5531
5532 if (size == 0)
5533 ; /* do nothing. */
5534 else if (sparc_leaf_function_p)
5535 {
5536 rtx size_int_rtx = GEN_INT (-size);
5537
5538 if (size <= 4096)
5539 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5540 else if (size <= 8192)
5541 {
5542 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5543 RTX_FRAME_RELATED_P (insn) = 1;
5544
5545 /* %sp is still the CFA register. */
5546 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5547 }
5548 else
5549 {
5550 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5551 emit_move_insn (size_rtx, size_int_rtx);
5552 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5553 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5554 gen_stack_pointer_inc (size_int_rtx));
5555 }
5556
5557 RTX_FRAME_RELATED_P (insn) = 1;
5558 }
5559 else
5560 {
5561 rtx size_int_rtx = GEN_INT (-size);
5562
5563 if (size <= 4096)
5564 emit_window_save (size_int_rtx);
5565 else if (size <= 8192)
5566 {
5567 emit_window_save (GEN_INT (-4096));
5568
5569 /* %sp is not the CFA register anymore. */
5570 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5571
5572 /* Make sure no %fp-based store is issued until after the frame is
5573 established. The offset between the frame pointer and the stack
5574 pointer is calculated relative to the value of the stack pointer
5575 at the end of the function prologue, and moving instructions that
5576 access the stack via the frame pointer between the instructions
5577 that decrement the stack pointer could result in accessing the
5578 register window save area, which is volatile. */
5579 emit_insn (gen_frame_blockage ());
5580 }
5581 else
5582 {
5583 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5584 emit_move_insn (size_rtx, size_int_rtx);
5585 emit_window_save (size_rtx);
5586 }
5587 }
5588
5589 if (sparc_leaf_function_p)
5590 {
5591 sparc_frame_base_reg = stack_pointer_rtx;
5592 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5593 }
5594 else
5595 {
5596 sparc_frame_base_reg = hard_frame_pointer_rtx;
5597 sparc_frame_base_offset = SPARC_STACK_BIAS;
5598 }
5599
5600 if (sparc_n_global_fp_regs > 0)
5601 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5602 sparc_frame_base_offset
5603 - sparc_apparent_frame_size,
5604 SORR_SAVE);
5605
5606 /* Load the GOT register if needed. */
5607 if (crtl->uses_pic_offset_table)
5608 load_got_register ();
5609
5610 /* Advertise that the data calculated just above are now valid. */
5611 sparc_prologue_data_valid_p = true;
5612 }
5613
5614 /* Expand the function prologue. The prologue is responsible for reserving
5615 storage for the frame, saving the call-saved registers and loading the
5616 GOT register if needed. */
5617
5618 void
5619 sparc_flat_expand_prologue (void)
5620 {
5621 HOST_WIDE_INT size;
5622 rtx_insn *insn;
5623
5624 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5625
5626 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5627
5628 if (flag_stack_usage_info)
5629 current_function_static_stack_size = size;
5630
5631 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5632 {
5633 if (crtl->is_leaf && !cfun->calls_alloca)
5634 {
5635 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5636 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5637 size - STACK_CHECK_PROTECT);
5638 }
5639 else if (size > 0)
5640 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5641 }
5642
5643 if (sparc_save_local_in_regs_p)
5644 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5645 SORR_SAVE);
5646
5647 if (size == 0)
5648 ; /* do nothing. */
5649 else
5650 {
5651 rtx size_int_rtx, size_rtx;
5652
5653 size_rtx = size_int_rtx = GEN_INT (-size);
5654
5655 /* We establish the frame (i.e. decrement the stack pointer) first, even
5656 if we use a frame pointer, because we cannot clobber any call-saved
5657 registers, including the frame pointer, if we haven't created a new
5658 register save area, for the sake of compatibility with the ABI. */
5659 if (size <= 4096)
5660 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5661 else if (size <= 8192 && !frame_pointer_needed)
5662 {
5663 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5664 RTX_FRAME_RELATED_P (insn) = 1;
5665 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5666 }
5667 else
5668 {
5669 size_rtx = gen_rtx_REG (Pmode, 1);
5670 emit_move_insn (size_rtx, size_int_rtx);
5671 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5672 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5673 gen_stack_pointer_inc (size_int_rtx));
5674 }
5675 RTX_FRAME_RELATED_P (insn) = 1;
5676
5677 /* Ensure nothing is scheduled until after the frame is established. */
5678 emit_insn (gen_blockage ());
5679
5680 if (frame_pointer_needed)
5681 {
5682 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5683 gen_rtx_MINUS (Pmode,
5684 stack_pointer_rtx,
5685 size_rtx)));
5686 RTX_FRAME_RELATED_P (insn) = 1;
5687
5688 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5689 gen_rtx_SET (hard_frame_pointer_rtx,
5690 plus_constant (Pmode, stack_pointer_rtx,
5691 size)));
5692 }
5693
5694 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5695 {
5696 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5697 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5698
5699 insn = emit_move_insn (i7, o7);
5700 RTX_FRAME_RELATED_P (insn) = 1;
5701
5702 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5703
5704 /* Prevent this instruction from ever being considered dead,
5705 even if this function has no epilogue. */
5706 emit_use (i7);
5707 }
5708 }
5709
5710 if (frame_pointer_needed)
5711 {
5712 sparc_frame_base_reg = hard_frame_pointer_rtx;
5713 sparc_frame_base_offset = SPARC_STACK_BIAS;
5714 }
5715 else
5716 {
5717 sparc_frame_base_reg = stack_pointer_rtx;
5718 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5719 }
5720
5721 if (sparc_n_global_fp_regs > 0)
5722 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5723 sparc_frame_base_offset
5724 - sparc_apparent_frame_size,
5725 SORR_SAVE);
5726
5727 /* Load the GOT register if needed. */
5728 if (crtl->uses_pic_offset_table)
5729 load_got_register ();
5730
5731 /* Advertise that the data calculated just above are now valid. */
5732 sparc_prologue_data_valid_p = true;
5733 }
5734
5735 /* This function generates the assembly code for function entry, which boils
5736 down to emitting the necessary .register directives. */
5737
5738 static void
5739 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5740 {
5741 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5742 if (!TARGET_FLAT)
5743 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5744
5745 sparc_output_scratch_registers (file);
5746 }
5747
5748 /* Expand the function epilogue, either normal or part of a sibcall.
5749 We emit all the instructions except the return or the call. */
5750
5751 void
5752 sparc_expand_epilogue (bool for_eh)
5753 {
5754 HOST_WIDE_INT size = sparc_frame_size;
5755
5756 if (sparc_n_global_fp_regs > 0)
5757 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5758 sparc_frame_base_offset
5759 - sparc_apparent_frame_size,
5760 SORR_RESTORE);
5761
5762 if (size == 0 || for_eh)
5763 ; /* do nothing. */
5764 else if (sparc_leaf_function_p)
5765 {
5766 if (size <= 4096)
5767 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5768 else if (size <= 8192)
5769 {
5770 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5771 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5772 }
5773 else
5774 {
5775 rtx reg = gen_rtx_REG (Pmode, 1);
5776 emit_move_insn (reg, GEN_INT (size));
5777 emit_insn (gen_stack_pointer_inc (reg));
5778 }
5779 }
5780 }
5781
5782 /* Expand the function epilogue, either normal or part of a sibcall.
5783 We emit all the instructions except the return or the call. */
5784
5785 void
5786 sparc_flat_expand_epilogue (bool for_eh)
5787 {
5788 HOST_WIDE_INT size = sparc_frame_size;
5789
5790 if (sparc_n_global_fp_regs > 0)
5791 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5792 sparc_frame_base_offset
5793 - sparc_apparent_frame_size,
5794 SORR_RESTORE);
5795
5796 /* If we have a frame pointer, we'll need both to restore it before the
5797 frame is destroyed and use its current value in destroying the frame.
5798 Since we don't have an atomic way to do that in the flat window model,
5799 we save the current value into a temporary register (%g1). */
5800 if (frame_pointer_needed && !for_eh)
5801 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5802
5803 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5804 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5805 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5806
5807 if (sparc_save_local_in_regs_p)
5808 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5809 sparc_frame_base_offset,
5810 SORR_RESTORE);
5811
5812 if (size == 0 || for_eh)
5813 ; /* do nothing. */
5814 else if (frame_pointer_needed)
5815 {
5816 /* Make sure the frame is destroyed after everything else is done. */
5817 emit_insn (gen_blockage ());
5818
5819 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5820 }
5821 else
5822 {
5823 /* Likewise. */
5824 emit_insn (gen_blockage ());
5825
5826 if (size <= 4096)
5827 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5828 else if (size <= 8192)
5829 {
5830 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5831 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5832 }
5833 else
5834 {
5835 rtx reg = gen_rtx_REG (Pmode, 1);
5836 emit_move_insn (reg, GEN_INT (size));
5837 emit_insn (gen_stack_pointer_inc (reg));
5838 }
5839 }
5840 }
5841
5842 /* Return true if it is appropriate to emit `return' instructions in the
5843 body of a function. */
5844
5845 bool
5846 sparc_can_use_return_insn_p (void)
5847 {
5848 return sparc_prologue_data_valid_p
5849 && sparc_n_global_fp_regs == 0
5850 && TARGET_FLAT
5851 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5852 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5853 }
5854
5855 /* This function generates the assembly code for function exit. */
5856
5857 static void
5858 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5859 {
5860 /* If the last two instructions of a function are "call foo; dslot;"
5861 the return address might point to the first instruction in the next
5862 function and we have to output a dummy nop for the sake of sane
5863 backtraces in such cases. This is pointless for sibling calls since
5864 the return address is explicitly adjusted. */
5865
5866 rtx insn, last_real_insn;
5867
5868 insn = get_last_insn ();
5869
5870 last_real_insn = prev_real_insn (insn);
5871 if (last_real_insn
5872 && NONJUMP_INSN_P (last_real_insn)
5873 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5874 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5875
5876 if (last_real_insn
5877 && CALL_P (last_real_insn)
5878 && !SIBLING_CALL_P (last_real_insn))
5879 fputs("\tnop\n", file);
5880
5881 sparc_output_deferred_case_vectors ();
5882 }
5883
5884 /* Output a 'restore' instruction. */
5885
5886 static void
5887 output_restore (rtx pat)
5888 {
5889 rtx operands[3];
5890
5891 if (! pat)
5892 {
5893 fputs ("\t restore\n", asm_out_file);
5894 return;
5895 }
5896
5897 gcc_assert (GET_CODE (pat) == SET);
5898
5899 operands[0] = SET_DEST (pat);
5900 pat = SET_SRC (pat);
5901
5902 switch (GET_CODE (pat))
5903 {
5904 case PLUS:
5905 operands[1] = XEXP (pat, 0);
5906 operands[2] = XEXP (pat, 1);
5907 output_asm_insn (" restore %r1, %2, %Y0", operands);
5908 break;
5909 case LO_SUM:
5910 operands[1] = XEXP (pat, 0);
5911 operands[2] = XEXP (pat, 1);
5912 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5913 break;
5914 case ASHIFT:
5915 operands[1] = XEXP (pat, 0);
5916 gcc_assert (XEXP (pat, 1) == const1_rtx);
5917 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5918 break;
5919 default:
5920 operands[1] = pat;
5921 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5922 break;
5923 }
5924 }
5925
5926 /* Output a return. */
5927
5928 const char *
5929 output_return (rtx_insn *insn)
5930 {
5931 if (crtl->calls_eh_return)
5932 {
5933 /* If the function uses __builtin_eh_return, the eh_return
5934 machinery occupies the delay slot. */
5935 gcc_assert (!final_sequence);
5936
5937 if (flag_delayed_branch)
5938 {
5939 if (!TARGET_FLAT && TARGET_V9)
5940 fputs ("\treturn\t%i7+8\n", asm_out_file);
5941 else
5942 {
5943 if (!TARGET_FLAT)
5944 fputs ("\trestore\n", asm_out_file);
5945
5946 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5947 }
5948
5949 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5950 }
5951 else
5952 {
5953 if (!TARGET_FLAT)
5954 fputs ("\trestore\n", asm_out_file);
5955
5956 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5957 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5958 }
5959 }
5960 else if (sparc_leaf_function_p || TARGET_FLAT)
5961 {
5962 /* This is a leaf or flat function so we don't have to bother restoring
5963 the register window, which frees us from dealing with the convoluted
5964 semantics of restore/return. We simply output the jump to the
5965 return address and the insn in the delay slot (if any). */
5966
5967 return "jmp\t%%o7+%)%#";
5968 }
5969 else
5970 {
5971 /* This is a regular function so we have to restore the register window.
5972 We may have a pending insn for the delay slot, which will be either
5973 combined with the 'restore' instruction or put in the delay slot of
5974 the 'return' instruction. */
5975
5976 if (final_sequence)
5977 {
5978 rtx delay, pat;
5979
5980 delay = NEXT_INSN (insn);
5981 gcc_assert (delay);
5982
5983 pat = PATTERN (delay);
5984
5985 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5986 {
5987 epilogue_renumber (&pat, 0);
5988 return "return\t%%i7+%)%#";
5989 }
5990 else
5991 {
5992 output_asm_insn ("jmp\t%%i7+%)", NULL);
5993 output_restore (pat);
5994 PATTERN (delay) = gen_blockage ();
5995 INSN_CODE (delay) = -1;
5996 }
5997 }
5998 else
5999 {
6000 /* The delay slot is empty. */
6001 if (TARGET_V9)
6002 return "return\t%%i7+%)\n\t nop";
6003 else if (flag_delayed_branch)
6004 return "jmp\t%%i7+%)\n\t restore";
6005 else
6006 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6007 }
6008 }
6009
6010 return "";
6011 }
6012
6013 /* Output a sibling call. */
6014
6015 const char *
6016 output_sibcall (rtx_insn *insn, rtx call_operand)
6017 {
6018 rtx operands[1];
6019
6020 gcc_assert (flag_delayed_branch);
6021
6022 operands[0] = call_operand;
6023
6024 if (sparc_leaf_function_p || TARGET_FLAT)
6025 {
6026 /* This is a leaf or flat function so we don't have to bother restoring
6027 the register window. We simply output the jump to the function and
6028 the insn in the delay slot (if any). */
6029
6030 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6031
6032 if (final_sequence)
6033 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6034 operands);
6035 else
6036 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6037 it into branch if possible. */
6038 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6039 operands);
6040 }
6041 else
6042 {
6043 /* This is a regular function so we have to restore the register window.
6044 We may have a pending insn for the delay slot, which will be combined
6045 with the 'restore' instruction. */
6046
6047 output_asm_insn ("call\t%a0, 0", operands);
6048
6049 if (final_sequence)
6050 {
6051 rtx_insn *delay = NEXT_INSN (insn);
6052 gcc_assert (delay);
6053
6054 output_restore (PATTERN (delay));
6055
6056 PATTERN (delay) = gen_blockage ();
6057 INSN_CODE (delay) = -1;
6058 }
6059 else
6060 output_restore (NULL_RTX);
6061 }
6062
6063 return "";
6064 }
6065 \f
6066 /* Functions for handling argument passing.
6067
6068 For 32-bit, the first 6 args are normally in registers and the rest are
6069 pushed. Any arg that starts within the first 6 words is at least
6070 partially passed in a register unless its data type forbids.
6071
6072 For 64-bit, the argument registers are laid out as an array of 16 elements
6073 and arguments are added sequentially. The first 6 int args and up to the
6074 first 16 fp args (depending on size) are passed in regs.
6075
6076 Slot Stack Integral Float Float in structure Double Long Double
6077 ---- ----- -------- ----- ------------------ ------ -----------
6078 15 [SP+248] %f31 %f30,%f31 %d30
6079 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6080 13 [SP+232] %f27 %f26,%f27 %d26
6081 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6082 11 [SP+216] %f23 %f22,%f23 %d22
6083 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6084 9 [SP+200] %f19 %f18,%f19 %d18
6085 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6086 7 [SP+184] %f15 %f14,%f15 %d14
6087 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6088 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6089 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6090 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6091 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6092 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6093 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6094
6095 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6096
6097 Integral arguments are always passed as 64-bit quantities appropriately
6098 extended.
6099
6100 Passing of floating point values is handled as follows.
6101 If a prototype is in scope:
6102 If the value is in a named argument (i.e. not a stdarg function or a
6103 value not part of the `...') then the value is passed in the appropriate
6104 fp reg.
6105 If the value is part of the `...' and is passed in one of the first 6
6106 slots then the value is passed in the appropriate int reg.
6107 If the value is part of the `...' and is not passed in one of the first 6
6108 slots then the value is passed in memory.
6109 If a prototype is not in scope:
6110 If the value is one of the first 6 arguments the value is passed in the
6111 appropriate integer reg and the appropriate fp reg.
6112 If the value is not one of the first 6 arguments the value is passed in
6113 the appropriate fp reg and in memory.
6114
6115
6116 Summary of the calling conventions implemented by GCC on the SPARC:
6117
6118 32-bit ABI:
6119 size argument return value
6120
6121 small integer <4 int. reg. int. reg.
6122 word 4 int. reg. int. reg.
6123 double word 8 int. reg. int. reg.
6124
6125 _Complex small integer <8 int. reg. int. reg.
6126 _Complex word 8 int. reg. int. reg.
6127 _Complex double word 16 memory int. reg.
6128
6129 vector integer <=8 int. reg. FP reg.
6130 vector integer >8 memory memory
6131
6132 float 4 int. reg. FP reg.
6133 double 8 int. reg. FP reg.
6134 long double 16 memory memory
6135
6136 _Complex float 8 memory FP reg.
6137 _Complex double 16 memory FP reg.
6138 _Complex long double 32 memory FP reg.
6139
6140 vector float any memory memory
6141
6142 aggregate any memory memory
6143
6144
6145
6146 64-bit ABI:
6147 size argument return value
6148
6149 small integer <8 int. reg. int. reg.
6150 word 8 int. reg. int. reg.
6151 double word 16 int. reg. int. reg.
6152
6153 _Complex small integer <16 int. reg. int. reg.
6154 _Complex word 16 int. reg. int. reg.
6155 _Complex double word 32 memory int. reg.
6156
6157 vector integer <=16 FP reg. FP reg.
6158 vector integer 16<s<=32 memory FP reg.
6159 vector integer >32 memory memory
6160
6161 float 4 FP reg. FP reg.
6162 double 8 FP reg. FP reg.
6163 long double 16 FP reg. FP reg.
6164
6165 _Complex float 8 FP reg. FP reg.
6166 _Complex double 16 FP reg. FP reg.
6167 _Complex long double 32 memory FP reg.
6168
6169 vector float <=16 FP reg. FP reg.
6170 vector float 16<s<=32 memory FP reg.
6171 vector float >32 memory memory
6172
6173 aggregate <=16 reg. reg.
6174 aggregate 16<s<=32 memory reg.
6175 aggregate >32 memory memory
6176
6177
6178
6179 Note #1: complex floating-point types follow the extended SPARC ABIs as
6180 implemented by the Sun compiler.
6181
6182 Note #2: integral vector types follow the scalar floating-point types
6183 conventions to match what is implemented by the Sun VIS SDK.
6184
6185 Note #3: floating-point vector types follow the aggregate types
6186 conventions. */
6187
6188
6189 /* Maximum number of int regs for args. */
6190 #define SPARC_INT_ARG_MAX 6
6191 /* Maximum number of fp regs for args. */
6192 #define SPARC_FP_ARG_MAX 16
6193 /* Number of words (partially) occupied for a given size in units. */
6194 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6195
6196 /* Handle the INIT_CUMULATIVE_ARGS macro.
6197 Initialize a variable CUM of type CUMULATIVE_ARGS
6198 for a call to a function whose data type is FNTYPE.
6199 For a library call, FNTYPE is 0. */
6200
6201 void
6202 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6203 {
6204 cum->words = 0;
6205 cum->prototype_p = fntype && prototype_p (fntype);
6206 cum->libcall_p = !fntype;
6207 }
6208
6209 /* Handle promotion of pointer and integer arguments. */
6210
6211 static machine_mode
6212 sparc_promote_function_mode (const_tree type, machine_mode mode,
6213 int *punsignedp, const_tree, int)
6214 {
6215 if (type && POINTER_TYPE_P (type))
6216 {
6217 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6218 return Pmode;
6219 }
6220
6221 /* Integral arguments are passed as full words, as per the ABI. */
6222 if (GET_MODE_CLASS (mode) == MODE_INT
6223 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6224 return word_mode;
6225
6226 return mode;
6227 }
6228
6229 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6230
6231 static bool
6232 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6233 {
6234 return TARGET_ARCH64 ? true : false;
6235 }
6236
6237 /* Traverse the record TYPE recursively and call FUNC on its fields.
6238 NAMED is true if this is for a named parameter. DATA is passed
6239 to FUNC for each field. OFFSET is the starting position and
6240 PACKED is true if we are inside a packed record. */
6241
6242 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6243 static void
6244 traverse_record_type (const_tree type, bool named, T *data,
6245 HOST_WIDE_INT offset = 0, bool packed = false)
6246 {
6247 /* The ABI obviously doesn't specify how packed structures are passed.
6248 These are passed in integer regs if possible, otherwise memory. */
6249 if (!packed)
6250 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6251 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6252 {
6253 packed = true;
6254 break;
6255 }
6256
6257 /* Walk the real fields, but skip those with no size or a zero size.
6258 ??? Fields with variable offset are handled as having zero offset. */
6259 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6260 if (TREE_CODE (field) == FIELD_DECL)
6261 {
6262 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6263 continue;
6264
6265 HOST_WIDE_INT bitpos = offset;
6266 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6267 bitpos += int_bit_position (field);
6268
6269 tree field_type = TREE_TYPE (field);
6270 if (TREE_CODE (field_type) == RECORD_TYPE)
6271 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6272 packed);
6273 else
6274 {
6275 const bool fp_type
6276 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6277 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6278 data);
6279 }
6280 }
6281 }
6282
6283 /* Handle recursive register classifying for structure layout. */
6284
6285 typedef struct
6286 {
6287 bool int_regs; /* true if field eligible to int registers. */
6288 bool fp_regs; /* true if field eligible to FP registers. */
6289 bool fp_regs_in_first_word; /* true if such field in first word. */
6290 } classify_data_t;
6291
6292 /* A subroutine of function_arg_slotno. Classify the field. */
6293
6294 inline void
6295 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6296 classify_data_t *data)
6297 {
6298 if (fp)
6299 {
6300 data->fp_regs = true;
6301 if (bitpos < BITS_PER_WORD)
6302 data->fp_regs_in_first_word = true;
6303 }
6304 else
6305 data->int_regs = true;
6306 }
6307
6308 /* Compute the slot number to pass an argument in.
6309 Return the slot number or -1 if passing on the stack.
6310
6311 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6312 the preceding args and about the function being called.
6313 MODE is the argument's machine mode.
6314 TYPE is the data type of the argument (as a tree).
6315 This is null for libcalls where that information may
6316 not be available.
6317 NAMED is nonzero if this argument is a named parameter
6318 (otherwise it is an extra parameter matching an ellipsis).
6319 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6320 *PREGNO records the register number to use if scalar type.
6321 *PPADDING records the amount of padding needed in words. */
6322
6323 static int
6324 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6325 const_tree type, bool named, bool incoming,
6326 int *pregno, int *ppadding)
6327 {
6328 int regbase = (incoming
6329 ? SPARC_INCOMING_INT_ARG_FIRST
6330 : SPARC_OUTGOING_INT_ARG_FIRST);
6331 int slotno = cum->words;
6332 enum mode_class mclass;
6333 int regno;
6334
6335 *ppadding = 0;
6336
6337 if (type && TREE_ADDRESSABLE (type))
6338 return -1;
6339
6340 if (TARGET_ARCH32
6341 && mode == BLKmode
6342 && type
6343 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6344 return -1;
6345
6346 /* For SPARC64, objects requiring 16-byte alignment get it. */
6347 if (TARGET_ARCH64
6348 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6349 && (slotno & 1) != 0)
6350 slotno++, *ppadding = 1;
6351
6352 mclass = GET_MODE_CLASS (mode);
6353 if (type && TREE_CODE (type) == VECTOR_TYPE)
6354 {
6355 /* Vector types deserve special treatment because they are
6356 polymorphic wrt their mode, depending upon whether VIS
6357 instructions are enabled. */
6358 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6359 {
6360 /* The SPARC port defines no floating-point vector modes. */
6361 gcc_assert (mode == BLKmode);
6362 }
6363 else
6364 {
6365 /* Integral vector types should either have a vector
6366 mode or an integral mode, because we are guaranteed
6367 by pass_by_reference that their size is not greater
6368 than 16 bytes and TImode is 16-byte wide. */
6369 gcc_assert (mode != BLKmode);
6370
6371 /* Vector integers are handled like floats according to
6372 the Sun VIS SDK. */
6373 mclass = MODE_FLOAT;
6374 }
6375 }
6376
6377 switch (mclass)
6378 {
6379 case MODE_FLOAT:
6380 case MODE_COMPLEX_FLOAT:
6381 case MODE_VECTOR_INT:
6382 if (TARGET_ARCH64 && TARGET_FPU && named)
6383 {
6384 /* If all arg slots are filled, then must pass on stack. */
6385 if (slotno >= SPARC_FP_ARG_MAX)
6386 return -1;
6387
6388 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6389 /* Arguments filling only one single FP register are
6390 right-justified in the outer double FP register. */
6391 if (GET_MODE_SIZE (mode) <= 4)
6392 regno++;
6393 break;
6394 }
6395 /* fallthrough */
6396
6397 case MODE_INT:
6398 case MODE_COMPLEX_INT:
6399 /* If all arg slots are filled, then must pass on stack. */
6400 if (slotno >= SPARC_INT_ARG_MAX)
6401 return -1;
6402
6403 regno = regbase + slotno;
6404 break;
6405
6406 case MODE_RANDOM:
6407 if (mode == VOIDmode)
6408 /* MODE is VOIDmode when generating the actual call. */
6409 return -1;
6410
6411 gcc_assert (mode == BLKmode);
6412
6413 if (TARGET_ARCH32
6414 || !type
6415 || (TREE_CODE (type) != RECORD_TYPE
6416 && TREE_CODE (type) != VECTOR_TYPE))
6417 {
6418 /* If all arg slots are filled, then must pass on stack. */
6419 if (slotno >= SPARC_INT_ARG_MAX)
6420 return -1;
6421
6422 regno = regbase + slotno;
6423 }
6424 else /* TARGET_ARCH64 && type */
6425 {
6426 /* If all arg slots are filled, then must pass on stack. */
6427 if (slotno >= SPARC_FP_ARG_MAX)
6428 return -1;
6429
6430 if (TREE_CODE (type) == RECORD_TYPE)
6431 {
6432 classify_data_t data = { false, false, false };
6433 traverse_record_type<classify_data_t, classify_registers>
6434 (type, named, &data);
6435
6436 /* If all slots are filled except for the last one, but there
6437 is no FP field in the first word, then must pass on stack. */
6438 if (data.fp_regs
6439 && !data.fp_regs_in_first_word
6440 && slotno >= SPARC_FP_ARG_MAX - 1)
6441 return -1;
6442
6443 /* If there are only int args and all int slots are filled,
6444 then must pass on stack. */
6445 if (!data.fp_regs
6446 && data.int_regs
6447 && slotno >= SPARC_INT_ARG_MAX)
6448 return -1;
6449 }
6450
6451 /* PREGNO isn't set since both int and FP regs can be used. */
6452 return slotno;
6453 }
6454 break;
6455
6456 default :
6457 gcc_unreachable ();
6458 }
6459
6460 *pregno = regno;
6461 return slotno;
6462 }
6463
6464 /* Handle recursive register counting/assigning for structure layout. */
6465
6466 typedef struct
6467 {
6468 int slotno; /* slot number of the argument. */
6469 int regbase; /* regno of the base register. */
6470 int intoffset; /* offset of the first pending integer field. */
6471 int nregs; /* number of words passed in registers. */
6472 bool stack; /* true if part of the argument is on the stack. */
6473 rtx ret; /* return expression being built. */
6474 } assign_data_t;
6475
6476 /* A subroutine of function_arg_record_value. Compute the number of integer
6477 registers to be assigned between PARMS->intoffset and BITPOS. Return
6478 true if at least one integer register is assigned or false otherwise. */
6479
6480 static bool
6481 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6482 {
6483 if (data->intoffset < 0)
6484 return false;
6485
6486 const int intoffset = data->intoffset;
6487 data->intoffset = -1;
6488
6489 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6490 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6491 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6492 int nregs = (endbit - startbit) / BITS_PER_WORD;
6493
6494 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6495 {
6496 nregs = SPARC_INT_ARG_MAX - this_slotno;
6497
6498 /* We need to pass this field (partly) on the stack. */
6499 data->stack = 1;
6500 }
6501
6502 if (nregs <= 0)
6503 return false;
6504
6505 *pnregs = nregs;
6506 return true;
6507 }
6508
6509 /* A subroutine of function_arg_record_value. Compute the number and the mode
6510 of the FP registers to be assigned for FIELD. Return true if at least one
6511 FP register is assigned or false otherwise. */
6512
6513 static bool
6514 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6515 assign_data_t *data,
6516 int *pnregs, machine_mode *pmode)
6517 {
6518 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6519 machine_mode mode = DECL_MODE (field);
6520 int nregs, nslots;
6521
6522 /* Slots are counted as words while regs are counted as having the size of
6523 the (inner) mode. */
6524 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6525 {
6526 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6527 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6528 }
6529 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6530 {
6531 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6532 nregs = 2;
6533 }
6534 else
6535 nregs = 1;
6536
6537 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6538
6539 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6540 {
6541 nslots = SPARC_FP_ARG_MAX - this_slotno;
6542 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6543
6544 /* We need to pass this field (partly) on the stack. */
6545 data->stack = 1;
6546
6547 if (nregs <= 0)
6548 return false;
6549 }
6550
6551 *pnregs = nregs;
6552 *pmode = mode;
6553 return true;
6554 }
6555
6556 /* A subroutine of function_arg_record_value. Count the number of registers
6557 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6558
6559 inline void
6560 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6561 assign_data_t *data)
6562 {
6563 if (fp)
6564 {
6565 int nregs;
6566 machine_mode mode;
6567
6568 if (compute_int_layout (bitpos, data, &nregs))
6569 data->nregs += nregs;
6570
6571 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6572 data->nregs += nregs;
6573 }
6574 else
6575 {
6576 if (data->intoffset < 0)
6577 data->intoffset = bitpos;
6578 }
6579 }
6580
6581 /* A subroutine of function_arg_record_value. Assign the bits of the
6582 structure between PARMS->intoffset and BITPOS to integer registers. */
6583
6584 static void
6585 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6586 {
6587 int intoffset = data->intoffset;
6588 machine_mode mode;
6589 int nregs;
6590
6591 if (!compute_int_layout (bitpos, data, &nregs))
6592 return;
6593
6594 /* If this is the trailing part of a word, only load that much into
6595 the register. Otherwise load the whole register. Note that in
6596 the latter case we may pick up unwanted bits. It's not a problem
6597 at the moment but may wish to revisit. */
6598 if (intoffset % BITS_PER_WORD != 0)
6599 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6600 MODE_INT);
6601 else
6602 mode = word_mode;
6603
6604 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6605 unsigned int regno = data->regbase + this_slotno;
6606 intoffset /= BITS_PER_UNIT;
6607
6608 do
6609 {
6610 rtx reg = gen_rtx_REG (mode, regno);
6611 XVECEXP (data->ret, 0, data->stack + data->nregs)
6612 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6613 data->nregs += 1;
6614 mode = word_mode;
6615 regno += 1;
6616 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6617 }
6618 while (--nregs > 0);
6619 }
6620
6621 /* A subroutine of function_arg_record_value. Assign FIELD at position
6622 BITPOS to FP registers. */
6623
6624 static void
6625 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6626 assign_data_t *data)
6627 {
6628 int nregs;
6629 machine_mode mode;
6630
6631 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6632 return;
6633
6634 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6635 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6636 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6637 regno++;
6638 int pos = bitpos / BITS_PER_UNIT;
6639
6640 do
6641 {
6642 rtx reg = gen_rtx_REG (mode, regno);
6643 XVECEXP (data->ret, 0, data->stack + data->nregs)
6644 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6645 data->nregs += 1;
6646 regno += GET_MODE_SIZE (mode) / 4;
6647 pos += GET_MODE_SIZE (mode);
6648 }
6649 while (--nregs > 0);
6650 }
6651
6652 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6653 the structure between PARMS->intoffset and BITPOS to registers. */
6654
6655 inline void
6656 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6657 assign_data_t *data)
6658 {
6659 if (fp)
6660 {
6661 assign_int_registers (bitpos, data);
6662
6663 assign_fp_registers (field, bitpos, data);
6664 }
6665 else
6666 {
6667 if (data->intoffset < 0)
6668 data->intoffset = bitpos;
6669 }
6670 }
6671
6672 /* Used by function_arg and sparc_function_value_1 to implement the complex
6673 conventions of the 64-bit ABI for passing and returning structures.
6674 Return an expression valid as a return value for the FUNCTION_ARG
6675 and TARGET_FUNCTION_VALUE.
6676
6677 TYPE is the data type of the argument (as a tree).
6678 This is null for libcalls where that information may
6679 not be available.
6680 MODE is the argument's machine mode.
6681 SLOTNO is the index number of the argument's slot in the parameter array.
6682 NAMED is true if this argument is a named parameter
6683 (otherwise it is an extra parameter matching an ellipsis).
6684 REGBASE is the regno of the base register for the parameter array. */
6685
6686 static rtx
6687 function_arg_record_value (const_tree type, machine_mode mode,
6688 int slotno, bool named, int regbase)
6689 {
6690 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6691 assign_data_t data;
6692 int nregs;
6693
6694 data.slotno = slotno;
6695 data.regbase = regbase;
6696
6697 /* Count how many registers we need. */
6698 data.nregs = 0;
6699 data.intoffset = 0;
6700 data.stack = false;
6701 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6702
6703 /* Take into account pending integer fields. */
6704 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6705 data.nregs += nregs;
6706
6707 /* Allocate the vector and handle some annoying special cases. */
6708 nregs = data.nregs;
6709
6710 if (nregs == 0)
6711 {
6712 /* ??? Empty structure has no value? Duh? */
6713 if (typesize <= 0)
6714 {
6715 /* Though there's nothing really to store, return a word register
6716 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6717 leads to breakage due to the fact that there are zero bytes to
6718 load. */
6719 return gen_rtx_REG (mode, regbase);
6720 }
6721
6722 /* ??? C++ has structures with no fields, and yet a size. Give up
6723 for now and pass everything back in integer registers. */
6724 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6725 if (nregs + slotno > SPARC_INT_ARG_MAX)
6726 nregs = SPARC_INT_ARG_MAX - slotno;
6727 }
6728
6729 gcc_assert (nregs > 0);
6730
6731 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6732
6733 /* If at least one field must be passed on the stack, generate
6734 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6735 also be passed on the stack. We can't do much better because the
6736 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6737 of structures for which the fields passed exclusively in registers
6738 are not at the beginning of the structure. */
6739 if (data.stack)
6740 XVECEXP (data.ret, 0, 0)
6741 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6742
6743 /* Assign the registers. */
6744 data.nregs = 0;
6745 data.intoffset = 0;
6746 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6747
6748 /* Assign pending integer fields. */
6749 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6750
6751 gcc_assert (data.nregs == nregs);
6752
6753 return data.ret;
6754 }
6755
6756 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6757 of the 64-bit ABI for passing and returning unions.
6758 Return an expression valid as a return value for the FUNCTION_ARG
6759 and TARGET_FUNCTION_VALUE.
6760
6761 SIZE is the size in bytes of the union.
6762 MODE is the argument's machine mode.
6763 REGNO is the hard register the union will be passed in. */
6764
6765 static rtx
6766 function_arg_union_value (int size, machine_mode mode, int slotno,
6767 int regno)
6768 {
6769 int nwords = CEIL_NWORDS (size), i;
6770 rtx regs;
6771
6772 /* See comment in previous function for empty structures. */
6773 if (nwords == 0)
6774 return gen_rtx_REG (mode, regno);
6775
6776 if (slotno == SPARC_INT_ARG_MAX - 1)
6777 nwords = 1;
6778
6779 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6780
6781 for (i = 0; i < nwords; i++)
6782 {
6783 /* Unions are passed left-justified. */
6784 XVECEXP (regs, 0, i)
6785 = gen_rtx_EXPR_LIST (VOIDmode,
6786 gen_rtx_REG (word_mode, regno),
6787 GEN_INT (UNITS_PER_WORD * i));
6788 regno++;
6789 }
6790
6791 return regs;
6792 }
6793
6794 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6795 for passing and returning BLKmode vectors.
6796 Return an expression valid as a return value for the FUNCTION_ARG
6797 and TARGET_FUNCTION_VALUE.
6798
6799 SIZE is the size in bytes of the vector.
6800 REGNO is the FP hard register the vector will be passed in. */
6801
6802 static rtx
6803 function_arg_vector_value (int size, int regno)
6804 {
6805 const int nregs = MAX (1, size / 8);
6806 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6807
6808 if (size < 8)
6809 XVECEXP (regs, 0, 0)
6810 = gen_rtx_EXPR_LIST (VOIDmode,
6811 gen_rtx_REG (SImode, regno),
6812 const0_rtx);
6813 else
6814 for (int i = 0; i < nregs; i++)
6815 XVECEXP (regs, 0, i)
6816 = gen_rtx_EXPR_LIST (VOIDmode,
6817 gen_rtx_REG (DImode, regno + 2*i),
6818 GEN_INT (i*8));
6819
6820 return regs;
6821 }
6822
6823 /* Determine where to put an argument to a function.
6824 Value is zero to push the argument on the stack,
6825 or a hard register in which to store the argument.
6826
6827 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6828 the preceding args and about the function being called.
6829 MODE is the argument's machine mode.
6830 TYPE is the data type of the argument (as a tree).
6831 This is null for libcalls where that information may
6832 not be available.
6833 NAMED is true if this argument is a named parameter
6834 (otherwise it is an extra parameter matching an ellipsis).
6835 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6836 TARGET_FUNCTION_INCOMING_ARG. */
6837
6838 static rtx
6839 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6840 const_tree type, bool named, bool incoming)
6841 {
6842 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6843
6844 int regbase = (incoming
6845 ? SPARC_INCOMING_INT_ARG_FIRST
6846 : SPARC_OUTGOING_INT_ARG_FIRST);
6847 int slotno, regno, padding;
6848 enum mode_class mclass = GET_MODE_CLASS (mode);
6849
6850 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6851 &regno, &padding);
6852 if (slotno == -1)
6853 return 0;
6854
6855 /* Vector types deserve special treatment because they are polymorphic wrt
6856 their mode, depending upon whether VIS instructions are enabled. */
6857 if (type && TREE_CODE (type) == VECTOR_TYPE)
6858 {
6859 HOST_WIDE_INT size = int_size_in_bytes (type);
6860 gcc_assert ((TARGET_ARCH32 && size <= 8)
6861 || (TARGET_ARCH64 && size <= 16));
6862
6863 if (mode == BLKmode)
6864 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6865
6866 mclass = MODE_FLOAT;
6867 }
6868
6869 if (TARGET_ARCH32)
6870 return gen_rtx_REG (mode, regno);
6871
6872 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6873 and are promoted to registers if possible. */
6874 if (type && TREE_CODE (type) == RECORD_TYPE)
6875 {
6876 HOST_WIDE_INT size = int_size_in_bytes (type);
6877 gcc_assert (size <= 16);
6878
6879 return function_arg_record_value (type, mode, slotno, named, regbase);
6880 }
6881
6882 /* Unions up to 16 bytes in size are passed in integer registers. */
6883 else if (type && TREE_CODE (type) == UNION_TYPE)
6884 {
6885 HOST_WIDE_INT size = int_size_in_bytes (type);
6886 gcc_assert (size <= 16);
6887
6888 return function_arg_union_value (size, mode, slotno, regno);
6889 }
6890
6891 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6892 but also have the slot allocated for them.
6893 If no prototype is in scope fp values in register slots get passed
6894 in two places, either fp regs and int regs or fp regs and memory. */
6895 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6896 && SPARC_FP_REG_P (regno))
6897 {
6898 rtx reg = gen_rtx_REG (mode, regno);
6899 if (cum->prototype_p || cum->libcall_p)
6900 return reg;
6901 else
6902 {
6903 rtx v0, v1;
6904
6905 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6906 {
6907 int intreg;
6908
6909 /* On incoming, we don't need to know that the value
6910 is passed in %f0 and %i0, and it confuses other parts
6911 causing needless spillage even on the simplest cases. */
6912 if (incoming)
6913 return reg;
6914
6915 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6916 + (regno - SPARC_FP_ARG_FIRST) / 2);
6917
6918 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6919 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6920 const0_rtx);
6921 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6922 }
6923 else
6924 {
6925 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6926 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6927 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6928 }
6929 }
6930 }
6931
6932 /* All other aggregate types are passed in an integer register in a mode
6933 corresponding to the size of the type. */
6934 else if (type && AGGREGATE_TYPE_P (type))
6935 {
6936 HOST_WIDE_INT size = int_size_in_bytes (type);
6937 gcc_assert (size <= 16);
6938
6939 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6940 }
6941
6942 return gen_rtx_REG (mode, regno);
6943 }
6944
6945 /* Handle the TARGET_FUNCTION_ARG target hook. */
6946
6947 static rtx
6948 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6949 const_tree type, bool named)
6950 {
6951 return sparc_function_arg_1 (cum, mode, type, named, false);
6952 }
6953
6954 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6955
6956 static rtx
6957 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6958 const_tree type, bool named)
6959 {
6960 return sparc_function_arg_1 (cum, mode, type, named, true);
6961 }
6962
6963 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6964
6965 static unsigned int
6966 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6967 {
6968 return ((TARGET_ARCH64
6969 && (GET_MODE_ALIGNMENT (mode) == 128
6970 || (type && TYPE_ALIGN (type) == 128)))
6971 ? 128
6972 : PARM_BOUNDARY);
6973 }
6974
6975 /* For an arg passed partly in registers and partly in memory,
6976 this is the number of bytes of registers used.
6977 For args passed entirely in registers or entirely in memory, zero.
6978
6979 Any arg that starts in the first 6 regs but won't entirely fit in them
6980 needs partial registers on v8. On v9, structures with integer
6981 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6982 values that begin in the last fp reg [where "last fp reg" varies with the
6983 mode] will be split between that reg and memory. */
6984
6985 static int
6986 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6987 tree type, bool named)
6988 {
6989 int slotno, regno, padding;
6990
6991 /* We pass false for incoming here, it doesn't matter. */
6992 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6993 false, &regno, &padding);
6994
6995 if (slotno == -1)
6996 return 0;
6997
6998 if (TARGET_ARCH32)
6999 {
7000 if ((slotno + (mode == BLKmode
7001 ? CEIL_NWORDS (int_size_in_bytes (type))
7002 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7003 > SPARC_INT_ARG_MAX)
7004 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7005 }
7006 else
7007 {
7008 /* We are guaranteed by pass_by_reference that the size of the
7009 argument is not greater than 16 bytes, so we only need to return
7010 one word if the argument is partially passed in registers. */
7011
7012 if (type && AGGREGATE_TYPE_P (type))
7013 {
7014 int size = int_size_in_bytes (type);
7015
7016 if (size > UNITS_PER_WORD
7017 && (slotno == SPARC_INT_ARG_MAX - 1
7018 || slotno == SPARC_FP_ARG_MAX - 1))
7019 return UNITS_PER_WORD;
7020 }
7021 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7022 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7023 && ! (TARGET_FPU && named)))
7024 {
7025 /* The complex types are passed as packed types. */
7026 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7027 && slotno == SPARC_INT_ARG_MAX - 1)
7028 return UNITS_PER_WORD;
7029 }
7030 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7031 {
7032 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7033 > SPARC_FP_ARG_MAX)
7034 return UNITS_PER_WORD;
7035 }
7036 }
7037
7038 return 0;
7039 }
7040
7041 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7042 Specify whether to pass the argument by reference. */
7043
7044 static bool
7045 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7046 machine_mode mode, const_tree type,
7047 bool named ATTRIBUTE_UNUSED)
7048 {
7049 if (TARGET_ARCH32)
7050 /* Original SPARC 32-bit ABI says that structures and unions,
7051 and quad-precision floats are passed by reference. For Pascal,
7052 also pass arrays by reference. All other base types are passed
7053 in registers.
7054
7055 Extended ABI (as implemented by the Sun compiler) says that all
7056 complex floats are passed by reference. Pass complex integers
7057 in registers up to 8 bytes. More generally, enforce the 2-word
7058 cap for passing arguments in registers.
7059
7060 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7061 integers are passed like floats of the same size, that is in
7062 registers up to 8 bytes. Pass all vector floats by reference
7063 like structure and unions. */
7064 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7065 || mode == SCmode
7066 /* Catch CDImode, TFmode, DCmode and TCmode. */
7067 || GET_MODE_SIZE (mode) > 8
7068 || (type
7069 && TREE_CODE (type) == VECTOR_TYPE
7070 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7071 else
7072 /* Original SPARC 64-bit ABI says that structures and unions
7073 smaller than 16 bytes are passed in registers, as well as
7074 all other base types.
7075
7076 Extended ABI (as implemented by the Sun compiler) says that
7077 complex floats are passed in registers up to 16 bytes. Pass
7078 all complex integers in registers up to 16 bytes. More generally,
7079 enforce the 2-word cap for passing arguments in registers.
7080
7081 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7082 integers are passed like floats of the same size, that is in
7083 registers (up to 16 bytes). Pass all vector floats like structure
7084 and unions. */
7085 return ((type
7086 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7087 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7088 /* Catch CTImode and TCmode. */
7089 || GET_MODE_SIZE (mode) > 16);
7090 }
7091
7092 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7093 Update the data in CUM to advance over an argument
7094 of mode MODE and data type TYPE.
7095 TYPE is null for libcalls where that information may not be available. */
7096
7097 static void
7098 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7099 const_tree type, bool named)
7100 {
7101 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7102 int regno, padding;
7103
7104 /* We pass false for incoming here, it doesn't matter. */
7105 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7106
7107 /* If argument requires leading padding, add it. */
7108 cum->words += padding;
7109
7110 if (TARGET_ARCH32)
7111 cum->words += (mode == BLKmode
7112 ? CEIL_NWORDS (int_size_in_bytes (type))
7113 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7114 else
7115 {
7116 if (type && AGGREGATE_TYPE_P (type))
7117 {
7118 int size = int_size_in_bytes (type);
7119
7120 if (size <= 8)
7121 ++cum->words;
7122 else if (size <= 16)
7123 cum->words += 2;
7124 else /* passed by reference */
7125 ++cum->words;
7126 }
7127 else
7128 cum->words += (mode == BLKmode
7129 ? CEIL_NWORDS (int_size_in_bytes (type))
7130 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7131 }
7132 }
7133
7134 /* Handle the FUNCTION_ARG_PADDING macro.
7135 For the 64 bit ABI structs are always stored left shifted in their
7136 argument slot. */
7137
7138 enum direction
7139 function_arg_padding (machine_mode mode, const_tree type)
7140 {
7141 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7142 return upward;
7143
7144 /* Fall back to the default. */
7145 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7146 }
7147
7148 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7149 Specify whether to return the return value in memory. */
7150
7151 static bool
7152 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7153 {
7154 if (TARGET_ARCH32)
7155 /* Original SPARC 32-bit ABI says that structures and unions,
7156 and quad-precision floats are returned in memory. All other
7157 base types are returned in registers.
7158
7159 Extended ABI (as implemented by the Sun compiler) says that
7160 all complex floats are returned in registers (8 FP registers
7161 at most for '_Complex long double'). Return all complex integers
7162 in registers (4 at most for '_Complex long long').
7163
7164 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7165 integers are returned like floats of the same size, that is in
7166 registers up to 8 bytes and in memory otherwise. Return all
7167 vector floats in memory like structure and unions; note that
7168 they always have BLKmode like the latter. */
7169 return (TYPE_MODE (type) == BLKmode
7170 || TYPE_MODE (type) == TFmode
7171 || (TREE_CODE (type) == VECTOR_TYPE
7172 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7173 else
7174 /* Original SPARC 64-bit ABI says that structures and unions
7175 smaller than 32 bytes are returned in registers, as well as
7176 all other base types.
7177
7178 Extended ABI (as implemented by the Sun compiler) says that all
7179 complex floats are returned in registers (8 FP registers at most
7180 for '_Complex long double'). Return all complex integers in
7181 registers (4 at most for '_Complex TItype').
7182
7183 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7184 integers are returned like floats of the same size, that is in
7185 registers. Return all vector floats like structure and unions;
7186 note that they always have BLKmode like the latter. */
7187 return (TYPE_MODE (type) == BLKmode
7188 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7189 }
7190
7191 /* Handle the TARGET_STRUCT_VALUE target hook.
7192 Return where to find the structure return value address. */
7193
7194 static rtx
7195 sparc_struct_value_rtx (tree fndecl, int incoming)
7196 {
7197 if (TARGET_ARCH64)
7198 return 0;
7199 else
7200 {
7201 rtx mem;
7202
7203 if (incoming)
7204 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7205 STRUCT_VALUE_OFFSET));
7206 else
7207 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7208 STRUCT_VALUE_OFFSET));
7209
7210 /* Only follow the SPARC ABI for fixed-size structure returns.
7211 Variable size structure returns are handled per the normal
7212 procedures in GCC. This is enabled by -mstd-struct-return */
7213 if (incoming == 2
7214 && sparc_std_struct_return
7215 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7216 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7217 {
7218 /* We must check and adjust the return address, as it is optional
7219 as to whether the return object is really provided. */
7220 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7221 rtx scratch = gen_reg_rtx (SImode);
7222 rtx_code_label *endlab = gen_label_rtx ();
7223
7224 /* Calculate the return object size. */
7225 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7226 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7227 /* Construct a temporary return value. */
7228 rtx temp_val
7229 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7230
7231 /* Implement SPARC 32-bit psABI callee return struct checking:
7232
7233 Fetch the instruction where we will return to and see if
7234 it's an unimp instruction (the most significant 10 bits
7235 will be zero). */
7236 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7237 plus_constant (Pmode,
7238 ret_reg, 8)));
7239 /* Assume the size is valid and pre-adjust. */
7240 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7241 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7242 0, endlab);
7243 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7244 /* Write the address of the memory pointed to by temp_val into
7245 the memory pointed to by mem. */
7246 emit_move_insn (mem, XEXP (temp_val, 0));
7247 emit_label (endlab);
7248 }
7249
7250 return mem;
7251 }
7252 }
7253
7254 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7255 For v9, function return values are subject to the same rules as arguments,
7256 except that up to 32 bytes may be returned in registers. */
7257
7258 static rtx
7259 sparc_function_value_1 (const_tree type, machine_mode mode,
7260 bool outgoing)
7261 {
7262 /* Beware that the two values are swapped here wrt function_arg. */
7263 int regbase = (outgoing
7264 ? SPARC_INCOMING_INT_ARG_FIRST
7265 : SPARC_OUTGOING_INT_ARG_FIRST);
7266 enum mode_class mclass = GET_MODE_CLASS (mode);
7267 int regno;
7268
7269 /* Vector types deserve special treatment because they are polymorphic wrt
7270 their mode, depending upon whether VIS instructions are enabled. */
7271 if (type && TREE_CODE (type) == VECTOR_TYPE)
7272 {
7273 HOST_WIDE_INT size = int_size_in_bytes (type);
7274 gcc_assert ((TARGET_ARCH32 && size <= 8)
7275 || (TARGET_ARCH64 && size <= 32));
7276
7277 if (mode == BLKmode)
7278 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7279
7280 mclass = MODE_FLOAT;
7281 }
7282
7283 if (TARGET_ARCH64 && type)
7284 {
7285 /* Structures up to 32 bytes in size are returned in registers. */
7286 if (TREE_CODE (type) == RECORD_TYPE)
7287 {
7288 HOST_WIDE_INT size = int_size_in_bytes (type);
7289 gcc_assert (size <= 32);
7290
7291 return function_arg_record_value (type, mode, 0, 1, regbase);
7292 }
7293
7294 /* Unions up to 32 bytes in size are returned in integer registers. */
7295 else if (TREE_CODE (type) == UNION_TYPE)
7296 {
7297 HOST_WIDE_INT size = int_size_in_bytes (type);
7298 gcc_assert (size <= 32);
7299
7300 return function_arg_union_value (size, mode, 0, regbase);
7301 }
7302
7303 /* Objects that require it are returned in FP registers. */
7304 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7305 ;
7306
7307 /* All other aggregate types are returned in an integer register in a
7308 mode corresponding to the size of the type. */
7309 else if (AGGREGATE_TYPE_P (type))
7310 {
7311 /* All other aggregate types are passed in an integer register
7312 in a mode corresponding to the size of the type. */
7313 HOST_WIDE_INT size = int_size_in_bytes (type);
7314 gcc_assert (size <= 32);
7315
7316 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7317
7318 /* ??? We probably should have made the same ABI change in
7319 3.4.0 as the one we made for unions. The latter was
7320 required by the SCD though, while the former is not
7321 specified, so we favored compatibility and efficiency.
7322
7323 Now we're stuck for aggregates larger than 16 bytes,
7324 because OImode vanished in the meantime. Let's not
7325 try to be unduly clever, and simply follow the ABI
7326 for unions in that case. */
7327 if (mode == BLKmode)
7328 return function_arg_union_value (size, mode, 0, regbase);
7329 else
7330 mclass = MODE_INT;
7331 }
7332
7333 /* We should only have pointer and integer types at this point. This
7334 must match sparc_promote_function_mode. */
7335 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7336 mode = word_mode;
7337 }
7338
7339 /* We should only have pointer and integer types at this point, except with
7340 -freg-struct-return. This must match sparc_promote_function_mode. */
7341 else if (TARGET_ARCH32
7342 && !(type && AGGREGATE_TYPE_P (type))
7343 && mclass == MODE_INT
7344 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7345 mode = word_mode;
7346
7347 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7348 regno = SPARC_FP_ARG_FIRST;
7349 else
7350 regno = regbase;
7351
7352 return gen_rtx_REG (mode, regno);
7353 }
7354
7355 /* Handle TARGET_FUNCTION_VALUE.
7356 On the SPARC, the value is found in the first "output" register, but the
7357 called function leaves it in the first "input" register. */
7358
7359 static rtx
7360 sparc_function_value (const_tree valtype,
7361 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7362 bool outgoing)
7363 {
7364 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7365 }
7366
7367 /* Handle TARGET_LIBCALL_VALUE. */
7368
7369 static rtx
7370 sparc_libcall_value (machine_mode mode,
7371 const_rtx fun ATTRIBUTE_UNUSED)
7372 {
7373 return sparc_function_value_1 (NULL_TREE, mode, false);
7374 }
7375
7376 /* Handle FUNCTION_VALUE_REGNO_P.
7377 On the SPARC, the first "output" reg is used for integer values, and the
7378 first floating point register is used for floating point values. */
7379
7380 static bool
7381 sparc_function_value_regno_p (const unsigned int regno)
7382 {
7383 return (regno == 8 || (TARGET_FPU && regno == 32));
7384 }
7385
7386 /* Do what is necessary for `va_start'. We look at the current function
7387 to determine if stdarg or varargs is used and return the address of
7388 the first unnamed parameter. */
7389
7390 static rtx
7391 sparc_builtin_saveregs (void)
7392 {
7393 int first_reg = crtl->args.info.words;
7394 rtx address;
7395 int regno;
7396
7397 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7398 emit_move_insn (gen_rtx_MEM (word_mode,
7399 gen_rtx_PLUS (Pmode,
7400 frame_pointer_rtx,
7401 GEN_INT (FIRST_PARM_OFFSET (0)
7402 + (UNITS_PER_WORD
7403 * regno)))),
7404 gen_rtx_REG (word_mode,
7405 SPARC_INCOMING_INT_ARG_FIRST + regno));
7406
7407 address = gen_rtx_PLUS (Pmode,
7408 frame_pointer_rtx,
7409 GEN_INT (FIRST_PARM_OFFSET (0)
7410 + UNITS_PER_WORD * first_reg));
7411
7412 return address;
7413 }
7414
7415 /* Implement `va_start' for stdarg. */
7416
7417 static void
7418 sparc_va_start (tree valist, rtx nextarg)
7419 {
7420 nextarg = expand_builtin_saveregs ();
7421 std_expand_builtin_va_start (valist, nextarg);
7422 }
7423
7424 /* Implement `va_arg' for stdarg. */
7425
7426 static tree
7427 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7428 gimple_seq *post_p)
7429 {
7430 HOST_WIDE_INT size, rsize, align;
7431 tree addr, incr;
7432 bool indirect;
7433 tree ptrtype = build_pointer_type (type);
7434
7435 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7436 {
7437 indirect = true;
7438 size = rsize = UNITS_PER_WORD;
7439 align = 0;
7440 }
7441 else
7442 {
7443 indirect = false;
7444 size = int_size_in_bytes (type);
7445 rsize = ROUND_UP (size, UNITS_PER_WORD);
7446 align = 0;
7447
7448 if (TARGET_ARCH64)
7449 {
7450 /* For SPARC64, objects requiring 16-byte alignment get it. */
7451 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7452 align = 2 * UNITS_PER_WORD;
7453
7454 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7455 are left-justified in their slots. */
7456 if (AGGREGATE_TYPE_P (type))
7457 {
7458 if (size == 0)
7459 size = rsize = UNITS_PER_WORD;
7460 else
7461 size = rsize;
7462 }
7463 }
7464 }
7465
7466 incr = valist;
7467 if (align)
7468 {
7469 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7470 incr = fold_convert (sizetype, incr);
7471 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7472 size_int (-align));
7473 incr = fold_convert (ptr_type_node, incr);
7474 }
7475
7476 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7477 addr = incr;
7478
7479 if (BYTES_BIG_ENDIAN && size < rsize)
7480 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7481
7482 if (indirect)
7483 {
7484 addr = fold_convert (build_pointer_type (ptrtype), addr);
7485 addr = build_va_arg_indirect_ref (addr);
7486 }
7487
7488 /* If the address isn't aligned properly for the type, we need a temporary.
7489 FIXME: This is inefficient, usually we can do this in registers. */
7490 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7491 {
7492 tree tmp = create_tmp_var (type, "va_arg_tmp");
7493 tree dest_addr = build_fold_addr_expr (tmp);
7494 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7495 3, dest_addr, addr, size_int (rsize));
7496 TREE_ADDRESSABLE (tmp) = 1;
7497 gimplify_and_add (copy, pre_p);
7498 addr = dest_addr;
7499 }
7500
7501 else
7502 addr = fold_convert (ptrtype, addr);
7503
7504 incr = fold_build_pointer_plus_hwi (incr, rsize);
7505 gimplify_assign (valist, incr, post_p);
7506
7507 return build_va_arg_indirect_ref (addr);
7508 }
7509 \f
7510 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7511 Specify whether the vector mode is supported by the hardware. */
7512
7513 static bool
7514 sparc_vector_mode_supported_p (machine_mode mode)
7515 {
7516 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7517 }
7518 \f
7519 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7520
7521 static machine_mode
7522 sparc_preferred_simd_mode (machine_mode mode)
7523 {
7524 if (TARGET_VIS)
7525 switch (mode)
7526 {
7527 case SImode:
7528 return V2SImode;
7529 case HImode:
7530 return V4HImode;
7531 case QImode:
7532 return V8QImode;
7533
7534 default:;
7535 }
7536
7537 return word_mode;
7538 }
7539 \f
7540 /* Return the string to output an unconditional branch to LABEL, which is
7541 the operand number of the label.
7542
7543 DEST is the destination insn (i.e. the label), INSN is the source. */
7544
7545 const char *
7546 output_ubranch (rtx dest, rtx_insn *insn)
7547 {
7548 static char string[64];
7549 bool v9_form = false;
7550 int delta;
7551 char *p;
7552
7553 /* Even if we are trying to use cbcond for this, evaluate
7554 whether we can use V9 branches as our backup plan. */
7555
7556 delta = 5000000;
7557 if (INSN_ADDRESSES_SET_P ())
7558 delta = (INSN_ADDRESSES (INSN_UID (dest))
7559 - INSN_ADDRESSES (INSN_UID (insn)));
7560
7561 /* Leave some instructions for "slop". */
7562 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7563 v9_form = true;
7564
7565 if (TARGET_CBCOND)
7566 {
7567 bool emit_nop = emit_cbcond_nop (insn);
7568 bool far = false;
7569 const char *rval;
7570
7571 if (delta < -500 || delta > 500)
7572 far = true;
7573
7574 if (far)
7575 {
7576 if (v9_form)
7577 rval = "ba,a,pt\t%%xcc, %l0";
7578 else
7579 rval = "b,a\t%l0";
7580 }
7581 else
7582 {
7583 if (emit_nop)
7584 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7585 else
7586 rval = "cwbe\t%%g0, %%g0, %l0";
7587 }
7588 return rval;
7589 }
7590
7591 if (v9_form)
7592 strcpy (string, "ba%*,pt\t%%xcc, ");
7593 else
7594 strcpy (string, "b%*\t");
7595
7596 p = strchr (string, '\0');
7597 *p++ = '%';
7598 *p++ = 'l';
7599 *p++ = '0';
7600 *p++ = '%';
7601 *p++ = '(';
7602 *p = '\0';
7603
7604 return string;
7605 }
7606
7607 /* Return the string to output a conditional branch to LABEL, which is
7608 the operand number of the label. OP is the conditional expression.
7609 XEXP (OP, 0) is assumed to be a condition code register (integer or
7610 floating point) and its mode specifies what kind of comparison we made.
7611
7612 DEST is the destination insn (i.e. the label), INSN is the source.
7613
7614 REVERSED is nonzero if we should reverse the sense of the comparison.
7615
7616 ANNUL is nonzero if we should generate an annulling branch. */
7617
7618 const char *
7619 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7620 rtx_insn *insn)
7621 {
7622 static char string[64];
7623 enum rtx_code code = GET_CODE (op);
7624 rtx cc_reg = XEXP (op, 0);
7625 machine_mode mode = GET_MODE (cc_reg);
7626 const char *labelno, *branch;
7627 int spaces = 8, far;
7628 char *p;
7629
7630 /* v9 branches are limited to +-1MB. If it is too far away,
7631 change
7632
7633 bne,pt %xcc, .LC30
7634
7635 to
7636
7637 be,pn %xcc, .+12
7638 nop
7639 ba .LC30
7640
7641 and
7642
7643 fbne,a,pn %fcc2, .LC29
7644
7645 to
7646
7647 fbe,pt %fcc2, .+16
7648 nop
7649 ba .LC29 */
7650
7651 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7652 if (reversed ^ far)
7653 {
7654 /* Reversal of FP compares takes care -- an ordered compare
7655 becomes an unordered compare and vice versa. */
7656 if (mode == CCFPmode || mode == CCFPEmode)
7657 code = reverse_condition_maybe_unordered (code);
7658 else
7659 code = reverse_condition (code);
7660 }
7661
7662 /* Start by writing the branch condition. */
7663 if (mode == CCFPmode || mode == CCFPEmode)
7664 {
7665 switch (code)
7666 {
7667 case NE:
7668 branch = "fbne";
7669 break;
7670 case EQ:
7671 branch = "fbe";
7672 break;
7673 case GE:
7674 branch = "fbge";
7675 break;
7676 case GT:
7677 branch = "fbg";
7678 break;
7679 case LE:
7680 branch = "fble";
7681 break;
7682 case LT:
7683 branch = "fbl";
7684 break;
7685 case UNORDERED:
7686 branch = "fbu";
7687 break;
7688 case ORDERED:
7689 branch = "fbo";
7690 break;
7691 case UNGT:
7692 branch = "fbug";
7693 break;
7694 case UNLT:
7695 branch = "fbul";
7696 break;
7697 case UNEQ:
7698 branch = "fbue";
7699 break;
7700 case UNGE:
7701 branch = "fbuge";
7702 break;
7703 case UNLE:
7704 branch = "fbule";
7705 break;
7706 case LTGT:
7707 branch = "fblg";
7708 break;
7709
7710 default:
7711 gcc_unreachable ();
7712 }
7713
7714 /* ??? !v9: FP branches cannot be preceded by another floating point
7715 insn. Because there is currently no concept of pre-delay slots,
7716 we can fix this only by always emitting a nop before a floating
7717 point branch. */
7718
7719 string[0] = '\0';
7720 if (! TARGET_V9)
7721 strcpy (string, "nop\n\t");
7722 strcat (string, branch);
7723 }
7724 else
7725 {
7726 switch (code)
7727 {
7728 case NE:
7729 branch = "bne";
7730 break;
7731 case EQ:
7732 branch = "be";
7733 break;
7734 case GE:
7735 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7736 branch = "bpos";
7737 else
7738 branch = "bge";
7739 break;
7740 case GT:
7741 branch = "bg";
7742 break;
7743 case LE:
7744 branch = "ble";
7745 break;
7746 case LT:
7747 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7748 branch = "bneg";
7749 else
7750 branch = "bl";
7751 break;
7752 case GEU:
7753 branch = "bgeu";
7754 break;
7755 case GTU:
7756 branch = "bgu";
7757 break;
7758 case LEU:
7759 branch = "bleu";
7760 break;
7761 case LTU:
7762 branch = "blu";
7763 break;
7764
7765 default:
7766 gcc_unreachable ();
7767 }
7768 strcpy (string, branch);
7769 }
7770 spaces -= strlen (branch);
7771 p = strchr (string, '\0');
7772
7773 /* Now add the annulling, the label, and a possible noop. */
7774 if (annul && ! far)
7775 {
7776 strcpy (p, ",a");
7777 p += 2;
7778 spaces -= 2;
7779 }
7780
7781 if (TARGET_V9)
7782 {
7783 rtx note;
7784 int v8 = 0;
7785
7786 if (! far && insn && INSN_ADDRESSES_SET_P ())
7787 {
7788 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7789 - INSN_ADDRESSES (INSN_UID (insn)));
7790 /* Leave some instructions for "slop". */
7791 if (delta < -260000 || delta >= 260000)
7792 v8 = 1;
7793 }
7794
7795 if (mode == CCFPmode || mode == CCFPEmode)
7796 {
7797 static char v9_fcc_labelno[] = "%%fccX, ";
7798 /* Set the char indicating the number of the fcc reg to use. */
7799 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7800 labelno = v9_fcc_labelno;
7801 if (v8)
7802 {
7803 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7804 labelno = "";
7805 }
7806 }
7807 else if (mode == CCXmode || mode == CCX_NOOVmode)
7808 {
7809 labelno = "%%xcc, ";
7810 gcc_assert (! v8);
7811 }
7812 else
7813 {
7814 labelno = "%%icc, ";
7815 if (v8)
7816 labelno = "";
7817 }
7818
7819 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7820 {
7821 strcpy (p,
7822 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7823 ? ",pt" : ",pn");
7824 p += 3;
7825 spaces -= 3;
7826 }
7827 }
7828 else
7829 labelno = "";
7830
7831 if (spaces > 0)
7832 *p++ = '\t';
7833 else
7834 *p++ = ' ';
7835 strcpy (p, labelno);
7836 p = strchr (p, '\0');
7837 if (far)
7838 {
7839 strcpy (p, ".+12\n\t nop\n\tb\t");
7840 /* Skip the next insn if requested or
7841 if we know that it will be a nop. */
7842 if (annul || ! final_sequence)
7843 p[3] = '6';
7844 p += 14;
7845 }
7846 *p++ = '%';
7847 *p++ = 'l';
7848 *p++ = label + '0';
7849 *p++ = '%';
7850 *p++ = '#';
7851 *p = '\0';
7852
7853 return string;
7854 }
7855
7856 /* Emit a library call comparison between floating point X and Y.
7857 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7858 Return the new operator to be used in the comparison sequence.
7859
7860 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7861 values as arguments instead of the TFmode registers themselves,
7862 that's why we cannot call emit_float_lib_cmp. */
7863
7864 rtx
7865 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7866 {
7867 const char *qpfunc;
7868 rtx slot0, slot1, result, tem, tem2, libfunc;
7869 machine_mode mode;
7870 enum rtx_code new_comparison;
7871
7872 switch (comparison)
7873 {
7874 case EQ:
7875 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7876 break;
7877
7878 case NE:
7879 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7880 break;
7881
7882 case GT:
7883 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7884 break;
7885
7886 case GE:
7887 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7888 break;
7889
7890 case LT:
7891 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7892 break;
7893
7894 case LE:
7895 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7896 break;
7897
7898 case ORDERED:
7899 case UNORDERED:
7900 case UNGT:
7901 case UNLT:
7902 case UNEQ:
7903 case UNGE:
7904 case UNLE:
7905 case LTGT:
7906 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7907 break;
7908
7909 default:
7910 gcc_unreachable ();
7911 }
7912
7913 if (TARGET_ARCH64)
7914 {
7915 if (MEM_P (x))
7916 {
7917 tree expr = MEM_EXPR (x);
7918 if (expr)
7919 mark_addressable (expr);
7920 slot0 = x;
7921 }
7922 else
7923 {
7924 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7925 emit_move_insn (slot0, x);
7926 }
7927
7928 if (MEM_P (y))
7929 {
7930 tree expr = MEM_EXPR (y);
7931 if (expr)
7932 mark_addressable (expr);
7933 slot1 = y;
7934 }
7935 else
7936 {
7937 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7938 emit_move_insn (slot1, y);
7939 }
7940
7941 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7942 emit_library_call (libfunc, LCT_NORMAL,
7943 DImode, 2,
7944 XEXP (slot0, 0), Pmode,
7945 XEXP (slot1, 0), Pmode);
7946 mode = DImode;
7947 }
7948 else
7949 {
7950 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7951 emit_library_call (libfunc, LCT_NORMAL,
7952 SImode, 2,
7953 x, TFmode, y, TFmode);
7954 mode = SImode;
7955 }
7956
7957
7958 /* Immediately move the result of the libcall into a pseudo
7959 register so reload doesn't clobber the value if it needs
7960 the return register for a spill reg. */
7961 result = gen_reg_rtx (mode);
7962 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7963
7964 switch (comparison)
7965 {
7966 default:
7967 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7968 case ORDERED:
7969 case UNORDERED:
7970 new_comparison = (comparison == UNORDERED ? EQ : NE);
7971 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7972 case UNGT:
7973 case UNGE:
7974 new_comparison = (comparison == UNGT ? GT : NE);
7975 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7976 case UNLE:
7977 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7978 case UNLT:
7979 tem = gen_reg_rtx (mode);
7980 if (TARGET_ARCH32)
7981 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7982 else
7983 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7984 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7985 case UNEQ:
7986 case LTGT:
7987 tem = gen_reg_rtx (mode);
7988 if (TARGET_ARCH32)
7989 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7990 else
7991 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7992 tem2 = gen_reg_rtx (mode);
7993 if (TARGET_ARCH32)
7994 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7995 else
7996 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7997 new_comparison = (comparison == UNEQ ? EQ : NE);
7998 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7999 }
8000
8001 gcc_unreachable ();
8002 }
8003
8004 /* Generate an unsigned DImode to FP conversion. This is the same code
8005 optabs would emit if we didn't have TFmode patterns. */
8006
8007 void
8008 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8009 {
8010 rtx i0, i1, f0, in, out;
8011
8012 out = operands[0];
8013 in = force_reg (DImode, operands[1]);
8014 rtx_code_label *neglab = gen_label_rtx ();
8015 rtx_code_label *donelab = gen_label_rtx ();
8016 i0 = gen_reg_rtx (DImode);
8017 i1 = gen_reg_rtx (DImode);
8018 f0 = gen_reg_rtx (mode);
8019
8020 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8021
8022 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8023 emit_jump_insn (gen_jump (donelab));
8024 emit_barrier ();
8025
8026 emit_label (neglab);
8027
8028 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8029 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8030 emit_insn (gen_iordi3 (i0, i0, i1));
8031 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8032 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8033
8034 emit_label (donelab);
8035 }
8036
8037 /* Generate an FP to unsigned DImode conversion. This is the same code
8038 optabs would emit if we didn't have TFmode patterns. */
8039
8040 void
8041 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8042 {
8043 rtx i0, i1, f0, in, out, limit;
8044
8045 out = operands[0];
8046 in = force_reg (mode, operands[1]);
8047 rtx_code_label *neglab = gen_label_rtx ();
8048 rtx_code_label *donelab = gen_label_rtx ();
8049 i0 = gen_reg_rtx (DImode);
8050 i1 = gen_reg_rtx (DImode);
8051 limit = gen_reg_rtx (mode);
8052 f0 = gen_reg_rtx (mode);
8053
8054 emit_move_insn (limit,
8055 const_double_from_real_value (
8056 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8057 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8058
8059 emit_insn (gen_rtx_SET (out,
8060 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8061 emit_jump_insn (gen_jump (donelab));
8062 emit_barrier ();
8063
8064 emit_label (neglab);
8065
8066 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8067 emit_insn (gen_rtx_SET (i0,
8068 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8069 emit_insn (gen_movdi (i1, const1_rtx));
8070 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8071 emit_insn (gen_xordi3 (out, i0, i1));
8072
8073 emit_label (donelab);
8074 }
8075
8076 /* Return the string to output a compare and branch instruction to DEST.
8077 DEST is the destination insn (i.e. the label), INSN is the source,
8078 and OP is the conditional expression. */
8079
8080 const char *
8081 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8082 {
8083 machine_mode mode = GET_MODE (XEXP (op, 0));
8084 enum rtx_code code = GET_CODE (op);
8085 const char *cond_str, *tmpl;
8086 int far, emit_nop, len;
8087 static char string[64];
8088 char size_char;
8089
8090 /* Compare and Branch is limited to +-2KB. If it is too far away,
8091 change
8092
8093 cxbne X, Y, .LC30
8094
8095 to
8096
8097 cxbe X, Y, .+16
8098 nop
8099 ba,pt xcc, .LC30
8100 nop */
8101
8102 len = get_attr_length (insn);
8103
8104 far = len == 4;
8105 emit_nop = len == 2;
8106
8107 if (far)
8108 code = reverse_condition (code);
8109
8110 size_char = ((mode == SImode) ? 'w' : 'x');
8111
8112 switch (code)
8113 {
8114 case NE:
8115 cond_str = "ne";
8116 break;
8117
8118 case EQ:
8119 cond_str = "e";
8120 break;
8121
8122 case GE:
8123 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8124 cond_str = "pos";
8125 else
8126 cond_str = "ge";
8127 break;
8128
8129 case GT:
8130 cond_str = "g";
8131 break;
8132
8133 case LE:
8134 cond_str = "le";
8135 break;
8136
8137 case LT:
8138 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8139 cond_str = "neg";
8140 else
8141 cond_str = "l";
8142 break;
8143
8144 case GEU:
8145 cond_str = "cc";
8146 break;
8147
8148 case GTU:
8149 cond_str = "gu";
8150 break;
8151
8152 case LEU:
8153 cond_str = "leu";
8154 break;
8155
8156 case LTU:
8157 cond_str = "cs";
8158 break;
8159
8160 default:
8161 gcc_unreachable ();
8162 }
8163
8164 if (far)
8165 {
8166 int veryfar = 1, delta;
8167
8168 if (INSN_ADDRESSES_SET_P ())
8169 {
8170 delta = (INSN_ADDRESSES (INSN_UID (dest))
8171 - INSN_ADDRESSES (INSN_UID (insn)));
8172 /* Leave some instructions for "slop". */
8173 if (delta >= -260000 && delta < 260000)
8174 veryfar = 0;
8175 }
8176
8177 if (veryfar)
8178 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8179 else
8180 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8181 }
8182 else
8183 {
8184 if (emit_nop)
8185 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8186 else
8187 tmpl = "c%cb%s\t%%1, %%2, %%3";
8188 }
8189
8190 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8191
8192 return string;
8193 }
8194
8195 /* Return the string to output a conditional branch to LABEL, testing
8196 register REG. LABEL is the operand number of the label; REG is the
8197 operand number of the reg. OP is the conditional expression. The mode
8198 of REG says what kind of comparison we made.
8199
8200 DEST is the destination insn (i.e. the label), INSN is the source.
8201
8202 REVERSED is nonzero if we should reverse the sense of the comparison.
8203
8204 ANNUL is nonzero if we should generate an annulling branch. */
8205
8206 const char *
8207 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8208 int annul, rtx_insn *insn)
8209 {
8210 static char string[64];
8211 enum rtx_code code = GET_CODE (op);
8212 machine_mode mode = GET_MODE (XEXP (op, 0));
8213 rtx note;
8214 int far;
8215 char *p;
8216
8217 /* branch on register are limited to +-128KB. If it is too far away,
8218 change
8219
8220 brnz,pt %g1, .LC30
8221
8222 to
8223
8224 brz,pn %g1, .+12
8225 nop
8226 ba,pt %xcc, .LC30
8227
8228 and
8229
8230 brgez,a,pn %o1, .LC29
8231
8232 to
8233
8234 brlz,pt %o1, .+16
8235 nop
8236 ba,pt %xcc, .LC29 */
8237
8238 far = get_attr_length (insn) >= 3;
8239
8240 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8241 if (reversed ^ far)
8242 code = reverse_condition (code);
8243
8244 /* Only 64 bit versions of these instructions exist. */
8245 gcc_assert (mode == DImode);
8246
8247 /* Start by writing the branch condition. */
8248
8249 switch (code)
8250 {
8251 case NE:
8252 strcpy (string, "brnz");
8253 break;
8254
8255 case EQ:
8256 strcpy (string, "brz");
8257 break;
8258
8259 case GE:
8260 strcpy (string, "brgez");
8261 break;
8262
8263 case LT:
8264 strcpy (string, "brlz");
8265 break;
8266
8267 case LE:
8268 strcpy (string, "brlez");
8269 break;
8270
8271 case GT:
8272 strcpy (string, "brgz");
8273 break;
8274
8275 default:
8276 gcc_unreachable ();
8277 }
8278
8279 p = strchr (string, '\0');
8280
8281 /* Now add the annulling, reg, label, and nop. */
8282 if (annul && ! far)
8283 {
8284 strcpy (p, ",a");
8285 p += 2;
8286 }
8287
8288 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8289 {
8290 strcpy (p,
8291 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8292 ? ",pt" : ",pn");
8293 p += 3;
8294 }
8295
8296 *p = p < string + 8 ? '\t' : ' ';
8297 p++;
8298 *p++ = '%';
8299 *p++ = '0' + reg;
8300 *p++ = ',';
8301 *p++ = ' ';
8302 if (far)
8303 {
8304 int veryfar = 1, delta;
8305
8306 if (INSN_ADDRESSES_SET_P ())
8307 {
8308 delta = (INSN_ADDRESSES (INSN_UID (dest))
8309 - INSN_ADDRESSES (INSN_UID (insn)));
8310 /* Leave some instructions for "slop". */
8311 if (delta >= -260000 && delta < 260000)
8312 veryfar = 0;
8313 }
8314
8315 strcpy (p, ".+12\n\t nop\n\t");
8316 /* Skip the next insn if requested or
8317 if we know that it will be a nop. */
8318 if (annul || ! final_sequence)
8319 p[3] = '6';
8320 p += 12;
8321 if (veryfar)
8322 {
8323 strcpy (p, "b\t");
8324 p += 2;
8325 }
8326 else
8327 {
8328 strcpy (p, "ba,pt\t%%xcc, ");
8329 p += 13;
8330 }
8331 }
8332 *p++ = '%';
8333 *p++ = 'l';
8334 *p++ = '0' + label;
8335 *p++ = '%';
8336 *p++ = '#';
8337 *p = '\0';
8338
8339 return string;
8340 }
8341
8342 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8343 Such instructions cannot be used in the delay slot of return insn on v9.
8344 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8345 */
8346
8347 static int
8348 epilogue_renumber (register rtx *where, int test)
8349 {
8350 register const char *fmt;
8351 register int i;
8352 register enum rtx_code code;
8353
8354 if (*where == 0)
8355 return 0;
8356
8357 code = GET_CODE (*where);
8358
8359 switch (code)
8360 {
8361 case REG:
8362 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8363 return 1;
8364 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8365 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8366 case SCRATCH:
8367 case CC0:
8368 case PC:
8369 case CONST_INT:
8370 case CONST_WIDE_INT:
8371 case CONST_DOUBLE:
8372 return 0;
8373
8374 /* Do not replace the frame pointer with the stack pointer because
8375 it can cause the delayed instruction to load below the stack.
8376 This occurs when instructions like:
8377
8378 (set (reg/i:SI 24 %i0)
8379 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8380 (const_int -20 [0xffffffec])) 0))
8381
8382 are in the return delayed slot. */
8383 case PLUS:
8384 if (GET_CODE (XEXP (*where, 0)) == REG
8385 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8386 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8387 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8388 return 1;
8389 break;
8390
8391 case MEM:
8392 if (SPARC_STACK_BIAS
8393 && GET_CODE (XEXP (*where, 0)) == REG
8394 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8395 return 1;
8396 break;
8397
8398 default:
8399 break;
8400 }
8401
8402 fmt = GET_RTX_FORMAT (code);
8403
8404 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8405 {
8406 if (fmt[i] == 'E')
8407 {
8408 register int j;
8409 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8410 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8411 return 1;
8412 }
8413 else if (fmt[i] == 'e'
8414 && epilogue_renumber (&(XEXP (*where, i)), test))
8415 return 1;
8416 }
8417 return 0;
8418 }
8419 \f
8420 /* Leaf functions and non-leaf functions have different needs. */
8421
8422 static const int
8423 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8424
8425 static const int
8426 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8427
8428 static const int *const reg_alloc_orders[] = {
8429 reg_leaf_alloc_order,
8430 reg_nonleaf_alloc_order};
8431
8432 void
8433 order_regs_for_local_alloc (void)
8434 {
8435 static int last_order_nonleaf = 1;
8436
8437 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8438 {
8439 last_order_nonleaf = !last_order_nonleaf;
8440 memcpy ((char *) reg_alloc_order,
8441 (const char *) reg_alloc_orders[last_order_nonleaf],
8442 FIRST_PSEUDO_REGISTER * sizeof (int));
8443 }
8444 }
8445 \f
8446 /* Return 1 if REG and MEM are legitimate enough to allow the various
8447 mem<-->reg splits to be run. */
8448
8449 int
8450 sparc_splitdi_legitimate (rtx reg, rtx mem)
8451 {
8452 /* Punt if we are here by mistake. */
8453 gcc_assert (reload_completed);
8454
8455 /* We must have an offsettable memory reference. */
8456 if (! offsettable_memref_p (mem))
8457 return 0;
8458
8459 /* If we have legitimate args for ldd/std, we do not want
8460 the split to happen. */
8461 if ((REGNO (reg) % 2) == 0
8462 && mem_min_alignment (mem, 8))
8463 return 0;
8464
8465 /* Success. */
8466 return 1;
8467 }
8468
8469 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8470
8471 int
8472 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8473 {
8474 int regno1, regno2;
8475
8476 if (GET_CODE (reg1) == SUBREG)
8477 reg1 = SUBREG_REG (reg1);
8478 if (GET_CODE (reg1) != REG)
8479 return 0;
8480 regno1 = REGNO (reg1);
8481
8482 if (GET_CODE (reg2) == SUBREG)
8483 reg2 = SUBREG_REG (reg2);
8484 if (GET_CODE (reg2) != REG)
8485 return 0;
8486 regno2 = REGNO (reg2);
8487
8488 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8489 return 1;
8490
8491 if (TARGET_VIS3)
8492 {
8493 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8494 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8495 return 1;
8496 }
8497
8498 return 0;
8499 }
8500
8501 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8502 This makes them candidates for using ldd and std insns.
8503
8504 Note reg1 and reg2 *must* be hard registers. */
8505
8506 int
8507 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8508 {
8509 /* We might have been passed a SUBREG. */
8510 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8511 return 0;
8512
8513 if (REGNO (reg1) % 2 != 0)
8514 return 0;
8515
8516 /* Integer ldd is deprecated in SPARC V9 */
8517 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8518 return 0;
8519
8520 return (REGNO (reg1) == REGNO (reg2) - 1);
8521 }
8522
8523 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8524 an ldd or std insn.
8525
8526 This can only happen when addr1 and addr2, the addresses in mem1
8527 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8528 addr1 must also be aligned on a 64-bit boundary.
8529
8530 Also iff dependent_reg_rtx is not null it should not be used to
8531 compute the address for mem1, i.e. we cannot optimize a sequence
8532 like:
8533 ld [%o0], %o0
8534 ld [%o0 + 4], %o1
8535 to
8536 ldd [%o0], %o0
8537 nor:
8538 ld [%g3 + 4], %g3
8539 ld [%g3], %g2
8540 to
8541 ldd [%g3], %g2
8542
8543 But, note that the transformation from:
8544 ld [%g2 + 4], %g3
8545 ld [%g2], %g2
8546 to
8547 ldd [%g2], %g2
8548 is perfectly fine. Thus, the peephole2 patterns always pass us
8549 the destination register of the first load, never the second one.
8550
8551 For stores we don't have a similar problem, so dependent_reg_rtx is
8552 NULL_RTX. */
8553
8554 int
8555 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8556 {
8557 rtx addr1, addr2;
8558 unsigned int reg1;
8559 HOST_WIDE_INT offset1;
8560
8561 /* The mems cannot be volatile. */
8562 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8563 return 0;
8564
8565 /* MEM1 should be aligned on a 64-bit boundary. */
8566 if (MEM_ALIGN (mem1) < 64)
8567 return 0;
8568
8569 addr1 = XEXP (mem1, 0);
8570 addr2 = XEXP (mem2, 0);
8571
8572 /* Extract a register number and offset (if used) from the first addr. */
8573 if (GET_CODE (addr1) == PLUS)
8574 {
8575 /* If not a REG, return zero. */
8576 if (GET_CODE (XEXP (addr1, 0)) != REG)
8577 return 0;
8578 else
8579 {
8580 reg1 = REGNO (XEXP (addr1, 0));
8581 /* The offset must be constant! */
8582 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8583 return 0;
8584 offset1 = INTVAL (XEXP (addr1, 1));
8585 }
8586 }
8587 else if (GET_CODE (addr1) != REG)
8588 return 0;
8589 else
8590 {
8591 reg1 = REGNO (addr1);
8592 /* This was a simple (mem (reg)) expression. Offset is 0. */
8593 offset1 = 0;
8594 }
8595
8596 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8597 if (GET_CODE (addr2) != PLUS)
8598 return 0;
8599
8600 if (GET_CODE (XEXP (addr2, 0)) != REG
8601 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8602 return 0;
8603
8604 if (reg1 != REGNO (XEXP (addr2, 0)))
8605 return 0;
8606
8607 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8608 return 0;
8609
8610 /* The first offset must be evenly divisible by 8 to ensure the
8611 address is 64 bit aligned. */
8612 if (offset1 % 8 != 0)
8613 return 0;
8614
8615 /* The offset for the second addr must be 4 more than the first addr. */
8616 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8617 return 0;
8618
8619 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8620 instructions. */
8621 return 1;
8622 }
8623
8624 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8625
8626 rtx
8627 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8628 {
8629 rtx x = widen_memory_access (mem1, mode, 0);
8630 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8631 return x;
8632 }
8633
8634 /* Return 1 if reg is a pseudo, or is the first register in
8635 a hard register pair. This makes it suitable for use in
8636 ldd and std insns. */
8637
8638 int
8639 register_ok_for_ldd (rtx reg)
8640 {
8641 /* We might have been passed a SUBREG. */
8642 if (!REG_P (reg))
8643 return 0;
8644
8645 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8646 return (REGNO (reg) % 2 == 0);
8647
8648 return 1;
8649 }
8650
8651 /* Return 1 if OP, a MEM, has an address which is known to be
8652 aligned to an 8-byte boundary. */
8653
8654 int
8655 memory_ok_for_ldd (rtx op)
8656 {
8657 /* In 64-bit mode, we assume that the address is word-aligned. */
8658 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8659 return 0;
8660
8661 if (! can_create_pseudo_p ()
8662 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8663 return 0;
8664
8665 return 1;
8666 }
8667 \f
8668 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8669
8670 static bool
8671 sparc_print_operand_punct_valid_p (unsigned char code)
8672 {
8673 if (code == '#'
8674 || code == '*'
8675 || code == '('
8676 || code == ')'
8677 || code == '_'
8678 || code == '&')
8679 return true;
8680
8681 return false;
8682 }
8683
8684 /* Implement TARGET_PRINT_OPERAND.
8685 Print operand X (an rtx) in assembler syntax to file FILE.
8686 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8687 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8688
8689 static void
8690 sparc_print_operand (FILE *file, rtx x, int code)
8691 {
8692 switch (code)
8693 {
8694 case '#':
8695 /* Output an insn in a delay slot. */
8696 if (final_sequence)
8697 sparc_indent_opcode = 1;
8698 else
8699 fputs ("\n\t nop", file);
8700 return;
8701 case '*':
8702 /* Output an annul flag if there's nothing for the delay slot and we
8703 are optimizing. This is always used with '(' below.
8704 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8705 this is a dbx bug. So, we only do this when optimizing.
8706 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8707 Always emit a nop in case the next instruction is a branch. */
8708 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8709 fputs (",a", file);
8710 return;
8711 case '(':
8712 /* Output a 'nop' if there's nothing for the delay slot and we are
8713 not optimizing. This is always used with '*' above. */
8714 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8715 fputs ("\n\t nop", file);
8716 else if (final_sequence)
8717 sparc_indent_opcode = 1;
8718 return;
8719 case ')':
8720 /* Output the right displacement from the saved PC on function return.
8721 The caller may have placed an "unimp" insn immediately after the call
8722 so we have to account for it. This insn is used in the 32-bit ABI
8723 when calling a function that returns a non zero-sized structure. The
8724 64-bit ABI doesn't have it. Be careful to have this test be the same
8725 as that for the call. The exception is when sparc_std_struct_return
8726 is enabled, the psABI is followed exactly and the adjustment is made
8727 by the code in sparc_struct_value_rtx. The call emitted is the same
8728 when sparc_std_struct_return is enabled. */
8729 if (!TARGET_ARCH64
8730 && cfun->returns_struct
8731 && !sparc_std_struct_return
8732 && DECL_SIZE (DECL_RESULT (current_function_decl))
8733 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8734 == INTEGER_CST
8735 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8736 fputs ("12", file);
8737 else
8738 fputc ('8', file);
8739 return;
8740 case '_':
8741 /* Output the Embedded Medium/Anywhere code model base register. */
8742 fputs (EMBMEDANY_BASE_REG, file);
8743 return;
8744 case '&':
8745 /* Print some local dynamic TLS name. */
8746 if (const char *name = get_some_local_dynamic_name ())
8747 assemble_name (file, name);
8748 else
8749 output_operand_lossage ("'%%&' used without any "
8750 "local dynamic TLS references");
8751 return;
8752
8753 case 'Y':
8754 /* Adjust the operand to take into account a RESTORE operation. */
8755 if (GET_CODE (x) == CONST_INT)
8756 break;
8757 else if (GET_CODE (x) != REG)
8758 output_operand_lossage ("invalid %%Y operand");
8759 else if (REGNO (x) < 8)
8760 fputs (reg_names[REGNO (x)], file);
8761 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8762 fputs (reg_names[REGNO (x)-16], file);
8763 else
8764 output_operand_lossage ("invalid %%Y operand");
8765 return;
8766 case 'L':
8767 /* Print out the low order register name of a register pair. */
8768 if (WORDS_BIG_ENDIAN)
8769 fputs (reg_names[REGNO (x)+1], file);
8770 else
8771 fputs (reg_names[REGNO (x)], file);
8772 return;
8773 case 'H':
8774 /* Print out the high order register name of a register pair. */
8775 if (WORDS_BIG_ENDIAN)
8776 fputs (reg_names[REGNO (x)], file);
8777 else
8778 fputs (reg_names[REGNO (x)+1], file);
8779 return;
8780 case 'R':
8781 /* Print out the second register name of a register pair or quad.
8782 I.e., R (%o0) => %o1. */
8783 fputs (reg_names[REGNO (x)+1], file);
8784 return;
8785 case 'S':
8786 /* Print out the third register name of a register quad.
8787 I.e., S (%o0) => %o2. */
8788 fputs (reg_names[REGNO (x)+2], file);
8789 return;
8790 case 'T':
8791 /* Print out the fourth register name of a register quad.
8792 I.e., T (%o0) => %o3. */
8793 fputs (reg_names[REGNO (x)+3], file);
8794 return;
8795 case 'x':
8796 /* Print a condition code register. */
8797 if (REGNO (x) == SPARC_ICC_REG)
8798 {
8799 /* We don't handle CC[X]_NOOVmode because they're not supposed
8800 to occur here. */
8801 if (GET_MODE (x) == CCmode)
8802 fputs ("%icc", file);
8803 else if (GET_MODE (x) == CCXmode)
8804 fputs ("%xcc", file);
8805 else
8806 gcc_unreachable ();
8807 }
8808 else
8809 /* %fccN register */
8810 fputs (reg_names[REGNO (x)], file);
8811 return;
8812 case 'm':
8813 /* Print the operand's address only. */
8814 output_address (GET_MODE (x), XEXP (x, 0));
8815 return;
8816 case 'r':
8817 /* In this case we need a register. Use %g0 if the
8818 operand is const0_rtx. */
8819 if (x == const0_rtx
8820 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8821 {
8822 fputs ("%g0", file);
8823 return;
8824 }
8825 else
8826 break;
8827
8828 case 'A':
8829 switch (GET_CODE (x))
8830 {
8831 case IOR: fputs ("or", file); break;
8832 case AND: fputs ("and", file); break;
8833 case XOR: fputs ("xor", file); break;
8834 default: output_operand_lossage ("invalid %%A operand");
8835 }
8836 return;
8837
8838 case 'B':
8839 switch (GET_CODE (x))
8840 {
8841 case IOR: fputs ("orn", file); break;
8842 case AND: fputs ("andn", file); break;
8843 case XOR: fputs ("xnor", file); break;
8844 default: output_operand_lossage ("invalid %%B operand");
8845 }
8846 return;
8847
8848 /* This is used by the conditional move instructions. */
8849 case 'C':
8850 {
8851 enum rtx_code rc = GET_CODE (x);
8852
8853 switch (rc)
8854 {
8855 case NE: fputs ("ne", file); break;
8856 case EQ: fputs ("e", file); break;
8857 case GE: fputs ("ge", file); break;
8858 case GT: fputs ("g", file); break;
8859 case LE: fputs ("le", file); break;
8860 case LT: fputs ("l", file); break;
8861 case GEU: fputs ("geu", file); break;
8862 case GTU: fputs ("gu", file); break;
8863 case LEU: fputs ("leu", file); break;
8864 case LTU: fputs ("lu", file); break;
8865 case LTGT: fputs ("lg", file); break;
8866 case UNORDERED: fputs ("u", file); break;
8867 case ORDERED: fputs ("o", file); break;
8868 case UNLT: fputs ("ul", file); break;
8869 case UNLE: fputs ("ule", file); break;
8870 case UNGT: fputs ("ug", file); break;
8871 case UNGE: fputs ("uge", file); break;
8872 case UNEQ: fputs ("ue", file); break;
8873 default: output_operand_lossage ("invalid %%C operand");
8874 }
8875 return;
8876 }
8877
8878 /* This are used by the movr instruction pattern. */
8879 case 'D':
8880 {
8881 enum rtx_code rc = GET_CODE (x);
8882 switch (rc)
8883 {
8884 case NE: fputs ("ne", file); break;
8885 case EQ: fputs ("e", file); break;
8886 case GE: fputs ("gez", file); break;
8887 case LT: fputs ("lz", file); break;
8888 case LE: fputs ("lez", file); break;
8889 case GT: fputs ("gz", file); break;
8890 default: output_operand_lossage ("invalid %%D operand");
8891 }
8892 return;
8893 }
8894
8895 case 'b':
8896 {
8897 /* Print a sign-extended character. */
8898 int i = trunc_int_for_mode (INTVAL (x), QImode);
8899 fprintf (file, "%d", i);
8900 return;
8901 }
8902
8903 case 'f':
8904 /* Operand must be a MEM; write its address. */
8905 if (GET_CODE (x) != MEM)
8906 output_operand_lossage ("invalid %%f operand");
8907 output_address (GET_MODE (x), XEXP (x, 0));
8908 return;
8909
8910 case 's':
8911 {
8912 /* Print a sign-extended 32-bit value. */
8913 HOST_WIDE_INT i;
8914 if (GET_CODE(x) == CONST_INT)
8915 i = INTVAL (x);
8916 else
8917 {
8918 output_operand_lossage ("invalid %%s operand");
8919 return;
8920 }
8921 i = trunc_int_for_mode (i, SImode);
8922 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8923 return;
8924 }
8925
8926 case 0:
8927 /* Do nothing special. */
8928 break;
8929
8930 default:
8931 /* Undocumented flag. */
8932 output_operand_lossage ("invalid operand output code");
8933 }
8934
8935 if (GET_CODE (x) == REG)
8936 fputs (reg_names[REGNO (x)], file);
8937 else if (GET_CODE (x) == MEM)
8938 {
8939 fputc ('[', file);
8940 /* Poor Sun assembler doesn't understand absolute addressing. */
8941 if (CONSTANT_P (XEXP (x, 0)))
8942 fputs ("%g0+", file);
8943 output_address (GET_MODE (x), XEXP (x, 0));
8944 fputc (']', file);
8945 }
8946 else if (GET_CODE (x) == HIGH)
8947 {
8948 fputs ("%hi(", file);
8949 output_addr_const (file, XEXP (x, 0));
8950 fputc (')', file);
8951 }
8952 else if (GET_CODE (x) == LO_SUM)
8953 {
8954 sparc_print_operand (file, XEXP (x, 0), 0);
8955 if (TARGET_CM_MEDMID)
8956 fputs ("+%l44(", file);
8957 else
8958 fputs ("+%lo(", file);
8959 output_addr_const (file, XEXP (x, 1));
8960 fputc (')', file);
8961 }
8962 else if (GET_CODE (x) == CONST_DOUBLE)
8963 output_operand_lossage ("floating-point constant not a valid immediate operand");
8964 else
8965 output_addr_const (file, x);
8966 }
8967
8968 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8969
8970 static void
8971 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8972 {
8973 register rtx base, index = 0;
8974 int offset = 0;
8975 register rtx addr = x;
8976
8977 if (REG_P (addr))
8978 fputs (reg_names[REGNO (addr)], file);
8979 else if (GET_CODE (addr) == PLUS)
8980 {
8981 if (CONST_INT_P (XEXP (addr, 0)))
8982 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8983 else if (CONST_INT_P (XEXP (addr, 1)))
8984 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8985 else
8986 base = XEXP (addr, 0), index = XEXP (addr, 1);
8987 if (GET_CODE (base) == LO_SUM)
8988 {
8989 gcc_assert (USE_AS_OFFSETABLE_LO10
8990 && TARGET_ARCH64
8991 && ! TARGET_CM_MEDMID);
8992 output_operand (XEXP (base, 0), 0);
8993 fputs ("+%lo(", file);
8994 output_address (VOIDmode, XEXP (base, 1));
8995 fprintf (file, ")+%d", offset);
8996 }
8997 else
8998 {
8999 fputs (reg_names[REGNO (base)], file);
9000 if (index == 0)
9001 fprintf (file, "%+d", offset);
9002 else if (REG_P (index))
9003 fprintf (file, "+%s", reg_names[REGNO (index)]);
9004 else if (GET_CODE (index) == SYMBOL_REF
9005 || GET_CODE (index) == LABEL_REF
9006 || GET_CODE (index) == CONST)
9007 fputc ('+', file), output_addr_const (file, index);
9008 else gcc_unreachable ();
9009 }
9010 }
9011 else if (GET_CODE (addr) == MINUS
9012 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9013 {
9014 output_addr_const (file, XEXP (addr, 0));
9015 fputs ("-(", file);
9016 output_addr_const (file, XEXP (addr, 1));
9017 fputs ("-.)", file);
9018 }
9019 else if (GET_CODE (addr) == LO_SUM)
9020 {
9021 output_operand (XEXP (addr, 0), 0);
9022 if (TARGET_CM_MEDMID)
9023 fputs ("+%l44(", file);
9024 else
9025 fputs ("+%lo(", file);
9026 output_address (VOIDmode, XEXP (addr, 1));
9027 fputc (')', file);
9028 }
9029 else if (flag_pic
9030 && GET_CODE (addr) == CONST
9031 && GET_CODE (XEXP (addr, 0)) == MINUS
9032 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9033 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9034 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9035 {
9036 addr = XEXP (addr, 0);
9037 output_addr_const (file, XEXP (addr, 0));
9038 /* Group the args of the second CONST in parenthesis. */
9039 fputs ("-(", file);
9040 /* Skip past the second CONST--it does nothing for us. */
9041 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9042 /* Close the parenthesis. */
9043 fputc (')', file);
9044 }
9045 else
9046 {
9047 output_addr_const (file, addr);
9048 }
9049 }
9050 \f
9051 /* Target hook for assembling integer objects. The sparc version has
9052 special handling for aligned DI-mode objects. */
9053
9054 static bool
9055 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9056 {
9057 /* ??? We only output .xword's for symbols and only then in environments
9058 where the assembler can handle them. */
9059 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9060 {
9061 if (TARGET_V9)
9062 {
9063 assemble_integer_with_op ("\t.xword\t", x);
9064 return true;
9065 }
9066 else
9067 {
9068 assemble_aligned_integer (4, const0_rtx);
9069 assemble_aligned_integer (4, x);
9070 return true;
9071 }
9072 }
9073 return default_assemble_integer (x, size, aligned_p);
9074 }
9075 \f
9076 /* Return the value of a code used in the .proc pseudo-op that says
9077 what kind of result this function returns. For non-C types, we pick
9078 the closest C type. */
9079
9080 #ifndef SHORT_TYPE_SIZE
9081 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9082 #endif
9083
9084 #ifndef INT_TYPE_SIZE
9085 #define INT_TYPE_SIZE BITS_PER_WORD
9086 #endif
9087
9088 #ifndef LONG_TYPE_SIZE
9089 #define LONG_TYPE_SIZE BITS_PER_WORD
9090 #endif
9091
9092 #ifndef LONG_LONG_TYPE_SIZE
9093 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9094 #endif
9095
9096 #ifndef FLOAT_TYPE_SIZE
9097 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9098 #endif
9099
9100 #ifndef DOUBLE_TYPE_SIZE
9101 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9102 #endif
9103
9104 #ifndef LONG_DOUBLE_TYPE_SIZE
9105 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9106 #endif
9107
9108 unsigned long
9109 sparc_type_code (register tree type)
9110 {
9111 register unsigned long qualifiers = 0;
9112 register unsigned shift;
9113
9114 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9115 setting more, since some assemblers will give an error for this. Also,
9116 we must be careful to avoid shifts of 32 bits or more to avoid getting
9117 unpredictable results. */
9118
9119 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9120 {
9121 switch (TREE_CODE (type))
9122 {
9123 case ERROR_MARK:
9124 return qualifiers;
9125
9126 case ARRAY_TYPE:
9127 qualifiers |= (3 << shift);
9128 break;
9129
9130 case FUNCTION_TYPE:
9131 case METHOD_TYPE:
9132 qualifiers |= (2 << shift);
9133 break;
9134
9135 case POINTER_TYPE:
9136 case REFERENCE_TYPE:
9137 case OFFSET_TYPE:
9138 qualifiers |= (1 << shift);
9139 break;
9140
9141 case RECORD_TYPE:
9142 return (qualifiers | 8);
9143
9144 case UNION_TYPE:
9145 case QUAL_UNION_TYPE:
9146 return (qualifiers | 9);
9147
9148 case ENUMERAL_TYPE:
9149 return (qualifiers | 10);
9150
9151 case VOID_TYPE:
9152 return (qualifiers | 16);
9153
9154 case INTEGER_TYPE:
9155 /* If this is a range type, consider it to be the underlying
9156 type. */
9157 if (TREE_TYPE (type) != 0)
9158 break;
9159
9160 /* Carefully distinguish all the standard types of C,
9161 without messing up if the language is not C. We do this by
9162 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9163 look at both the names and the above fields, but that's redundant.
9164 Any type whose size is between two C types will be considered
9165 to be the wider of the two types. Also, we do not have a
9166 special code to use for "long long", so anything wider than
9167 long is treated the same. Note that we can't distinguish
9168 between "int" and "long" in this code if they are the same
9169 size, but that's fine, since neither can the assembler. */
9170
9171 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9172 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9173
9174 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9175 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9176
9177 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9178 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9179
9180 else
9181 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9182
9183 case REAL_TYPE:
9184 /* If this is a range type, consider it to be the underlying
9185 type. */
9186 if (TREE_TYPE (type) != 0)
9187 break;
9188
9189 /* Carefully distinguish all the standard types of C,
9190 without messing up if the language is not C. */
9191
9192 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9193 return (qualifiers | 6);
9194
9195 else
9196 return (qualifiers | 7);
9197
9198 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9199 /* ??? We need to distinguish between double and float complex types,
9200 but I don't know how yet because I can't reach this code from
9201 existing front-ends. */
9202 return (qualifiers | 7); /* Who knows? */
9203
9204 case VECTOR_TYPE:
9205 case BOOLEAN_TYPE: /* Boolean truth value type. */
9206 case LANG_TYPE:
9207 case NULLPTR_TYPE:
9208 return qualifiers;
9209
9210 default:
9211 gcc_unreachable (); /* Not a type! */
9212 }
9213 }
9214
9215 return qualifiers;
9216 }
9217 \f
9218 /* Nested function support. */
9219
9220 /* Emit RTL insns to initialize the variable parts of a trampoline.
9221 FNADDR is an RTX for the address of the function's pure code.
9222 CXT is an RTX for the static chain value for the function.
9223
9224 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9225 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9226 (to store insns). This is a bit excessive. Perhaps a different
9227 mechanism would be better here.
9228
9229 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9230
9231 static void
9232 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9233 {
9234 /* SPARC 32-bit trampoline:
9235
9236 sethi %hi(fn), %g1
9237 sethi %hi(static), %g2
9238 jmp %g1+%lo(fn)
9239 or %g2, %lo(static), %g2
9240
9241 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9242 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9243 */
9244
9245 emit_move_insn
9246 (adjust_address (m_tramp, SImode, 0),
9247 expand_binop (SImode, ior_optab,
9248 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9249 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9250 NULL_RTX, 1, OPTAB_DIRECT));
9251
9252 emit_move_insn
9253 (adjust_address (m_tramp, SImode, 4),
9254 expand_binop (SImode, ior_optab,
9255 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9256 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9257 NULL_RTX, 1, OPTAB_DIRECT));
9258
9259 emit_move_insn
9260 (adjust_address (m_tramp, SImode, 8),
9261 expand_binop (SImode, ior_optab,
9262 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9263 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9264 NULL_RTX, 1, OPTAB_DIRECT));
9265
9266 emit_move_insn
9267 (adjust_address (m_tramp, SImode, 12),
9268 expand_binop (SImode, ior_optab,
9269 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9270 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9271 NULL_RTX, 1, OPTAB_DIRECT));
9272
9273 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9274 aligned on a 16 byte boundary so one flush clears it all. */
9275 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9276 if (sparc_cpu != PROCESSOR_ULTRASPARC
9277 && sparc_cpu != PROCESSOR_ULTRASPARC3
9278 && sparc_cpu != PROCESSOR_NIAGARA
9279 && sparc_cpu != PROCESSOR_NIAGARA2
9280 && sparc_cpu != PROCESSOR_NIAGARA3
9281 && sparc_cpu != PROCESSOR_NIAGARA4
9282 && sparc_cpu != PROCESSOR_NIAGARA7)
9283 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9284
9285 /* Call __enable_execute_stack after writing onto the stack to make sure
9286 the stack address is accessible. */
9287 #ifdef HAVE_ENABLE_EXECUTE_STACK
9288 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9289 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9290 #endif
9291
9292 }
9293
9294 /* The 64-bit version is simpler because it makes more sense to load the
9295 values as "immediate" data out of the trampoline. It's also easier since
9296 we can read the PC without clobbering a register. */
9297
9298 static void
9299 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9300 {
9301 /* SPARC 64-bit trampoline:
9302
9303 rd %pc, %g1
9304 ldx [%g1+24], %g5
9305 jmp %g5
9306 ldx [%g1+16], %g5
9307 +16 bytes data
9308 */
9309
9310 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9311 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9312 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9313 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9314 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9315 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9316 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9317 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9318 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9319 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9320 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9321
9322 if (sparc_cpu != PROCESSOR_ULTRASPARC
9323 && sparc_cpu != PROCESSOR_ULTRASPARC3
9324 && sparc_cpu != PROCESSOR_NIAGARA
9325 && sparc_cpu != PROCESSOR_NIAGARA2
9326 && sparc_cpu != PROCESSOR_NIAGARA3
9327 && sparc_cpu != PROCESSOR_NIAGARA4
9328 && sparc_cpu != PROCESSOR_NIAGARA7)
9329 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9330
9331 /* Call __enable_execute_stack after writing onto the stack to make sure
9332 the stack address is accessible. */
9333 #ifdef HAVE_ENABLE_EXECUTE_STACK
9334 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9335 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9336 #endif
9337 }
9338
9339 /* Worker for TARGET_TRAMPOLINE_INIT. */
9340
9341 static void
9342 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9343 {
9344 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9345 cxt = force_reg (Pmode, cxt);
9346 if (TARGET_ARCH64)
9347 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9348 else
9349 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9350 }
9351 \f
9352 /* Adjust the cost of a scheduling dependency. Return the new cost of
9353 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9354
9355 static int
9356 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9357 {
9358 enum attr_type insn_type;
9359
9360 if (recog_memoized (insn) < 0)
9361 return cost;
9362
9363 insn_type = get_attr_type (insn);
9364
9365 if (REG_NOTE_KIND (link) == 0)
9366 {
9367 /* Data dependency; DEP_INSN writes a register that INSN reads some
9368 cycles later. */
9369
9370 /* if a load, then the dependence must be on the memory address;
9371 add an extra "cycle". Note that the cost could be two cycles
9372 if the reg was written late in an instruction group; we ca not tell
9373 here. */
9374 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9375 return cost + 3;
9376
9377 /* Get the delay only if the address of the store is the dependence. */
9378 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9379 {
9380 rtx pat = PATTERN(insn);
9381 rtx dep_pat = PATTERN (dep_insn);
9382
9383 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9384 return cost; /* This should not happen! */
9385
9386 /* The dependency between the two instructions was on the data that
9387 is being stored. Assume that this implies that the address of the
9388 store is not dependent. */
9389 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9390 return cost;
9391
9392 return cost + 3; /* An approximation. */
9393 }
9394
9395 /* A shift instruction cannot receive its data from an instruction
9396 in the same cycle; add a one cycle penalty. */
9397 if (insn_type == TYPE_SHIFT)
9398 return cost + 3; /* Split before cascade into shift. */
9399 }
9400 else
9401 {
9402 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9403 INSN writes some cycles later. */
9404
9405 /* These are only significant for the fpu unit; writing a fp reg before
9406 the fpu has finished with it stalls the processor. */
9407
9408 /* Reusing an integer register causes no problems. */
9409 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9410 return 0;
9411 }
9412
9413 return cost;
9414 }
9415
9416 static int
9417 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9418 {
9419 enum attr_type insn_type, dep_type;
9420 rtx pat = PATTERN(insn);
9421 rtx dep_pat = PATTERN (dep_insn);
9422
9423 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9424 return cost;
9425
9426 insn_type = get_attr_type (insn);
9427 dep_type = get_attr_type (dep_insn);
9428
9429 switch (REG_NOTE_KIND (link))
9430 {
9431 case 0:
9432 /* Data dependency; DEP_INSN writes a register that INSN reads some
9433 cycles later. */
9434
9435 switch (insn_type)
9436 {
9437 case TYPE_STORE:
9438 case TYPE_FPSTORE:
9439 /* Get the delay iff the address of the store is the dependence. */
9440 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9441 return cost;
9442
9443 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9444 return cost;
9445 return cost + 3;
9446
9447 case TYPE_LOAD:
9448 case TYPE_SLOAD:
9449 case TYPE_FPLOAD:
9450 /* If a load, then the dependence must be on the memory address. If
9451 the addresses aren't equal, then it might be a false dependency */
9452 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9453 {
9454 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9455 || GET_CODE (SET_DEST (dep_pat)) != MEM
9456 || GET_CODE (SET_SRC (pat)) != MEM
9457 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9458 XEXP (SET_SRC (pat), 0)))
9459 return cost + 2;
9460
9461 return cost + 8;
9462 }
9463 break;
9464
9465 case TYPE_BRANCH:
9466 /* Compare to branch latency is 0. There is no benefit from
9467 separating compare and branch. */
9468 if (dep_type == TYPE_COMPARE)
9469 return 0;
9470 /* Floating point compare to branch latency is less than
9471 compare to conditional move. */
9472 if (dep_type == TYPE_FPCMP)
9473 return cost - 1;
9474 break;
9475 default:
9476 break;
9477 }
9478 break;
9479
9480 case REG_DEP_ANTI:
9481 /* Anti-dependencies only penalize the fpu unit. */
9482 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9483 return 0;
9484 break;
9485
9486 default:
9487 break;
9488 }
9489
9490 return cost;
9491 }
9492
9493 static int
9494 sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9495 {
9496 switch (sparc_cpu)
9497 {
9498 case PROCESSOR_SUPERSPARC:
9499 cost = supersparc_adjust_cost (insn, link, dep, cost);
9500 break;
9501 case PROCESSOR_HYPERSPARC:
9502 case PROCESSOR_SPARCLITE86X:
9503 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9504 break;
9505 default:
9506 break;
9507 }
9508 return cost;
9509 }
9510
9511 static void
9512 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9513 int sched_verbose ATTRIBUTE_UNUSED,
9514 int max_ready ATTRIBUTE_UNUSED)
9515 {}
9516
9517 static int
9518 sparc_use_sched_lookahead (void)
9519 {
9520 if (sparc_cpu == PROCESSOR_NIAGARA
9521 || sparc_cpu == PROCESSOR_NIAGARA2
9522 || sparc_cpu == PROCESSOR_NIAGARA3)
9523 return 0;
9524 if (sparc_cpu == PROCESSOR_NIAGARA4
9525 || sparc_cpu == PROCESSOR_NIAGARA7)
9526 return 2;
9527 if (sparc_cpu == PROCESSOR_ULTRASPARC
9528 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9529 return 4;
9530 if ((1 << sparc_cpu) &
9531 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9532 (1 << PROCESSOR_SPARCLITE86X)))
9533 return 3;
9534 return 0;
9535 }
9536
9537 static int
9538 sparc_issue_rate (void)
9539 {
9540 switch (sparc_cpu)
9541 {
9542 case PROCESSOR_NIAGARA:
9543 case PROCESSOR_NIAGARA2:
9544 case PROCESSOR_NIAGARA3:
9545 default:
9546 return 1;
9547 case PROCESSOR_NIAGARA4:
9548 case PROCESSOR_NIAGARA7:
9549 case PROCESSOR_V9:
9550 /* Assume V9 processors are capable of at least dual-issue. */
9551 return 2;
9552 case PROCESSOR_SUPERSPARC:
9553 return 3;
9554 case PROCESSOR_HYPERSPARC:
9555 case PROCESSOR_SPARCLITE86X:
9556 return 2;
9557 case PROCESSOR_ULTRASPARC:
9558 case PROCESSOR_ULTRASPARC3:
9559 return 4;
9560 }
9561 }
9562
9563 static int
9564 set_extends (rtx_insn *insn)
9565 {
9566 register rtx pat = PATTERN (insn);
9567
9568 switch (GET_CODE (SET_SRC (pat)))
9569 {
9570 /* Load and some shift instructions zero extend. */
9571 case MEM:
9572 case ZERO_EXTEND:
9573 /* sethi clears the high bits */
9574 case HIGH:
9575 /* LO_SUM is used with sethi. sethi cleared the high
9576 bits and the values used with lo_sum are positive */
9577 case LO_SUM:
9578 /* Store flag stores 0 or 1 */
9579 case LT: case LTU:
9580 case GT: case GTU:
9581 case LE: case LEU:
9582 case GE: case GEU:
9583 case EQ:
9584 case NE:
9585 return 1;
9586 case AND:
9587 {
9588 rtx op0 = XEXP (SET_SRC (pat), 0);
9589 rtx op1 = XEXP (SET_SRC (pat), 1);
9590 if (GET_CODE (op1) == CONST_INT)
9591 return INTVAL (op1) >= 0;
9592 if (GET_CODE (op0) != REG)
9593 return 0;
9594 if (sparc_check_64 (op0, insn) == 1)
9595 return 1;
9596 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9597 }
9598 case IOR:
9599 case XOR:
9600 {
9601 rtx op0 = XEXP (SET_SRC (pat), 0);
9602 rtx op1 = XEXP (SET_SRC (pat), 1);
9603 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9604 return 0;
9605 if (GET_CODE (op1) == CONST_INT)
9606 return INTVAL (op1) >= 0;
9607 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9608 }
9609 case LSHIFTRT:
9610 return GET_MODE (SET_SRC (pat)) == SImode;
9611 /* Positive integers leave the high bits zero. */
9612 case CONST_INT:
9613 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9614 case ASHIFTRT:
9615 case SIGN_EXTEND:
9616 return - (GET_MODE (SET_SRC (pat)) == SImode);
9617 case REG:
9618 return sparc_check_64 (SET_SRC (pat), insn);
9619 default:
9620 return 0;
9621 }
9622 }
9623
9624 /* We _ought_ to have only one kind per function, but... */
9625 static GTY(()) rtx sparc_addr_diff_list;
9626 static GTY(()) rtx sparc_addr_list;
9627
9628 void
9629 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9630 {
9631 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9632 if (diff)
9633 sparc_addr_diff_list
9634 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9635 else
9636 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9637 }
9638
9639 static void
9640 sparc_output_addr_vec (rtx vec)
9641 {
9642 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9643 int idx, vlen = XVECLEN (body, 0);
9644
9645 #ifdef ASM_OUTPUT_ADDR_VEC_START
9646 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9647 #endif
9648
9649 #ifdef ASM_OUTPUT_CASE_LABEL
9650 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9651 NEXT_INSN (lab));
9652 #else
9653 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9654 #endif
9655
9656 for (idx = 0; idx < vlen; idx++)
9657 {
9658 ASM_OUTPUT_ADDR_VEC_ELT
9659 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9660 }
9661
9662 #ifdef ASM_OUTPUT_ADDR_VEC_END
9663 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9664 #endif
9665 }
9666
9667 static void
9668 sparc_output_addr_diff_vec (rtx vec)
9669 {
9670 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9671 rtx base = XEXP (XEXP (body, 0), 0);
9672 int idx, vlen = XVECLEN (body, 1);
9673
9674 #ifdef ASM_OUTPUT_ADDR_VEC_START
9675 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9676 #endif
9677
9678 #ifdef ASM_OUTPUT_CASE_LABEL
9679 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9680 NEXT_INSN (lab));
9681 #else
9682 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9683 #endif
9684
9685 for (idx = 0; idx < vlen; idx++)
9686 {
9687 ASM_OUTPUT_ADDR_DIFF_ELT
9688 (asm_out_file,
9689 body,
9690 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9691 CODE_LABEL_NUMBER (base));
9692 }
9693
9694 #ifdef ASM_OUTPUT_ADDR_VEC_END
9695 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9696 #endif
9697 }
9698
9699 static void
9700 sparc_output_deferred_case_vectors (void)
9701 {
9702 rtx t;
9703 int align;
9704
9705 if (sparc_addr_list == NULL_RTX
9706 && sparc_addr_diff_list == NULL_RTX)
9707 return;
9708
9709 /* Align to cache line in the function's code section. */
9710 switch_to_section (current_function_section ());
9711
9712 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9713 if (align > 0)
9714 ASM_OUTPUT_ALIGN (asm_out_file, align);
9715
9716 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9717 sparc_output_addr_vec (XEXP (t, 0));
9718 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9719 sparc_output_addr_diff_vec (XEXP (t, 0));
9720
9721 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9722 }
9723
9724 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9725 unknown. Return 1 if the high bits are zero, -1 if the register is
9726 sign extended. */
9727 int
9728 sparc_check_64 (rtx x, rtx_insn *insn)
9729 {
9730 /* If a register is set only once it is safe to ignore insns this
9731 code does not know how to handle. The loop will either recognize
9732 the single set and return the correct value or fail to recognize
9733 it and return 0. */
9734 int set_once = 0;
9735 rtx y = x;
9736
9737 gcc_assert (GET_CODE (x) == REG);
9738
9739 if (GET_MODE (x) == DImode)
9740 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9741
9742 if (flag_expensive_optimizations
9743 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9744 set_once = 1;
9745
9746 if (insn == 0)
9747 {
9748 if (set_once)
9749 insn = get_last_insn_anywhere ();
9750 else
9751 return 0;
9752 }
9753
9754 while ((insn = PREV_INSN (insn)))
9755 {
9756 switch (GET_CODE (insn))
9757 {
9758 case JUMP_INSN:
9759 case NOTE:
9760 break;
9761 case CODE_LABEL:
9762 case CALL_INSN:
9763 default:
9764 if (! set_once)
9765 return 0;
9766 break;
9767 case INSN:
9768 {
9769 rtx pat = PATTERN (insn);
9770 if (GET_CODE (pat) != SET)
9771 return 0;
9772 if (rtx_equal_p (x, SET_DEST (pat)))
9773 return set_extends (insn);
9774 if (y && rtx_equal_p (y, SET_DEST (pat)))
9775 return set_extends (insn);
9776 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9777 return 0;
9778 }
9779 }
9780 }
9781 return 0;
9782 }
9783
9784 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9785 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9786
9787 const char *
9788 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9789 {
9790 static char asm_code[60];
9791
9792 /* The scratch register is only required when the destination
9793 register is not a 64-bit global or out register. */
9794 if (which_alternative != 2)
9795 operands[3] = operands[0];
9796
9797 /* We can only shift by constants <= 63. */
9798 if (GET_CODE (operands[2]) == CONST_INT)
9799 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9800
9801 if (GET_CODE (operands[1]) == CONST_INT)
9802 {
9803 output_asm_insn ("mov\t%1, %3", operands);
9804 }
9805 else
9806 {
9807 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9808 if (sparc_check_64 (operands[1], insn) <= 0)
9809 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9810 output_asm_insn ("or\t%L1, %3, %3", operands);
9811 }
9812
9813 strcpy (asm_code, opcode);
9814
9815 if (which_alternative != 2)
9816 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9817 else
9818 return
9819 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9820 }
9821 \f
9822 /* Output rtl to increment the profiler label LABELNO
9823 for profiling a function entry. */
9824
9825 void
9826 sparc_profile_hook (int labelno)
9827 {
9828 char buf[32];
9829 rtx lab, fun;
9830
9831 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9832 if (NO_PROFILE_COUNTERS)
9833 {
9834 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9835 }
9836 else
9837 {
9838 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9839 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9840 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9841 }
9842 }
9843 \f
9844 #ifdef TARGET_SOLARIS
9845 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9846
9847 static void
9848 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9849 tree decl ATTRIBUTE_UNUSED)
9850 {
9851 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9852 {
9853 solaris_elf_asm_comdat_section (name, flags, decl);
9854 return;
9855 }
9856
9857 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9858
9859 if (!(flags & SECTION_DEBUG))
9860 fputs (",#alloc", asm_out_file);
9861 if (flags & SECTION_WRITE)
9862 fputs (",#write", asm_out_file);
9863 if (flags & SECTION_TLS)
9864 fputs (",#tls", asm_out_file);
9865 if (flags & SECTION_CODE)
9866 fputs (",#execinstr", asm_out_file);
9867
9868 if (flags & SECTION_NOTYPE)
9869 ;
9870 else if (flags & SECTION_BSS)
9871 fputs (",#nobits", asm_out_file);
9872 else
9873 fputs (",#progbits", asm_out_file);
9874
9875 fputc ('\n', asm_out_file);
9876 }
9877 #endif /* TARGET_SOLARIS */
9878
9879 /* We do not allow indirect calls to be optimized into sibling calls.
9880
9881 We cannot use sibling calls when delayed branches are disabled
9882 because they will likely require the call delay slot to be filled.
9883
9884 Also, on SPARC 32-bit we cannot emit a sibling call when the
9885 current function returns a structure. This is because the "unimp
9886 after call" convention would cause the callee to return to the
9887 wrong place. The generic code already disallows cases where the
9888 function being called returns a structure.
9889
9890 It may seem strange how this last case could occur. Usually there
9891 is code after the call which jumps to epilogue code which dumps the
9892 return value into the struct return area. That ought to invalidate
9893 the sibling call right? Well, in the C++ case we can end up passing
9894 the pointer to the struct return area to a constructor (which returns
9895 void) and then nothing else happens. Such a sibling call would look
9896 valid without the added check here.
9897
9898 VxWorks PIC PLT entries require the global pointer to be initialized
9899 on entry. We therefore can't emit sibling calls to them. */
9900 static bool
9901 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9902 {
9903 return (decl
9904 && flag_delayed_branch
9905 && (TARGET_ARCH64 || ! cfun->returns_struct)
9906 && !(TARGET_VXWORKS_RTP
9907 && flag_pic
9908 && !targetm.binds_local_p (decl)));
9909 }
9910 \f
9911 /* libfunc renaming. */
9912
9913 static void
9914 sparc_init_libfuncs (void)
9915 {
9916 if (TARGET_ARCH32)
9917 {
9918 /* Use the subroutines that Sun's library provides for integer
9919 multiply and divide. The `*' prevents an underscore from
9920 being prepended by the compiler. .umul is a little faster
9921 than .mul. */
9922 set_optab_libfunc (smul_optab, SImode, "*.umul");
9923 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9924 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9925 set_optab_libfunc (smod_optab, SImode, "*.rem");
9926 set_optab_libfunc (umod_optab, SImode, "*.urem");
9927
9928 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9929 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9930 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9931 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9932 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9933 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9934
9935 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9936 is because with soft-float, the SFmode and DFmode sqrt
9937 instructions will be absent, and the compiler will notice and
9938 try to use the TFmode sqrt instruction for calls to the
9939 builtin function sqrt, but this fails. */
9940 if (TARGET_FPU)
9941 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9942
9943 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9944 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9945 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9946 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9947 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9948 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9949
9950 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9951 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9952 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9953 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9954
9955 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9956 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9957 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9958 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9959
9960 if (DITF_CONVERSION_LIBFUNCS)
9961 {
9962 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9963 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9964 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9965 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9966 }
9967
9968 if (SUN_CONVERSION_LIBFUNCS)
9969 {
9970 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9971 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9972 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9973 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9974 }
9975 }
9976 if (TARGET_ARCH64)
9977 {
9978 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9979 do not exist in the library. Make sure the compiler does not
9980 emit calls to them by accident. (It should always use the
9981 hardware instructions.) */
9982 set_optab_libfunc (smul_optab, SImode, 0);
9983 set_optab_libfunc (sdiv_optab, SImode, 0);
9984 set_optab_libfunc (udiv_optab, SImode, 0);
9985 set_optab_libfunc (smod_optab, SImode, 0);
9986 set_optab_libfunc (umod_optab, SImode, 0);
9987
9988 if (SUN_INTEGER_MULTIPLY_64)
9989 {
9990 set_optab_libfunc (smul_optab, DImode, "__mul64");
9991 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9992 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9993 set_optab_libfunc (smod_optab, DImode, "__rem64");
9994 set_optab_libfunc (umod_optab, DImode, "__urem64");
9995 }
9996
9997 if (SUN_CONVERSION_LIBFUNCS)
9998 {
9999 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10000 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10001 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10002 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10003 }
10004 }
10005 }
10006 \f
10007 /* SPARC builtins. */
10008 enum sparc_builtins
10009 {
10010 /* FPU builtins. */
10011 SPARC_BUILTIN_LDFSR,
10012 SPARC_BUILTIN_STFSR,
10013
10014 /* VIS 1.0 builtins. */
10015 SPARC_BUILTIN_FPACK16,
10016 SPARC_BUILTIN_FPACK32,
10017 SPARC_BUILTIN_FPACKFIX,
10018 SPARC_BUILTIN_FEXPAND,
10019 SPARC_BUILTIN_FPMERGE,
10020 SPARC_BUILTIN_FMUL8X16,
10021 SPARC_BUILTIN_FMUL8X16AU,
10022 SPARC_BUILTIN_FMUL8X16AL,
10023 SPARC_BUILTIN_FMUL8SUX16,
10024 SPARC_BUILTIN_FMUL8ULX16,
10025 SPARC_BUILTIN_FMULD8SUX16,
10026 SPARC_BUILTIN_FMULD8ULX16,
10027 SPARC_BUILTIN_FALIGNDATAV4HI,
10028 SPARC_BUILTIN_FALIGNDATAV8QI,
10029 SPARC_BUILTIN_FALIGNDATAV2SI,
10030 SPARC_BUILTIN_FALIGNDATADI,
10031 SPARC_BUILTIN_WRGSR,
10032 SPARC_BUILTIN_RDGSR,
10033 SPARC_BUILTIN_ALIGNADDR,
10034 SPARC_BUILTIN_ALIGNADDRL,
10035 SPARC_BUILTIN_PDIST,
10036 SPARC_BUILTIN_EDGE8,
10037 SPARC_BUILTIN_EDGE8L,
10038 SPARC_BUILTIN_EDGE16,
10039 SPARC_BUILTIN_EDGE16L,
10040 SPARC_BUILTIN_EDGE32,
10041 SPARC_BUILTIN_EDGE32L,
10042 SPARC_BUILTIN_FCMPLE16,
10043 SPARC_BUILTIN_FCMPLE32,
10044 SPARC_BUILTIN_FCMPNE16,
10045 SPARC_BUILTIN_FCMPNE32,
10046 SPARC_BUILTIN_FCMPGT16,
10047 SPARC_BUILTIN_FCMPGT32,
10048 SPARC_BUILTIN_FCMPEQ16,
10049 SPARC_BUILTIN_FCMPEQ32,
10050 SPARC_BUILTIN_FPADD16,
10051 SPARC_BUILTIN_FPADD16S,
10052 SPARC_BUILTIN_FPADD32,
10053 SPARC_BUILTIN_FPADD32S,
10054 SPARC_BUILTIN_FPSUB16,
10055 SPARC_BUILTIN_FPSUB16S,
10056 SPARC_BUILTIN_FPSUB32,
10057 SPARC_BUILTIN_FPSUB32S,
10058 SPARC_BUILTIN_ARRAY8,
10059 SPARC_BUILTIN_ARRAY16,
10060 SPARC_BUILTIN_ARRAY32,
10061
10062 /* VIS 2.0 builtins. */
10063 SPARC_BUILTIN_EDGE8N,
10064 SPARC_BUILTIN_EDGE8LN,
10065 SPARC_BUILTIN_EDGE16N,
10066 SPARC_BUILTIN_EDGE16LN,
10067 SPARC_BUILTIN_EDGE32N,
10068 SPARC_BUILTIN_EDGE32LN,
10069 SPARC_BUILTIN_BMASK,
10070 SPARC_BUILTIN_BSHUFFLEV4HI,
10071 SPARC_BUILTIN_BSHUFFLEV8QI,
10072 SPARC_BUILTIN_BSHUFFLEV2SI,
10073 SPARC_BUILTIN_BSHUFFLEDI,
10074
10075 /* VIS 3.0 builtins. */
10076 SPARC_BUILTIN_CMASK8,
10077 SPARC_BUILTIN_CMASK16,
10078 SPARC_BUILTIN_CMASK32,
10079 SPARC_BUILTIN_FCHKSM16,
10080 SPARC_BUILTIN_FSLL16,
10081 SPARC_BUILTIN_FSLAS16,
10082 SPARC_BUILTIN_FSRL16,
10083 SPARC_BUILTIN_FSRA16,
10084 SPARC_BUILTIN_FSLL32,
10085 SPARC_BUILTIN_FSLAS32,
10086 SPARC_BUILTIN_FSRL32,
10087 SPARC_BUILTIN_FSRA32,
10088 SPARC_BUILTIN_PDISTN,
10089 SPARC_BUILTIN_FMEAN16,
10090 SPARC_BUILTIN_FPADD64,
10091 SPARC_BUILTIN_FPSUB64,
10092 SPARC_BUILTIN_FPADDS16,
10093 SPARC_BUILTIN_FPADDS16S,
10094 SPARC_BUILTIN_FPSUBS16,
10095 SPARC_BUILTIN_FPSUBS16S,
10096 SPARC_BUILTIN_FPADDS32,
10097 SPARC_BUILTIN_FPADDS32S,
10098 SPARC_BUILTIN_FPSUBS32,
10099 SPARC_BUILTIN_FPSUBS32S,
10100 SPARC_BUILTIN_FUCMPLE8,
10101 SPARC_BUILTIN_FUCMPNE8,
10102 SPARC_BUILTIN_FUCMPGT8,
10103 SPARC_BUILTIN_FUCMPEQ8,
10104 SPARC_BUILTIN_FHADDS,
10105 SPARC_BUILTIN_FHADDD,
10106 SPARC_BUILTIN_FHSUBS,
10107 SPARC_BUILTIN_FHSUBD,
10108 SPARC_BUILTIN_FNHADDS,
10109 SPARC_BUILTIN_FNHADDD,
10110 SPARC_BUILTIN_UMULXHI,
10111 SPARC_BUILTIN_XMULX,
10112 SPARC_BUILTIN_XMULXHI,
10113
10114 /* VIS 4.0 builtins. */
10115 SPARC_BUILTIN_FPADD8,
10116 SPARC_BUILTIN_FPADDS8,
10117 SPARC_BUILTIN_FPADDUS8,
10118 SPARC_BUILTIN_FPADDUS16,
10119 SPARC_BUILTIN_FPCMPLE8,
10120 SPARC_BUILTIN_FPCMPGT8,
10121 SPARC_BUILTIN_FPCMPULE16,
10122 SPARC_BUILTIN_FPCMPUGT16,
10123 SPARC_BUILTIN_FPCMPULE32,
10124 SPARC_BUILTIN_FPCMPUGT32,
10125 SPARC_BUILTIN_FPMAX8,
10126 SPARC_BUILTIN_FPMAX16,
10127 SPARC_BUILTIN_FPMAX32,
10128 SPARC_BUILTIN_FPMAXU8,
10129 SPARC_BUILTIN_FPMAXU16,
10130 SPARC_BUILTIN_FPMAXU32,
10131 SPARC_BUILTIN_FPMIN8,
10132 SPARC_BUILTIN_FPMIN16,
10133 SPARC_BUILTIN_FPMIN32,
10134 SPARC_BUILTIN_FPMINU8,
10135 SPARC_BUILTIN_FPMINU16,
10136 SPARC_BUILTIN_FPMINU32,
10137 SPARC_BUILTIN_FPSUB8,
10138 SPARC_BUILTIN_FPSUBS8,
10139 SPARC_BUILTIN_FPSUBUS8,
10140 SPARC_BUILTIN_FPSUBUS16,
10141
10142 SPARC_BUILTIN_MAX
10143 };
10144
10145 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10146 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10147
10148 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10149 function decl or NULL_TREE if the builtin was not added. */
10150
10151 static tree
10152 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10153 tree type)
10154 {
10155 tree t
10156 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10157
10158 if (t)
10159 {
10160 sparc_builtins[code] = t;
10161 sparc_builtins_icode[code] = icode;
10162 }
10163
10164 return t;
10165 }
10166
10167 /* Likewise, but also marks the function as "const". */
10168
10169 static tree
10170 def_builtin_const (const char *name, enum insn_code icode,
10171 enum sparc_builtins code, tree type)
10172 {
10173 tree t = def_builtin (name, icode, code, type);
10174
10175 if (t)
10176 TREE_READONLY (t) = 1;
10177
10178 return t;
10179 }
10180
10181 /* Implement the TARGET_INIT_BUILTINS target hook.
10182 Create builtin functions for special SPARC instructions. */
10183
10184 static void
10185 sparc_init_builtins (void)
10186 {
10187 if (TARGET_FPU)
10188 sparc_fpu_init_builtins ();
10189
10190 if (TARGET_VIS)
10191 sparc_vis_init_builtins ();
10192 }
10193
10194 /* Create builtin functions for FPU instructions. */
10195
10196 static void
10197 sparc_fpu_init_builtins (void)
10198 {
10199 tree ftype
10200 = build_function_type_list (void_type_node,
10201 build_pointer_type (unsigned_type_node), 0);
10202 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10203 SPARC_BUILTIN_LDFSR, ftype);
10204 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10205 SPARC_BUILTIN_STFSR, ftype);
10206 }
10207
10208 /* Create builtin functions for VIS instructions. */
10209
10210 static void
10211 sparc_vis_init_builtins (void)
10212 {
10213 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10214 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10215 tree v4hi = build_vector_type (intHI_type_node, 4);
10216 tree v2hi = build_vector_type (intHI_type_node, 2);
10217 tree v2si = build_vector_type (intSI_type_node, 2);
10218 tree v1si = build_vector_type (intSI_type_node, 1);
10219
10220 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10221 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10222 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10223 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10224 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10225 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10226 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10227 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10228 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10229 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10230 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10231 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10232 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10233 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10234 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10235 v8qi, v8qi,
10236 intDI_type_node, 0);
10237 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10238 v8qi, v8qi, 0);
10239 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10240 v8qi, v8qi, 0);
10241 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10242 intDI_type_node,
10243 intDI_type_node, 0);
10244 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10245 intSI_type_node,
10246 intSI_type_node, 0);
10247 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10248 ptr_type_node,
10249 intSI_type_node, 0);
10250 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10251 ptr_type_node,
10252 intDI_type_node, 0);
10253 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10254 ptr_type_node,
10255 ptr_type_node, 0);
10256 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10257 ptr_type_node,
10258 ptr_type_node, 0);
10259 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10260 v4hi, v4hi, 0);
10261 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10262 v2si, v2si, 0);
10263 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10264 v4hi, v4hi, 0);
10265 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10266 v2si, v2si, 0);
10267 tree void_ftype_di = build_function_type_list (void_type_node,
10268 intDI_type_node, 0);
10269 tree di_ftype_void = build_function_type_list (intDI_type_node,
10270 void_type_node, 0);
10271 tree void_ftype_si = build_function_type_list (void_type_node,
10272 intSI_type_node, 0);
10273 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10274 float_type_node,
10275 float_type_node, 0);
10276 tree df_ftype_df_df = build_function_type_list (double_type_node,
10277 double_type_node,
10278 double_type_node, 0);
10279
10280 /* Packing and expanding vectors. */
10281 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10282 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10283 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10284 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10285 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10286 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10287 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10288 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10289 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10290 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10291
10292 /* Multiplications. */
10293 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10294 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10295 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10296 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10297 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10298 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10299 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10300 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10301 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10302 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10303 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10304 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10305 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10306 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10307
10308 /* Data aligning. */
10309 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10310 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10311 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10312 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10313 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10314 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10315 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10316 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10317
10318 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10319 SPARC_BUILTIN_WRGSR, void_ftype_di);
10320 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10321 SPARC_BUILTIN_RDGSR, di_ftype_void);
10322
10323 if (TARGET_ARCH64)
10324 {
10325 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10326 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10327 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10328 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10329 }
10330 else
10331 {
10332 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10333 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10334 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10335 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10336 }
10337
10338 /* Pixel distance. */
10339 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10340 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10341
10342 /* Edge handling. */
10343 if (TARGET_ARCH64)
10344 {
10345 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10346 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10347 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10348 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10349 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10350 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10351 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10352 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10353 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10354 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10355 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10356 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10357 }
10358 else
10359 {
10360 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10361 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10362 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10363 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10364 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10365 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10366 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10367 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10368 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10369 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10370 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10371 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10372 }
10373
10374 /* Pixel compare. */
10375 if (TARGET_ARCH64)
10376 {
10377 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10378 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10379 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10380 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10381 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10382 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10383 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10384 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10385 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10386 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10387 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10388 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10389 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10390 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10391 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10392 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10393 }
10394 else
10395 {
10396 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10397 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10398 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10399 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10400 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10401 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10402 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10403 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10404 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10405 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10406 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10407 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10408 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10409 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10410 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10411 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10412 }
10413
10414 /* Addition and subtraction. */
10415 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10416 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10417 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10418 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10419 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10420 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10421 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10422 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10423 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10424 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10425 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10426 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10427 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10428 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10429 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10430 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10431
10432 /* Three-dimensional array addressing. */
10433 if (TARGET_ARCH64)
10434 {
10435 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10436 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10437 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10438 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10439 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10440 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10441 }
10442 else
10443 {
10444 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10445 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10446 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10447 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10448 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10449 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10450 }
10451
10452 if (TARGET_VIS2)
10453 {
10454 /* Edge handling. */
10455 if (TARGET_ARCH64)
10456 {
10457 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10458 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10459 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10460 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10461 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10462 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10463 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10464 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10465 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10466 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10467 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10468 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10469 }
10470 else
10471 {
10472 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10473 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10474 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10475 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10476 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10477 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10478 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10479 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10480 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10481 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10482 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10483 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10484 }
10485
10486 /* Byte mask and shuffle. */
10487 if (TARGET_ARCH64)
10488 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10489 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10490 else
10491 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10492 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10493 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10494 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10495 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10496 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10497 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10498 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10499 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10500 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10501 }
10502
10503 if (TARGET_VIS3)
10504 {
10505 if (TARGET_ARCH64)
10506 {
10507 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10508 SPARC_BUILTIN_CMASK8, void_ftype_di);
10509 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10510 SPARC_BUILTIN_CMASK16, void_ftype_di);
10511 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10512 SPARC_BUILTIN_CMASK32, void_ftype_di);
10513 }
10514 else
10515 {
10516 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10517 SPARC_BUILTIN_CMASK8, void_ftype_si);
10518 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10519 SPARC_BUILTIN_CMASK16, void_ftype_si);
10520 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10521 SPARC_BUILTIN_CMASK32, void_ftype_si);
10522 }
10523
10524 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10525 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10526
10527 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10528 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10529 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10530 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10531 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10532 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10533 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10534 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10535 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10536 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10537 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10538 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10539 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10540 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10541 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10542 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10543
10544 if (TARGET_ARCH64)
10545 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10546 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10547 else
10548 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10549 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10550
10551 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10552 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10553 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10554 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10555 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10556 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10557
10558 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10559 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10560 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10561 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10562 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10563 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10564 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10565 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10566 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10567 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10568 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10569 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10570 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10571 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10572 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10573 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10574
10575 if (TARGET_ARCH64)
10576 {
10577 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10578 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10579 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10580 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10581 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10582 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10583 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10584 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10585 }
10586 else
10587 {
10588 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10589 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10590 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10591 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10592 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10593 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10594 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10595 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10596 }
10597
10598 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10599 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10600 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10601 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10602 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10603 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10604 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10605 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10606 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10607 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10608 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10609 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10610
10611 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10612 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10613 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10614 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10615 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10616 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10617 }
10618
10619 if (TARGET_VIS4)
10620 {
10621 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10622 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10623 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10624 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10625 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10626 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10627 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10628 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10629
10630
10631 if (TARGET_ARCH64)
10632 {
10633 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10634 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10635 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10636 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10637 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10638 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10639 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10640 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10641 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10642 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10643 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10644 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10645 }
10646 else
10647 {
10648 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10649 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10650 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10651 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10652 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10653 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10654 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10655 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10656 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10657 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10658 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10659 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10660 }
10661
10662 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10663 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10664 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10665 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10666 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10667 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10668 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10669 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10670 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10671 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10672 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10673 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10674 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10675 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10676 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10677 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10678 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10679 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10680 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10681 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10682 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10683 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10684 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10685 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10686 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10687 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10688 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10689 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10690 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10691 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10692 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10693 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10694 }
10695 }
10696
10697 /* Implement TARGET_BUILTIN_DECL hook. */
10698
10699 static tree
10700 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10701 {
10702 if (code >= SPARC_BUILTIN_MAX)
10703 return error_mark_node;
10704
10705 return sparc_builtins[code];
10706 }
10707
10708 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10709
10710 static rtx
10711 sparc_expand_builtin (tree exp, rtx target,
10712 rtx subtarget ATTRIBUTE_UNUSED,
10713 machine_mode tmode ATTRIBUTE_UNUSED,
10714 int ignore ATTRIBUTE_UNUSED)
10715 {
10716 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10717 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10718 enum insn_code icode = sparc_builtins_icode[code];
10719 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10720 call_expr_arg_iterator iter;
10721 int arg_count = 0;
10722 rtx pat, op[4];
10723 tree arg;
10724
10725 if (nonvoid)
10726 {
10727 machine_mode tmode = insn_data[icode].operand[0].mode;
10728 if (!target
10729 || GET_MODE (target) != tmode
10730 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10731 op[0] = gen_reg_rtx (tmode);
10732 else
10733 op[0] = target;
10734 }
10735
10736 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10737 {
10738 const struct insn_operand_data *insn_op;
10739 int idx;
10740
10741 if (arg == error_mark_node)
10742 return NULL_RTX;
10743
10744 arg_count++;
10745 idx = arg_count - !nonvoid;
10746 insn_op = &insn_data[icode].operand[idx];
10747 op[arg_count] = expand_normal (arg);
10748
10749 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10750 {
10751 if (!address_operand (op[arg_count], SImode))
10752 {
10753 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10754 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10755 }
10756 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10757 }
10758
10759 else if (insn_op->mode == V1DImode
10760 && GET_MODE (op[arg_count]) == DImode)
10761 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10762
10763 else if (insn_op->mode == V1SImode
10764 && GET_MODE (op[arg_count]) == SImode)
10765 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10766
10767 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10768 insn_op->mode))
10769 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10770 }
10771
10772 switch (arg_count)
10773 {
10774 case 0:
10775 pat = GEN_FCN (icode) (op[0]);
10776 break;
10777 case 1:
10778 if (nonvoid)
10779 pat = GEN_FCN (icode) (op[0], op[1]);
10780 else
10781 pat = GEN_FCN (icode) (op[1]);
10782 break;
10783 case 2:
10784 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10785 break;
10786 case 3:
10787 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10788 break;
10789 default:
10790 gcc_unreachable ();
10791 }
10792
10793 if (!pat)
10794 return NULL_RTX;
10795
10796 emit_insn (pat);
10797
10798 return (nonvoid ? op[0] : const0_rtx);
10799 }
10800
10801 /* Return the upper 16 bits of the 8x16 multiplication. */
10802
10803 static int
10804 sparc_vis_mul8x16 (int e8, int e16)
10805 {
10806 return (e8 * e16 + 128) / 256;
10807 }
10808
10809 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10810 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10811
10812 static void
10813 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10814 tree inner_type, tree cst0, tree cst1)
10815 {
10816 unsigned i, num = VECTOR_CST_NELTS (cst0);
10817 int scale;
10818
10819 switch (fncode)
10820 {
10821 case SPARC_BUILTIN_FMUL8X16:
10822 for (i = 0; i < num; ++i)
10823 {
10824 int val
10825 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10826 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10827 n_elts[i] = build_int_cst (inner_type, val);
10828 }
10829 break;
10830
10831 case SPARC_BUILTIN_FMUL8X16AU:
10832 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10833
10834 for (i = 0; i < num; ++i)
10835 {
10836 int val
10837 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10838 scale);
10839 n_elts[i] = build_int_cst (inner_type, val);
10840 }
10841 break;
10842
10843 case SPARC_BUILTIN_FMUL8X16AL:
10844 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10845
10846 for (i = 0; i < num; ++i)
10847 {
10848 int val
10849 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10850 scale);
10851 n_elts[i] = build_int_cst (inner_type, val);
10852 }
10853 break;
10854
10855 default:
10856 gcc_unreachable ();
10857 }
10858 }
10859
10860 /* Implement TARGET_FOLD_BUILTIN hook.
10861
10862 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10863 result of the function call is ignored. NULL_TREE is returned if the
10864 function could not be folded. */
10865
10866 static tree
10867 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10868 tree *args, bool ignore)
10869 {
10870 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10871 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10872 tree arg0, arg1, arg2;
10873
10874 if (ignore)
10875 switch (code)
10876 {
10877 case SPARC_BUILTIN_LDFSR:
10878 case SPARC_BUILTIN_STFSR:
10879 case SPARC_BUILTIN_ALIGNADDR:
10880 case SPARC_BUILTIN_WRGSR:
10881 case SPARC_BUILTIN_BMASK:
10882 case SPARC_BUILTIN_CMASK8:
10883 case SPARC_BUILTIN_CMASK16:
10884 case SPARC_BUILTIN_CMASK32:
10885 break;
10886
10887 default:
10888 return build_zero_cst (rtype);
10889 }
10890
10891 switch (code)
10892 {
10893 case SPARC_BUILTIN_FEXPAND:
10894 arg0 = args[0];
10895 STRIP_NOPS (arg0);
10896
10897 if (TREE_CODE (arg0) == VECTOR_CST)
10898 {
10899 tree inner_type = TREE_TYPE (rtype);
10900 tree *n_elts;
10901 unsigned i;
10902
10903 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10904 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10905 n_elts[i] = build_int_cst (inner_type,
10906 TREE_INT_CST_LOW
10907 (VECTOR_CST_ELT (arg0, i)) << 4);
10908 return build_vector (rtype, n_elts);
10909 }
10910 break;
10911
10912 case SPARC_BUILTIN_FMUL8X16:
10913 case SPARC_BUILTIN_FMUL8X16AU:
10914 case SPARC_BUILTIN_FMUL8X16AL:
10915 arg0 = args[0];
10916 arg1 = args[1];
10917 STRIP_NOPS (arg0);
10918 STRIP_NOPS (arg1);
10919
10920 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10921 {
10922 tree inner_type = TREE_TYPE (rtype);
10923 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10924 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10925 return build_vector (rtype, n_elts);
10926 }
10927 break;
10928
10929 case SPARC_BUILTIN_FPMERGE:
10930 arg0 = args[0];
10931 arg1 = args[1];
10932 STRIP_NOPS (arg0);
10933 STRIP_NOPS (arg1);
10934
10935 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10936 {
10937 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10938 unsigned i;
10939 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10940 {
10941 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10942 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10943 }
10944
10945 return build_vector (rtype, n_elts);
10946 }
10947 break;
10948
10949 case SPARC_BUILTIN_PDIST:
10950 case SPARC_BUILTIN_PDISTN:
10951 arg0 = args[0];
10952 arg1 = args[1];
10953 STRIP_NOPS (arg0);
10954 STRIP_NOPS (arg1);
10955 if (code == SPARC_BUILTIN_PDIST)
10956 {
10957 arg2 = args[2];
10958 STRIP_NOPS (arg2);
10959 }
10960 else
10961 arg2 = integer_zero_node;
10962
10963 if (TREE_CODE (arg0) == VECTOR_CST
10964 && TREE_CODE (arg1) == VECTOR_CST
10965 && TREE_CODE (arg2) == INTEGER_CST)
10966 {
10967 bool overflow = false;
10968 widest_int result = wi::to_widest (arg2);
10969 widest_int tmp;
10970 unsigned i;
10971
10972 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10973 {
10974 tree e0 = VECTOR_CST_ELT (arg0, i);
10975 tree e1 = VECTOR_CST_ELT (arg1, i);
10976
10977 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10978
10979 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10980 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10981 if (wi::neg_p (tmp))
10982 tmp = wi::neg (tmp, &neg2_ovf);
10983 else
10984 neg2_ovf = false;
10985 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10986 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10987 }
10988
10989 gcc_assert (!overflow);
10990
10991 return wide_int_to_tree (rtype, result);
10992 }
10993
10994 default:
10995 break;
10996 }
10997
10998 return NULL_TREE;
10999 }
11000 \f
11001 /* ??? This duplicates information provided to the compiler by the
11002 ??? scheduler description. Some day, teach genautomata to output
11003 ??? the latencies and then CSE will just use that. */
11004
11005 static bool
11006 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11007 int opno ATTRIBUTE_UNUSED,
11008 int *total, bool speed ATTRIBUTE_UNUSED)
11009 {
11010 int code = GET_CODE (x);
11011 bool float_mode_p = FLOAT_MODE_P (mode);
11012
11013 switch (code)
11014 {
11015 case CONST_INT:
11016 if (SMALL_INT (x))
11017 *total = 0;
11018 else
11019 *total = 2;
11020 return true;
11021
11022 case CONST_WIDE_INT:
11023 *total = 0;
11024 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11025 *total += 2;
11026 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11027 *total += 2;
11028 return true;
11029
11030 case HIGH:
11031 *total = 2;
11032 return true;
11033
11034 case CONST:
11035 case LABEL_REF:
11036 case SYMBOL_REF:
11037 *total = 4;
11038 return true;
11039
11040 case CONST_DOUBLE:
11041 *total = 8;
11042 return true;
11043
11044 case MEM:
11045 /* If outer-code was a sign or zero extension, a cost
11046 of COSTS_N_INSNS (1) was already added in. This is
11047 why we are subtracting it back out. */
11048 if (outer_code == ZERO_EXTEND)
11049 {
11050 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11051 }
11052 else if (outer_code == SIGN_EXTEND)
11053 {
11054 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11055 }
11056 else if (float_mode_p)
11057 {
11058 *total = sparc_costs->float_load;
11059 }
11060 else
11061 {
11062 *total = sparc_costs->int_load;
11063 }
11064
11065 return true;
11066
11067 case PLUS:
11068 case MINUS:
11069 if (float_mode_p)
11070 *total = sparc_costs->float_plusminus;
11071 else
11072 *total = COSTS_N_INSNS (1);
11073 return false;
11074
11075 case FMA:
11076 {
11077 rtx sub;
11078
11079 gcc_assert (float_mode_p);
11080 *total = sparc_costs->float_mul;
11081
11082 sub = XEXP (x, 0);
11083 if (GET_CODE (sub) == NEG)
11084 sub = XEXP (sub, 0);
11085 *total += rtx_cost (sub, mode, FMA, 0, speed);
11086
11087 sub = XEXP (x, 2);
11088 if (GET_CODE (sub) == NEG)
11089 sub = XEXP (sub, 0);
11090 *total += rtx_cost (sub, mode, FMA, 2, speed);
11091 return true;
11092 }
11093
11094 case MULT:
11095 if (float_mode_p)
11096 *total = sparc_costs->float_mul;
11097 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11098 *total = COSTS_N_INSNS (25);
11099 else
11100 {
11101 int bit_cost;
11102
11103 bit_cost = 0;
11104 if (sparc_costs->int_mul_bit_factor)
11105 {
11106 int nbits;
11107
11108 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11109 {
11110 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11111 for (nbits = 0; value != 0; value &= value - 1)
11112 nbits++;
11113 }
11114 else
11115 nbits = 7;
11116
11117 if (nbits < 3)
11118 nbits = 3;
11119 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11120 bit_cost = COSTS_N_INSNS (bit_cost);
11121 }
11122
11123 if (mode == DImode || !TARGET_HARD_MUL)
11124 *total = sparc_costs->int_mulX + bit_cost;
11125 else
11126 *total = sparc_costs->int_mul + bit_cost;
11127 }
11128 return false;
11129
11130 case ASHIFT:
11131 case ASHIFTRT:
11132 case LSHIFTRT:
11133 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11134 return false;
11135
11136 case DIV:
11137 case UDIV:
11138 case MOD:
11139 case UMOD:
11140 if (float_mode_p)
11141 {
11142 if (mode == DFmode)
11143 *total = sparc_costs->float_div_df;
11144 else
11145 *total = sparc_costs->float_div_sf;
11146 }
11147 else
11148 {
11149 if (mode == DImode)
11150 *total = sparc_costs->int_divX;
11151 else
11152 *total = sparc_costs->int_div;
11153 }
11154 return false;
11155
11156 case NEG:
11157 if (! float_mode_p)
11158 {
11159 *total = COSTS_N_INSNS (1);
11160 return false;
11161 }
11162 /* FALLTHRU */
11163
11164 case ABS:
11165 case FLOAT:
11166 case UNSIGNED_FLOAT:
11167 case FIX:
11168 case UNSIGNED_FIX:
11169 case FLOAT_EXTEND:
11170 case FLOAT_TRUNCATE:
11171 *total = sparc_costs->float_move;
11172 return false;
11173
11174 case SQRT:
11175 if (mode == DFmode)
11176 *total = sparc_costs->float_sqrt_df;
11177 else
11178 *total = sparc_costs->float_sqrt_sf;
11179 return false;
11180
11181 case COMPARE:
11182 if (float_mode_p)
11183 *total = sparc_costs->float_cmp;
11184 else
11185 *total = COSTS_N_INSNS (1);
11186 return false;
11187
11188 case IF_THEN_ELSE:
11189 if (float_mode_p)
11190 *total = sparc_costs->float_cmove;
11191 else
11192 *total = sparc_costs->int_cmove;
11193 return false;
11194
11195 case IOR:
11196 /* Handle the NAND vector patterns. */
11197 if (sparc_vector_mode_supported_p (mode)
11198 && GET_CODE (XEXP (x, 0)) == NOT
11199 && GET_CODE (XEXP (x, 1)) == NOT)
11200 {
11201 *total = COSTS_N_INSNS (1);
11202 return true;
11203 }
11204 else
11205 return false;
11206
11207 default:
11208 return false;
11209 }
11210 }
11211
11212 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11213
11214 static inline bool
11215 general_or_i64_p (reg_class_t rclass)
11216 {
11217 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11218 }
11219
11220 /* Implement TARGET_REGISTER_MOVE_COST. */
11221
11222 static int
11223 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11224 reg_class_t from, reg_class_t to)
11225 {
11226 bool need_memory = false;
11227
11228 if (from == FPCC_REGS || to == FPCC_REGS)
11229 need_memory = true;
11230 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11231 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11232 {
11233 if (TARGET_VIS3)
11234 {
11235 int size = GET_MODE_SIZE (mode);
11236 if (size == 8 || size == 4)
11237 {
11238 if (! TARGET_ARCH32 || size == 4)
11239 return 4;
11240 else
11241 return 6;
11242 }
11243 }
11244 need_memory = true;
11245 }
11246
11247 if (need_memory)
11248 {
11249 if (sparc_cpu == PROCESSOR_ULTRASPARC
11250 || sparc_cpu == PROCESSOR_ULTRASPARC3
11251 || sparc_cpu == PROCESSOR_NIAGARA
11252 || sparc_cpu == PROCESSOR_NIAGARA2
11253 || sparc_cpu == PROCESSOR_NIAGARA3
11254 || sparc_cpu == PROCESSOR_NIAGARA4
11255 || sparc_cpu == PROCESSOR_NIAGARA7)
11256 return 12;
11257
11258 return 6;
11259 }
11260
11261 return 2;
11262 }
11263
11264 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11265 This is achieved by means of a manual dynamic stack space allocation in
11266 the current frame. We make the assumption that SEQ doesn't contain any
11267 function calls, with the possible exception of calls to the GOT helper. */
11268
11269 static void
11270 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11271 {
11272 /* We must preserve the lowest 16 words for the register save area. */
11273 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11274 /* We really need only 2 words of fresh stack space. */
11275 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11276
11277 rtx slot
11278 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11279 SPARC_STACK_BIAS + offset));
11280
11281 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11282 emit_insn (gen_rtx_SET (slot, reg));
11283 if (reg2)
11284 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11285 reg2));
11286 emit_insn (seq);
11287 if (reg2)
11288 emit_insn (gen_rtx_SET (reg2,
11289 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11290 emit_insn (gen_rtx_SET (reg, slot));
11291 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11292 }
11293
11294 /* Output the assembler code for a thunk function. THUNK_DECL is the
11295 declaration for the thunk function itself, FUNCTION is the decl for
11296 the target function. DELTA is an immediate constant offset to be
11297 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11298 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11299
11300 static void
11301 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11302 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11303 tree function)
11304 {
11305 rtx this_rtx, funexp;
11306 rtx_insn *insn;
11307 unsigned int int_arg_first;
11308
11309 reload_completed = 1;
11310 epilogue_completed = 1;
11311
11312 emit_note (NOTE_INSN_PROLOGUE_END);
11313
11314 if (TARGET_FLAT)
11315 {
11316 sparc_leaf_function_p = 1;
11317
11318 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11319 }
11320 else if (flag_delayed_branch)
11321 {
11322 /* We will emit a regular sibcall below, so we need to instruct
11323 output_sibcall that we are in a leaf function. */
11324 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11325
11326 /* This will cause final.c to invoke leaf_renumber_regs so we
11327 must behave as if we were in a not-yet-leafified function. */
11328 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11329 }
11330 else
11331 {
11332 /* We will emit the sibcall manually below, so we will need to
11333 manually spill non-leaf registers. */
11334 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11335
11336 /* We really are in a leaf function. */
11337 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11338 }
11339
11340 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11341 returns a structure, the structure return pointer is there instead. */
11342 if (TARGET_ARCH64
11343 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11344 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11345 else
11346 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11347
11348 /* Add DELTA. When possible use a plain add, otherwise load it into
11349 a register first. */
11350 if (delta)
11351 {
11352 rtx delta_rtx = GEN_INT (delta);
11353
11354 if (! SPARC_SIMM13_P (delta))
11355 {
11356 rtx scratch = gen_rtx_REG (Pmode, 1);
11357 emit_move_insn (scratch, delta_rtx);
11358 delta_rtx = scratch;
11359 }
11360
11361 /* THIS_RTX += DELTA. */
11362 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11363 }
11364
11365 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11366 if (vcall_offset)
11367 {
11368 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11369 rtx scratch = gen_rtx_REG (Pmode, 1);
11370
11371 gcc_assert (vcall_offset < 0);
11372
11373 /* SCRATCH = *THIS_RTX. */
11374 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11375
11376 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11377 may not have any available scratch register at this point. */
11378 if (SPARC_SIMM13_P (vcall_offset))
11379 ;
11380 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11381 else if (! fixed_regs[5]
11382 /* The below sequence is made up of at least 2 insns,
11383 while the default method may need only one. */
11384 && vcall_offset < -8192)
11385 {
11386 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11387 emit_move_insn (scratch2, vcall_offset_rtx);
11388 vcall_offset_rtx = scratch2;
11389 }
11390 else
11391 {
11392 rtx increment = GEN_INT (-4096);
11393
11394 /* VCALL_OFFSET is a negative number whose typical range can be
11395 estimated as -32768..0 in 32-bit mode. In almost all cases
11396 it is therefore cheaper to emit multiple add insns than
11397 spilling and loading the constant into a register (at least
11398 6 insns). */
11399 while (! SPARC_SIMM13_P (vcall_offset))
11400 {
11401 emit_insn (gen_add2_insn (scratch, increment));
11402 vcall_offset += 4096;
11403 }
11404 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11405 }
11406
11407 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11408 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11409 gen_rtx_PLUS (Pmode,
11410 scratch,
11411 vcall_offset_rtx)));
11412
11413 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11414 emit_insn (gen_add2_insn (this_rtx, scratch));
11415 }
11416
11417 /* Generate a tail call to the target function. */
11418 if (! TREE_USED (function))
11419 {
11420 assemble_external (function);
11421 TREE_USED (function) = 1;
11422 }
11423 funexp = XEXP (DECL_RTL (function), 0);
11424
11425 if (flag_delayed_branch)
11426 {
11427 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11428 insn = emit_call_insn (gen_sibcall (funexp));
11429 SIBLING_CALL_P (insn) = 1;
11430 }
11431 else
11432 {
11433 /* The hoops we have to jump through in order to generate a sibcall
11434 without using delay slots... */
11435 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11436
11437 if (flag_pic)
11438 {
11439 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11440 start_sequence ();
11441 load_got_register (); /* clobbers %o7 */
11442 scratch = sparc_legitimize_pic_address (funexp, scratch);
11443 seq = get_insns ();
11444 end_sequence ();
11445 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11446 }
11447 else if (TARGET_ARCH32)
11448 {
11449 emit_insn (gen_rtx_SET (scratch,
11450 gen_rtx_HIGH (SImode, funexp)));
11451 emit_insn (gen_rtx_SET (scratch,
11452 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11453 }
11454 else /* TARGET_ARCH64 */
11455 {
11456 switch (sparc_cmodel)
11457 {
11458 case CM_MEDLOW:
11459 case CM_MEDMID:
11460 /* The destination can serve as a temporary. */
11461 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11462 break;
11463
11464 case CM_MEDANY:
11465 case CM_EMBMEDANY:
11466 /* The destination cannot serve as a temporary. */
11467 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11468 start_sequence ();
11469 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11470 seq = get_insns ();
11471 end_sequence ();
11472 emit_and_preserve (seq, spill_reg, 0);
11473 break;
11474
11475 default:
11476 gcc_unreachable ();
11477 }
11478 }
11479
11480 emit_jump_insn (gen_indirect_jump (scratch));
11481 }
11482
11483 emit_barrier ();
11484
11485 /* Run just enough of rest_of_compilation to get the insns emitted.
11486 There's not really enough bulk here to make other passes such as
11487 instruction scheduling worth while. Note that use_thunk calls
11488 assemble_start_function and assemble_end_function. */
11489 insn = get_insns ();
11490 shorten_branches (insn);
11491 final_start_function (insn, file, 1);
11492 final (insn, file, 1);
11493 final_end_function ();
11494
11495 reload_completed = 0;
11496 epilogue_completed = 0;
11497 }
11498
11499 /* Return true if sparc_output_mi_thunk would be able to output the
11500 assembler code for the thunk function specified by the arguments
11501 it is passed, and false otherwise. */
11502 static bool
11503 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11504 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11505 HOST_WIDE_INT vcall_offset,
11506 const_tree function ATTRIBUTE_UNUSED)
11507 {
11508 /* Bound the loop used in the default method above. */
11509 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11510 }
11511
11512 /* How to allocate a 'struct machine_function'. */
11513
11514 static struct machine_function *
11515 sparc_init_machine_status (void)
11516 {
11517 return ggc_cleared_alloc<machine_function> ();
11518 }
11519
11520 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11521 We need to emit DTP-relative relocations. */
11522
11523 static void
11524 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11525 {
11526 switch (size)
11527 {
11528 case 4:
11529 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11530 break;
11531 case 8:
11532 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11533 break;
11534 default:
11535 gcc_unreachable ();
11536 }
11537 output_addr_const (file, x);
11538 fputs (")", file);
11539 }
11540
11541 /* Do whatever processing is required at the end of a file. */
11542
11543 static void
11544 sparc_file_end (void)
11545 {
11546 /* If we need to emit the special GOT helper function, do so now. */
11547 if (got_helper_rtx)
11548 {
11549 const char *name = XSTR (got_helper_rtx, 0);
11550 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11551 #ifdef DWARF2_UNWIND_INFO
11552 bool do_cfi;
11553 #endif
11554
11555 if (USE_HIDDEN_LINKONCE)
11556 {
11557 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11558 get_identifier (name),
11559 build_function_type_list (void_type_node,
11560 NULL_TREE));
11561 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11562 NULL_TREE, void_type_node);
11563 TREE_PUBLIC (decl) = 1;
11564 TREE_STATIC (decl) = 1;
11565 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11566 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11567 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11568 resolve_unique_section (decl, 0, flag_function_sections);
11569 allocate_struct_function (decl, true);
11570 cfun->is_thunk = 1;
11571 current_function_decl = decl;
11572 init_varasm_status ();
11573 assemble_start_function (decl, name);
11574 }
11575 else
11576 {
11577 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11578 switch_to_section (text_section);
11579 if (align > 0)
11580 ASM_OUTPUT_ALIGN (asm_out_file, align);
11581 ASM_OUTPUT_LABEL (asm_out_file, name);
11582 }
11583
11584 #ifdef DWARF2_UNWIND_INFO
11585 do_cfi = dwarf2out_do_cfi_asm ();
11586 if (do_cfi)
11587 fprintf (asm_out_file, "\t.cfi_startproc\n");
11588 #endif
11589 if (flag_delayed_branch)
11590 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11591 reg_name, reg_name);
11592 else
11593 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11594 reg_name, reg_name);
11595 #ifdef DWARF2_UNWIND_INFO
11596 if (do_cfi)
11597 fprintf (asm_out_file, "\t.cfi_endproc\n");
11598 #endif
11599 }
11600
11601 if (NEED_INDICATE_EXEC_STACK)
11602 file_end_indicate_exec_stack ();
11603
11604 #ifdef TARGET_SOLARIS
11605 solaris_file_end ();
11606 #endif
11607 }
11608
11609 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11610 /* Implement TARGET_MANGLE_TYPE. */
11611
11612 static const char *
11613 sparc_mangle_type (const_tree type)
11614 {
11615 if (!TARGET_64BIT
11616 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11617 && TARGET_LONG_DOUBLE_128)
11618 return "g";
11619
11620 /* For all other types, use normal C++ mangling. */
11621 return NULL;
11622 }
11623 #endif
11624
11625 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11626 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11627 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11628
11629 void
11630 sparc_emit_membar_for_model (enum memmodel model,
11631 int load_store, int before_after)
11632 {
11633 /* Bits for the MEMBAR mmask field. */
11634 const int LoadLoad = 1;
11635 const int StoreLoad = 2;
11636 const int LoadStore = 4;
11637 const int StoreStore = 8;
11638
11639 int mm = 0, implied = 0;
11640
11641 switch (sparc_memory_model)
11642 {
11643 case SMM_SC:
11644 /* Sequential Consistency. All memory transactions are immediately
11645 visible in sequential execution order. No barriers needed. */
11646 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11647 break;
11648
11649 case SMM_TSO:
11650 /* Total Store Ordering: all memory transactions with store semantics
11651 are followed by an implied StoreStore. */
11652 implied |= StoreStore;
11653
11654 /* If we're not looking for a raw barrer (before+after), then atomic
11655 operations get the benefit of being both load and store. */
11656 if (load_store == 3 && before_after == 1)
11657 implied |= StoreLoad;
11658 /* FALLTHRU */
11659
11660 case SMM_PSO:
11661 /* Partial Store Ordering: all memory transactions with load semantics
11662 are followed by an implied LoadLoad | LoadStore. */
11663 implied |= LoadLoad | LoadStore;
11664
11665 /* If we're not looking for a raw barrer (before+after), then atomic
11666 operations get the benefit of being both load and store. */
11667 if (load_store == 3 && before_after == 2)
11668 implied |= StoreLoad | StoreStore;
11669 /* FALLTHRU */
11670
11671 case SMM_RMO:
11672 /* Relaxed Memory Ordering: no implicit bits. */
11673 break;
11674
11675 default:
11676 gcc_unreachable ();
11677 }
11678
11679 if (before_after & 1)
11680 {
11681 if (is_mm_release (model) || is_mm_acq_rel (model)
11682 || is_mm_seq_cst (model))
11683 {
11684 if (load_store & 1)
11685 mm |= LoadLoad | StoreLoad;
11686 if (load_store & 2)
11687 mm |= LoadStore | StoreStore;
11688 }
11689 }
11690 if (before_after & 2)
11691 {
11692 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11693 || is_mm_seq_cst (model))
11694 {
11695 if (load_store & 1)
11696 mm |= LoadLoad | LoadStore;
11697 if (load_store & 2)
11698 mm |= StoreLoad | StoreStore;
11699 }
11700 }
11701
11702 /* Remove the bits implied by the system memory model. */
11703 mm &= ~implied;
11704
11705 /* For raw barriers (before+after), always emit a barrier.
11706 This will become a compile-time barrier if needed. */
11707 if (mm || before_after == 3)
11708 emit_insn (gen_membar (GEN_INT (mm)));
11709 }
11710
11711 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11712 compare and swap on the word containing the byte or half-word. */
11713
11714 static void
11715 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11716 rtx oldval, rtx newval)
11717 {
11718 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11719 rtx addr = gen_reg_rtx (Pmode);
11720 rtx off = gen_reg_rtx (SImode);
11721 rtx oldv = gen_reg_rtx (SImode);
11722 rtx newv = gen_reg_rtx (SImode);
11723 rtx oldvalue = gen_reg_rtx (SImode);
11724 rtx newvalue = gen_reg_rtx (SImode);
11725 rtx res = gen_reg_rtx (SImode);
11726 rtx resv = gen_reg_rtx (SImode);
11727 rtx memsi, val, mask, cc;
11728
11729 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11730
11731 if (Pmode != SImode)
11732 addr1 = gen_lowpart (SImode, addr1);
11733 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11734
11735 memsi = gen_rtx_MEM (SImode, addr);
11736 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11737 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11738
11739 val = copy_to_reg (memsi);
11740
11741 emit_insn (gen_rtx_SET (off,
11742 gen_rtx_XOR (SImode, off,
11743 GEN_INT (GET_MODE (mem) == QImode
11744 ? 3 : 2))));
11745
11746 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11747
11748 if (GET_MODE (mem) == QImode)
11749 mask = force_reg (SImode, GEN_INT (0xff));
11750 else
11751 mask = force_reg (SImode, GEN_INT (0xffff));
11752
11753 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11754
11755 emit_insn (gen_rtx_SET (val,
11756 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11757 val)));
11758
11759 oldval = gen_lowpart (SImode, oldval);
11760 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11761
11762 newval = gen_lowpart_common (SImode, newval);
11763 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11764
11765 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11766
11767 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11768
11769 rtx_code_label *end_label = gen_label_rtx ();
11770 rtx_code_label *loop_label = gen_label_rtx ();
11771 emit_label (loop_label);
11772
11773 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11774
11775 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11776
11777 emit_move_insn (bool_result, const1_rtx);
11778
11779 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11780
11781 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11782
11783 emit_insn (gen_rtx_SET (resv,
11784 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11785 res)));
11786
11787 emit_move_insn (bool_result, const0_rtx);
11788
11789 cc = gen_compare_reg_1 (NE, resv, val);
11790 emit_insn (gen_rtx_SET (val, resv));
11791
11792 /* Use cbranchcc4 to separate the compare and branch! */
11793 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11794 cc, const0_rtx, loop_label));
11795
11796 emit_label (end_label);
11797
11798 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11799
11800 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11801
11802 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11803 }
11804
11805 /* Expand code to perform a compare-and-swap. */
11806
11807 void
11808 sparc_expand_compare_and_swap (rtx operands[])
11809 {
11810 rtx bval, retval, mem, oldval, newval;
11811 machine_mode mode;
11812 enum memmodel model;
11813
11814 bval = operands[0];
11815 retval = operands[1];
11816 mem = operands[2];
11817 oldval = operands[3];
11818 newval = operands[4];
11819 model = (enum memmodel) INTVAL (operands[6]);
11820 mode = GET_MODE (mem);
11821
11822 sparc_emit_membar_for_model (model, 3, 1);
11823
11824 if (reg_overlap_mentioned_p (retval, oldval))
11825 oldval = copy_to_reg (oldval);
11826
11827 if (mode == QImode || mode == HImode)
11828 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11829 else
11830 {
11831 rtx (*gen) (rtx, rtx, rtx, rtx);
11832 rtx x;
11833
11834 if (mode == SImode)
11835 gen = gen_atomic_compare_and_swapsi_1;
11836 else
11837 gen = gen_atomic_compare_and_swapdi_1;
11838 emit_insn (gen (retval, mem, oldval, newval));
11839
11840 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11841 if (x != bval)
11842 convert_move (bval, x, 1);
11843 }
11844
11845 sparc_emit_membar_for_model (model, 3, 2);
11846 }
11847
11848 void
11849 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11850 {
11851 rtx t_1, t_2, t_3;
11852
11853 sel = gen_lowpart (DImode, sel);
11854 switch (vmode)
11855 {
11856 case V2SImode:
11857 /* inp = xxxxxxxAxxxxxxxB */
11858 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11859 NULL_RTX, 1, OPTAB_DIRECT);
11860 /* t_1 = ....xxxxxxxAxxx. */
11861 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11862 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11863 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11864 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11865 /* sel = .......B */
11866 /* t_1 = ...A.... */
11867 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11868 /* sel = ...A...B */
11869 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11870 /* sel = AAAABBBB * 4 */
11871 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11872 /* sel = { A*4, A*4+1, A*4+2, ... } */
11873 break;
11874
11875 case V4HImode:
11876 /* inp = xxxAxxxBxxxCxxxD */
11877 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11878 NULL_RTX, 1, OPTAB_DIRECT);
11879 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11880 NULL_RTX, 1, OPTAB_DIRECT);
11881 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11882 NULL_RTX, 1, OPTAB_DIRECT);
11883 /* t_1 = ..xxxAxxxBxxxCxx */
11884 /* t_2 = ....xxxAxxxBxxxC */
11885 /* t_3 = ......xxxAxxxBxx */
11886 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11887 GEN_INT (0x07),
11888 NULL_RTX, 1, OPTAB_DIRECT);
11889 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11890 GEN_INT (0x0700),
11891 NULL_RTX, 1, OPTAB_DIRECT);
11892 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11893 GEN_INT (0x070000),
11894 NULL_RTX, 1, OPTAB_DIRECT);
11895 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11896 GEN_INT (0x07000000),
11897 NULL_RTX, 1, OPTAB_DIRECT);
11898 /* sel = .......D */
11899 /* t_1 = .....C.. */
11900 /* t_2 = ...B.... */
11901 /* t_3 = .A...... */
11902 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11903 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11904 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11905 /* sel = .A.B.C.D */
11906 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11907 /* sel = AABBCCDD * 2 */
11908 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11909 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11910 break;
11911
11912 case V8QImode:
11913 /* input = xAxBxCxDxExFxGxH */
11914 sel = expand_simple_binop (DImode, AND, sel,
11915 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11916 | 0x0f0f0f0f),
11917 NULL_RTX, 1, OPTAB_DIRECT);
11918 /* sel = .A.B.C.D.E.F.G.H */
11919 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11920 NULL_RTX, 1, OPTAB_DIRECT);
11921 /* t_1 = ..A.B.C.D.E.F.G. */
11922 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11923 NULL_RTX, 1, OPTAB_DIRECT);
11924 /* sel = .AABBCCDDEEFFGGH */
11925 sel = expand_simple_binop (DImode, AND, sel,
11926 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11927 | 0xff00ff),
11928 NULL_RTX, 1, OPTAB_DIRECT);
11929 /* sel = ..AB..CD..EF..GH */
11930 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11931 NULL_RTX, 1, OPTAB_DIRECT);
11932 /* t_1 = ....AB..CD..EF.. */
11933 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11934 NULL_RTX, 1, OPTAB_DIRECT);
11935 /* sel = ..ABABCDCDEFEFGH */
11936 sel = expand_simple_binop (DImode, AND, sel,
11937 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11938 NULL_RTX, 1, OPTAB_DIRECT);
11939 /* sel = ....ABCD....EFGH */
11940 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11941 NULL_RTX, 1, OPTAB_DIRECT);
11942 /* t_1 = ........ABCD.... */
11943 sel = gen_lowpart (SImode, sel);
11944 t_1 = gen_lowpart (SImode, t_1);
11945 break;
11946
11947 default:
11948 gcc_unreachable ();
11949 }
11950
11951 /* Always perform the final addition/merge within the bmask insn. */
11952 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11953 }
11954
11955 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11956
11957 static bool
11958 sparc_frame_pointer_required (void)
11959 {
11960 /* If the stack pointer is dynamically modified in the function, it cannot
11961 serve as the frame pointer. */
11962 if (cfun->calls_alloca)
11963 return true;
11964
11965 /* If the function receives nonlocal gotos, it needs to save the frame
11966 pointer in the nonlocal_goto_save_area object. */
11967 if (cfun->has_nonlocal_label)
11968 return true;
11969
11970 /* In flat mode, that's it. */
11971 if (TARGET_FLAT)
11972 return false;
11973
11974 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11975 return !(crtl->is_leaf && only_leaf_regs_used ());
11976 }
11977
11978 /* The way this is structured, we can't eliminate SFP in favor of SP
11979 if the frame pointer is required: we want to use the SFP->HFP elimination
11980 in that case. But the test in update_eliminables doesn't know we are
11981 assuming below that we only do the former elimination. */
11982
11983 static bool
11984 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11985 {
11986 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11987 }
11988
11989 /* Return the hard frame pointer directly to bypass the stack bias. */
11990
11991 static rtx
11992 sparc_builtin_setjmp_frame_value (void)
11993 {
11994 return hard_frame_pointer_rtx;
11995 }
11996
11997 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11998 they won't be allocated. */
11999
12000 static void
12001 sparc_conditional_register_usage (void)
12002 {
12003 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12004 {
12005 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12006 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12007 }
12008 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12009 /* then honor it. */
12010 if (TARGET_ARCH32 && fixed_regs[5])
12011 fixed_regs[5] = 1;
12012 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12013 fixed_regs[5] = 0;
12014 if (! TARGET_V9)
12015 {
12016 int regno;
12017 for (regno = SPARC_FIRST_V9_FP_REG;
12018 regno <= SPARC_LAST_V9_FP_REG;
12019 regno++)
12020 fixed_regs[regno] = 1;
12021 /* %fcc0 is used by v8 and v9. */
12022 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12023 regno <= SPARC_LAST_V9_FCC_REG;
12024 regno++)
12025 fixed_regs[regno] = 1;
12026 }
12027 if (! TARGET_FPU)
12028 {
12029 int regno;
12030 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12031 fixed_regs[regno] = 1;
12032 }
12033 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12034 /* then honor it. Likewise with g3 and g4. */
12035 if (fixed_regs[2] == 2)
12036 fixed_regs[2] = ! TARGET_APP_REGS;
12037 if (fixed_regs[3] == 2)
12038 fixed_regs[3] = ! TARGET_APP_REGS;
12039 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12040 fixed_regs[4] = ! TARGET_APP_REGS;
12041 else if (TARGET_CM_EMBMEDANY)
12042 fixed_regs[4] = 1;
12043 else if (fixed_regs[4] == 2)
12044 fixed_regs[4] = 0;
12045 if (TARGET_FLAT)
12046 {
12047 int regno;
12048 /* Disable leaf functions. */
12049 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12050 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12051 leaf_reg_remap [regno] = regno;
12052 }
12053 if (TARGET_VIS)
12054 global_regs[SPARC_GSR_REG] = 1;
12055 }
12056
12057 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12058
12059 - We can't load constants into FP registers.
12060 - We can't load FP constants into integer registers when soft-float,
12061 because there is no soft-float pattern with a r/F constraint.
12062 - We can't load FP constants into integer registers for TFmode unless
12063 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12064 - Try and reload integer constants (symbolic or otherwise) back into
12065 registers directly, rather than having them dumped to memory. */
12066
12067 static reg_class_t
12068 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12069 {
12070 machine_mode mode = GET_MODE (x);
12071 if (CONSTANT_P (x))
12072 {
12073 if (FP_REG_CLASS_P (rclass)
12074 || rclass == GENERAL_OR_FP_REGS
12075 || rclass == GENERAL_OR_EXTRA_FP_REGS
12076 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12077 || (mode == TFmode && ! const_zero_operand (x, mode)))
12078 return NO_REGS;
12079
12080 if (GET_MODE_CLASS (mode) == MODE_INT)
12081 return GENERAL_REGS;
12082
12083 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12084 {
12085 if (! FP_REG_CLASS_P (rclass)
12086 || !(const_zero_operand (x, mode)
12087 || const_all_ones_operand (x, mode)))
12088 return NO_REGS;
12089 }
12090 }
12091
12092 if (TARGET_VIS3
12093 && ! TARGET_ARCH64
12094 && (rclass == EXTRA_FP_REGS
12095 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12096 {
12097 int regno = true_regnum (x);
12098
12099 if (SPARC_INT_REG_P (regno))
12100 return (rclass == EXTRA_FP_REGS
12101 ? FP_REGS : GENERAL_OR_FP_REGS);
12102 }
12103
12104 return rclass;
12105 }
12106
12107 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12108 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12109
12110 const char *
12111 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12112 {
12113 char mulstr[32];
12114
12115 gcc_assert (! TARGET_ARCH64);
12116
12117 if (sparc_check_64 (operands[1], insn) <= 0)
12118 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12119 if (which_alternative == 1)
12120 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12121 if (GET_CODE (operands[2]) == CONST_INT)
12122 {
12123 if (which_alternative == 1)
12124 {
12125 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12126 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12127 output_asm_insn (mulstr, operands);
12128 return "srlx\t%L0, 32, %H0";
12129 }
12130 else
12131 {
12132 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12133 output_asm_insn ("or\t%L1, %3, %3", operands);
12134 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12135 output_asm_insn (mulstr, operands);
12136 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12137 return "mov\t%3, %L0";
12138 }
12139 }
12140 else if (rtx_equal_p (operands[1], operands[2]))
12141 {
12142 if (which_alternative == 1)
12143 {
12144 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12145 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12146 output_asm_insn (mulstr, operands);
12147 return "srlx\t%L0, 32, %H0";
12148 }
12149 else
12150 {
12151 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12152 output_asm_insn ("or\t%L1, %3, %3", operands);
12153 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12154 output_asm_insn (mulstr, operands);
12155 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12156 return "mov\t%3, %L0";
12157 }
12158 }
12159 if (sparc_check_64 (operands[2], insn) <= 0)
12160 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12161 if (which_alternative == 1)
12162 {
12163 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12164 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12165 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12166 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12167 output_asm_insn (mulstr, operands);
12168 return "srlx\t%L0, 32, %H0";
12169 }
12170 else
12171 {
12172 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12173 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12174 output_asm_insn ("or\t%L1, %3, %3", operands);
12175 output_asm_insn ("or\t%L2, %4, %4", operands);
12176 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12177 output_asm_insn (mulstr, operands);
12178 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12179 return "mov\t%3, %L0";
12180 }
12181 }
12182
12183 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12184 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12185 and INNER_MODE are the modes describing TARGET. */
12186
12187 static void
12188 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12189 machine_mode inner_mode)
12190 {
12191 rtx t1, final_insn, sel;
12192 int bmask;
12193
12194 t1 = gen_reg_rtx (mode);
12195
12196 elt = convert_modes (SImode, inner_mode, elt, true);
12197 emit_move_insn (gen_lowpart(SImode, t1), elt);
12198
12199 switch (mode)
12200 {
12201 case V2SImode:
12202 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12203 bmask = 0x45674567;
12204 break;
12205 case V4HImode:
12206 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12207 bmask = 0x67676767;
12208 break;
12209 case V8QImode:
12210 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12211 bmask = 0x77777777;
12212 break;
12213 default:
12214 gcc_unreachable ();
12215 }
12216
12217 sel = force_reg (SImode, GEN_INT (bmask));
12218 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12219 emit_insn (final_insn);
12220 }
12221
12222 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12223 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12224
12225 static void
12226 vector_init_fpmerge (rtx target, rtx elt)
12227 {
12228 rtx t1, t2, t2_low, t3, t3_low;
12229
12230 t1 = gen_reg_rtx (V4QImode);
12231 elt = convert_modes (SImode, QImode, elt, true);
12232 emit_move_insn (gen_lowpart (SImode, t1), elt);
12233
12234 t2 = gen_reg_rtx (V8QImode);
12235 t2_low = gen_lowpart (V4QImode, t2);
12236 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12237
12238 t3 = gen_reg_rtx (V8QImode);
12239 t3_low = gen_lowpart (V4QImode, t3);
12240 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12241
12242 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12243 }
12244
12245 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12246 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12247
12248 static void
12249 vector_init_faligndata (rtx target, rtx elt)
12250 {
12251 rtx t1 = gen_reg_rtx (V4HImode);
12252 int i;
12253
12254 elt = convert_modes (SImode, HImode, elt, true);
12255 emit_move_insn (gen_lowpart (SImode, t1), elt);
12256
12257 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12258 force_reg (SImode, GEN_INT (6)),
12259 const0_rtx));
12260
12261 for (i = 0; i < 4; i++)
12262 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12263 }
12264
12265 /* Emit code to initialize TARGET to values for individual fields VALS. */
12266
12267 void
12268 sparc_expand_vector_init (rtx target, rtx vals)
12269 {
12270 const machine_mode mode = GET_MODE (target);
12271 const machine_mode inner_mode = GET_MODE_INNER (mode);
12272 const int n_elts = GET_MODE_NUNITS (mode);
12273 int i, n_var = 0;
12274 bool all_same;
12275 rtx mem;
12276
12277 all_same = true;
12278 for (i = 0; i < n_elts; i++)
12279 {
12280 rtx x = XVECEXP (vals, 0, i);
12281 if (!CONSTANT_P (x))
12282 n_var++;
12283
12284 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12285 all_same = false;
12286 }
12287
12288 if (n_var == 0)
12289 {
12290 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12291 return;
12292 }
12293
12294 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12295 {
12296 if (GET_MODE_SIZE (inner_mode) == 4)
12297 {
12298 emit_move_insn (gen_lowpart (SImode, target),
12299 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12300 return;
12301 }
12302 else if (GET_MODE_SIZE (inner_mode) == 8)
12303 {
12304 emit_move_insn (gen_lowpart (DImode, target),
12305 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12306 return;
12307 }
12308 }
12309 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12310 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12311 {
12312 emit_move_insn (gen_highpart (word_mode, target),
12313 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12314 emit_move_insn (gen_lowpart (word_mode, target),
12315 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12316 return;
12317 }
12318
12319 if (all_same && GET_MODE_SIZE (mode) == 8)
12320 {
12321 if (TARGET_VIS2)
12322 {
12323 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12324 return;
12325 }
12326 if (mode == V8QImode)
12327 {
12328 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12329 return;
12330 }
12331 if (mode == V4HImode)
12332 {
12333 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12334 return;
12335 }
12336 }
12337
12338 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12339 for (i = 0; i < n_elts; i++)
12340 emit_move_insn (adjust_address_nv (mem, inner_mode,
12341 i * GET_MODE_SIZE (inner_mode)),
12342 XVECEXP (vals, 0, i));
12343 emit_move_insn (target, mem);
12344 }
12345
12346 /* Implement TARGET_SECONDARY_RELOAD. */
12347
12348 static reg_class_t
12349 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12350 machine_mode mode, secondary_reload_info *sri)
12351 {
12352 enum reg_class rclass = (enum reg_class) rclass_i;
12353
12354 sri->icode = CODE_FOR_nothing;
12355 sri->extra_cost = 0;
12356
12357 /* We need a temporary when loading/storing a HImode/QImode value
12358 between memory and the FPU registers. This can happen when combine puts
12359 a paradoxical subreg in a float/fix conversion insn. */
12360 if (FP_REG_CLASS_P (rclass)
12361 && (mode == HImode || mode == QImode)
12362 && (GET_CODE (x) == MEM
12363 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12364 && true_regnum (x) == -1)))
12365 return GENERAL_REGS;
12366
12367 /* On 32-bit we need a temporary when loading/storing a DFmode value
12368 between unaligned memory and the upper FPU registers. */
12369 if (TARGET_ARCH32
12370 && rclass == EXTRA_FP_REGS
12371 && mode == DFmode
12372 && GET_CODE (x) == MEM
12373 && ! mem_min_alignment (x, 8))
12374 return FP_REGS;
12375
12376 if (((TARGET_CM_MEDANY
12377 && symbolic_operand (x, mode))
12378 || (TARGET_CM_EMBMEDANY
12379 && text_segment_operand (x, mode)))
12380 && ! flag_pic)
12381 {
12382 if (in_p)
12383 sri->icode = direct_optab_handler (reload_in_optab, mode);
12384 else
12385 sri->icode = direct_optab_handler (reload_out_optab, mode);
12386 return NO_REGS;
12387 }
12388
12389 if (TARGET_VIS3 && TARGET_ARCH32)
12390 {
12391 int regno = true_regnum (x);
12392
12393 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12394 to move 8-byte values in 4-byte pieces. This only works via
12395 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12396 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12397 an FP_REGS intermediate move. */
12398 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12399 || ((general_or_i64_p (rclass)
12400 || rclass == GENERAL_OR_FP_REGS)
12401 && SPARC_FP_REG_P (regno)))
12402 {
12403 sri->extra_cost = 2;
12404 return FP_REGS;
12405 }
12406 }
12407
12408 return NO_REGS;
12409 }
12410
12411 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12412 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12413
12414 bool
12415 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12416 {
12417 enum rtx_code rc = GET_CODE (operands[1]);
12418 machine_mode cmp_mode;
12419 rtx cc_reg, dst, cmp;
12420
12421 cmp = operands[1];
12422 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12423 return false;
12424
12425 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12426 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12427
12428 cmp_mode = GET_MODE (XEXP (cmp, 0));
12429 rc = GET_CODE (cmp);
12430
12431 dst = operands[0];
12432 if (! rtx_equal_p (operands[2], dst)
12433 && ! rtx_equal_p (operands[3], dst))
12434 {
12435 if (reg_overlap_mentioned_p (dst, cmp))
12436 dst = gen_reg_rtx (mode);
12437
12438 emit_move_insn (dst, operands[3]);
12439 }
12440 else if (operands[2] == dst)
12441 {
12442 operands[2] = operands[3];
12443
12444 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12445 rc = reverse_condition_maybe_unordered (rc);
12446 else
12447 rc = reverse_condition (rc);
12448 }
12449
12450 if (XEXP (cmp, 1) == const0_rtx
12451 && GET_CODE (XEXP (cmp, 0)) == REG
12452 && cmp_mode == DImode
12453 && v9_regcmp_p (rc))
12454 cc_reg = XEXP (cmp, 0);
12455 else
12456 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12457
12458 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12459
12460 emit_insn (gen_rtx_SET (dst,
12461 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12462
12463 if (dst != operands[0])
12464 emit_move_insn (operands[0], dst);
12465
12466 return true;
12467 }
12468
12469 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12470 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12471 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12472 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12473 code to be used for the condition mask. */
12474
12475 void
12476 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12477 {
12478 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12479 enum rtx_code code = GET_CODE (operands[3]);
12480
12481 mask = gen_reg_rtx (Pmode);
12482 cop0 = operands[4];
12483 cop1 = operands[5];
12484 if (code == LT || code == GE)
12485 {
12486 rtx t;
12487
12488 code = swap_condition (code);
12489 t = cop0; cop0 = cop1; cop1 = t;
12490 }
12491
12492 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12493
12494 fcmp = gen_rtx_UNSPEC (Pmode,
12495 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12496 fcode);
12497
12498 cmask = gen_rtx_UNSPEC (DImode,
12499 gen_rtvec (2, mask, gsr),
12500 ccode);
12501
12502 bshuf = gen_rtx_UNSPEC (mode,
12503 gen_rtvec (3, operands[1], operands[2], gsr),
12504 UNSPEC_BSHUFFLE);
12505
12506 emit_insn (gen_rtx_SET (mask, fcmp));
12507 emit_insn (gen_rtx_SET (gsr, cmask));
12508
12509 emit_insn (gen_rtx_SET (operands[0], bshuf));
12510 }
12511
12512 /* On sparc, any mode which naturally allocates into the float
12513 registers should return 4 here. */
12514
12515 unsigned int
12516 sparc_regmode_natural_size (machine_mode mode)
12517 {
12518 int size = UNITS_PER_WORD;
12519
12520 if (TARGET_ARCH64)
12521 {
12522 enum mode_class mclass = GET_MODE_CLASS (mode);
12523
12524 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12525 size = 4;
12526 }
12527
12528 return size;
12529 }
12530
12531 /* Return TRUE if it is a good idea to tie two pseudo registers
12532 when one has mode MODE1 and one has mode MODE2.
12533 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12534 for any hard reg, then this must be FALSE for correct output.
12535
12536 For V9 we have to deal with the fact that only the lower 32 floating
12537 point registers are 32-bit addressable. */
12538
12539 bool
12540 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12541 {
12542 enum mode_class mclass1, mclass2;
12543 unsigned short size1, size2;
12544
12545 if (mode1 == mode2)
12546 return true;
12547
12548 mclass1 = GET_MODE_CLASS (mode1);
12549 mclass2 = GET_MODE_CLASS (mode2);
12550 if (mclass1 != mclass2)
12551 return false;
12552
12553 if (! TARGET_V9)
12554 return true;
12555
12556 /* Classes are the same and we are V9 so we have to deal with upper
12557 vs. lower floating point registers. If one of the modes is a
12558 4-byte mode, and the other is not, we have to mark them as not
12559 tieable because only the lower 32 floating point register are
12560 addressable 32-bits at a time.
12561
12562 We can't just test explicitly for SFmode, otherwise we won't
12563 cover the vector mode cases properly. */
12564
12565 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12566 return true;
12567
12568 size1 = GET_MODE_SIZE (mode1);
12569 size2 = GET_MODE_SIZE (mode2);
12570 if ((size1 > 4 && size2 == 4)
12571 || (size2 > 4 && size1 == 4))
12572 return false;
12573
12574 return true;
12575 }
12576
12577 /* Implement TARGET_CSTORE_MODE. */
12578
12579 static machine_mode
12580 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12581 {
12582 return (TARGET_ARCH64 ? DImode : SImode);
12583 }
12584
12585 /* Return the compound expression made of T1 and T2. */
12586
12587 static inline tree
12588 compound_expr (tree t1, tree t2)
12589 {
12590 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12591 }
12592
12593 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12594
12595 static void
12596 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12597 {
12598 if (!TARGET_FPU)
12599 return;
12600
12601 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12602 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12603
12604 /* We generate the equivalent of feholdexcept (&fenv_var):
12605
12606 unsigned int fenv_var;
12607 __builtin_store_fsr (&fenv_var);
12608
12609 unsigned int tmp1_var;
12610 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12611
12612 __builtin_load_fsr (&tmp1_var); */
12613
12614 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12615 TREE_ADDRESSABLE (fenv_var) = 1;
12616 tree fenv_addr = build_fold_addr_expr (fenv_var);
12617 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12618 tree hold_stfsr
12619 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12620 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12621
12622 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12623 TREE_ADDRESSABLE (tmp1_var) = 1;
12624 tree masked_fenv_var
12625 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12626 build_int_cst (unsigned_type_node,
12627 ~(accrued_exception_mask | trap_enable_mask)));
12628 tree hold_mask
12629 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12630 NULL_TREE, NULL_TREE);
12631
12632 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12633 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12634 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12635
12636 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12637
12638 /* We reload the value of tmp1_var to clear the exceptions:
12639
12640 __builtin_load_fsr (&tmp1_var); */
12641
12642 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12643
12644 /* We generate the equivalent of feupdateenv (&fenv_var):
12645
12646 unsigned int tmp2_var;
12647 __builtin_store_fsr (&tmp2_var);
12648
12649 __builtin_load_fsr (&fenv_var);
12650
12651 if (SPARC_LOW_FE_EXCEPT_VALUES)
12652 tmp2_var >>= 5;
12653 __atomic_feraiseexcept ((int) tmp2_var); */
12654
12655 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12656 TREE_ADDRESSABLE (tmp2_var) = 1;
12657 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12658 tree update_stfsr
12659 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12660 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12661
12662 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12663
12664 tree atomic_feraiseexcept
12665 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12666 tree update_call
12667 = build_call_expr (atomic_feraiseexcept, 1,
12668 fold_convert (integer_type_node, tmp2_var));
12669
12670 if (SPARC_LOW_FE_EXCEPT_VALUES)
12671 {
12672 tree shifted_tmp2_var
12673 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12674 build_int_cst (unsigned_type_node, 5));
12675 tree update_shift
12676 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12677 update_call = compound_expr (update_shift, update_call);
12678 }
12679
12680 *update
12681 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12682 }
12683
12684 #include "gt-sparc.h"