]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
i386.h (TARGET_CUSTOM_FUNCTION_DESCRIPTORS): Move to...
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_print_operand_punct_valid_p (unsigned char);
643 static void sparc_print_operand (FILE *, rtx, int);
644 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
645 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
646 machine_mode,
647 secondary_reload_info *);
648 static machine_mode sparc_cstore_mode (enum insn_code icode);
649 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
650 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
651 \f
652 #ifdef SUBTARGET_ATTRIBUTE_TABLE
653 /* Table of valid machine attributes. */
654 static const struct attribute_spec sparc_attribute_table[] =
655 {
656 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
657 do_diagnostic } */
658 SUBTARGET_ATTRIBUTE_TABLE,
659 { NULL, 0, 0, false, false, false, NULL, false }
660 };
661 #endif
662 \f
663 /* Option handling. */
664
665 /* Parsed value. */
666 enum cmodel sparc_cmodel;
667
668 char sparc_hard_reg_printed[8];
669
670 /* Initialize the GCC target structure. */
671
672 /* The default is to use .half rather than .short for aligned HI objects. */
673 #undef TARGET_ASM_ALIGNED_HI_OP
674 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
675
676 #undef TARGET_ASM_UNALIGNED_HI_OP
677 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
678 #undef TARGET_ASM_UNALIGNED_SI_OP
679 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
680 #undef TARGET_ASM_UNALIGNED_DI_OP
681 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
682
683 /* The target hook has to handle DI-mode values. */
684 #undef TARGET_ASM_INTEGER
685 #define TARGET_ASM_INTEGER sparc_assemble_integer
686
687 #undef TARGET_ASM_FUNCTION_PROLOGUE
688 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
689 #undef TARGET_ASM_FUNCTION_EPILOGUE
690 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
691
692 #undef TARGET_SCHED_ADJUST_COST
693 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
694 #undef TARGET_SCHED_ISSUE_RATE
695 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
696 #undef TARGET_SCHED_INIT
697 #define TARGET_SCHED_INIT sparc_sched_init
698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
700
701 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
702 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
703
704 #undef TARGET_INIT_LIBFUNCS
705 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
706
707 #undef TARGET_LEGITIMIZE_ADDRESS
708 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
709 #undef TARGET_DELEGITIMIZE_ADDRESS
710 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
711 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
712 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
713
714 #undef TARGET_INIT_BUILTINS
715 #define TARGET_INIT_BUILTINS sparc_init_builtins
716 #undef TARGET_BUILTIN_DECL
717 #define TARGET_BUILTIN_DECL sparc_builtin_decl
718 #undef TARGET_EXPAND_BUILTIN
719 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
720 #undef TARGET_FOLD_BUILTIN
721 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
722
723 #if TARGET_TLS
724 #undef TARGET_HAVE_TLS
725 #define TARGET_HAVE_TLS true
726 #endif
727
728 #undef TARGET_CANNOT_FORCE_CONST_MEM
729 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
730
731 #undef TARGET_ASM_OUTPUT_MI_THUNK
732 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
735
736 #undef TARGET_RTX_COSTS
737 #define TARGET_RTX_COSTS sparc_rtx_costs
738 #undef TARGET_ADDRESS_COST
739 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
740 #undef TARGET_REGISTER_MOVE_COST
741 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
742
743 #undef TARGET_PROMOTE_FUNCTION_MODE
744 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
745
746 #undef TARGET_FUNCTION_VALUE
747 #define TARGET_FUNCTION_VALUE sparc_function_value
748 #undef TARGET_LIBCALL_VALUE
749 #define TARGET_LIBCALL_VALUE sparc_libcall_value
750 #undef TARGET_FUNCTION_VALUE_REGNO_P
751 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
752
753 #undef TARGET_STRUCT_VALUE_RTX
754 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
755 #undef TARGET_RETURN_IN_MEMORY
756 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
757 #undef TARGET_MUST_PASS_IN_STACK
758 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
759 #undef TARGET_PASS_BY_REFERENCE
760 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
761 #undef TARGET_ARG_PARTIAL_BYTES
762 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
763 #undef TARGET_FUNCTION_ARG_ADVANCE
764 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
765 #undef TARGET_FUNCTION_ARG
766 #define TARGET_FUNCTION_ARG sparc_function_arg
767 #undef TARGET_FUNCTION_INCOMING_ARG
768 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
769 #undef TARGET_FUNCTION_ARG_BOUNDARY
770 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
771
772 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
773 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
774 #undef TARGET_STRICT_ARGUMENT_NAMING
775 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
776
777 #undef TARGET_EXPAND_BUILTIN_VA_START
778 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
779 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
780 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
781
782 #undef TARGET_VECTOR_MODE_SUPPORTED_P
783 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
784
785 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
786 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
787
788 #ifdef SUBTARGET_INSERT_ATTRIBUTES
789 #undef TARGET_INSERT_ATTRIBUTES
790 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
791 #endif
792
793 #ifdef SUBTARGET_ATTRIBUTE_TABLE
794 #undef TARGET_ATTRIBUTE_TABLE
795 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
796 #endif
797
798 #undef TARGET_OPTION_OVERRIDE
799 #define TARGET_OPTION_OVERRIDE sparc_option_override
800
801 #ifdef TARGET_THREAD_SSP_OFFSET
802 #undef TARGET_STACK_PROTECT_GUARD
803 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
804 #endif
805
806 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
807 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
808 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
809 #endif
810
811 #undef TARGET_ASM_FILE_END
812 #define TARGET_ASM_FILE_END sparc_file_end
813
814 #undef TARGET_FRAME_POINTER_REQUIRED
815 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
816
817 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
818 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
819
820 #undef TARGET_CAN_ELIMINATE
821 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
822
823 #undef TARGET_PREFERRED_RELOAD_CLASS
824 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
825
826 #undef TARGET_SECONDARY_RELOAD
827 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
828
829 #undef TARGET_CONDITIONAL_REGISTER_USAGE
830 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
831
832 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
833 #undef TARGET_MANGLE_TYPE
834 #define TARGET_MANGLE_TYPE sparc_mangle_type
835 #endif
836
837 #undef TARGET_LRA_P
838 #define TARGET_LRA_P hook_bool_void_false
839
840 #undef TARGET_LEGITIMATE_ADDRESS_P
841 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
842
843 #undef TARGET_LEGITIMATE_CONSTANT_P
844 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
845
846 #undef TARGET_TRAMPOLINE_INIT
847 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
848
849 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
850 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
851 #undef TARGET_PRINT_OPERAND
852 #define TARGET_PRINT_OPERAND sparc_print_operand
853 #undef TARGET_PRINT_OPERAND_ADDRESS
854 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
855
856 /* The value stored by LDSTUB. */
857 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
858 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
859
860 #undef TARGET_CSTORE_MODE
861 #define TARGET_CSTORE_MODE sparc_cstore_mode
862
863 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
864 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
865
866 #undef TARGET_FIXED_CONDITION_CODE_REGS
867 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
868
869 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
870 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
871
872 struct gcc_target targetm = TARGET_INITIALIZER;
873
874 /* Return the memory reference contained in X if any, zero otherwise. */
875
876 static rtx
877 mem_ref (rtx x)
878 {
879 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
880 x = XEXP (x, 0);
881
882 if (MEM_P (x))
883 return x;
884
885 return NULL_RTX;
886 }
887
888 /* We use a machine specific pass to enable workarounds for errata.
889
890 We need to have the (essentially) final form of the insn stream in order
891 to properly detect the various hazards. Therefore, this machine specific
892 pass runs as late as possible. */
893
894 static unsigned int
895 sparc_do_work_around_errata (void)
896 {
897 rtx_insn *insn, *next;
898
899 /* Force all instructions to be split into their final form. */
900 split_all_insns_noflow ();
901
902 /* Now look for specific patterns in the insn stream. */
903 for (insn = get_insns (); insn; insn = next)
904 {
905 bool insert_nop = false;
906 rtx set;
907
908 /* Look into the instruction in a delay slot. */
909 if (NONJUMP_INSN_P (insn))
910 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
911 insn = seq->insn (1);
912
913 /* Look for a single-word load into an odd-numbered FP register. */
914 if (sparc_fix_at697f
915 && NONJUMP_INSN_P (insn)
916 && (set = single_set (insn)) != NULL_RTX
917 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
918 && MEM_P (SET_SRC (set))
919 && REG_P (SET_DEST (set))
920 && REGNO (SET_DEST (set)) > 31
921 && REGNO (SET_DEST (set)) % 2 != 0)
922 {
923 /* The wrong dependency is on the enclosing double register. */
924 const unsigned int x = REGNO (SET_DEST (set)) - 1;
925 unsigned int src1, src2, dest;
926 int code;
927
928 next = next_active_insn (insn);
929 if (!next)
930 break;
931 /* If the insn is a branch, then it cannot be problematic. */
932 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
933 continue;
934
935 extract_insn (next);
936 code = INSN_CODE (next);
937
938 switch (code)
939 {
940 case CODE_FOR_adddf3:
941 case CODE_FOR_subdf3:
942 case CODE_FOR_muldf3:
943 case CODE_FOR_divdf3:
944 dest = REGNO (recog_data.operand[0]);
945 src1 = REGNO (recog_data.operand[1]);
946 src2 = REGNO (recog_data.operand[2]);
947 if (src1 != src2)
948 {
949 /* Case [1-4]:
950 ld [address], %fx+1
951 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
952 if ((src1 == x || src2 == x)
953 && (dest == src1 || dest == src2))
954 insert_nop = true;
955 }
956 else
957 {
958 /* Case 5:
959 ld [address], %fx+1
960 FPOPd %fx, %fx, %fx */
961 if (src1 == x
962 && dest == src1
963 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
964 insert_nop = true;
965 }
966 break;
967
968 case CODE_FOR_sqrtdf2:
969 dest = REGNO (recog_data.operand[0]);
970 src1 = REGNO (recog_data.operand[1]);
971 /* Case 6:
972 ld [address], %fx+1
973 fsqrtd %fx, %fx */
974 if (src1 == x && dest == src1)
975 insert_nop = true;
976 break;
977
978 default:
979 break;
980 }
981 }
982
983 /* Look for a single-word load into an integer register. */
984 else if (sparc_fix_ut699
985 && NONJUMP_INSN_P (insn)
986 && (set = single_set (insn)) != NULL_RTX
987 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
988 && mem_ref (SET_SRC (set)) != NULL_RTX
989 && REG_P (SET_DEST (set))
990 && REGNO (SET_DEST (set)) < 32)
991 {
992 /* There is no problem if the second memory access has a data
993 dependency on the first single-cycle load. */
994 rtx x = SET_DEST (set);
995
996 next = next_active_insn (insn);
997 if (!next)
998 break;
999 /* If the insn is a branch, then it cannot be problematic. */
1000 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1001 continue;
1002
1003 /* Look for a second memory access to/from an integer register. */
1004 if ((set = single_set (next)) != NULL_RTX)
1005 {
1006 rtx src = SET_SRC (set);
1007 rtx dest = SET_DEST (set);
1008 rtx mem;
1009
1010 /* LDD is affected. */
1011 if ((mem = mem_ref (src)) != NULL_RTX
1012 && REG_P (dest)
1013 && REGNO (dest) < 32
1014 && !reg_mentioned_p (x, XEXP (mem, 0)))
1015 insert_nop = true;
1016
1017 /* STD is *not* affected. */
1018 else if (MEM_P (dest)
1019 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1020 && (src == CONST0_RTX (GET_MODE (dest))
1021 || (REG_P (src)
1022 && REGNO (src) < 32
1023 && REGNO (src) != REGNO (x)))
1024 && !reg_mentioned_p (x, XEXP (dest, 0)))
1025 insert_nop = true;
1026 }
1027 }
1028
1029 /* Look for a single-word load/operation into an FP register. */
1030 else if (sparc_fix_ut699
1031 && NONJUMP_INSN_P (insn)
1032 && (set = single_set (insn)) != NULL_RTX
1033 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1034 && REG_P (SET_DEST (set))
1035 && REGNO (SET_DEST (set)) > 31)
1036 {
1037 /* Number of instructions in the problematic window. */
1038 const int n_insns = 4;
1039 /* The problematic combination is with the sibling FP register. */
1040 const unsigned int x = REGNO (SET_DEST (set));
1041 const unsigned int y = x ^ 1;
1042 rtx_insn *after;
1043 int i;
1044
1045 next = next_active_insn (insn);
1046 if (!next)
1047 break;
1048 /* If the insn is a branch, then it cannot be problematic. */
1049 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1050 continue;
1051
1052 /* Look for a second load/operation into the sibling FP register. */
1053 if (!((set = single_set (next)) != NULL_RTX
1054 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1055 && REG_P (SET_DEST (set))
1056 && REGNO (SET_DEST (set)) == y))
1057 continue;
1058
1059 /* Look for a (possible) store from the FP register in the next N
1060 instructions, but bail out if it is again modified or if there
1061 is a store from the sibling FP register before this store. */
1062 for (after = next, i = 0; i < n_insns; i++)
1063 {
1064 bool branch_p;
1065
1066 after = next_active_insn (after);
1067 if (!after)
1068 break;
1069
1070 /* This is a branch with an empty delay slot. */
1071 if (!NONJUMP_INSN_P (after))
1072 {
1073 if (++i == n_insns)
1074 break;
1075 branch_p = true;
1076 after = NULL;
1077 }
1078 /* This is a branch with a filled delay slot. */
1079 else if (rtx_sequence *seq =
1080 dyn_cast <rtx_sequence *> (PATTERN (after)))
1081 {
1082 if (++i == n_insns)
1083 break;
1084 branch_p = true;
1085 after = seq->insn (1);
1086 }
1087 /* This is a regular instruction. */
1088 else
1089 branch_p = false;
1090
1091 if (after && (set = single_set (after)) != NULL_RTX)
1092 {
1093 const rtx src = SET_SRC (set);
1094 const rtx dest = SET_DEST (set);
1095 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1096
1097 /* If the FP register is again modified before the store,
1098 then the store isn't affected. */
1099 if (REG_P (dest)
1100 && (REGNO (dest) == x
1101 || (REGNO (dest) == y && size == 8)))
1102 break;
1103
1104 if (MEM_P (dest) && REG_P (src))
1105 {
1106 /* If there is a store from the sibling FP register
1107 before the store, then the store is not affected. */
1108 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1109 break;
1110
1111 /* Otherwise, the store is affected. */
1112 if (REGNO (src) == x && size == 4)
1113 {
1114 insert_nop = true;
1115 break;
1116 }
1117 }
1118 }
1119
1120 /* If we have a branch in the first M instructions, then we
1121 cannot see the (M+2)th instruction so we play safe. */
1122 if (branch_p && i <= (n_insns - 2))
1123 {
1124 insert_nop = true;
1125 break;
1126 }
1127 }
1128 }
1129
1130 else
1131 next = NEXT_INSN (insn);
1132
1133 if (insert_nop)
1134 emit_insn_before (gen_nop (), next);
1135 }
1136
1137 return 0;
1138 }
1139
1140 namespace {
1141
1142 const pass_data pass_data_work_around_errata =
1143 {
1144 RTL_PASS, /* type */
1145 "errata", /* name */
1146 OPTGROUP_NONE, /* optinfo_flags */
1147 TV_MACH_DEP, /* tv_id */
1148 0, /* properties_required */
1149 0, /* properties_provided */
1150 0, /* properties_destroyed */
1151 0, /* todo_flags_start */
1152 0, /* todo_flags_finish */
1153 };
1154
1155 class pass_work_around_errata : public rtl_opt_pass
1156 {
1157 public:
1158 pass_work_around_errata(gcc::context *ctxt)
1159 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1160 {}
1161
1162 /* opt_pass methods: */
1163 virtual bool gate (function *)
1164 {
1165 /* The only errata we handle are those of the AT697F and UT699. */
1166 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1167 }
1168
1169 virtual unsigned int execute (function *)
1170 {
1171 return sparc_do_work_around_errata ();
1172 }
1173
1174 }; // class pass_work_around_errata
1175
1176 } // anon namespace
1177
1178 rtl_opt_pass *
1179 make_pass_work_around_errata (gcc::context *ctxt)
1180 {
1181 return new pass_work_around_errata (ctxt);
1182 }
1183
1184 /* Helpers for TARGET_DEBUG_OPTIONS. */
1185 static void
1186 dump_target_flag_bits (const int flags)
1187 {
1188 if (flags & MASK_64BIT)
1189 fprintf (stderr, "64BIT ");
1190 if (flags & MASK_APP_REGS)
1191 fprintf (stderr, "APP_REGS ");
1192 if (flags & MASK_FASTER_STRUCTS)
1193 fprintf (stderr, "FASTER_STRUCTS ");
1194 if (flags & MASK_FLAT)
1195 fprintf (stderr, "FLAT ");
1196 if (flags & MASK_FMAF)
1197 fprintf (stderr, "FMAF ");
1198 if (flags & MASK_FPU)
1199 fprintf (stderr, "FPU ");
1200 if (flags & MASK_HARD_QUAD)
1201 fprintf (stderr, "HARD_QUAD ");
1202 if (flags & MASK_POPC)
1203 fprintf (stderr, "POPC ");
1204 if (flags & MASK_PTR64)
1205 fprintf (stderr, "PTR64 ");
1206 if (flags & MASK_STACK_BIAS)
1207 fprintf (stderr, "STACK_BIAS ");
1208 if (flags & MASK_UNALIGNED_DOUBLES)
1209 fprintf (stderr, "UNALIGNED_DOUBLES ");
1210 if (flags & MASK_V8PLUS)
1211 fprintf (stderr, "V8PLUS ");
1212 if (flags & MASK_VIS)
1213 fprintf (stderr, "VIS ");
1214 if (flags & MASK_VIS2)
1215 fprintf (stderr, "VIS2 ");
1216 if (flags & MASK_VIS3)
1217 fprintf (stderr, "VIS3 ");
1218 if (flags & MASK_VIS4)
1219 fprintf (stderr, "VIS4 ");
1220 if (flags & MASK_CBCOND)
1221 fprintf (stderr, "CBCOND ");
1222 if (flags & MASK_DEPRECATED_V8_INSNS)
1223 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1224 if (flags & MASK_SPARCLET)
1225 fprintf (stderr, "SPARCLET ");
1226 if (flags & MASK_SPARCLITE)
1227 fprintf (stderr, "SPARCLITE ");
1228 if (flags & MASK_V8)
1229 fprintf (stderr, "V8 ");
1230 if (flags & MASK_V9)
1231 fprintf (stderr, "V9 ");
1232 }
1233
1234 static void
1235 dump_target_flags (const char *prefix, const int flags)
1236 {
1237 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1238 dump_target_flag_bits (flags);
1239 fprintf(stderr, "]\n");
1240 }
1241
1242 /* Validate and override various options, and do some machine dependent
1243 initialization. */
1244
1245 static void
1246 sparc_option_override (void)
1247 {
1248 static struct code_model {
1249 const char *const name;
1250 const enum cmodel value;
1251 } const cmodels[] = {
1252 { "32", CM_32 },
1253 { "medlow", CM_MEDLOW },
1254 { "medmid", CM_MEDMID },
1255 { "medany", CM_MEDANY },
1256 { "embmedany", CM_EMBMEDANY },
1257 { NULL, (enum cmodel) 0 }
1258 };
1259 const struct code_model *cmodel;
1260 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1261 static struct cpu_default {
1262 const int cpu;
1263 const enum processor_type processor;
1264 } const cpu_default[] = {
1265 /* There must be one entry here for each TARGET_CPU value. */
1266 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1267 { TARGET_CPU_v8, PROCESSOR_V8 },
1268 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1269 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1270 { TARGET_CPU_leon, PROCESSOR_LEON },
1271 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1272 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1273 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1274 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1275 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1276 { TARGET_CPU_v9, PROCESSOR_V9 },
1277 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1278 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1279 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1280 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1281 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1282 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1283 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1284 { -1, PROCESSOR_V7 }
1285 };
1286 const struct cpu_default *def;
1287 /* Table of values for -m{cpu,tune}=. This must match the order of
1288 the enum processor_type in sparc-opts.h. */
1289 static struct cpu_table {
1290 const char *const name;
1291 const int disable;
1292 const int enable;
1293 } const cpu_table[] = {
1294 { "v7", MASK_ISA, 0 },
1295 { "cypress", MASK_ISA, 0 },
1296 { "v8", MASK_ISA, MASK_V8 },
1297 /* TI TMS390Z55 supersparc */
1298 { "supersparc", MASK_ISA, MASK_V8 },
1299 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1300 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1301 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1302 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1303 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1304 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1305 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1306 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1307 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1308 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1309 { "sparclet", MASK_ISA, MASK_SPARCLET },
1310 /* TEMIC sparclet */
1311 { "tsc701", MASK_ISA, MASK_SPARCLET },
1312 { "v9", MASK_ISA, MASK_V9 },
1313 /* UltraSPARC I, II, IIi */
1314 { "ultrasparc", MASK_ISA,
1315 /* Although insns using %y are deprecated, it is a clear win. */
1316 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1317 /* UltraSPARC III */
1318 /* ??? Check if %y issue still holds true. */
1319 { "ultrasparc3", MASK_ISA,
1320 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1321 /* UltraSPARC T1 */
1322 { "niagara", MASK_ISA,
1323 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1324 /* UltraSPARC T2 */
1325 { "niagara2", MASK_ISA,
1326 MASK_V9|MASK_POPC|MASK_VIS2 },
1327 /* UltraSPARC T3 */
1328 { "niagara3", MASK_ISA,
1329 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1330 /* UltraSPARC T4 */
1331 { "niagara4", MASK_ISA,
1332 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1333 /* UltraSPARC M7 */
1334 { "niagara7", MASK_ISA,
1335 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1336 };
1337 const struct cpu_table *cpu;
1338 unsigned int i;
1339 int fpu;
1340
1341 if (sparc_debug_string != NULL)
1342 {
1343 const char *q;
1344 char *p;
1345
1346 p = ASTRDUP (sparc_debug_string);
1347 while ((q = strtok (p, ",")) != NULL)
1348 {
1349 bool invert;
1350 int mask;
1351
1352 p = NULL;
1353 if (*q == '!')
1354 {
1355 invert = true;
1356 q++;
1357 }
1358 else
1359 invert = false;
1360
1361 if (! strcmp (q, "all"))
1362 mask = MASK_DEBUG_ALL;
1363 else if (! strcmp (q, "options"))
1364 mask = MASK_DEBUG_OPTIONS;
1365 else
1366 error ("unknown -mdebug-%s switch", q);
1367
1368 if (invert)
1369 sparc_debug &= ~mask;
1370 else
1371 sparc_debug |= mask;
1372 }
1373 }
1374
1375 if (TARGET_DEBUG_OPTIONS)
1376 {
1377 dump_target_flags("Initial target_flags", target_flags);
1378 dump_target_flags("target_flags_explicit", target_flags_explicit);
1379 }
1380
1381 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1382 SUBTARGET_OVERRIDE_OPTIONS;
1383 #endif
1384
1385 #ifndef SPARC_BI_ARCH
1386 /* Check for unsupported architecture size. */
1387 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1388 error ("%s is not supported by this configuration",
1389 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1390 #endif
1391
1392 /* We force all 64bit archs to use 128 bit long double */
1393 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1394 {
1395 error ("-mlong-double-64 not allowed with -m64");
1396 target_flags |= MASK_LONG_DOUBLE_128;
1397 }
1398
1399 /* Code model selection. */
1400 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1401
1402 #ifdef SPARC_BI_ARCH
1403 if (TARGET_ARCH32)
1404 sparc_cmodel = CM_32;
1405 #endif
1406
1407 if (sparc_cmodel_string != NULL)
1408 {
1409 if (TARGET_ARCH64)
1410 {
1411 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1412 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1413 break;
1414 if (cmodel->name == NULL)
1415 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1416 else
1417 sparc_cmodel = cmodel->value;
1418 }
1419 else
1420 error ("-mcmodel= is not supported on 32 bit systems");
1421 }
1422
1423 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1424 for (i = 8; i < 16; i++)
1425 if (!call_used_regs [i])
1426 {
1427 error ("-fcall-saved-REG is not supported for out registers");
1428 call_used_regs [i] = 1;
1429 }
1430
1431 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1432
1433 /* Set the default CPU. */
1434 if (!global_options_set.x_sparc_cpu_and_features)
1435 {
1436 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1437 if (def->cpu == TARGET_CPU_DEFAULT)
1438 break;
1439 gcc_assert (def->cpu != -1);
1440 sparc_cpu_and_features = def->processor;
1441 }
1442
1443 if (!global_options_set.x_sparc_cpu)
1444 sparc_cpu = sparc_cpu_and_features;
1445
1446 cpu = &cpu_table[(int) sparc_cpu_and_features];
1447
1448 if (TARGET_DEBUG_OPTIONS)
1449 {
1450 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1451 fprintf (stderr, "sparc_cpu: %s\n",
1452 cpu_table[(int) sparc_cpu].name);
1453 dump_target_flags ("cpu->disable", cpu->disable);
1454 dump_target_flags ("cpu->enable", cpu->enable);
1455 }
1456
1457 target_flags &= ~cpu->disable;
1458 target_flags |= (cpu->enable
1459 #ifndef HAVE_AS_FMAF_HPC_VIS3
1460 & ~(MASK_FMAF | MASK_VIS3)
1461 #endif
1462 #ifndef HAVE_AS_SPARC4
1463 & ~MASK_CBCOND
1464 #endif
1465 #ifndef HAVE_AS_SPARC5_VIS4
1466 & ~(MASK_VIS4 | MASK_SUBXC)
1467 #endif
1468 #ifndef HAVE_AS_LEON
1469 & ~(MASK_LEON | MASK_LEON3)
1470 #endif
1471 );
1472
1473 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1474 the processor default. */
1475 if (target_flags_explicit & MASK_FPU)
1476 target_flags = (target_flags & ~MASK_FPU) | fpu;
1477
1478 /* -mvis2 implies -mvis */
1479 if (TARGET_VIS2)
1480 target_flags |= MASK_VIS;
1481
1482 /* -mvis3 implies -mvis2 and -mvis */
1483 if (TARGET_VIS3)
1484 target_flags |= MASK_VIS2 | MASK_VIS;
1485
1486 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1487 if (TARGET_VIS4)
1488 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1489
1490 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1491 disabled. */
1492 if (! TARGET_FPU)
1493 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1494 | MASK_FMAF);
1495
1496 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1497 are available.
1498 -m64 also implies v9. */
1499 if (TARGET_VIS || TARGET_ARCH64)
1500 {
1501 target_flags |= MASK_V9;
1502 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1503 }
1504
1505 /* -mvis also implies -mv8plus on 32-bit */
1506 if (TARGET_VIS && ! TARGET_ARCH64)
1507 target_flags |= MASK_V8PLUS;
1508
1509 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1510 if (TARGET_V9 && TARGET_ARCH32)
1511 target_flags |= MASK_DEPRECATED_V8_INSNS;
1512
1513 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1514 if (! TARGET_V9 || TARGET_ARCH64)
1515 target_flags &= ~MASK_V8PLUS;
1516
1517 /* Don't use stack biasing in 32 bit mode. */
1518 if (TARGET_ARCH32)
1519 target_flags &= ~MASK_STACK_BIAS;
1520
1521 /* Supply a default value for align_functions. */
1522 if (align_functions == 0
1523 && (sparc_cpu == PROCESSOR_ULTRASPARC
1524 || sparc_cpu == PROCESSOR_ULTRASPARC3
1525 || sparc_cpu == PROCESSOR_NIAGARA
1526 || sparc_cpu == PROCESSOR_NIAGARA2
1527 || sparc_cpu == PROCESSOR_NIAGARA3
1528 || sparc_cpu == PROCESSOR_NIAGARA4
1529 || sparc_cpu == PROCESSOR_NIAGARA7))
1530 align_functions = 32;
1531
1532 /* Validate PCC_STRUCT_RETURN. */
1533 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1534 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1535
1536 /* Only use .uaxword when compiling for a 64-bit target. */
1537 if (!TARGET_ARCH64)
1538 targetm.asm_out.unaligned_op.di = NULL;
1539
1540 /* Do various machine dependent initializations. */
1541 sparc_init_modes ();
1542
1543 /* Set up function hooks. */
1544 init_machine_status = sparc_init_machine_status;
1545
1546 switch (sparc_cpu)
1547 {
1548 case PROCESSOR_V7:
1549 case PROCESSOR_CYPRESS:
1550 sparc_costs = &cypress_costs;
1551 break;
1552 case PROCESSOR_V8:
1553 case PROCESSOR_SPARCLITE:
1554 case PROCESSOR_SUPERSPARC:
1555 sparc_costs = &supersparc_costs;
1556 break;
1557 case PROCESSOR_F930:
1558 case PROCESSOR_F934:
1559 case PROCESSOR_HYPERSPARC:
1560 case PROCESSOR_SPARCLITE86X:
1561 sparc_costs = &hypersparc_costs;
1562 break;
1563 case PROCESSOR_LEON:
1564 sparc_costs = &leon_costs;
1565 break;
1566 case PROCESSOR_LEON3:
1567 case PROCESSOR_LEON3V7:
1568 sparc_costs = &leon3_costs;
1569 break;
1570 case PROCESSOR_SPARCLET:
1571 case PROCESSOR_TSC701:
1572 sparc_costs = &sparclet_costs;
1573 break;
1574 case PROCESSOR_V9:
1575 case PROCESSOR_ULTRASPARC:
1576 sparc_costs = &ultrasparc_costs;
1577 break;
1578 case PROCESSOR_ULTRASPARC3:
1579 sparc_costs = &ultrasparc3_costs;
1580 break;
1581 case PROCESSOR_NIAGARA:
1582 sparc_costs = &niagara_costs;
1583 break;
1584 case PROCESSOR_NIAGARA2:
1585 sparc_costs = &niagara2_costs;
1586 break;
1587 case PROCESSOR_NIAGARA3:
1588 sparc_costs = &niagara3_costs;
1589 break;
1590 case PROCESSOR_NIAGARA4:
1591 sparc_costs = &niagara4_costs;
1592 break;
1593 case PROCESSOR_NIAGARA7:
1594 sparc_costs = &niagara7_costs;
1595 break;
1596 case PROCESSOR_NATIVE:
1597 gcc_unreachable ();
1598 };
1599
1600 if (sparc_memory_model == SMM_DEFAULT)
1601 {
1602 /* Choose the memory model for the operating system. */
1603 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1604 if (os_default != SMM_DEFAULT)
1605 sparc_memory_model = os_default;
1606 /* Choose the most relaxed model for the processor. */
1607 else if (TARGET_V9)
1608 sparc_memory_model = SMM_RMO;
1609 else if (TARGET_LEON3)
1610 sparc_memory_model = SMM_TSO;
1611 else if (TARGET_LEON)
1612 sparc_memory_model = SMM_SC;
1613 else if (TARGET_V8)
1614 sparc_memory_model = SMM_PSO;
1615 else
1616 sparc_memory_model = SMM_SC;
1617 }
1618
1619 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1620 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1621 target_flags |= MASK_LONG_DOUBLE_128;
1622 #endif
1623
1624 if (TARGET_DEBUG_OPTIONS)
1625 dump_target_flags ("Final target_flags", target_flags);
1626
1627 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1628 can run at the same time. More important, it is the threshold
1629 defining when additional prefetches will be dropped by the
1630 hardware.
1631
1632 The UltraSPARC-III features a documented prefetch queue with a
1633 size of 8. Additional prefetches issued in the cpu are
1634 dropped.
1635
1636 Niagara processors are different. In these processors prefetches
1637 are handled much like regular loads. The L1 miss buffer is 32
1638 entries, but prefetches start getting affected when 30 entries
1639 become occupied. That occupation could be a mix of regular loads
1640 and prefetches though. And that buffer is shared by all threads.
1641 Once the threshold is reached, if the core is running a single
1642 thread the prefetch will retry. If more than one thread is
1643 running, the prefetch will be dropped.
1644
1645 All this makes it very difficult to determine how many
1646 simultaneous prefetches can be issued simultaneously, even in a
1647 single-threaded program. Experimental results show that setting
1648 this parameter to 32 works well when the number of threads is not
1649 high. */
1650 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1651 ((sparc_cpu == PROCESSOR_ULTRASPARC
1652 || sparc_cpu == PROCESSOR_NIAGARA
1653 || sparc_cpu == PROCESSOR_NIAGARA2
1654 || sparc_cpu == PROCESSOR_NIAGARA3
1655 || sparc_cpu == PROCESSOR_NIAGARA4)
1656 ? 2
1657 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1658 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1659 ? 32 : 3))),
1660 global_options.x_param_values,
1661 global_options_set.x_param_values);
1662
1663 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1664 params.def), so no maybe_set_param_value is needed.
1665
1666 The Oracle SPARC Architecture (previously the UltraSPARC
1667 Architecture) specification states that when a PREFETCH[A]
1668 instruction is executed an implementation-specific amount of data
1669 is prefetched, and that it is at least 64 bytes long (aligned to
1670 at least 64 bytes).
1671
1672 However, this is not correct. The M7 (and implementations prior
1673 to that) does not guarantee a 64B prefetch into a cache if the
1674 line size is smaller. A single cache line is all that is ever
1675 prefetched. So for the M7, where the L1D$ has 32B lines and the
1676 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1677 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1678 is a read_n prefetch, which is the only type which allocates to
1679 the L1.) */
1680
1681 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1682 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1683 Niagara processors feature a L1D$ of 16KB. */
1684 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1685 ((sparc_cpu == PROCESSOR_ULTRASPARC
1686 || sparc_cpu == PROCESSOR_ULTRASPARC3
1687 || sparc_cpu == PROCESSOR_NIAGARA
1688 || sparc_cpu == PROCESSOR_NIAGARA2
1689 || sparc_cpu == PROCESSOR_NIAGARA3
1690 || sparc_cpu == PROCESSOR_NIAGARA4
1691 || sparc_cpu == PROCESSOR_NIAGARA7)
1692 ? 16 : 64),
1693 global_options.x_param_values,
1694 global_options_set.x_param_values);
1695
1696
1697 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1698 that 512 is the default in params.def. */
1699 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1700 (sparc_cpu == PROCESSOR_NIAGARA4
1701 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1702 ? 256 : 512)),
1703 global_options.x_param_values,
1704 global_options_set.x_param_values);
1705
1706
1707 /* Disable save slot sharing for call-clobbered registers by default.
1708 The IRA sharing algorithm works on single registers only and this
1709 pessimizes for double floating-point registers. */
1710 if (!global_options_set.x_flag_ira_share_save_slots)
1711 flag_ira_share_save_slots = 0;
1712 }
1713 \f
1714 /* Miscellaneous utilities. */
1715
1716 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1717 or branch on register contents instructions. */
1718
1719 int
1720 v9_regcmp_p (enum rtx_code code)
1721 {
1722 return (code == EQ || code == NE || code == GE || code == LT
1723 || code == LE || code == GT);
1724 }
1725
1726 /* Nonzero if OP is a floating point constant which can
1727 be loaded into an integer register using a single
1728 sethi instruction. */
1729
1730 int
1731 fp_sethi_p (rtx op)
1732 {
1733 if (GET_CODE (op) == CONST_DOUBLE)
1734 {
1735 long i;
1736
1737 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1738 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1739 }
1740
1741 return 0;
1742 }
1743
1744 /* Nonzero if OP is a floating point constant which can
1745 be loaded into an integer register using a single
1746 mov instruction. */
1747
1748 int
1749 fp_mov_p (rtx op)
1750 {
1751 if (GET_CODE (op) == CONST_DOUBLE)
1752 {
1753 long i;
1754
1755 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1756 return SPARC_SIMM13_P (i);
1757 }
1758
1759 return 0;
1760 }
1761
1762 /* Nonzero if OP is a floating point constant which can
1763 be loaded into an integer register using a high/losum
1764 instruction sequence. */
1765
1766 int
1767 fp_high_losum_p (rtx op)
1768 {
1769 /* The constraints calling this should only be in
1770 SFmode move insns, so any constant which cannot
1771 be moved using a single insn will do. */
1772 if (GET_CODE (op) == CONST_DOUBLE)
1773 {
1774 long i;
1775
1776 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1777 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1778 }
1779
1780 return 0;
1781 }
1782
1783 /* Return true if the address of LABEL can be loaded by means of the
1784 mov{si,di}_pic_label_ref patterns in PIC mode. */
1785
1786 static bool
1787 can_use_mov_pic_label_ref (rtx label)
1788 {
1789 /* VxWorks does not impose a fixed gap between segments; the run-time
1790 gap can be different from the object-file gap. We therefore can't
1791 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1792 are absolutely sure that X is in the same segment as the GOT.
1793 Unfortunately, the flexibility of linker scripts means that we
1794 can't be sure of that in general, so assume that GOT-relative
1795 accesses are never valid on VxWorks. */
1796 if (TARGET_VXWORKS_RTP)
1797 return false;
1798
1799 /* Similarly, if the label is non-local, it might end up being placed
1800 in a different section than the current one; now mov_pic_label_ref
1801 requires the label and the code to be in the same section. */
1802 if (LABEL_REF_NONLOCAL_P (label))
1803 return false;
1804
1805 /* Finally, if we are reordering basic blocks and partition into hot
1806 and cold sections, this might happen for any label. */
1807 if (flag_reorder_blocks_and_partition)
1808 return false;
1809
1810 return true;
1811 }
1812
1813 /* Expand a move instruction. Return true if all work is done. */
1814
1815 bool
1816 sparc_expand_move (machine_mode mode, rtx *operands)
1817 {
1818 /* Handle sets of MEM first. */
1819 if (GET_CODE (operands[0]) == MEM)
1820 {
1821 /* 0 is a register (or a pair of registers) on SPARC. */
1822 if (register_or_zero_operand (operands[1], mode))
1823 return false;
1824
1825 if (!reload_in_progress)
1826 {
1827 operands[0] = validize_mem (operands[0]);
1828 operands[1] = force_reg (mode, operands[1]);
1829 }
1830 }
1831
1832 /* Fixup TLS cases. */
1833 if (TARGET_HAVE_TLS
1834 && CONSTANT_P (operands[1])
1835 && sparc_tls_referenced_p (operands [1]))
1836 {
1837 operands[1] = sparc_legitimize_tls_address (operands[1]);
1838 return false;
1839 }
1840
1841 /* Fixup PIC cases. */
1842 if (flag_pic && CONSTANT_P (operands[1]))
1843 {
1844 if (pic_address_needs_scratch (operands[1]))
1845 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1846
1847 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1848 if (GET_CODE (operands[1]) == LABEL_REF
1849 && can_use_mov_pic_label_ref (operands[1]))
1850 {
1851 if (mode == SImode)
1852 {
1853 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1854 return true;
1855 }
1856
1857 if (mode == DImode)
1858 {
1859 gcc_assert (TARGET_ARCH64);
1860 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1861 return true;
1862 }
1863 }
1864
1865 if (symbolic_operand (operands[1], mode))
1866 {
1867 operands[1]
1868 = sparc_legitimize_pic_address (operands[1],
1869 reload_in_progress
1870 ? operands[0] : NULL_RTX);
1871 return false;
1872 }
1873 }
1874
1875 /* If we are trying to toss an integer constant into FP registers,
1876 or loading a FP or vector constant, force it into memory. */
1877 if (CONSTANT_P (operands[1])
1878 && REG_P (operands[0])
1879 && (SPARC_FP_REG_P (REGNO (operands[0]))
1880 || SCALAR_FLOAT_MODE_P (mode)
1881 || VECTOR_MODE_P (mode)))
1882 {
1883 /* emit_group_store will send such bogosity to us when it is
1884 not storing directly into memory. So fix this up to avoid
1885 crashes in output_constant_pool. */
1886 if (operands [1] == const0_rtx)
1887 operands[1] = CONST0_RTX (mode);
1888
1889 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1890 always other regs. */
1891 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1892 && (const_zero_operand (operands[1], mode)
1893 || const_all_ones_operand (operands[1], mode)))
1894 return false;
1895
1896 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1897 /* We are able to build any SF constant in integer registers
1898 with at most 2 instructions. */
1899 && (mode == SFmode
1900 /* And any DF constant in integer registers. */
1901 || (mode == DFmode
1902 && ! can_create_pseudo_p ())))
1903 return false;
1904
1905 operands[1] = force_const_mem (mode, operands[1]);
1906 if (!reload_in_progress)
1907 operands[1] = validize_mem (operands[1]);
1908 return false;
1909 }
1910
1911 /* Accept non-constants and valid constants unmodified. */
1912 if (!CONSTANT_P (operands[1])
1913 || GET_CODE (operands[1]) == HIGH
1914 || input_operand (operands[1], mode))
1915 return false;
1916
1917 switch (mode)
1918 {
1919 case QImode:
1920 /* All QImode constants require only one insn, so proceed. */
1921 break;
1922
1923 case HImode:
1924 case SImode:
1925 sparc_emit_set_const32 (operands[0], operands[1]);
1926 return true;
1927
1928 case DImode:
1929 /* input_operand should have filtered out 32-bit mode. */
1930 sparc_emit_set_const64 (operands[0], operands[1]);
1931 return true;
1932
1933 case TImode:
1934 {
1935 rtx high, low;
1936 /* TImode isn't available in 32-bit mode. */
1937 split_double (operands[1], &high, &low);
1938 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1939 high));
1940 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1941 low));
1942 }
1943 return true;
1944
1945 default:
1946 gcc_unreachable ();
1947 }
1948
1949 return false;
1950 }
1951
1952 /* Load OP1, a 32-bit constant, into OP0, a register.
1953 We know it can't be done in one insn when we get
1954 here, the move expander guarantees this. */
1955
1956 static void
1957 sparc_emit_set_const32 (rtx op0, rtx op1)
1958 {
1959 machine_mode mode = GET_MODE (op0);
1960 rtx temp = op0;
1961
1962 if (can_create_pseudo_p ())
1963 temp = gen_reg_rtx (mode);
1964
1965 if (GET_CODE (op1) == CONST_INT)
1966 {
1967 gcc_assert (!small_int_operand (op1, mode)
1968 && !const_high_operand (op1, mode));
1969
1970 /* Emit them as real moves instead of a HIGH/LO_SUM,
1971 this way CSE can see everything and reuse intermediate
1972 values if it wants. */
1973 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1974 & ~(HOST_WIDE_INT) 0x3ff)));
1975
1976 emit_insn (gen_rtx_SET (op0,
1977 gen_rtx_IOR (mode, temp,
1978 GEN_INT (INTVAL (op1) & 0x3ff))));
1979 }
1980 else
1981 {
1982 /* A symbol, emit in the traditional way. */
1983 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1984 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1985 }
1986 }
1987
1988 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1989 If TEMP is nonzero, we are forbidden to use any other scratch
1990 registers. Otherwise, we are allowed to generate them as needed.
1991
1992 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1993 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1994
1995 void
1996 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1997 {
1998 rtx temp1, temp2, temp3, temp4, temp5;
1999 rtx ti_temp = 0;
2000
2001 if (temp && GET_MODE (temp) == TImode)
2002 {
2003 ti_temp = temp;
2004 temp = gen_rtx_REG (DImode, REGNO (temp));
2005 }
2006
2007 /* SPARC-V9 code-model support. */
2008 switch (sparc_cmodel)
2009 {
2010 case CM_MEDLOW:
2011 /* The range spanned by all instructions in the object is less
2012 than 2^31 bytes (2GB) and the distance from any instruction
2013 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2014 than 2^31 bytes (2GB).
2015
2016 The executable must be in the low 4TB of the virtual address
2017 space.
2018
2019 sethi %hi(symbol), %temp1
2020 or %temp1, %lo(symbol), %reg */
2021 if (temp)
2022 temp1 = temp; /* op0 is allowed. */
2023 else
2024 temp1 = gen_reg_rtx (DImode);
2025
2026 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2027 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2028 break;
2029
2030 case CM_MEDMID:
2031 /* The range spanned by all instructions in the object is less
2032 than 2^31 bytes (2GB) and the distance from any instruction
2033 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2034 than 2^31 bytes (2GB).
2035
2036 The executable must be in the low 16TB of the virtual address
2037 space.
2038
2039 sethi %h44(symbol), %temp1
2040 or %temp1, %m44(symbol), %temp2
2041 sllx %temp2, 12, %temp3
2042 or %temp3, %l44(symbol), %reg */
2043 if (temp)
2044 {
2045 temp1 = op0;
2046 temp2 = op0;
2047 temp3 = temp; /* op0 is allowed. */
2048 }
2049 else
2050 {
2051 temp1 = gen_reg_rtx (DImode);
2052 temp2 = gen_reg_rtx (DImode);
2053 temp3 = gen_reg_rtx (DImode);
2054 }
2055
2056 emit_insn (gen_seth44 (temp1, op1));
2057 emit_insn (gen_setm44 (temp2, temp1, op1));
2058 emit_insn (gen_rtx_SET (temp3,
2059 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2060 emit_insn (gen_setl44 (op0, temp3, op1));
2061 break;
2062
2063 case CM_MEDANY:
2064 /* The range spanned by all instructions in the object is less
2065 than 2^31 bytes (2GB) and the distance from any instruction
2066 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2067 than 2^31 bytes (2GB).
2068
2069 The executable can be placed anywhere in the virtual address
2070 space.
2071
2072 sethi %hh(symbol), %temp1
2073 sethi %lm(symbol), %temp2
2074 or %temp1, %hm(symbol), %temp3
2075 sllx %temp3, 32, %temp4
2076 or %temp4, %temp2, %temp5
2077 or %temp5, %lo(symbol), %reg */
2078 if (temp)
2079 {
2080 /* It is possible that one of the registers we got for operands[2]
2081 might coincide with that of operands[0] (which is why we made
2082 it TImode). Pick the other one to use as our scratch. */
2083 if (rtx_equal_p (temp, op0))
2084 {
2085 gcc_assert (ti_temp);
2086 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2087 }
2088 temp1 = op0;
2089 temp2 = temp; /* op0 is _not_ allowed, see above. */
2090 temp3 = op0;
2091 temp4 = op0;
2092 temp5 = op0;
2093 }
2094 else
2095 {
2096 temp1 = gen_reg_rtx (DImode);
2097 temp2 = gen_reg_rtx (DImode);
2098 temp3 = gen_reg_rtx (DImode);
2099 temp4 = gen_reg_rtx (DImode);
2100 temp5 = gen_reg_rtx (DImode);
2101 }
2102
2103 emit_insn (gen_sethh (temp1, op1));
2104 emit_insn (gen_setlm (temp2, op1));
2105 emit_insn (gen_sethm (temp3, temp1, op1));
2106 emit_insn (gen_rtx_SET (temp4,
2107 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2108 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2109 emit_insn (gen_setlo (op0, temp5, op1));
2110 break;
2111
2112 case CM_EMBMEDANY:
2113 /* Old old old backwards compatibility kruft here.
2114 Essentially it is MEDLOW with a fixed 64-bit
2115 virtual base added to all data segment addresses.
2116 Text-segment stuff is computed like MEDANY, we can't
2117 reuse the code above because the relocation knobs
2118 look different.
2119
2120 Data segment: sethi %hi(symbol), %temp1
2121 add %temp1, EMBMEDANY_BASE_REG, %temp2
2122 or %temp2, %lo(symbol), %reg */
2123 if (data_segment_operand (op1, GET_MODE (op1)))
2124 {
2125 if (temp)
2126 {
2127 temp1 = temp; /* op0 is allowed. */
2128 temp2 = op0;
2129 }
2130 else
2131 {
2132 temp1 = gen_reg_rtx (DImode);
2133 temp2 = gen_reg_rtx (DImode);
2134 }
2135
2136 emit_insn (gen_embmedany_sethi (temp1, op1));
2137 emit_insn (gen_embmedany_brsum (temp2, temp1));
2138 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2139 }
2140
2141 /* Text segment: sethi %uhi(symbol), %temp1
2142 sethi %hi(symbol), %temp2
2143 or %temp1, %ulo(symbol), %temp3
2144 sllx %temp3, 32, %temp4
2145 or %temp4, %temp2, %temp5
2146 or %temp5, %lo(symbol), %reg */
2147 else
2148 {
2149 if (temp)
2150 {
2151 /* It is possible that one of the registers we got for operands[2]
2152 might coincide with that of operands[0] (which is why we made
2153 it TImode). Pick the other one to use as our scratch. */
2154 if (rtx_equal_p (temp, op0))
2155 {
2156 gcc_assert (ti_temp);
2157 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2158 }
2159 temp1 = op0;
2160 temp2 = temp; /* op0 is _not_ allowed, see above. */
2161 temp3 = op0;
2162 temp4 = op0;
2163 temp5 = op0;
2164 }
2165 else
2166 {
2167 temp1 = gen_reg_rtx (DImode);
2168 temp2 = gen_reg_rtx (DImode);
2169 temp3 = gen_reg_rtx (DImode);
2170 temp4 = gen_reg_rtx (DImode);
2171 temp5 = gen_reg_rtx (DImode);
2172 }
2173
2174 emit_insn (gen_embmedany_textuhi (temp1, op1));
2175 emit_insn (gen_embmedany_texthi (temp2, op1));
2176 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2177 emit_insn (gen_rtx_SET (temp4,
2178 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2179 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2180 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2181 }
2182 break;
2183
2184 default:
2185 gcc_unreachable ();
2186 }
2187 }
2188
2189 /* These avoid problems when cross compiling. If we do not
2190 go through all this hair then the optimizer will see
2191 invalid REG_EQUAL notes or in some cases none at all. */
2192 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2193 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2194 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2195 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2196
2197 /* The optimizer is not to assume anything about exactly
2198 which bits are set for a HIGH, they are unspecified.
2199 Unfortunately this leads to many missed optimizations
2200 during CSE. We mask out the non-HIGH bits, and matches
2201 a plain movdi, to alleviate this problem. */
2202 static rtx
2203 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2204 {
2205 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2206 }
2207
2208 static rtx
2209 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2210 {
2211 return gen_rtx_SET (dest, GEN_INT (val));
2212 }
2213
2214 static rtx
2215 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2216 {
2217 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2218 }
2219
2220 static rtx
2221 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2222 {
2223 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2224 }
2225
2226 /* Worker routines for 64-bit constant formation on arch64.
2227 One of the key things to be doing in these emissions is
2228 to create as many temp REGs as possible. This makes it
2229 possible for half-built constants to be used later when
2230 such values are similar to something required later on.
2231 Without doing this, the optimizer cannot see such
2232 opportunities. */
2233
2234 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2235 unsigned HOST_WIDE_INT, int);
2236
2237 static void
2238 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2239 unsigned HOST_WIDE_INT low_bits, int is_neg)
2240 {
2241 unsigned HOST_WIDE_INT high_bits;
2242
2243 if (is_neg)
2244 high_bits = (~low_bits) & 0xffffffff;
2245 else
2246 high_bits = low_bits;
2247
2248 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2249 if (!is_neg)
2250 {
2251 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2252 }
2253 else
2254 {
2255 /* If we are XOR'ing with -1, then we should emit a one's complement
2256 instead. This way the combiner will notice logical operations
2257 such as ANDN later on and substitute. */
2258 if ((low_bits & 0x3ff) == 0x3ff)
2259 {
2260 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2261 }
2262 else
2263 {
2264 emit_insn (gen_rtx_SET (op0,
2265 gen_safe_XOR64 (temp,
2266 (-(HOST_WIDE_INT)0x400
2267 | (low_bits & 0x3ff)))));
2268 }
2269 }
2270 }
2271
2272 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2273 unsigned HOST_WIDE_INT, int);
2274
2275 static void
2276 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2277 unsigned HOST_WIDE_INT high_bits,
2278 unsigned HOST_WIDE_INT low_immediate,
2279 int shift_count)
2280 {
2281 rtx temp2 = op0;
2282
2283 if ((high_bits & 0xfffffc00) != 0)
2284 {
2285 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2286 if ((high_bits & ~0xfffffc00) != 0)
2287 emit_insn (gen_rtx_SET (op0,
2288 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2289 else
2290 temp2 = temp;
2291 }
2292 else
2293 {
2294 emit_insn (gen_safe_SET64 (temp, high_bits));
2295 temp2 = temp;
2296 }
2297
2298 /* Now shift it up into place. */
2299 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2300 GEN_INT (shift_count))));
2301
2302 /* If there is a low immediate part piece, finish up by
2303 putting that in as well. */
2304 if (low_immediate != 0)
2305 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2306 }
2307
2308 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2309 unsigned HOST_WIDE_INT);
2310
2311 /* Full 64-bit constant decomposition. Even though this is the
2312 'worst' case, we still optimize a few things away. */
2313 static void
2314 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2315 unsigned HOST_WIDE_INT high_bits,
2316 unsigned HOST_WIDE_INT low_bits)
2317 {
2318 rtx sub_temp = op0;
2319
2320 if (can_create_pseudo_p ())
2321 sub_temp = gen_reg_rtx (DImode);
2322
2323 if ((high_bits & 0xfffffc00) != 0)
2324 {
2325 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2326 if ((high_bits & ~0xfffffc00) != 0)
2327 emit_insn (gen_rtx_SET (sub_temp,
2328 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2329 else
2330 sub_temp = temp;
2331 }
2332 else
2333 {
2334 emit_insn (gen_safe_SET64 (temp, high_bits));
2335 sub_temp = temp;
2336 }
2337
2338 if (can_create_pseudo_p ())
2339 {
2340 rtx temp2 = gen_reg_rtx (DImode);
2341 rtx temp3 = gen_reg_rtx (DImode);
2342 rtx temp4 = gen_reg_rtx (DImode);
2343
2344 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2345 GEN_INT (32))));
2346
2347 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2348 if ((low_bits & ~0xfffffc00) != 0)
2349 {
2350 emit_insn (gen_rtx_SET (temp3,
2351 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2352 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2353 }
2354 else
2355 {
2356 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2357 }
2358 }
2359 else
2360 {
2361 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2362 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2363 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2364 int to_shift = 12;
2365
2366 /* We are in the middle of reload, so this is really
2367 painful. However we do still make an attempt to
2368 avoid emitting truly stupid code. */
2369 if (low1 != const0_rtx)
2370 {
2371 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2372 GEN_INT (to_shift))));
2373 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2374 sub_temp = op0;
2375 to_shift = 12;
2376 }
2377 else
2378 {
2379 to_shift += 12;
2380 }
2381 if (low2 != const0_rtx)
2382 {
2383 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2384 GEN_INT (to_shift))));
2385 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2386 sub_temp = op0;
2387 to_shift = 8;
2388 }
2389 else
2390 {
2391 to_shift += 8;
2392 }
2393 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2394 GEN_INT (to_shift))));
2395 if (low3 != const0_rtx)
2396 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2397 /* phew... */
2398 }
2399 }
2400
2401 /* Analyze a 64-bit constant for certain properties. */
2402 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2403 unsigned HOST_WIDE_INT,
2404 int *, int *, int *);
2405
2406 static void
2407 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2408 unsigned HOST_WIDE_INT low_bits,
2409 int *hbsp, int *lbsp, int *abbasp)
2410 {
2411 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2412 int i;
2413
2414 lowest_bit_set = highest_bit_set = -1;
2415 i = 0;
2416 do
2417 {
2418 if ((lowest_bit_set == -1)
2419 && ((low_bits >> i) & 1))
2420 lowest_bit_set = i;
2421 if ((highest_bit_set == -1)
2422 && ((high_bits >> (32 - i - 1)) & 1))
2423 highest_bit_set = (64 - i - 1);
2424 }
2425 while (++i < 32
2426 && ((highest_bit_set == -1)
2427 || (lowest_bit_set == -1)));
2428 if (i == 32)
2429 {
2430 i = 0;
2431 do
2432 {
2433 if ((lowest_bit_set == -1)
2434 && ((high_bits >> i) & 1))
2435 lowest_bit_set = i + 32;
2436 if ((highest_bit_set == -1)
2437 && ((low_bits >> (32 - i - 1)) & 1))
2438 highest_bit_set = 32 - i - 1;
2439 }
2440 while (++i < 32
2441 && ((highest_bit_set == -1)
2442 || (lowest_bit_set == -1)));
2443 }
2444 /* If there are no bits set this should have gone out
2445 as one instruction! */
2446 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2447 all_bits_between_are_set = 1;
2448 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2449 {
2450 if (i < 32)
2451 {
2452 if ((low_bits & (1 << i)) != 0)
2453 continue;
2454 }
2455 else
2456 {
2457 if ((high_bits & (1 << (i - 32))) != 0)
2458 continue;
2459 }
2460 all_bits_between_are_set = 0;
2461 break;
2462 }
2463 *hbsp = highest_bit_set;
2464 *lbsp = lowest_bit_set;
2465 *abbasp = all_bits_between_are_set;
2466 }
2467
2468 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2469
2470 static int
2471 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2472 unsigned HOST_WIDE_INT low_bits)
2473 {
2474 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2475
2476 if (high_bits == 0
2477 || high_bits == 0xffffffff)
2478 return 1;
2479
2480 analyze_64bit_constant (high_bits, low_bits,
2481 &highest_bit_set, &lowest_bit_set,
2482 &all_bits_between_are_set);
2483
2484 if ((highest_bit_set == 63
2485 || lowest_bit_set == 0)
2486 && all_bits_between_are_set != 0)
2487 return 1;
2488
2489 if ((highest_bit_set - lowest_bit_set) < 21)
2490 return 1;
2491
2492 return 0;
2493 }
2494
2495 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2496 unsigned HOST_WIDE_INT,
2497 int, int);
2498
2499 static unsigned HOST_WIDE_INT
2500 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2501 unsigned HOST_WIDE_INT low_bits,
2502 int lowest_bit_set, int shift)
2503 {
2504 HOST_WIDE_INT hi, lo;
2505
2506 if (lowest_bit_set < 32)
2507 {
2508 lo = (low_bits >> lowest_bit_set) << shift;
2509 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2510 }
2511 else
2512 {
2513 lo = 0;
2514 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2515 }
2516 gcc_assert (! (hi & lo));
2517 return (hi | lo);
2518 }
2519
2520 /* Here we are sure to be arch64 and this is an integer constant
2521 being loaded into a register. Emit the most efficient
2522 insn sequence possible. Detection of all the 1-insn cases
2523 has been done already. */
2524 static void
2525 sparc_emit_set_const64 (rtx op0, rtx op1)
2526 {
2527 unsigned HOST_WIDE_INT high_bits, low_bits;
2528 int lowest_bit_set, highest_bit_set;
2529 int all_bits_between_are_set;
2530 rtx temp = 0;
2531
2532 /* Sanity check that we know what we are working with. */
2533 gcc_assert (TARGET_ARCH64
2534 && (GET_CODE (op0) == SUBREG
2535 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2536
2537 if (! can_create_pseudo_p ())
2538 temp = op0;
2539
2540 if (GET_CODE (op1) != CONST_INT)
2541 {
2542 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2543 return;
2544 }
2545
2546 if (! temp)
2547 temp = gen_reg_rtx (DImode);
2548
2549 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2550 low_bits = (INTVAL (op1) & 0xffffffff);
2551
2552 /* low_bits bits 0 --> 31
2553 high_bits bits 32 --> 63 */
2554
2555 analyze_64bit_constant (high_bits, low_bits,
2556 &highest_bit_set, &lowest_bit_set,
2557 &all_bits_between_are_set);
2558
2559 /* First try for a 2-insn sequence. */
2560
2561 /* These situations are preferred because the optimizer can
2562 * do more things with them:
2563 * 1) mov -1, %reg
2564 * sllx %reg, shift, %reg
2565 * 2) mov -1, %reg
2566 * srlx %reg, shift, %reg
2567 * 3) mov some_small_const, %reg
2568 * sllx %reg, shift, %reg
2569 */
2570 if (((highest_bit_set == 63
2571 || lowest_bit_set == 0)
2572 && all_bits_between_are_set != 0)
2573 || ((highest_bit_set - lowest_bit_set) < 12))
2574 {
2575 HOST_WIDE_INT the_const = -1;
2576 int shift = lowest_bit_set;
2577
2578 if ((highest_bit_set != 63
2579 && lowest_bit_set != 0)
2580 || all_bits_between_are_set == 0)
2581 {
2582 the_const =
2583 create_simple_focus_bits (high_bits, low_bits,
2584 lowest_bit_set, 0);
2585 }
2586 else if (lowest_bit_set == 0)
2587 shift = -(63 - highest_bit_set);
2588
2589 gcc_assert (SPARC_SIMM13_P (the_const));
2590 gcc_assert (shift != 0);
2591
2592 emit_insn (gen_safe_SET64 (temp, the_const));
2593 if (shift > 0)
2594 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2595 GEN_INT (shift))));
2596 else if (shift < 0)
2597 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2598 GEN_INT (-shift))));
2599 return;
2600 }
2601
2602 /* Now a range of 22 or less bits set somewhere.
2603 * 1) sethi %hi(focus_bits), %reg
2604 * sllx %reg, shift, %reg
2605 * 2) sethi %hi(focus_bits), %reg
2606 * srlx %reg, shift, %reg
2607 */
2608 if ((highest_bit_set - lowest_bit_set) < 21)
2609 {
2610 unsigned HOST_WIDE_INT focus_bits =
2611 create_simple_focus_bits (high_bits, low_bits,
2612 lowest_bit_set, 10);
2613
2614 gcc_assert (SPARC_SETHI_P (focus_bits));
2615 gcc_assert (lowest_bit_set != 10);
2616
2617 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2618
2619 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2620 if (lowest_bit_set < 10)
2621 emit_insn (gen_rtx_SET (op0,
2622 gen_rtx_LSHIFTRT (DImode, temp,
2623 GEN_INT (10 - lowest_bit_set))));
2624 else if (lowest_bit_set > 10)
2625 emit_insn (gen_rtx_SET (op0,
2626 gen_rtx_ASHIFT (DImode, temp,
2627 GEN_INT (lowest_bit_set - 10))));
2628 return;
2629 }
2630
2631 /* 1) sethi %hi(low_bits), %reg
2632 * or %reg, %lo(low_bits), %reg
2633 * 2) sethi %hi(~low_bits), %reg
2634 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2635 */
2636 if (high_bits == 0
2637 || high_bits == 0xffffffff)
2638 {
2639 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2640 (high_bits == 0xffffffff));
2641 return;
2642 }
2643
2644 /* Now, try 3-insn sequences. */
2645
2646 /* 1) sethi %hi(high_bits), %reg
2647 * or %reg, %lo(high_bits), %reg
2648 * sllx %reg, 32, %reg
2649 */
2650 if (low_bits == 0)
2651 {
2652 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2653 return;
2654 }
2655
2656 /* We may be able to do something quick
2657 when the constant is negated, so try that. */
2658 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2659 (~low_bits) & 0xfffffc00))
2660 {
2661 /* NOTE: The trailing bits get XOR'd so we need the
2662 non-negated bits, not the negated ones. */
2663 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2664
2665 if ((((~high_bits) & 0xffffffff) == 0
2666 && ((~low_bits) & 0x80000000) == 0)
2667 || (((~high_bits) & 0xffffffff) == 0xffffffff
2668 && ((~low_bits) & 0x80000000) != 0))
2669 {
2670 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2671
2672 if ((SPARC_SETHI_P (fast_int)
2673 && (~high_bits & 0xffffffff) == 0)
2674 || SPARC_SIMM13_P (fast_int))
2675 emit_insn (gen_safe_SET64 (temp, fast_int));
2676 else
2677 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2678 }
2679 else
2680 {
2681 rtx negated_const;
2682 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2683 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2684 sparc_emit_set_const64 (temp, negated_const);
2685 }
2686
2687 /* If we are XOR'ing with -1, then we should emit a one's complement
2688 instead. This way the combiner will notice logical operations
2689 such as ANDN later on and substitute. */
2690 if (trailing_bits == 0x3ff)
2691 {
2692 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2693 }
2694 else
2695 {
2696 emit_insn (gen_rtx_SET (op0,
2697 gen_safe_XOR64 (temp,
2698 (-0x400 | trailing_bits))));
2699 }
2700 return;
2701 }
2702
2703 /* 1) sethi %hi(xxx), %reg
2704 * or %reg, %lo(xxx), %reg
2705 * sllx %reg, yyy, %reg
2706 *
2707 * ??? This is just a generalized version of the low_bits==0
2708 * thing above, FIXME...
2709 */
2710 if ((highest_bit_set - lowest_bit_set) < 32)
2711 {
2712 unsigned HOST_WIDE_INT focus_bits =
2713 create_simple_focus_bits (high_bits, low_bits,
2714 lowest_bit_set, 0);
2715
2716 /* We can't get here in this state. */
2717 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2718
2719 /* So what we know is that the set bits straddle the
2720 middle of the 64-bit word. */
2721 sparc_emit_set_const64_quick2 (op0, temp,
2722 focus_bits, 0,
2723 lowest_bit_set);
2724 return;
2725 }
2726
2727 /* 1) sethi %hi(high_bits), %reg
2728 * or %reg, %lo(high_bits), %reg
2729 * sllx %reg, 32, %reg
2730 * or %reg, low_bits, %reg
2731 */
2732 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2733 {
2734 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2735 return;
2736 }
2737
2738 /* The easiest way when all else fails, is full decomposition. */
2739 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2740 }
2741
2742 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2743
2744 static bool
2745 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2746 {
2747 *p1 = SPARC_ICC_REG;
2748 *p2 = SPARC_FCC_REG;
2749 return true;
2750 }
2751
2752 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2753 return the mode to be used for the comparison. For floating-point,
2754 CCFP[E]mode is used. CCNZmode should be used when the first operand
2755 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2756 processing is needed. */
2757
2758 machine_mode
2759 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2760 {
2761 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2762 {
2763 switch (op)
2764 {
2765 case EQ:
2766 case NE:
2767 case UNORDERED:
2768 case ORDERED:
2769 case UNLT:
2770 case UNLE:
2771 case UNGT:
2772 case UNGE:
2773 case UNEQ:
2774 case LTGT:
2775 return CCFPmode;
2776
2777 case LT:
2778 case LE:
2779 case GT:
2780 case GE:
2781 return CCFPEmode;
2782
2783 default:
2784 gcc_unreachable ();
2785 }
2786 }
2787 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2788 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2789 {
2790 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2791 return CCXNZmode;
2792 else
2793 return CCNZmode;
2794 }
2795 else
2796 {
2797 /* This is for the cmp<mode>_sne pattern. */
2798 if (GET_CODE (x) == NOT && y == constm1_rtx)
2799 {
2800 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2801 return CCXCmode;
2802 else
2803 return CCCmode;
2804 }
2805
2806 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2807 return CCXmode;
2808 else
2809 return CCmode;
2810 }
2811 }
2812
2813 /* Emit the compare insn and return the CC reg for a CODE comparison
2814 with operands X and Y. */
2815
2816 static rtx
2817 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2818 {
2819 machine_mode mode;
2820 rtx cc_reg;
2821
2822 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2823 return x;
2824
2825 mode = SELECT_CC_MODE (code, x, y);
2826
2827 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2828 fcc regs (cse can't tell they're really call clobbered regs and will
2829 remove a duplicate comparison even if there is an intervening function
2830 call - it will then try to reload the cc reg via an int reg which is why
2831 we need the movcc patterns). It is possible to provide the movcc
2832 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2833 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2834 to tell cse that CCFPE mode registers (even pseudos) are call
2835 clobbered. */
2836
2837 /* ??? This is an experiment. Rather than making changes to cse which may
2838 or may not be easy/clean, we do our own cse. This is possible because
2839 we will generate hard registers. Cse knows they're call clobbered (it
2840 doesn't know the same thing about pseudos). If we guess wrong, no big
2841 deal, but if we win, great! */
2842
2843 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2844 #if 1 /* experiment */
2845 {
2846 int reg;
2847 /* We cycle through the registers to ensure they're all exercised. */
2848 static int next_fcc_reg = 0;
2849 /* Previous x,y for each fcc reg. */
2850 static rtx prev_args[4][2];
2851
2852 /* Scan prev_args for x,y. */
2853 for (reg = 0; reg < 4; reg++)
2854 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2855 break;
2856 if (reg == 4)
2857 {
2858 reg = next_fcc_reg;
2859 prev_args[reg][0] = x;
2860 prev_args[reg][1] = y;
2861 next_fcc_reg = (next_fcc_reg + 1) & 3;
2862 }
2863 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2864 }
2865 #else
2866 cc_reg = gen_reg_rtx (mode);
2867 #endif /* ! experiment */
2868 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2869 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2870 else
2871 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2872
2873 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2874 will only result in an unrecognizable insn so no point in asserting. */
2875 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2876
2877 return cc_reg;
2878 }
2879
2880
2881 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2882
2883 rtx
2884 gen_compare_reg (rtx cmp)
2885 {
2886 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2887 }
2888
2889 /* This function is used for v9 only.
2890 DEST is the target of the Scc insn.
2891 CODE is the code for an Scc's comparison.
2892 X and Y are the values we compare.
2893
2894 This function is needed to turn
2895
2896 (set (reg:SI 110)
2897 (gt (reg:CCX 100 %icc)
2898 (const_int 0)))
2899 into
2900 (set (reg:SI 110)
2901 (gt:DI (reg:CCX 100 %icc)
2902 (const_int 0)))
2903
2904 IE: The instruction recognizer needs to see the mode of the comparison to
2905 find the right instruction. We could use "gt:DI" right in the
2906 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2907
2908 static int
2909 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2910 {
2911 if (! TARGET_ARCH64
2912 && (GET_MODE (x) == DImode
2913 || GET_MODE (dest) == DImode))
2914 return 0;
2915
2916 /* Try to use the movrCC insns. */
2917 if (TARGET_ARCH64
2918 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2919 && y == const0_rtx
2920 && v9_regcmp_p (compare_code))
2921 {
2922 rtx op0 = x;
2923 rtx temp;
2924
2925 /* Special case for op0 != 0. This can be done with one instruction if
2926 dest == x. */
2927
2928 if (compare_code == NE
2929 && GET_MODE (dest) == DImode
2930 && rtx_equal_p (op0, dest))
2931 {
2932 emit_insn (gen_rtx_SET (dest,
2933 gen_rtx_IF_THEN_ELSE (DImode,
2934 gen_rtx_fmt_ee (compare_code, DImode,
2935 op0, const0_rtx),
2936 const1_rtx,
2937 dest)));
2938 return 1;
2939 }
2940
2941 if (reg_overlap_mentioned_p (dest, op0))
2942 {
2943 /* Handle the case where dest == x.
2944 We "early clobber" the result. */
2945 op0 = gen_reg_rtx (GET_MODE (x));
2946 emit_move_insn (op0, x);
2947 }
2948
2949 emit_insn (gen_rtx_SET (dest, const0_rtx));
2950 if (GET_MODE (op0) != DImode)
2951 {
2952 temp = gen_reg_rtx (DImode);
2953 convert_move (temp, op0, 0);
2954 }
2955 else
2956 temp = op0;
2957 emit_insn (gen_rtx_SET (dest,
2958 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2959 gen_rtx_fmt_ee (compare_code, DImode,
2960 temp, const0_rtx),
2961 const1_rtx,
2962 dest)));
2963 return 1;
2964 }
2965 else
2966 {
2967 x = gen_compare_reg_1 (compare_code, x, y);
2968 y = const0_rtx;
2969
2970 emit_insn (gen_rtx_SET (dest, const0_rtx));
2971 emit_insn (gen_rtx_SET (dest,
2972 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2973 gen_rtx_fmt_ee (compare_code,
2974 GET_MODE (x), x, y),
2975 const1_rtx, dest)));
2976 return 1;
2977 }
2978 }
2979
2980
2981 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2982 without jumps using the addx/subx instructions. */
2983
2984 bool
2985 emit_scc_insn (rtx operands[])
2986 {
2987 rtx tem, x, y;
2988 enum rtx_code code;
2989 machine_mode mode;
2990
2991 /* The quad-word fp compare library routines all return nonzero to indicate
2992 true, which is different from the equivalent libgcc routines, so we must
2993 handle them specially here. */
2994 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2995 {
2996 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2997 GET_CODE (operands[1]));
2998 operands[2] = XEXP (operands[1], 0);
2999 operands[3] = XEXP (operands[1], 1);
3000 }
3001
3002 code = GET_CODE (operands[1]);
3003 x = operands[2];
3004 y = operands[3];
3005 mode = GET_MODE (x);
3006
3007 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3008 more applications). The exception to this is "reg != 0" which can
3009 be done in one instruction on v9 (so we do it). */
3010 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3011 {
3012 if (y != const0_rtx)
3013 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3014
3015 rtx pat = gen_rtx_SET (operands[0],
3016 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3017 x, const0_rtx));
3018
3019 /* If we can use addx/subx or addxc, add a clobber for CC. */
3020 if (mode == SImode || (code == NE && TARGET_VIS3))
3021 {
3022 rtx clobber
3023 = gen_rtx_CLOBBER (VOIDmode,
3024 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3025 SPARC_ICC_REG));
3026 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3027 }
3028
3029 emit_insn (pat);
3030 return true;
3031 }
3032
3033 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3034 if (TARGET_ARCH64
3035 && mode == DImode
3036 && !((code == LTU || code == GTU) && TARGET_VIS3)
3037 && gen_v9_scc (operands[0], code, x, y))
3038 return true;
3039
3040 /* We can do LTU and GEU using the addx/subx instructions too. And
3041 for GTU/LEU, if both operands are registers swap them and fall
3042 back to the easy case. */
3043 if (code == GTU || code == LEU)
3044 {
3045 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3046 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3047 {
3048 tem = x;
3049 x = y;
3050 y = tem;
3051 code = swap_condition (code);
3052 }
3053 }
3054
3055 if (code == LTU || code == GEU)
3056 {
3057 emit_insn (gen_rtx_SET (operands[0],
3058 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3059 gen_compare_reg_1 (code, x, y),
3060 const0_rtx)));
3061 return true;
3062 }
3063
3064 /* All the posibilities to use addx/subx based sequences has been
3065 exhausted, try for a 3 instruction sequence using v9 conditional
3066 moves. */
3067 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3068 return true;
3069
3070 /* Nope, do branches. */
3071 return false;
3072 }
3073
3074 /* Emit a conditional jump insn for the v9 architecture using comparison code
3075 CODE and jump target LABEL.
3076 This function exists to take advantage of the v9 brxx insns. */
3077
3078 static void
3079 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3080 {
3081 emit_jump_insn (gen_rtx_SET (pc_rtx,
3082 gen_rtx_IF_THEN_ELSE (VOIDmode,
3083 gen_rtx_fmt_ee (code, GET_MODE (op0),
3084 op0, const0_rtx),
3085 gen_rtx_LABEL_REF (VOIDmode, label),
3086 pc_rtx)));
3087 }
3088
3089 /* Emit a conditional jump insn for the UA2011 architecture using
3090 comparison code CODE and jump target LABEL. This function exists
3091 to take advantage of the UA2011 Compare and Branch insns. */
3092
3093 static void
3094 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3095 {
3096 rtx if_then_else;
3097
3098 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3099 gen_rtx_fmt_ee(code, GET_MODE(op0),
3100 op0, op1),
3101 gen_rtx_LABEL_REF (VOIDmode, label),
3102 pc_rtx);
3103
3104 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3105 }
3106
3107 void
3108 emit_conditional_branch_insn (rtx operands[])
3109 {
3110 /* The quad-word fp compare library routines all return nonzero to indicate
3111 true, which is different from the equivalent libgcc routines, so we must
3112 handle them specially here. */
3113 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3114 {
3115 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3116 GET_CODE (operands[0]));
3117 operands[1] = XEXP (operands[0], 0);
3118 operands[2] = XEXP (operands[0], 1);
3119 }
3120
3121 /* If we can tell early on that the comparison is against a constant
3122 that won't fit in the 5-bit signed immediate field of a cbcond,
3123 use one of the other v9 conditional branch sequences. */
3124 if (TARGET_CBCOND
3125 && GET_CODE (operands[1]) == REG
3126 && (GET_MODE (operands[1]) == SImode
3127 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3128 && (GET_CODE (operands[2]) != CONST_INT
3129 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3130 {
3131 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3132 return;
3133 }
3134
3135 if (TARGET_ARCH64 && operands[2] == const0_rtx
3136 && GET_CODE (operands[1]) == REG
3137 && GET_MODE (operands[1]) == DImode)
3138 {
3139 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3140 return;
3141 }
3142
3143 operands[1] = gen_compare_reg (operands[0]);
3144 operands[2] = const0_rtx;
3145 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3146 operands[1], operands[2]);
3147 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3148 operands[3]));
3149 }
3150
3151
3152 /* Generate a DFmode part of a hard TFmode register.
3153 REG is the TFmode hard register, LOW is 1 for the
3154 low 64bit of the register and 0 otherwise.
3155 */
3156 rtx
3157 gen_df_reg (rtx reg, int low)
3158 {
3159 int regno = REGNO (reg);
3160
3161 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3162 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3163 return gen_rtx_REG (DFmode, regno);
3164 }
3165 \f
3166 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3167 Unlike normal calls, TFmode operands are passed by reference. It is
3168 assumed that no more than 3 operands are required. */
3169
3170 static void
3171 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3172 {
3173 rtx ret_slot = NULL, arg[3], func_sym;
3174 int i;
3175
3176 /* We only expect to be called for conversions, unary, and binary ops. */
3177 gcc_assert (nargs == 2 || nargs == 3);
3178
3179 for (i = 0; i < nargs; ++i)
3180 {
3181 rtx this_arg = operands[i];
3182 rtx this_slot;
3183
3184 /* TFmode arguments and return values are passed by reference. */
3185 if (GET_MODE (this_arg) == TFmode)
3186 {
3187 int force_stack_temp;
3188
3189 force_stack_temp = 0;
3190 if (TARGET_BUGGY_QP_LIB && i == 0)
3191 force_stack_temp = 1;
3192
3193 if (GET_CODE (this_arg) == MEM
3194 && ! force_stack_temp)
3195 {
3196 tree expr = MEM_EXPR (this_arg);
3197 if (expr)
3198 mark_addressable (expr);
3199 this_arg = XEXP (this_arg, 0);
3200 }
3201 else if (CONSTANT_P (this_arg)
3202 && ! force_stack_temp)
3203 {
3204 this_slot = force_const_mem (TFmode, this_arg);
3205 this_arg = XEXP (this_slot, 0);
3206 }
3207 else
3208 {
3209 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3210
3211 /* Operand 0 is the return value. We'll copy it out later. */
3212 if (i > 0)
3213 emit_move_insn (this_slot, this_arg);
3214 else
3215 ret_slot = this_slot;
3216
3217 this_arg = XEXP (this_slot, 0);
3218 }
3219 }
3220
3221 arg[i] = this_arg;
3222 }
3223
3224 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3225
3226 if (GET_MODE (operands[0]) == TFmode)
3227 {
3228 if (nargs == 2)
3229 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3230 arg[0], GET_MODE (arg[0]),
3231 arg[1], GET_MODE (arg[1]));
3232 else
3233 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3234 arg[0], GET_MODE (arg[0]),
3235 arg[1], GET_MODE (arg[1]),
3236 arg[2], GET_MODE (arg[2]));
3237
3238 if (ret_slot)
3239 emit_move_insn (operands[0], ret_slot);
3240 }
3241 else
3242 {
3243 rtx ret;
3244
3245 gcc_assert (nargs == 2);
3246
3247 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3248 GET_MODE (operands[0]), 1,
3249 arg[1], GET_MODE (arg[1]));
3250
3251 if (ret != operands[0])
3252 emit_move_insn (operands[0], ret);
3253 }
3254 }
3255
3256 /* Expand soft-float TFmode calls to sparc abi routines. */
3257
3258 static void
3259 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3260 {
3261 const char *func;
3262
3263 switch (code)
3264 {
3265 case PLUS:
3266 func = "_Qp_add";
3267 break;
3268 case MINUS:
3269 func = "_Qp_sub";
3270 break;
3271 case MULT:
3272 func = "_Qp_mul";
3273 break;
3274 case DIV:
3275 func = "_Qp_div";
3276 break;
3277 default:
3278 gcc_unreachable ();
3279 }
3280
3281 emit_soft_tfmode_libcall (func, 3, operands);
3282 }
3283
3284 static void
3285 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3286 {
3287 const char *func;
3288
3289 gcc_assert (code == SQRT);
3290 func = "_Qp_sqrt";
3291
3292 emit_soft_tfmode_libcall (func, 2, operands);
3293 }
3294
3295 static void
3296 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3297 {
3298 const char *func;
3299
3300 switch (code)
3301 {
3302 case FLOAT_EXTEND:
3303 switch (GET_MODE (operands[1]))
3304 {
3305 case SFmode:
3306 func = "_Qp_stoq";
3307 break;
3308 case DFmode:
3309 func = "_Qp_dtoq";
3310 break;
3311 default:
3312 gcc_unreachable ();
3313 }
3314 break;
3315
3316 case FLOAT_TRUNCATE:
3317 switch (GET_MODE (operands[0]))
3318 {
3319 case SFmode:
3320 func = "_Qp_qtos";
3321 break;
3322 case DFmode:
3323 func = "_Qp_qtod";
3324 break;
3325 default:
3326 gcc_unreachable ();
3327 }
3328 break;
3329
3330 case FLOAT:
3331 switch (GET_MODE (operands[1]))
3332 {
3333 case SImode:
3334 func = "_Qp_itoq";
3335 if (TARGET_ARCH64)
3336 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3337 break;
3338 case DImode:
3339 func = "_Qp_xtoq";
3340 break;
3341 default:
3342 gcc_unreachable ();
3343 }
3344 break;
3345
3346 case UNSIGNED_FLOAT:
3347 switch (GET_MODE (operands[1]))
3348 {
3349 case SImode:
3350 func = "_Qp_uitoq";
3351 if (TARGET_ARCH64)
3352 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3353 break;
3354 case DImode:
3355 func = "_Qp_uxtoq";
3356 break;
3357 default:
3358 gcc_unreachable ();
3359 }
3360 break;
3361
3362 case FIX:
3363 switch (GET_MODE (operands[0]))
3364 {
3365 case SImode:
3366 func = "_Qp_qtoi";
3367 break;
3368 case DImode:
3369 func = "_Qp_qtox";
3370 break;
3371 default:
3372 gcc_unreachable ();
3373 }
3374 break;
3375
3376 case UNSIGNED_FIX:
3377 switch (GET_MODE (operands[0]))
3378 {
3379 case SImode:
3380 func = "_Qp_qtoui";
3381 break;
3382 case DImode:
3383 func = "_Qp_qtoux";
3384 break;
3385 default:
3386 gcc_unreachable ();
3387 }
3388 break;
3389
3390 default:
3391 gcc_unreachable ();
3392 }
3393
3394 emit_soft_tfmode_libcall (func, 2, operands);
3395 }
3396
3397 /* Expand a hard-float tfmode operation. All arguments must be in
3398 registers. */
3399
3400 static void
3401 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3402 {
3403 rtx op, dest;
3404
3405 if (GET_RTX_CLASS (code) == RTX_UNARY)
3406 {
3407 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3408 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3409 }
3410 else
3411 {
3412 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3413 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3414 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3415 operands[1], operands[2]);
3416 }
3417
3418 if (register_operand (operands[0], VOIDmode))
3419 dest = operands[0];
3420 else
3421 dest = gen_reg_rtx (GET_MODE (operands[0]));
3422
3423 emit_insn (gen_rtx_SET (dest, op));
3424
3425 if (dest != operands[0])
3426 emit_move_insn (operands[0], dest);
3427 }
3428
3429 void
3430 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3431 {
3432 if (TARGET_HARD_QUAD)
3433 emit_hard_tfmode_operation (code, operands);
3434 else
3435 emit_soft_tfmode_binop (code, operands);
3436 }
3437
3438 void
3439 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3440 {
3441 if (TARGET_HARD_QUAD)
3442 emit_hard_tfmode_operation (code, operands);
3443 else
3444 emit_soft_tfmode_unop (code, operands);
3445 }
3446
3447 void
3448 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3449 {
3450 if (TARGET_HARD_QUAD)
3451 emit_hard_tfmode_operation (code, operands);
3452 else
3453 emit_soft_tfmode_cvt (code, operands);
3454 }
3455 \f
3456 /* Return nonzero if a branch/jump/call instruction will be emitting
3457 nop into its delay slot. */
3458
3459 int
3460 empty_delay_slot (rtx_insn *insn)
3461 {
3462 rtx seq;
3463
3464 /* If no previous instruction (should not happen), return true. */
3465 if (PREV_INSN (insn) == NULL)
3466 return 1;
3467
3468 seq = NEXT_INSN (PREV_INSN (insn));
3469 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3470 return 0;
3471
3472 return 1;
3473 }
3474
3475 /* Return nonzero if we should emit a nop after a cbcond instruction.
3476 The cbcond instruction does not have a delay slot, however there is
3477 a severe performance penalty if a control transfer appears right
3478 after a cbcond. Therefore we emit a nop when we detect this
3479 situation. */
3480
3481 int
3482 emit_cbcond_nop (rtx_insn *insn)
3483 {
3484 rtx next = next_active_insn (insn);
3485
3486 if (!next)
3487 return 1;
3488
3489 if (NONJUMP_INSN_P (next)
3490 && GET_CODE (PATTERN (next)) == SEQUENCE)
3491 next = XVECEXP (PATTERN (next), 0, 0);
3492 else if (CALL_P (next)
3493 && GET_CODE (PATTERN (next)) == PARALLEL)
3494 {
3495 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3496
3497 if (GET_CODE (delay) == RETURN)
3498 {
3499 /* It's a sibling call. Do not emit the nop if we're going
3500 to emit something other than the jump itself as the first
3501 instruction of the sibcall sequence. */
3502 if (sparc_leaf_function_p || TARGET_FLAT)
3503 return 0;
3504 }
3505 }
3506
3507 if (NONJUMP_INSN_P (next))
3508 return 0;
3509
3510 return 1;
3511 }
3512
3513 /* Return nonzero if TRIAL can go into the call delay slot. */
3514
3515 int
3516 eligible_for_call_delay (rtx_insn *trial)
3517 {
3518 rtx pat;
3519
3520 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3521 return 0;
3522
3523 /* Binutils allows
3524 call __tls_get_addr, %tgd_call (foo)
3525 add %l7, %o0, %o0, %tgd_add (foo)
3526 while Sun as/ld does not. */
3527 if (TARGET_GNU_TLS || !TARGET_TLS)
3528 return 1;
3529
3530 pat = PATTERN (trial);
3531
3532 /* We must reject tgd_add{32|64}, i.e.
3533 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3534 and tldm_add{32|64}, i.e.
3535 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3536 for Sun as/ld. */
3537 if (GET_CODE (pat) == SET
3538 && GET_CODE (SET_SRC (pat)) == PLUS)
3539 {
3540 rtx unspec = XEXP (SET_SRC (pat), 1);
3541
3542 if (GET_CODE (unspec) == UNSPEC
3543 && (XINT (unspec, 1) == UNSPEC_TLSGD
3544 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3545 return 0;
3546 }
3547
3548 return 1;
3549 }
3550
3551 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3552 instruction. RETURN_P is true if the v9 variant 'return' is to be
3553 considered in the test too.
3554
3555 TRIAL must be a SET whose destination is a REG appropriate for the
3556 'restore' instruction or, if RETURN_P is true, for the 'return'
3557 instruction. */
3558
3559 static int
3560 eligible_for_restore_insn (rtx trial, bool return_p)
3561 {
3562 rtx pat = PATTERN (trial);
3563 rtx src = SET_SRC (pat);
3564 bool src_is_freg = false;
3565 rtx src_reg;
3566
3567 /* Since we now can do moves between float and integer registers when
3568 VIS3 is enabled, we have to catch this case. We can allow such
3569 moves when doing a 'return' however. */
3570 src_reg = src;
3571 if (GET_CODE (src_reg) == SUBREG)
3572 src_reg = SUBREG_REG (src_reg);
3573 if (GET_CODE (src_reg) == REG
3574 && SPARC_FP_REG_P (REGNO (src_reg)))
3575 src_is_freg = true;
3576
3577 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3578 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3579 && arith_operand (src, GET_MODE (src))
3580 && ! src_is_freg)
3581 {
3582 if (TARGET_ARCH64)
3583 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3584 else
3585 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3586 }
3587
3588 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3589 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3590 && arith_double_operand (src, GET_MODE (src))
3591 && ! src_is_freg)
3592 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3593
3594 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3595 else if (! TARGET_FPU && register_operand (src, SFmode))
3596 return 1;
3597
3598 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3599 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3600 return 1;
3601
3602 /* If we have the 'return' instruction, anything that does not use
3603 local or output registers and can go into a delay slot wins. */
3604 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3605 return 1;
3606
3607 /* The 'restore src1,src2,dest' pattern for SImode. */
3608 else if (GET_CODE (src) == PLUS
3609 && register_operand (XEXP (src, 0), SImode)
3610 && arith_operand (XEXP (src, 1), SImode))
3611 return 1;
3612
3613 /* The 'restore src1,src2,dest' pattern for DImode. */
3614 else if (GET_CODE (src) == PLUS
3615 && register_operand (XEXP (src, 0), DImode)
3616 && arith_double_operand (XEXP (src, 1), DImode))
3617 return 1;
3618
3619 /* The 'restore src1,%lo(src2),dest' pattern. */
3620 else if (GET_CODE (src) == LO_SUM
3621 && ! TARGET_CM_MEDMID
3622 && ((register_operand (XEXP (src, 0), SImode)
3623 && immediate_operand (XEXP (src, 1), SImode))
3624 || (TARGET_ARCH64
3625 && register_operand (XEXP (src, 0), DImode)
3626 && immediate_operand (XEXP (src, 1), DImode))))
3627 return 1;
3628
3629 /* The 'restore src,src,dest' pattern. */
3630 else if (GET_CODE (src) == ASHIFT
3631 && (register_operand (XEXP (src, 0), SImode)
3632 || register_operand (XEXP (src, 0), DImode))
3633 && XEXP (src, 1) == const1_rtx)
3634 return 1;
3635
3636 return 0;
3637 }
3638
3639 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3640
3641 int
3642 eligible_for_return_delay (rtx_insn *trial)
3643 {
3644 int regno;
3645 rtx pat;
3646
3647 /* If the function uses __builtin_eh_return, the eh_return machinery
3648 occupies the delay slot. */
3649 if (crtl->calls_eh_return)
3650 return 0;
3651
3652 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3653 return 0;
3654
3655 /* In the case of a leaf or flat function, anything can go into the slot. */
3656 if (sparc_leaf_function_p || TARGET_FLAT)
3657 return 1;
3658
3659 if (!NONJUMP_INSN_P (trial))
3660 return 0;
3661
3662 pat = PATTERN (trial);
3663 if (GET_CODE (pat) == PARALLEL)
3664 {
3665 int i;
3666
3667 if (! TARGET_V9)
3668 return 0;
3669 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3670 {
3671 rtx expr = XVECEXP (pat, 0, i);
3672 if (GET_CODE (expr) != SET)
3673 return 0;
3674 if (GET_CODE (SET_DEST (expr)) != REG)
3675 return 0;
3676 regno = REGNO (SET_DEST (expr));
3677 if (regno >= 8 && regno < 24)
3678 return 0;
3679 }
3680 return !epilogue_renumber (&pat, 1);
3681 }
3682
3683 if (GET_CODE (pat) != SET)
3684 return 0;
3685
3686 if (GET_CODE (SET_DEST (pat)) != REG)
3687 return 0;
3688
3689 regno = REGNO (SET_DEST (pat));
3690
3691 /* Otherwise, only operations which can be done in tandem with
3692 a `restore' or `return' insn can go into the delay slot. */
3693 if (regno >= 8 && regno < 24)
3694 return 0;
3695
3696 /* If this instruction sets up floating point register and we have a return
3697 instruction, it can probably go in. But restore will not work
3698 with FP_REGS. */
3699 if (! SPARC_INT_REG_P (regno))
3700 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3701
3702 return eligible_for_restore_insn (trial, true);
3703 }
3704
3705 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3706
3707 int
3708 eligible_for_sibcall_delay (rtx_insn *trial)
3709 {
3710 rtx pat;
3711
3712 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3713 return 0;
3714
3715 if (!NONJUMP_INSN_P (trial))
3716 return 0;
3717
3718 pat = PATTERN (trial);
3719
3720 if (sparc_leaf_function_p || TARGET_FLAT)
3721 {
3722 /* If the tail call is done using the call instruction,
3723 we have to restore %o7 in the delay slot. */
3724 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3725 return 0;
3726
3727 /* %g1 is used to build the function address */
3728 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3729 return 0;
3730
3731 return 1;
3732 }
3733
3734 if (GET_CODE (pat) != SET)
3735 return 0;
3736
3737 /* Otherwise, only operations which can be done in tandem with
3738 a `restore' insn can go into the delay slot. */
3739 if (GET_CODE (SET_DEST (pat)) != REG
3740 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3741 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3742 return 0;
3743
3744 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3745 in most cases. */
3746 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3747 return 0;
3748
3749 return eligible_for_restore_insn (trial, false);
3750 }
3751 \f
3752 /* Determine if it's legal to put X into the constant pool. This
3753 is not possible if X contains the address of a symbol that is
3754 not constant (TLS) or not known at final link time (PIC). */
3755
3756 static bool
3757 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3758 {
3759 switch (GET_CODE (x))
3760 {
3761 case CONST_INT:
3762 case CONST_WIDE_INT:
3763 case CONST_DOUBLE:
3764 case CONST_VECTOR:
3765 /* Accept all non-symbolic constants. */
3766 return false;
3767
3768 case LABEL_REF:
3769 /* Labels are OK iff we are non-PIC. */
3770 return flag_pic != 0;
3771
3772 case SYMBOL_REF:
3773 /* 'Naked' TLS symbol references are never OK,
3774 non-TLS symbols are OK iff we are non-PIC. */
3775 if (SYMBOL_REF_TLS_MODEL (x))
3776 return true;
3777 else
3778 return flag_pic != 0;
3779
3780 case CONST:
3781 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3782 case PLUS:
3783 case MINUS:
3784 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3785 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3786 case UNSPEC:
3787 return true;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 }
3792 \f
3793 /* Global Offset Table support. */
3794 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3795 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3796
3797 /* Return the SYMBOL_REF for the Global Offset Table. */
3798
3799 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3800
3801 static rtx
3802 sparc_got (void)
3803 {
3804 if (!sparc_got_symbol)
3805 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3806
3807 return sparc_got_symbol;
3808 }
3809
3810 /* Ensure that we are not using patterns that are not OK with PIC. */
3811
3812 int
3813 check_pic (int i)
3814 {
3815 rtx op;
3816
3817 switch (flag_pic)
3818 {
3819 case 1:
3820 op = recog_data.operand[i];
3821 gcc_assert (GET_CODE (op) != SYMBOL_REF
3822 && (GET_CODE (op) != CONST
3823 || (GET_CODE (XEXP (op, 0)) == MINUS
3824 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3825 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3826 /* fallthrough */
3827 case 2:
3828 default:
3829 return 1;
3830 }
3831 }
3832
3833 /* Return true if X is an address which needs a temporary register when
3834 reloaded while generating PIC code. */
3835
3836 int
3837 pic_address_needs_scratch (rtx x)
3838 {
3839 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3840 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3842 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3843 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3844 return 1;
3845
3846 return 0;
3847 }
3848
3849 /* Determine if a given RTX is a valid constant. We already know this
3850 satisfies CONSTANT_P. */
3851
3852 static bool
3853 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3854 {
3855 switch (GET_CODE (x))
3856 {
3857 case CONST:
3858 case SYMBOL_REF:
3859 if (sparc_tls_referenced_p (x))
3860 return false;
3861 break;
3862
3863 case CONST_DOUBLE:
3864 /* Floating point constants are generally not ok.
3865 The only exception is 0.0 and all-ones in VIS. */
3866 if (TARGET_VIS
3867 && SCALAR_FLOAT_MODE_P (mode)
3868 && (const_zero_operand (x, mode)
3869 || const_all_ones_operand (x, mode)))
3870 return true;
3871
3872 return false;
3873
3874 case CONST_VECTOR:
3875 /* Vector constants are generally not ok.
3876 The only exception is 0 or -1 in VIS. */
3877 if (TARGET_VIS
3878 && (const_zero_operand (x, mode)
3879 || const_all_ones_operand (x, mode)))
3880 return true;
3881
3882 return false;
3883
3884 default:
3885 break;
3886 }
3887
3888 return true;
3889 }
3890
3891 /* Determine if a given RTX is a valid constant address. */
3892
3893 bool
3894 constant_address_p (rtx x)
3895 {
3896 switch (GET_CODE (x))
3897 {
3898 case LABEL_REF:
3899 case CONST_INT:
3900 case HIGH:
3901 return true;
3902
3903 case CONST:
3904 if (flag_pic && pic_address_needs_scratch (x))
3905 return false;
3906 return sparc_legitimate_constant_p (Pmode, x);
3907
3908 case SYMBOL_REF:
3909 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3910
3911 default:
3912 return false;
3913 }
3914 }
3915
3916 /* Nonzero if the constant value X is a legitimate general operand
3917 when generating PIC code. It is given that flag_pic is on and
3918 that X satisfies CONSTANT_P. */
3919
3920 bool
3921 legitimate_pic_operand_p (rtx x)
3922 {
3923 if (pic_address_needs_scratch (x))
3924 return false;
3925 if (sparc_tls_referenced_p (x))
3926 return false;
3927 return true;
3928 }
3929
3930 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3931 (CONST_INT_P (X) \
3932 && INTVAL (X) >= -0x1000 \
3933 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3934
3935 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3936 (CONST_INT_P (X) \
3937 && INTVAL (X) >= -0x1000 \
3938 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3939
3940 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3941
3942 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3943 ordinarily. This changes a bit when generating PIC. */
3944
3945 static bool
3946 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3947 {
3948 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3949
3950 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3951 rs1 = addr;
3952 else if (GET_CODE (addr) == PLUS)
3953 {
3954 rs1 = XEXP (addr, 0);
3955 rs2 = XEXP (addr, 1);
3956
3957 /* Canonicalize. REG comes first, if there are no regs,
3958 LO_SUM comes first. */
3959 if (!REG_P (rs1)
3960 && GET_CODE (rs1) != SUBREG
3961 && (REG_P (rs2)
3962 || GET_CODE (rs2) == SUBREG
3963 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3964 {
3965 rs1 = XEXP (addr, 1);
3966 rs2 = XEXP (addr, 0);
3967 }
3968
3969 if ((flag_pic == 1
3970 && rs1 == pic_offset_table_rtx
3971 && !REG_P (rs2)
3972 && GET_CODE (rs2) != SUBREG
3973 && GET_CODE (rs2) != LO_SUM
3974 && GET_CODE (rs2) != MEM
3975 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3976 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3977 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3978 || ((REG_P (rs1)
3979 || GET_CODE (rs1) == SUBREG)
3980 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3981 {
3982 imm1 = rs2;
3983 rs2 = NULL;
3984 }
3985 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3986 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3987 {
3988 /* We prohibit REG + REG for TFmode when there are no quad move insns
3989 and we consequently need to split. We do this because REG+REG
3990 is not an offsettable address. If we get the situation in reload
3991 where source and destination of a movtf pattern are both MEMs with
3992 REG+REG address, then only one of them gets converted to an
3993 offsettable address. */
3994 if (mode == TFmode
3995 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3996 return 0;
3997
3998 /* Likewise for TImode, but in all cases. */
3999 if (mode == TImode)
4000 return 0;
4001
4002 /* We prohibit REG + REG on ARCH32 if not optimizing for
4003 DFmode/DImode because then mem_min_alignment is likely to be zero
4004 after reload and the forced split would lack a matching splitter
4005 pattern. */
4006 if (TARGET_ARCH32 && !optimize
4007 && (mode == DFmode || mode == DImode))
4008 return 0;
4009 }
4010 else if (USE_AS_OFFSETABLE_LO10
4011 && GET_CODE (rs1) == LO_SUM
4012 && TARGET_ARCH64
4013 && ! TARGET_CM_MEDMID
4014 && RTX_OK_FOR_OLO10_P (rs2, mode))
4015 {
4016 rs2 = NULL;
4017 imm1 = XEXP (rs1, 1);
4018 rs1 = XEXP (rs1, 0);
4019 if (!CONSTANT_P (imm1)
4020 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4021 return 0;
4022 }
4023 }
4024 else if (GET_CODE (addr) == LO_SUM)
4025 {
4026 rs1 = XEXP (addr, 0);
4027 imm1 = XEXP (addr, 1);
4028
4029 if (!CONSTANT_P (imm1)
4030 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4031 return 0;
4032
4033 /* We can't allow TFmode in 32-bit mode, because an offset greater
4034 than the alignment (8) may cause the LO_SUM to overflow. */
4035 if (mode == TFmode && TARGET_ARCH32)
4036 return 0;
4037
4038 /* During reload, accept the HIGH+LO_SUM construct generated by
4039 sparc_legitimize_reload_address. */
4040 if (reload_in_progress
4041 && GET_CODE (rs1) == HIGH
4042 && XEXP (rs1, 0) == imm1)
4043 return 1;
4044 }
4045 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4046 return 1;
4047 else
4048 return 0;
4049
4050 if (GET_CODE (rs1) == SUBREG)
4051 rs1 = SUBREG_REG (rs1);
4052 if (!REG_P (rs1))
4053 return 0;
4054
4055 if (rs2)
4056 {
4057 if (GET_CODE (rs2) == SUBREG)
4058 rs2 = SUBREG_REG (rs2);
4059 if (!REG_P (rs2))
4060 return 0;
4061 }
4062
4063 if (strict)
4064 {
4065 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4066 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4067 return 0;
4068 }
4069 else
4070 {
4071 if ((! SPARC_INT_REG_P (REGNO (rs1))
4072 && REGNO (rs1) != FRAME_POINTER_REGNUM
4073 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4074 || (rs2
4075 && (! SPARC_INT_REG_P (REGNO (rs2))
4076 && REGNO (rs2) != FRAME_POINTER_REGNUM
4077 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4078 return 0;
4079 }
4080 return 1;
4081 }
4082
4083 /* Return the SYMBOL_REF for the tls_get_addr function. */
4084
4085 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4086
4087 static rtx
4088 sparc_tls_get_addr (void)
4089 {
4090 if (!sparc_tls_symbol)
4091 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4092
4093 return sparc_tls_symbol;
4094 }
4095
4096 /* Return the Global Offset Table to be used in TLS mode. */
4097
4098 static rtx
4099 sparc_tls_got (void)
4100 {
4101 /* In PIC mode, this is just the PIC offset table. */
4102 if (flag_pic)
4103 {
4104 crtl->uses_pic_offset_table = 1;
4105 return pic_offset_table_rtx;
4106 }
4107
4108 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4109 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4110 if (TARGET_SUN_TLS && TARGET_ARCH32)
4111 {
4112 load_got_register ();
4113 return global_offset_table_rtx;
4114 }
4115
4116 /* In all other cases, we load a new pseudo with the GOT symbol. */
4117 return copy_to_reg (sparc_got ());
4118 }
4119
4120 /* Return true if X contains a thread-local symbol. */
4121
4122 static bool
4123 sparc_tls_referenced_p (rtx x)
4124 {
4125 if (!TARGET_HAVE_TLS)
4126 return false;
4127
4128 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4129 x = XEXP (XEXP (x, 0), 0);
4130
4131 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4132 return true;
4133
4134 /* That's all we handle in sparc_legitimize_tls_address for now. */
4135 return false;
4136 }
4137
4138 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4139 this (thread-local) address. */
4140
4141 static rtx
4142 sparc_legitimize_tls_address (rtx addr)
4143 {
4144 rtx temp1, temp2, temp3, ret, o0, got;
4145 rtx_insn *insn;
4146
4147 gcc_assert (can_create_pseudo_p ());
4148
4149 if (GET_CODE (addr) == SYMBOL_REF)
4150 switch (SYMBOL_REF_TLS_MODEL (addr))
4151 {
4152 case TLS_MODEL_GLOBAL_DYNAMIC:
4153 start_sequence ();
4154 temp1 = gen_reg_rtx (SImode);
4155 temp2 = gen_reg_rtx (SImode);
4156 ret = gen_reg_rtx (Pmode);
4157 o0 = gen_rtx_REG (Pmode, 8);
4158 got = sparc_tls_got ();
4159 emit_insn (gen_tgd_hi22 (temp1, addr));
4160 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4161 if (TARGET_ARCH32)
4162 {
4163 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4164 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4165 addr, const1_rtx));
4166 }
4167 else
4168 {
4169 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4170 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4171 addr, const1_rtx));
4172 }
4173 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4174 insn = get_insns ();
4175 end_sequence ();
4176 emit_libcall_block (insn, ret, o0, addr);
4177 break;
4178
4179 case TLS_MODEL_LOCAL_DYNAMIC:
4180 start_sequence ();
4181 temp1 = gen_reg_rtx (SImode);
4182 temp2 = gen_reg_rtx (SImode);
4183 temp3 = gen_reg_rtx (Pmode);
4184 ret = gen_reg_rtx (Pmode);
4185 o0 = gen_rtx_REG (Pmode, 8);
4186 got = sparc_tls_got ();
4187 emit_insn (gen_tldm_hi22 (temp1));
4188 emit_insn (gen_tldm_lo10 (temp2, temp1));
4189 if (TARGET_ARCH32)
4190 {
4191 emit_insn (gen_tldm_add32 (o0, got, temp2));
4192 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4193 const1_rtx));
4194 }
4195 else
4196 {
4197 emit_insn (gen_tldm_add64 (o0, got, temp2));
4198 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4199 const1_rtx));
4200 }
4201 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4202 insn = get_insns ();
4203 end_sequence ();
4204 emit_libcall_block (insn, temp3, o0,
4205 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4206 UNSPEC_TLSLD_BASE));
4207 temp1 = gen_reg_rtx (SImode);
4208 temp2 = gen_reg_rtx (SImode);
4209 emit_insn (gen_tldo_hix22 (temp1, addr));
4210 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4211 if (TARGET_ARCH32)
4212 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4213 else
4214 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4215 break;
4216
4217 case TLS_MODEL_INITIAL_EXEC:
4218 temp1 = gen_reg_rtx (SImode);
4219 temp2 = gen_reg_rtx (SImode);
4220 temp3 = gen_reg_rtx (Pmode);
4221 got = sparc_tls_got ();
4222 emit_insn (gen_tie_hi22 (temp1, addr));
4223 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4224 if (TARGET_ARCH32)
4225 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4226 else
4227 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4228 if (TARGET_SUN_TLS)
4229 {
4230 ret = gen_reg_rtx (Pmode);
4231 if (TARGET_ARCH32)
4232 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4233 temp3, addr));
4234 else
4235 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4236 temp3, addr));
4237 }
4238 else
4239 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4240 break;
4241
4242 case TLS_MODEL_LOCAL_EXEC:
4243 temp1 = gen_reg_rtx (Pmode);
4244 temp2 = gen_reg_rtx (Pmode);
4245 if (TARGET_ARCH32)
4246 {
4247 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4248 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4249 }
4250 else
4251 {
4252 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4253 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4254 }
4255 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4256 break;
4257
4258 default:
4259 gcc_unreachable ();
4260 }
4261
4262 else if (GET_CODE (addr) == CONST)
4263 {
4264 rtx base, offset;
4265
4266 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4267
4268 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4269 offset = XEXP (XEXP (addr, 0), 1);
4270
4271 base = force_operand (base, NULL_RTX);
4272 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4273 offset = force_reg (Pmode, offset);
4274 ret = gen_rtx_PLUS (Pmode, base, offset);
4275 }
4276
4277 else
4278 gcc_unreachable (); /* for now ... */
4279
4280 return ret;
4281 }
4282
4283 /* Legitimize PIC addresses. If the address is already position-independent,
4284 we return ORIG. Newly generated position-independent addresses go into a
4285 reg. This is REG if nonzero, otherwise we allocate register(s) as
4286 necessary. */
4287
4288 static rtx
4289 sparc_legitimize_pic_address (rtx orig, rtx reg)
4290 {
4291 bool gotdata_op = false;
4292
4293 if (GET_CODE (orig) == SYMBOL_REF
4294 /* See the comment in sparc_expand_move. */
4295 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4296 {
4297 rtx pic_ref, address;
4298 rtx_insn *insn;
4299
4300 if (reg == 0)
4301 {
4302 gcc_assert (can_create_pseudo_p ());
4303 reg = gen_reg_rtx (Pmode);
4304 }
4305
4306 if (flag_pic == 2)
4307 {
4308 /* If not during reload, allocate another temp reg here for loading
4309 in the address, so that these instructions can be optimized
4310 properly. */
4311 rtx temp_reg = (! can_create_pseudo_p ()
4312 ? reg : gen_reg_rtx (Pmode));
4313
4314 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4315 won't get confused into thinking that these two instructions
4316 are loading in the true address of the symbol. If in the
4317 future a PIC rtx exists, that should be used instead. */
4318 if (TARGET_ARCH64)
4319 {
4320 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4321 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4322 }
4323 else
4324 {
4325 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4326 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4327 }
4328 address = temp_reg;
4329 gotdata_op = true;
4330 }
4331 else
4332 address = orig;
4333
4334 crtl->uses_pic_offset_table = 1;
4335 if (gotdata_op)
4336 {
4337 if (TARGET_ARCH64)
4338 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4339 pic_offset_table_rtx,
4340 address, orig));
4341 else
4342 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4343 pic_offset_table_rtx,
4344 address, orig));
4345 }
4346 else
4347 {
4348 pic_ref
4349 = gen_const_mem (Pmode,
4350 gen_rtx_PLUS (Pmode,
4351 pic_offset_table_rtx, address));
4352 insn = emit_move_insn (reg, pic_ref);
4353 }
4354
4355 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4356 by loop. */
4357 set_unique_reg_note (insn, REG_EQUAL, orig);
4358 return reg;
4359 }
4360 else if (GET_CODE (orig) == CONST)
4361 {
4362 rtx base, offset;
4363
4364 if (GET_CODE (XEXP (orig, 0)) == PLUS
4365 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4366 return orig;
4367
4368 if (reg == 0)
4369 {
4370 gcc_assert (can_create_pseudo_p ());
4371 reg = gen_reg_rtx (Pmode);
4372 }
4373
4374 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4375 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4376 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4377 base == reg ? NULL_RTX : reg);
4378
4379 if (GET_CODE (offset) == CONST_INT)
4380 {
4381 if (SMALL_INT (offset))
4382 return plus_constant (Pmode, base, INTVAL (offset));
4383 else if (can_create_pseudo_p ())
4384 offset = force_reg (Pmode, offset);
4385 else
4386 /* If we reach here, then something is seriously wrong. */
4387 gcc_unreachable ();
4388 }
4389 return gen_rtx_PLUS (Pmode, base, offset);
4390 }
4391 else if (GET_CODE (orig) == LABEL_REF)
4392 /* ??? We ought to be checking that the register is live instead, in case
4393 it is eliminated. */
4394 crtl->uses_pic_offset_table = 1;
4395
4396 return orig;
4397 }
4398
4399 /* Try machine-dependent ways of modifying an illegitimate address X
4400 to be legitimate. If we find one, return the new, valid address.
4401
4402 OLDX is the address as it was before break_out_memory_refs was called.
4403 In some cases it is useful to look at this to decide what needs to be done.
4404
4405 MODE is the mode of the operand pointed to by X.
4406
4407 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4408
4409 static rtx
4410 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4411 machine_mode mode)
4412 {
4413 rtx orig_x = x;
4414
4415 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4416 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4417 force_operand (XEXP (x, 0), NULL_RTX));
4418 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4419 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4420 force_operand (XEXP (x, 1), NULL_RTX));
4421 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4422 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4423 XEXP (x, 1));
4424 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4425 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4426 force_operand (XEXP (x, 1), NULL_RTX));
4427
4428 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4429 return x;
4430
4431 if (sparc_tls_referenced_p (x))
4432 x = sparc_legitimize_tls_address (x);
4433 else if (flag_pic)
4434 x = sparc_legitimize_pic_address (x, NULL_RTX);
4435 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4436 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4437 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4438 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4439 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4440 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4441 else if (GET_CODE (x) == SYMBOL_REF
4442 || GET_CODE (x) == CONST
4443 || GET_CODE (x) == LABEL_REF)
4444 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4445
4446 return x;
4447 }
4448
4449 /* Delegitimize an address that was legitimized by the above function. */
4450
4451 static rtx
4452 sparc_delegitimize_address (rtx x)
4453 {
4454 x = delegitimize_mem_from_attrs (x);
4455
4456 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4457 switch (XINT (XEXP (x, 1), 1))
4458 {
4459 case UNSPEC_MOVE_PIC:
4460 case UNSPEC_TLSLE:
4461 x = XVECEXP (XEXP (x, 1), 0, 0);
4462 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4463 break;
4464 default:
4465 break;
4466 }
4467
4468 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4469 if (GET_CODE (x) == MINUS
4470 && REG_P (XEXP (x, 0))
4471 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4472 && GET_CODE (XEXP (x, 1)) == LO_SUM
4473 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4474 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4475 {
4476 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4477 gcc_assert (GET_CODE (x) == LABEL_REF);
4478 }
4479
4480 return x;
4481 }
4482
4483 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4484 replace the input X, or the original X if no replacement is called for.
4485 The output parameter *WIN is 1 if the calling macro should goto WIN,
4486 0 if it should not.
4487
4488 For SPARC, we wish to handle addresses by splitting them into
4489 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4490 This cuts the number of extra insns by one.
4491
4492 Do nothing when generating PIC code and the address is a symbolic
4493 operand or requires a scratch register. */
4494
4495 rtx
4496 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4497 int opnum, int type,
4498 int ind_levels ATTRIBUTE_UNUSED, int *win)
4499 {
4500 /* Decompose SImode constants into HIGH+LO_SUM. */
4501 if (CONSTANT_P (x)
4502 && (mode != TFmode || TARGET_ARCH64)
4503 && GET_MODE (x) == SImode
4504 && GET_CODE (x) != LO_SUM
4505 && GET_CODE (x) != HIGH
4506 && sparc_cmodel <= CM_MEDLOW
4507 && !(flag_pic
4508 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4509 {
4510 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4511 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4512 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4513 opnum, (enum reload_type)type);
4514 *win = 1;
4515 return x;
4516 }
4517
4518 /* We have to recognize what we have already generated above. */
4519 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4520 {
4521 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4522 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4523 opnum, (enum reload_type)type);
4524 *win = 1;
4525 return x;
4526 }
4527
4528 *win = 0;
4529 return x;
4530 }
4531
4532 /* Return true if ADDR (a legitimate address expression)
4533 has an effect that depends on the machine mode it is used for.
4534
4535 In PIC mode,
4536
4537 (mem:HI [%l7+a])
4538
4539 is not equivalent to
4540
4541 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4542
4543 because [%l7+a+1] is interpreted as the address of (a+1). */
4544
4545
4546 static bool
4547 sparc_mode_dependent_address_p (const_rtx addr,
4548 addr_space_t as ATTRIBUTE_UNUSED)
4549 {
4550 if (flag_pic && GET_CODE (addr) == PLUS)
4551 {
4552 rtx op0 = XEXP (addr, 0);
4553 rtx op1 = XEXP (addr, 1);
4554 if (op0 == pic_offset_table_rtx
4555 && symbolic_operand (op1, VOIDmode))
4556 return true;
4557 }
4558
4559 return false;
4560 }
4561
4562 #ifdef HAVE_GAS_HIDDEN
4563 # define USE_HIDDEN_LINKONCE 1
4564 #else
4565 # define USE_HIDDEN_LINKONCE 0
4566 #endif
4567
4568 static void
4569 get_pc_thunk_name (char name[32], unsigned int regno)
4570 {
4571 const char *reg_name = reg_names[regno];
4572
4573 /* Skip the leading '%' as that cannot be used in a
4574 symbol name. */
4575 reg_name += 1;
4576
4577 if (USE_HIDDEN_LINKONCE)
4578 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4579 else
4580 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4581 }
4582
4583 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4584
4585 static rtx
4586 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4587 {
4588 int orig_flag_pic = flag_pic;
4589 rtx insn;
4590
4591 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4592 flag_pic = 0;
4593 if (TARGET_ARCH64)
4594 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4595 else
4596 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4597 flag_pic = orig_flag_pic;
4598
4599 return insn;
4600 }
4601
4602 /* Emit code to load the GOT register. */
4603
4604 void
4605 load_got_register (void)
4606 {
4607 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4608 if (!global_offset_table_rtx)
4609 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4610
4611 if (TARGET_VXWORKS_RTP)
4612 emit_insn (gen_vxworks_load_got ());
4613 else
4614 {
4615 /* The GOT symbol is subject to a PC-relative relocation so we need a
4616 helper function to add the PC value and thus get the final value. */
4617 if (!got_helper_rtx)
4618 {
4619 char name[32];
4620 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4621 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4622 }
4623
4624 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4625 got_helper_rtx,
4626 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4627 }
4628
4629 /* Need to emit this whether or not we obey regdecls,
4630 since setjmp/longjmp can cause life info to screw up.
4631 ??? In the case where we don't obey regdecls, this is not sufficient
4632 since we may not fall out the bottom. */
4633 emit_use (global_offset_table_rtx);
4634 }
4635
4636 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4637 address of the call target. */
4638
4639 void
4640 sparc_emit_call_insn (rtx pat, rtx addr)
4641 {
4642 rtx_insn *insn;
4643
4644 insn = emit_call_insn (pat);
4645
4646 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4647 if (TARGET_VXWORKS_RTP
4648 && flag_pic
4649 && GET_CODE (addr) == SYMBOL_REF
4650 && (SYMBOL_REF_DECL (addr)
4651 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4652 : !SYMBOL_REF_LOCAL_P (addr)))
4653 {
4654 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4655 crtl->uses_pic_offset_table = 1;
4656 }
4657 }
4658 \f
4659 /* Return 1 if RTX is a MEM which is known to be aligned to at
4660 least a DESIRED byte boundary. */
4661
4662 int
4663 mem_min_alignment (rtx mem, int desired)
4664 {
4665 rtx addr, base, offset;
4666
4667 /* If it's not a MEM we can't accept it. */
4668 if (GET_CODE (mem) != MEM)
4669 return 0;
4670
4671 /* Obviously... */
4672 if (!TARGET_UNALIGNED_DOUBLES
4673 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4674 return 1;
4675
4676 /* ??? The rest of the function predates MEM_ALIGN so
4677 there is probably a bit of redundancy. */
4678 addr = XEXP (mem, 0);
4679 base = offset = NULL_RTX;
4680 if (GET_CODE (addr) == PLUS)
4681 {
4682 if (GET_CODE (XEXP (addr, 0)) == REG)
4683 {
4684 base = XEXP (addr, 0);
4685
4686 /* What we are saying here is that if the base
4687 REG is aligned properly, the compiler will make
4688 sure any REG based index upon it will be so
4689 as well. */
4690 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4691 offset = XEXP (addr, 1);
4692 else
4693 offset = const0_rtx;
4694 }
4695 }
4696 else if (GET_CODE (addr) == REG)
4697 {
4698 base = addr;
4699 offset = const0_rtx;
4700 }
4701
4702 if (base != NULL_RTX)
4703 {
4704 int regno = REGNO (base);
4705
4706 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4707 {
4708 /* Check if the compiler has recorded some information
4709 about the alignment of the base REG. If reload has
4710 completed, we already matched with proper alignments.
4711 If not running global_alloc, reload might give us
4712 unaligned pointer to local stack though. */
4713 if (((cfun != 0
4714 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4715 || (optimize && reload_completed))
4716 && (INTVAL (offset) & (desired - 1)) == 0)
4717 return 1;
4718 }
4719 else
4720 {
4721 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4722 return 1;
4723 }
4724 }
4725 else if (! TARGET_UNALIGNED_DOUBLES
4726 || CONSTANT_P (addr)
4727 || GET_CODE (addr) == LO_SUM)
4728 {
4729 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4730 is true, in which case we can only assume that an access is aligned if
4731 it is to a constant address, or the address involves a LO_SUM. */
4732 return 1;
4733 }
4734
4735 /* An obviously unaligned address. */
4736 return 0;
4737 }
4738
4739 \f
4740 /* Vectors to keep interesting information about registers where it can easily
4741 be got. We used to use the actual mode value as the bit number, but there
4742 are more than 32 modes now. Instead we use two tables: one indexed by
4743 hard register number, and one indexed by mode. */
4744
4745 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4746 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4747 mapped into one sparc_mode_class mode. */
4748
4749 enum sparc_mode_class {
4750 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4751 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4752 CC_MODE, CCFP_MODE
4753 };
4754
4755 /* Modes for single-word and smaller quantities. */
4756 #define S_MODES \
4757 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4758
4759 /* Modes for double-word and smaller quantities. */
4760 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4761
4762 /* Modes for quad-word and smaller quantities. */
4763 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4764
4765 /* Modes for 8-word and smaller quantities. */
4766 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4767
4768 /* Modes for single-float quantities. */
4769 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4770
4771 /* Modes for double-float and smaller quantities. */
4772 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4773
4774 /* Modes for quad-float and smaller quantities. */
4775 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4776
4777 /* Modes for quad-float pairs and smaller quantities. */
4778 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4779
4780 /* Modes for double-float only quantities. */
4781 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4782
4783 /* Modes for quad-float and double-float only quantities. */
4784 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4785
4786 /* Modes for quad-float pairs and double-float only quantities. */
4787 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4788
4789 /* Modes for condition codes. */
4790 #define CC_MODES (1 << (int) CC_MODE)
4791 #define CCFP_MODES (1 << (int) CCFP_MODE)
4792
4793 /* Value is 1 if register/mode pair is acceptable on sparc.
4794
4795 The funny mixture of D and T modes is because integer operations
4796 do not specially operate on tetra quantities, so non-quad-aligned
4797 registers can hold quadword quantities (except %o4 and %i4 because
4798 they cross fixed registers).
4799
4800 ??? Note that, despite the settings, non-double-aligned parameter
4801 registers can hold double-word quantities in 32-bit mode. */
4802
4803 /* This points to either the 32 bit or the 64 bit version. */
4804 const int *hard_regno_mode_classes;
4805
4806 static const int hard_32bit_mode_classes[] = {
4807 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4808 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4809 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4810 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4811
4812 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4813 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4814 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4815 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4816
4817 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4818 and none can hold SFmode/SImode values. */
4819 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4820 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4821 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4822 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4823
4824 /* %fcc[0123] */
4825 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4826
4827 /* %icc, %sfp, %gsr */
4828 CC_MODES, 0, D_MODES
4829 };
4830
4831 static const int hard_64bit_mode_classes[] = {
4832 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4833 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4834 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4835 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4836
4837 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4838 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4839 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4840 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4841
4842 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4843 and none can hold SFmode/SImode values. */
4844 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4845 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4846 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4847 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4848
4849 /* %fcc[0123] */
4850 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4851
4852 /* %icc, %sfp, %gsr */
4853 CC_MODES, 0, D_MODES
4854 };
4855
4856 int sparc_mode_class [NUM_MACHINE_MODES];
4857
4858 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4859
4860 static void
4861 sparc_init_modes (void)
4862 {
4863 int i;
4864
4865 for (i = 0; i < NUM_MACHINE_MODES; i++)
4866 {
4867 machine_mode m = (machine_mode) i;
4868 unsigned int size = GET_MODE_SIZE (m);
4869
4870 switch (GET_MODE_CLASS (m))
4871 {
4872 case MODE_INT:
4873 case MODE_PARTIAL_INT:
4874 case MODE_COMPLEX_INT:
4875 if (size < 4)
4876 sparc_mode_class[i] = 1 << (int) H_MODE;
4877 else if (size == 4)
4878 sparc_mode_class[i] = 1 << (int) S_MODE;
4879 else if (size == 8)
4880 sparc_mode_class[i] = 1 << (int) D_MODE;
4881 else if (size == 16)
4882 sparc_mode_class[i] = 1 << (int) T_MODE;
4883 else if (size == 32)
4884 sparc_mode_class[i] = 1 << (int) O_MODE;
4885 else
4886 sparc_mode_class[i] = 0;
4887 break;
4888 case MODE_VECTOR_INT:
4889 if (size == 4)
4890 sparc_mode_class[i] = 1 << (int) SF_MODE;
4891 else if (size == 8)
4892 sparc_mode_class[i] = 1 << (int) DF_MODE;
4893 else
4894 sparc_mode_class[i] = 0;
4895 break;
4896 case MODE_FLOAT:
4897 case MODE_COMPLEX_FLOAT:
4898 if (size == 4)
4899 sparc_mode_class[i] = 1 << (int) SF_MODE;
4900 else if (size == 8)
4901 sparc_mode_class[i] = 1 << (int) DF_MODE;
4902 else if (size == 16)
4903 sparc_mode_class[i] = 1 << (int) TF_MODE;
4904 else if (size == 32)
4905 sparc_mode_class[i] = 1 << (int) OF_MODE;
4906 else
4907 sparc_mode_class[i] = 0;
4908 break;
4909 case MODE_CC:
4910 if (m == CCFPmode || m == CCFPEmode)
4911 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4912 else
4913 sparc_mode_class[i] = 1 << (int) CC_MODE;
4914 break;
4915 default:
4916 sparc_mode_class[i] = 0;
4917 break;
4918 }
4919 }
4920
4921 if (TARGET_ARCH64)
4922 hard_regno_mode_classes = hard_64bit_mode_classes;
4923 else
4924 hard_regno_mode_classes = hard_32bit_mode_classes;
4925
4926 /* Initialize the array used by REGNO_REG_CLASS. */
4927 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4928 {
4929 if (i < 16 && TARGET_V8PLUS)
4930 sparc_regno_reg_class[i] = I64_REGS;
4931 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4932 sparc_regno_reg_class[i] = GENERAL_REGS;
4933 else if (i < 64)
4934 sparc_regno_reg_class[i] = FP_REGS;
4935 else if (i < 96)
4936 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4937 else if (i < 100)
4938 sparc_regno_reg_class[i] = FPCC_REGS;
4939 else
4940 sparc_regno_reg_class[i] = NO_REGS;
4941 }
4942 }
4943 \f
4944 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4945
4946 static inline bool
4947 save_global_or_fp_reg_p (unsigned int regno,
4948 int leaf_function ATTRIBUTE_UNUSED)
4949 {
4950 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4951 }
4952
4953 /* Return whether the return address register (%i7) is needed. */
4954
4955 static inline bool
4956 return_addr_reg_needed_p (int leaf_function)
4957 {
4958 /* If it is live, for example because of __builtin_return_address (0). */
4959 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4960 return true;
4961
4962 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4963 if (!leaf_function
4964 /* Loading the GOT register clobbers %o7. */
4965 || crtl->uses_pic_offset_table
4966 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4967 return true;
4968
4969 return false;
4970 }
4971
4972 /* Return whether REGNO, a local or in register, must be saved/restored. */
4973
4974 static bool
4975 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4976 {
4977 /* General case: call-saved registers live at some point. */
4978 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4979 return true;
4980
4981 /* Frame pointer register (%fp) if needed. */
4982 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4983 return true;
4984
4985 /* Return address register (%i7) if needed. */
4986 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4987 return true;
4988
4989 /* GOT register (%l7) if needed. */
4990 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4991 return true;
4992
4993 /* If the function accesses prior frames, the frame pointer and the return
4994 address of the previous frame must be saved on the stack. */
4995 if (crtl->accesses_prior_frames
4996 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4997 return true;
4998
4999 return false;
5000 }
5001
5002 /* Compute the frame size required by the function. This function is called
5003 during the reload pass and also by sparc_expand_prologue. */
5004
5005 HOST_WIDE_INT
5006 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5007 {
5008 HOST_WIDE_INT frame_size, apparent_frame_size;
5009 int args_size, n_global_fp_regs = 0;
5010 bool save_local_in_regs_p = false;
5011 unsigned int i;
5012
5013 /* If the function allocates dynamic stack space, the dynamic offset is
5014 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5015 if (leaf_function && !cfun->calls_alloca)
5016 args_size = 0;
5017 else
5018 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5019
5020 /* Calculate space needed for global registers. */
5021 if (TARGET_ARCH64)
5022 {
5023 for (i = 0; i < 8; i++)
5024 if (save_global_or_fp_reg_p (i, 0))
5025 n_global_fp_regs += 2;
5026 }
5027 else
5028 {
5029 for (i = 0; i < 8; i += 2)
5030 if (save_global_or_fp_reg_p (i, 0)
5031 || save_global_or_fp_reg_p (i + 1, 0))
5032 n_global_fp_regs += 2;
5033 }
5034
5035 /* In the flat window model, find out which local and in registers need to
5036 be saved. We don't reserve space in the current frame for them as they
5037 will be spilled into the register window save area of the caller's frame.
5038 However, as soon as we use this register window save area, we must create
5039 that of the current frame to make it the live one. */
5040 if (TARGET_FLAT)
5041 for (i = 16; i < 32; i++)
5042 if (save_local_or_in_reg_p (i, leaf_function))
5043 {
5044 save_local_in_regs_p = true;
5045 break;
5046 }
5047
5048 /* Calculate space needed for FP registers. */
5049 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5050 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5051 n_global_fp_regs += 2;
5052
5053 if (size == 0
5054 && n_global_fp_regs == 0
5055 && args_size == 0
5056 && !save_local_in_regs_p)
5057 frame_size = apparent_frame_size = 0;
5058 else
5059 {
5060 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5061 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5062 apparent_frame_size += n_global_fp_regs * 4;
5063
5064 /* We need to add the size of the outgoing argument area. */
5065 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5066
5067 /* And that of the register window save area. */
5068 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5069
5070 /* Finally, bump to the appropriate alignment. */
5071 frame_size = SPARC_STACK_ALIGN (frame_size);
5072 }
5073
5074 /* Set up values for use in prologue and epilogue. */
5075 sparc_frame_size = frame_size;
5076 sparc_apparent_frame_size = apparent_frame_size;
5077 sparc_n_global_fp_regs = n_global_fp_regs;
5078 sparc_save_local_in_regs_p = save_local_in_regs_p;
5079
5080 return frame_size;
5081 }
5082
5083 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5084
5085 int
5086 sparc_initial_elimination_offset (int to)
5087 {
5088 int offset;
5089
5090 if (to == STACK_POINTER_REGNUM)
5091 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5092 else
5093 offset = 0;
5094
5095 offset += SPARC_STACK_BIAS;
5096 return offset;
5097 }
5098
5099 /* Output any necessary .register pseudo-ops. */
5100
5101 void
5102 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5103 {
5104 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5105 int i;
5106
5107 if (TARGET_ARCH32)
5108 return;
5109
5110 /* Check if %g[2367] were used without
5111 .register being printed for them already. */
5112 for (i = 2; i < 8; i++)
5113 {
5114 if (df_regs_ever_live_p (i)
5115 && ! sparc_hard_reg_printed [i])
5116 {
5117 sparc_hard_reg_printed [i] = 1;
5118 /* %g7 is used as TLS base register, use #ignore
5119 for it instead of #scratch. */
5120 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5121 i == 7 ? "ignore" : "scratch");
5122 }
5123 if (i == 3) i = 5;
5124 }
5125 #endif
5126 }
5127
5128 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5129
5130 #if PROBE_INTERVAL > 4096
5131 #error Cannot use indexed addressing mode for stack probing
5132 #endif
5133
5134 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5135 inclusive. These are offsets from the current stack pointer.
5136
5137 Note that we don't use the REG+REG addressing mode for the probes because
5138 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5139 so the advantages of having a single code win here. */
5140
5141 static void
5142 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5143 {
5144 rtx g1 = gen_rtx_REG (Pmode, 1);
5145
5146 /* See if we have a constant small number of probes to generate. If so,
5147 that's the easy case. */
5148 if (size <= PROBE_INTERVAL)
5149 {
5150 emit_move_insn (g1, GEN_INT (first));
5151 emit_insn (gen_rtx_SET (g1,
5152 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5153 emit_stack_probe (plus_constant (Pmode, g1, -size));
5154 }
5155
5156 /* The run-time loop is made up of 9 insns in the generic case while the
5157 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5158 else if (size <= 4 * PROBE_INTERVAL)
5159 {
5160 HOST_WIDE_INT i;
5161
5162 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5163 emit_insn (gen_rtx_SET (g1,
5164 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5165 emit_stack_probe (g1);
5166
5167 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5168 it exceeds SIZE. If only two probes are needed, this will not
5169 generate any code. Then probe at FIRST + SIZE. */
5170 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5171 {
5172 emit_insn (gen_rtx_SET (g1,
5173 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5174 emit_stack_probe (g1);
5175 }
5176
5177 emit_stack_probe (plus_constant (Pmode, g1,
5178 (i - PROBE_INTERVAL) - size));
5179 }
5180
5181 /* Otherwise, do the same as above, but in a loop. Note that we must be
5182 extra careful with variables wrapping around because we might be at
5183 the very top (or the very bottom) of the address space and we have
5184 to be able to handle this case properly; in particular, we use an
5185 equality test for the loop condition. */
5186 else
5187 {
5188 HOST_WIDE_INT rounded_size;
5189 rtx g4 = gen_rtx_REG (Pmode, 4);
5190
5191 emit_move_insn (g1, GEN_INT (first));
5192
5193
5194 /* Step 1: round SIZE to the previous multiple of the interval. */
5195
5196 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5197 emit_move_insn (g4, GEN_INT (rounded_size));
5198
5199
5200 /* Step 2: compute initial and final value of the loop counter. */
5201
5202 /* TEST_ADDR = SP + FIRST. */
5203 emit_insn (gen_rtx_SET (g1,
5204 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5205
5206 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5207 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5208
5209
5210 /* Step 3: the loop
5211
5212 while (TEST_ADDR != LAST_ADDR)
5213 {
5214 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5215 probe at TEST_ADDR
5216 }
5217
5218 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5219 until it is equal to ROUNDED_SIZE. */
5220
5221 if (TARGET_ARCH64)
5222 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5223 else
5224 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5225
5226
5227 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5228 that SIZE is equal to ROUNDED_SIZE. */
5229
5230 if (size != rounded_size)
5231 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5232 }
5233
5234 /* Make sure nothing is scheduled before we are done. */
5235 emit_insn (gen_blockage ());
5236 }
5237
5238 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5239 absolute addresses. */
5240
5241 const char *
5242 output_probe_stack_range (rtx reg1, rtx reg2)
5243 {
5244 static int labelno = 0;
5245 char loop_lab[32];
5246 rtx xops[2];
5247
5248 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5249
5250 /* Loop. */
5251 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5252
5253 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5254 xops[0] = reg1;
5255 xops[1] = GEN_INT (-PROBE_INTERVAL);
5256 output_asm_insn ("add\t%0, %1, %0", xops);
5257
5258 /* Test if TEST_ADDR == LAST_ADDR. */
5259 xops[1] = reg2;
5260 output_asm_insn ("cmp\t%0, %1", xops);
5261
5262 /* Probe at TEST_ADDR and branch. */
5263 if (TARGET_ARCH64)
5264 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5265 else
5266 fputs ("\tbne\t", asm_out_file);
5267 assemble_name_raw (asm_out_file, loop_lab);
5268 fputc ('\n', asm_out_file);
5269 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5270 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5271
5272 return "";
5273 }
5274
5275 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5276 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5277 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5278 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5279 the action to be performed if it returns false. Return the new offset. */
5280
5281 typedef bool (*sorr_pred_t) (unsigned int, int);
5282 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5283
5284 static int
5285 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5286 int offset, int leaf_function, sorr_pred_t save_p,
5287 sorr_act_t action_true, sorr_act_t action_false)
5288 {
5289 unsigned int i;
5290 rtx mem;
5291 rtx_insn *insn;
5292
5293 if (TARGET_ARCH64 && high <= 32)
5294 {
5295 int fp_offset = -1;
5296
5297 for (i = low; i < high; i++)
5298 {
5299 if (save_p (i, leaf_function))
5300 {
5301 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5302 base, offset));
5303 if (action_true == SORR_SAVE)
5304 {
5305 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5306 RTX_FRAME_RELATED_P (insn) = 1;
5307 }
5308 else /* action_true == SORR_RESTORE */
5309 {
5310 /* The frame pointer must be restored last since its old
5311 value may be used as base address for the frame. This
5312 is problematic in 64-bit mode only because of the lack
5313 of double-word load instruction. */
5314 if (i == HARD_FRAME_POINTER_REGNUM)
5315 fp_offset = offset;
5316 else
5317 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5318 }
5319 offset += 8;
5320 }
5321 else if (action_false == SORR_ADVANCE)
5322 offset += 8;
5323 }
5324
5325 if (fp_offset >= 0)
5326 {
5327 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5328 emit_move_insn (hard_frame_pointer_rtx, mem);
5329 }
5330 }
5331 else
5332 {
5333 for (i = low; i < high; i += 2)
5334 {
5335 bool reg0 = save_p (i, leaf_function);
5336 bool reg1 = save_p (i + 1, leaf_function);
5337 machine_mode mode;
5338 int regno;
5339
5340 if (reg0 && reg1)
5341 {
5342 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5343 regno = i;
5344 }
5345 else if (reg0)
5346 {
5347 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5348 regno = i;
5349 }
5350 else if (reg1)
5351 {
5352 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5353 regno = i + 1;
5354 offset += 4;
5355 }
5356 else
5357 {
5358 if (action_false == SORR_ADVANCE)
5359 offset += 8;
5360 continue;
5361 }
5362
5363 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5364 if (action_true == SORR_SAVE)
5365 {
5366 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5367 RTX_FRAME_RELATED_P (insn) = 1;
5368 if (mode == DImode)
5369 {
5370 rtx set1, set2;
5371 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5372 offset));
5373 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5374 RTX_FRAME_RELATED_P (set1) = 1;
5375 mem
5376 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5377 offset + 4));
5378 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5379 RTX_FRAME_RELATED_P (set2) = 1;
5380 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5381 gen_rtx_PARALLEL (VOIDmode,
5382 gen_rtvec (2, set1, set2)));
5383 }
5384 }
5385 else /* action_true == SORR_RESTORE */
5386 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5387
5388 /* Bump and round down to double word
5389 in case we already bumped by 4. */
5390 offset = ROUND_DOWN (offset + 8, 8);
5391 }
5392 }
5393
5394 return offset;
5395 }
5396
5397 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5398
5399 static rtx
5400 emit_adjust_base_to_offset (rtx base, int offset)
5401 {
5402 /* ??? This might be optimized a little as %g1 might already have a
5403 value close enough that a single add insn will do. */
5404 /* ??? Although, all of this is probably only a temporary fix because
5405 if %g1 can hold a function result, then sparc_expand_epilogue will
5406 lose (the result will be clobbered). */
5407 rtx new_base = gen_rtx_REG (Pmode, 1);
5408 emit_move_insn (new_base, GEN_INT (offset));
5409 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5410 return new_base;
5411 }
5412
5413 /* Emit code to save/restore call-saved global and FP registers. */
5414
5415 static void
5416 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5417 {
5418 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5419 {
5420 base = emit_adjust_base_to_offset (base, offset);
5421 offset = 0;
5422 }
5423
5424 offset
5425 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5426 save_global_or_fp_reg_p, action, SORR_NONE);
5427 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5428 save_global_or_fp_reg_p, action, SORR_NONE);
5429 }
5430
5431 /* Emit code to save/restore call-saved local and in registers. */
5432
5433 static void
5434 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5435 {
5436 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5437 {
5438 base = emit_adjust_base_to_offset (base, offset);
5439 offset = 0;
5440 }
5441
5442 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5443 save_local_or_in_reg_p, action, SORR_ADVANCE);
5444 }
5445
5446 /* Emit a window_save insn. */
5447
5448 static rtx_insn *
5449 emit_window_save (rtx increment)
5450 {
5451 rtx_insn *insn = emit_insn (gen_window_save (increment));
5452 RTX_FRAME_RELATED_P (insn) = 1;
5453
5454 /* The incoming return address (%o7) is saved in %i7. */
5455 add_reg_note (insn, REG_CFA_REGISTER,
5456 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5457 gen_rtx_REG (Pmode,
5458 INCOMING_RETURN_ADDR_REGNUM)));
5459
5460 /* The window save event. */
5461 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5462
5463 /* The CFA is %fp, the hard frame pointer. */
5464 add_reg_note (insn, REG_CFA_DEF_CFA,
5465 plus_constant (Pmode, hard_frame_pointer_rtx,
5466 INCOMING_FRAME_SP_OFFSET));
5467
5468 return insn;
5469 }
5470
5471 /* Generate an increment for the stack pointer. */
5472
5473 static rtx
5474 gen_stack_pointer_inc (rtx increment)
5475 {
5476 return gen_rtx_SET (stack_pointer_rtx,
5477 gen_rtx_PLUS (Pmode,
5478 stack_pointer_rtx,
5479 increment));
5480 }
5481
5482 /* Expand the function prologue. The prologue is responsible for reserving
5483 storage for the frame, saving the call-saved registers and loading the
5484 GOT register if needed. */
5485
5486 void
5487 sparc_expand_prologue (void)
5488 {
5489 HOST_WIDE_INT size;
5490 rtx_insn *insn;
5491
5492 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5493 on the final value of the flag means deferring the prologue/epilogue
5494 expansion until just before the second scheduling pass, which is too
5495 late to emit multiple epilogues or return insns.
5496
5497 Of course we are making the assumption that the value of the flag
5498 will not change between now and its final value. Of the three parts
5499 of the formula, only the last one can reasonably vary. Let's take a
5500 closer look, after assuming that the first two ones are set to true
5501 (otherwise the last value is effectively silenced).
5502
5503 If only_leaf_regs_used returns false, the global predicate will also
5504 be false so the actual frame size calculated below will be positive.
5505 As a consequence, the save_register_window insn will be emitted in
5506 the instruction stream; now this insn explicitly references %fp
5507 which is not a leaf register so only_leaf_regs_used will always
5508 return false subsequently.
5509
5510 If only_leaf_regs_used returns true, we hope that the subsequent
5511 optimization passes won't cause non-leaf registers to pop up. For
5512 example, the regrename pass has special provisions to not rename to
5513 non-leaf registers in a leaf function. */
5514 sparc_leaf_function_p
5515 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5516
5517 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5518
5519 if (flag_stack_usage_info)
5520 current_function_static_stack_size = size;
5521
5522 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5523 {
5524 if (crtl->is_leaf && !cfun->calls_alloca)
5525 {
5526 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5527 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5528 size - STACK_CHECK_PROTECT);
5529 }
5530 else if (size > 0)
5531 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5532 }
5533
5534 if (size == 0)
5535 ; /* do nothing. */
5536 else if (sparc_leaf_function_p)
5537 {
5538 rtx size_int_rtx = GEN_INT (-size);
5539
5540 if (size <= 4096)
5541 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5542 else if (size <= 8192)
5543 {
5544 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5545 RTX_FRAME_RELATED_P (insn) = 1;
5546
5547 /* %sp is still the CFA register. */
5548 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5549 }
5550 else
5551 {
5552 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5553 emit_move_insn (size_rtx, size_int_rtx);
5554 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5555 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5556 gen_stack_pointer_inc (size_int_rtx));
5557 }
5558
5559 RTX_FRAME_RELATED_P (insn) = 1;
5560 }
5561 else
5562 {
5563 rtx size_int_rtx = GEN_INT (-size);
5564
5565 if (size <= 4096)
5566 emit_window_save (size_int_rtx);
5567 else if (size <= 8192)
5568 {
5569 emit_window_save (GEN_INT (-4096));
5570
5571 /* %sp is not the CFA register anymore. */
5572 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5573
5574 /* Make sure no %fp-based store is issued until after the frame is
5575 established. The offset between the frame pointer and the stack
5576 pointer is calculated relative to the value of the stack pointer
5577 at the end of the function prologue, and moving instructions that
5578 access the stack via the frame pointer between the instructions
5579 that decrement the stack pointer could result in accessing the
5580 register window save area, which is volatile. */
5581 emit_insn (gen_frame_blockage ());
5582 }
5583 else
5584 {
5585 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5586 emit_move_insn (size_rtx, size_int_rtx);
5587 emit_window_save (size_rtx);
5588 }
5589 }
5590
5591 if (sparc_leaf_function_p)
5592 {
5593 sparc_frame_base_reg = stack_pointer_rtx;
5594 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5595 }
5596 else
5597 {
5598 sparc_frame_base_reg = hard_frame_pointer_rtx;
5599 sparc_frame_base_offset = SPARC_STACK_BIAS;
5600 }
5601
5602 if (sparc_n_global_fp_regs > 0)
5603 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5604 sparc_frame_base_offset
5605 - sparc_apparent_frame_size,
5606 SORR_SAVE);
5607
5608 /* Load the GOT register if needed. */
5609 if (crtl->uses_pic_offset_table)
5610 load_got_register ();
5611
5612 /* Advertise that the data calculated just above are now valid. */
5613 sparc_prologue_data_valid_p = true;
5614 }
5615
5616 /* Expand the function prologue. The prologue is responsible for reserving
5617 storage for the frame, saving the call-saved registers and loading the
5618 GOT register if needed. */
5619
5620 void
5621 sparc_flat_expand_prologue (void)
5622 {
5623 HOST_WIDE_INT size;
5624 rtx_insn *insn;
5625
5626 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5627
5628 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5629
5630 if (flag_stack_usage_info)
5631 current_function_static_stack_size = size;
5632
5633 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5634 {
5635 if (crtl->is_leaf && !cfun->calls_alloca)
5636 {
5637 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5638 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5639 size - STACK_CHECK_PROTECT);
5640 }
5641 else if (size > 0)
5642 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5643 }
5644
5645 if (sparc_save_local_in_regs_p)
5646 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5647 SORR_SAVE);
5648
5649 if (size == 0)
5650 ; /* do nothing. */
5651 else
5652 {
5653 rtx size_int_rtx, size_rtx;
5654
5655 size_rtx = size_int_rtx = GEN_INT (-size);
5656
5657 /* We establish the frame (i.e. decrement the stack pointer) first, even
5658 if we use a frame pointer, because we cannot clobber any call-saved
5659 registers, including the frame pointer, if we haven't created a new
5660 register save area, for the sake of compatibility with the ABI. */
5661 if (size <= 4096)
5662 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5663 else if (size <= 8192 && !frame_pointer_needed)
5664 {
5665 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5666 RTX_FRAME_RELATED_P (insn) = 1;
5667 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5668 }
5669 else
5670 {
5671 size_rtx = gen_rtx_REG (Pmode, 1);
5672 emit_move_insn (size_rtx, size_int_rtx);
5673 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5674 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5675 gen_stack_pointer_inc (size_int_rtx));
5676 }
5677 RTX_FRAME_RELATED_P (insn) = 1;
5678
5679 /* Ensure nothing is scheduled until after the frame is established. */
5680 emit_insn (gen_blockage ());
5681
5682 if (frame_pointer_needed)
5683 {
5684 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5685 gen_rtx_MINUS (Pmode,
5686 stack_pointer_rtx,
5687 size_rtx)));
5688 RTX_FRAME_RELATED_P (insn) = 1;
5689
5690 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5691 gen_rtx_SET (hard_frame_pointer_rtx,
5692 plus_constant (Pmode, stack_pointer_rtx,
5693 size)));
5694 }
5695
5696 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5697 {
5698 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5699 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5700
5701 insn = emit_move_insn (i7, o7);
5702 RTX_FRAME_RELATED_P (insn) = 1;
5703
5704 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5705
5706 /* Prevent this instruction from ever being considered dead,
5707 even if this function has no epilogue. */
5708 emit_use (i7);
5709 }
5710 }
5711
5712 if (frame_pointer_needed)
5713 {
5714 sparc_frame_base_reg = hard_frame_pointer_rtx;
5715 sparc_frame_base_offset = SPARC_STACK_BIAS;
5716 }
5717 else
5718 {
5719 sparc_frame_base_reg = stack_pointer_rtx;
5720 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5721 }
5722
5723 if (sparc_n_global_fp_regs > 0)
5724 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5725 sparc_frame_base_offset
5726 - sparc_apparent_frame_size,
5727 SORR_SAVE);
5728
5729 /* Load the GOT register if needed. */
5730 if (crtl->uses_pic_offset_table)
5731 load_got_register ();
5732
5733 /* Advertise that the data calculated just above are now valid. */
5734 sparc_prologue_data_valid_p = true;
5735 }
5736
5737 /* This function generates the assembly code for function entry, which boils
5738 down to emitting the necessary .register directives. */
5739
5740 static void
5741 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5742 {
5743 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5744 if (!TARGET_FLAT)
5745 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5746
5747 sparc_output_scratch_registers (file);
5748 }
5749
5750 /* Expand the function epilogue, either normal or part of a sibcall.
5751 We emit all the instructions except the return or the call. */
5752
5753 void
5754 sparc_expand_epilogue (bool for_eh)
5755 {
5756 HOST_WIDE_INT size = sparc_frame_size;
5757
5758 if (sparc_n_global_fp_regs > 0)
5759 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5760 sparc_frame_base_offset
5761 - sparc_apparent_frame_size,
5762 SORR_RESTORE);
5763
5764 if (size == 0 || for_eh)
5765 ; /* do nothing. */
5766 else if (sparc_leaf_function_p)
5767 {
5768 if (size <= 4096)
5769 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5770 else if (size <= 8192)
5771 {
5772 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5773 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5774 }
5775 else
5776 {
5777 rtx reg = gen_rtx_REG (Pmode, 1);
5778 emit_move_insn (reg, GEN_INT (size));
5779 emit_insn (gen_stack_pointer_inc (reg));
5780 }
5781 }
5782 }
5783
5784 /* Expand the function epilogue, either normal or part of a sibcall.
5785 We emit all the instructions except the return or the call. */
5786
5787 void
5788 sparc_flat_expand_epilogue (bool for_eh)
5789 {
5790 HOST_WIDE_INT size = sparc_frame_size;
5791
5792 if (sparc_n_global_fp_regs > 0)
5793 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5794 sparc_frame_base_offset
5795 - sparc_apparent_frame_size,
5796 SORR_RESTORE);
5797
5798 /* If we have a frame pointer, we'll need both to restore it before the
5799 frame is destroyed and use its current value in destroying the frame.
5800 Since we don't have an atomic way to do that in the flat window model,
5801 we save the current value into a temporary register (%g1). */
5802 if (frame_pointer_needed && !for_eh)
5803 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5804
5805 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5806 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5807 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5808
5809 if (sparc_save_local_in_regs_p)
5810 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5811 sparc_frame_base_offset,
5812 SORR_RESTORE);
5813
5814 if (size == 0 || for_eh)
5815 ; /* do nothing. */
5816 else if (frame_pointer_needed)
5817 {
5818 /* Make sure the frame is destroyed after everything else is done. */
5819 emit_insn (gen_blockage ());
5820
5821 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5822 }
5823 else
5824 {
5825 /* Likewise. */
5826 emit_insn (gen_blockage ());
5827
5828 if (size <= 4096)
5829 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5830 else if (size <= 8192)
5831 {
5832 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5833 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5834 }
5835 else
5836 {
5837 rtx reg = gen_rtx_REG (Pmode, 1);
5838 emit_move_insn (reg, GEN_INT (size));
5839 emit_insn (gen_stack_pointer_inc (reg));
5840 }
5841 }
5842 }
5843
5844 /* Return true if it is appropriate to emit `return' instructions in the
5845 body of a function. */
5846
5847 bool
5848 sparc_can_use_return_insn_p (void)
5849 {
5850 return sparc_prologue_data_valid_p
5851 && sparc_n_global_fp_regs == 0
5852 && TARGET_FLAT
5853 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5854 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5855 }
5856
5857 /* This function generates the assembly code for function exit. */
5858
5859 static void
5860 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5861 {
5862 /* If the last two instructions of a function are "call foo; dslot;"
5863 the return address might point to the first instruction in the next
5864 function and we have to output a dummy nop for the sake of sane
5865 backtraces in such cases. This is pointless for sibling calls since
5866 the return address is explicitly adjusted. */
5867
5868 rtx_insn *insn = get_last_insn ();
5869
5870 rtx last_real_insn = prev_real_insn (insn);
5871 if (last_real_insn
5872 && NONJUMP_INSN_P (last_real_insn)
5873 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5874 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5875
5876 if (last_real_insn
5877 && CALL_P (last_real_insn)
5878 && !SIBLING_CALL_P (last_real_insn))
5879 fputs("\tnop\n", file);
5880
5881 sparc_output_deferred_case_vectors ();
5882 }
5883
5884 /* Output a 'restore' instruction. */
5885
5886 static void
5887 output_restore (rtx pat)
5888 {
5889 rtx operands[3];
5890
5891 if (! pat)
5892 {
5893 fputs ("\t restore\n", asm_out_file);
5894 return;
5895 }
5896
5897 gcc_assert (GET_CODE (pat) == SET);
5898
5899 operands[0] = SET_DEST (pat);
5900 pat = SET_SRC (pat);
5901
5902 switch (GET_CODE (pat))
5903 {
5904 case PLUS:
5905 operands[1] = XEXP (pat, 0);
5906 operands[2] = XEXP (pat, 1);
5907 output_asm_insn (" restore %r1, %2, %Y0", operands);
5908 break;
5909 case LO_SUM:
5910 operands[1] = XEXP (pat, 0);
5911 operands[2] = XEXP (pat, 1);
5912 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5913 break;
5914 case ASHIFT:
5915 operands[1] = XEXP (pat, 0);
5916 gcc_assert (XEXP (pat, 1) == const1_rtx);
5917 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5918 break;
5919 default:
5920 operands[1] = pat;
5921 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5922 break;
5923 }
5924 }
5925
5926 /* Output a return. */
5927
5928 const char *
5929 output_return (rtx_insn *insn)
5930 {
5931 if (crtl->calls_eh_return)
5932 {
5933 /* If the function uses __builtin_eh_return, the eh_return
5934 machinery occupies the delay slot. */
5935 gcc_assert (!final_sequence);
5936
5937 if (flag_delayed_branch)
5938 {
5939 if (!TARGET_FLAT && TARGET_V9)
5940 fputs ("\treturn\t%i7+8\n", asm_out_file);
5941 else
5942 {
5943 if (!TARGET_FLAT)
5944 fputs ("\trestore\n", asm_out_file);
5945
5946 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5947 }
5948
5949 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5950 }
5951 else
5952 {
5953 if (!TARGET_FLAT)
5954 fputs ("\trestore\n", asm_out_file);
5955
5956 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5957 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5958 }
5959 }
5960 else if (sparc_leaf_function_p || TARGET_FLAT)
5961 {
5962 /* This is a leaf or flat function so we don't have to bother restoring
5963 the register window, which frees us from dealing with the convoluted
5964 semantics of restore/return. We simply output the jump to the
5965 return address and the insn in the delay slot (if any). */
5966
5967 return "jmp\t%%o7+%)%#";
5968 }
5969 else
5970 {
5971 /* This is a regular function so we have to restore the register window.
5972 We may have a pending insn for the delay slot, which will be either
5973 combined with the 'restore' instruction or put in the delay slot of
5974 the 'return' instruction. */
5975
5976 if (final_sequence)
5977 {
5978 rtx delay, pat;
5979
5980 delay = NEXT_INSN (insn);
5981 gcc_assert (delay);
5982
5983 pat = PATTERN (delay);
5984
5985 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5986 {
5987 epilogue_renumber (&pat, 0);
5988 return "return\t%%i7+%)%#";
5989 }
5990 else
5991 {
5992 output_asm_insn ("jmp\t%%i7+%)", NULL);
5993 output_restore (pat);
5994 PATTERN (delay) = gen_blockage ();
5995 INSN_CODE (delay) = -1;
5996 }
5997 }
5998 else
5999 {
6000 /* The delay slot is empty. */
6001 if (TARGET_V9)
6002 return "return\t%%i7+%)\n\t nop";
6003 else if (flag_delayed_branch)
6004 return "jmp\t%%i7+%)\n\t restore";
6005 else
6006 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6007 }
6008 }
6009
6010 return "";
6011 }
6012
6013 /* Output a sibling call. */
6014
6015 const char *
6016 output_sibcall (rtx_insn *insn, rtx call_operand)
6017 {
6018 rtx operands[1];
6019
6020 gcc_assert (flag_delayed_branch);
6021
6022 operands[0] = call_operand;
6023
6024 if (sparc_leaf_function_p || TARGET_FLAT)
6025 {
6026 /* This is a leaf or flat function so we don't have to bother restoring
6027 the register window. We simply output the jump to the function and
6028 the insn in the delay slot (if any). */
6029
6030 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6031
6032 if (final_sequence)
6033 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6034 operands);
6035 else
6036 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6037 it into branch if possible. */
6038 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6039 operands);
6040 }
6041 else
6042 {
6043 /* This is a regular function so we have to restore the register window.
6044 We may have a pending insn for the delay slot, which will be combined
6045 with the 'restore' instruction. */
6046
6047 output_asm_insn ("call\t%a0, 0", operands);
6048
6049 if (final_sequence)
6050 {
6051 rtx_insn *delay = NEXT_INSN (insn);
6052 gcc_assert (delay);
6053
6054 output_restore (PATTERN (delay));
6055
6056 PATTERN (delay) = gen_blockage ();
6057 INSN_CODE (delay) = -1;
6058 }
6059 else
6060 output_restore (NULL_RTX);
6061 }
6062
6063 return "";
6064 }
6065 \f
6066 /* Functions for handling argument passing.
6067
6068 For 32-bit, the first 6 args are normally in registers and the rest are
6069 pushed. Any arg that starts within the first 6 words is at least
6070 partially passed in a register unless its data type forbids.
6071
6072 For 64-bit, the argument registers are laid out as an array of 16 elements
6073 and arguments are added sequentially. The first 6 int args and up to the
6074 first 16 fp args (depending on size) are passed in regs.
6075
6076 Slot Stack Integral Float Float in structure Double Long Double
6077 ---- ----- -------- ----- ------------------ ------ -----------
6078 15 [SP+248] %f31 %f30,%f31 %d30
6079 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6080 13 [SP+232] %f27 %f26,%f27 %d26
6081 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6082 11 [SP+216] %f23 %f22,%f23 %d22
6083 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6084 9 [SP+200] %f19 %f18,%f19 %d18
6085 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6086 7 [SP+184] %f15 %f14,%f15 %d14
6087 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6088 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6089 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6090 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6091 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6092 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6093 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6094
6095 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6096
6097 Integral arguments are always passed as 64-bit quantities appropriately
6098 extended.
6099
6100 Passing of floating point values is handled as follows.
6101 If a prototype is in scope:
6102 If the value is in a named argument (i.e. not a stdarg function or a
6103 value not part of the `...') then the value is passed in the appropriate
6104 fp reg.
6105 If the value is part of the `...' and is passed in one of the first 6
6106 slots then the value is passed in the appropriate int reg.
6107 If the value is part of the `...' and is not passed in one of the first 6
6108 slots then the value is passed in memory.
6109 If a prototype is not in scope:
6110 If the value is one of the first 6 arguments the value is passed in the
6111 appropriate integer reg and the appropriate fp reg.
6112 If the value is not one of the first 6 arguments the value is passed in
6113 the appropriate fp reg and in memory.
6114
6115
6116 Summary of the calling conventions implemented by GCC on the SPARC:
6117
6118 32-bit ABI:
6119 size argument return value
6120
6121 small integer <4 int. reg. int. reg.
6122 word 4 int. reg. int. reg.
6123 double word 8 int. reg. int. reg.
6124
6125 _Complex small integer <8 int. reg. int. reg.
6126 _Complex word 8 int. reg. int. reg.
6127 _Complex double word 16 memory int. reg.
6128
6129 vector integer <=8 int. reg. FP reg.
6130 vector integer >8 memory memory
6131
6132 float 4 int. reg. FP reg.
6133 double 8 int. reg. FP reg.
6134 long double 16 memory memory
6135
6136 _Complex float 8 memory FP reg.
6137 _Complex double 16 memory FP reg.
6138 _Complex long double 32 memory FP reg.
6139
6140 vector float any memory memory
6141
6142 aggregate any memory memory
6143
6144
6145
6146 64-bit ABI:
6147 size argument return value
6148
6149 small integer <8 int. reg. int. reg.
6150 word 8 int. reg. int. reg.
6151 double word 16 int. reg. int. reg.
6152
6153 _Complex small integer <16 int. reg. int. reg.
6154 _Complex word 16 int. reg. int. reg.
6155 _Complex double word 32 memory int. reg.
6156
6157 vector integer <=16 FP reg. FP reg.
6158 vector integer 16<s<=32 memory FP reg.
6159 vector integer >32 memory memory
6160
6161 float 4 FP reg. FP reg.
6162 double 8 FP reg. FP reg.
6163 long double 16 FP reg. FP reg.
6164
6165 _Complex float 8 FP reg. FP reg.
6166 _Complex double 16 FP reg. FP reg.
6167 _Complex long double 32 memory FP reg.
6168
6169 vector float <=16 FP reg. FP reg.
6170 vector float 16<s<=32 memory FP reg.
6171 vector float >32 memory memory
6172
6173 aggregate <=16 reg. reg.
6174 aggregate 16<s<=32 memory reg.
6175 aggregate >32 memory memory
6176
6177
6178
6179 Note #1: complex floating-point types follow the extended SPARC ABIs as
6180 implemented by the Sun compiler.
6181
6182 Note #2: integral vector types follow the scalar floating-point types
6183 conventions to match what is implemented by the Sun VIS SDK.
6184
6185 Note #3: floating-point vector types follow the aggregate types
6186 conventions. */
6187
6188
6189 /* Maximum number of int regs for args. */
6190 #define SPARC_INT_ARG_MAX 6
6191 /* Maximum number of fp regs for args. */
6192 #define SPARC_FP_ARG_MAX 16
6193 /* Number of words (partially) occupied for a given size in units. */
6194 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6195
6196 /* Handle the INIT_CUMULATIVE_ARGS macro.
6197 Initialize a variable CUM of type CUMULATIVE_ARGS
6198 for a call to a function whose data type is FNTYPE.
6199 For a library call, FNTYPE is 0. */
6200
6201 void
6202 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6203 {
6204 cum->words = 0;
6205 cum->prototype_p = fntype && prototype_p (fntype);
6206 cum->libcall_p = !fntype;
6207 }
6208
6209 /* Handle promotion of pointer and integer arguments. */
6210
6211 static machine_mode
6212 sparc_promote_function_mode (const_tree type, machine_mode mode,
6213 int *punsignedp, const_tree, int)
6214 {
6215 if (type && POINTER_TYPE_P (type))
6216 {
6217 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6218 return Pmode;
6219 }
6220
6221 /* Integral arguments are passed as full words, as per the ABI. */
6222 if (GET_MODE_CLASS (mode) == MODE_INT
6223 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6224 return word_mode;
6225
6226 return mode;
6227 }
6228
6229 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6230
6231 static bool
6232 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6233 {
6234 return TARGET_ARCH64 ? true : false;
6235 }
6236
6237 /* Traverse the record TYPE recursively and call FUNC on its fields.
6238 NAMED is true if this is for a named parameter. DATA is passed
6239 to FUNC for each field. OFFSET is the starting position and
6240 PACKED is true if we are inside a packed record. */
6241
6242 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6243 static void
6244 traverse_record_type (const_tree type, bool named, T *data,
6245 HOST_WIDE_INT offset = 0, bool packed = false)
6246 {
6247 /* The ABI obviously doesn't specify how packed structures are passed.
6248 These are passed in integer regs if possible, otherwise memory. */
6249 if (!packed)
6250 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6251 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6252 {
6253 packed = true;
6254 break;
6255 }
6256
6257 /* Walk the real fields, but skip those with no size or a zero size.
6258 ??? Fields with variable offset are handled as having zero offset. */
6259 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6260 if (TREE_CODE (field) == FIELD_DECL)
6261 {
6262 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6263 continue;
6264
6265 HOST_WIDE_INT bitpos = offset;
6266 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6267 bitpos += int_bit_position (field);
6268
6269 tree field_type = TREE_TYPE (field);
6270 if (TREE_CODE (field_type) == RECORD_TYPE)
6271 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6272 packed);
6273 else
6274 {
6275 const bool fp_type
6276 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6277 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6278 data);
6279 }
6280 }
6281 }
6282
6283 /* Handle recursive register classifying for structure layout. */
6284
6285 typedef struct
6286 {
6287 bool fp_regs; /* true if field eligible to FP registers. */
6288 bool fp_regs_in_first_word; /* true if such field in first word. */
6289 } classify_data_t;
6290
6291 /* A subroutine of function_arg_slotno. Classify the field. */
6292
6293 inline void
6294 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6295 classify_data_t *data)
6296 {
6297 if (fp)
6298 {
6299 data->fp_regs = true;
6300 if (bitpos < BITS_PER_WORD)
6301 data->fp_regs_in_first_word = true;
6302 }
6303 }
6304
6305 /* Compute the slot number to pass an argument in.
6306 Return the slot number or -1 if passing on the stack.
6307
6308 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6309 the preceding args and about the function being called.
6310 MODE is the argument's machine mode.
6311 TYPE is the data type of the argument (as a tree).
6312 This is null for libcalls where that information may
6313 not be available.
6314 NAMED is nonzero if this argument is a named parameter
6315 (otherwise it is an extra parameter matching an ellipsis).
6316 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6317 *PREGNO records the register number to use if scalar type.
6318 *PPADDING records the amount of padding needed in words. */
6319
6320 static int
6321 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6322 const_tree type, bool named, bool incoming,
6323 int *pregno, int *ppadding)
6324 {
6325 int regbase = (incoming
6326 ? SPARC_INCOMING_INT_ARG_FIRST
6327 : SPARC_OUTGOING_INT_ARG_FIRST);
6328 int slotno = cum->words;
6329 enum mode_class mclass;
6330 int regno;
6331
6332 *ppadding = 0;
6333
6334 if (type && TREE_ADDRESSABLE (type))
6335 return -1;
6336
6337 if (TARGET_ARCH32
6338 && mode == BLKmode
6339 && type
6340 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6341 return -1;
6342
6343 /* For SPARC64, objects requiring 16-byte alignment get it. */
6344 if (TARGET_ARCH64
6345 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6346 && (slotno & 1) != 0)
6347 slotno++, *ppadding = 1;
6348
6349 mclass = GET_MODE_CLASS (mode);
6350 if (type && TREE_CODE (type) == VECTOR_TYPE)
6351 {
6352 /* Vector types deserve special treatment because they are
6353 polymorphic wrt their mode, depending upon whether VIS
6354 instructions are enabled. */
6355 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6356 {
6357 /* The SPARC port defines no floating-point vector modes. */
6358 gcc_assert (mode == BLKmode);
6359 }
6360 else
6361 {
6362 /* Integral vector types should either have a vector
6363 mode or an integral mode, because we are guaranteed
6364 by pass_by_reference that their size is not greater
6365 than 16 bytes and TImode is 16-byte wide. */
6366 gcc_assert (mode != BLKmode);
6367
6368 /* Vector integers are handled like floats according to
6369 the Sun VIS SDK. */
6370 mclass = MODE_FLOAT;
6371 }
6372 }
6373
6374 switch (mclass)
6375 {
6376 case MODE_FLOAT:
6377 case MODE_COMPLEX_FLOAT:
6378 case MODE_VECTOR_INT:
6379 if (TARGET_ARCH64 && TARGET_FPU && named)
6380 {
6381 /* If all arg slots are filled, then must pass on stack. */
6382 if (slotno >= SPARC_FP_ARG_MAX)
6383 return -1;
6384
6385 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6386 /* Arguments filling only one single FP register are
6387 right-justified in the outer double FP register. */
6388 if (GET_MODE_SIZE (mode) <= 4)
6389 regno++;
6390 break;
6391 }
6392 /* fallthrough */
6393
6394 case MODE_INT:
6395 case MODE_COMPLEX_INT:
6396 /* If all arg slots are filled, then must pass on stack. */
6397 if (slotno >= SPARC_INT_ARG_MAX)
6398 return -1;
6399
6400 regno = regbase + slotno;
6401 break;
6402
6403 case MODE_RANDOM:
6404 if (mode == VOIDmode)
6405 /* MODE is VOIDmode when generating the actual call. */
6406 return -1;
6407
6408 gcc_assert (mode == BLKmode);
6409
6410 if (TARGET_ARCH32
6411 || !type
6412 || (TREE_CODE (type) != RECORD_TYPE
6413 && TREE_CODE (type) != VECTOR_TYPE))
6414 {
6415 /* If all arg slots are filled, then must pass on stack. */
6416 if (slotno >= SPARC_INT_ARG_MAX)
6417 return -1;
6418
6419 regno = regbase + slotno;
6420 }
6421 else /* TARGET_ARCH64 && type */
6422 {
6423 /* If all arg slots are filled, then must pass on stack. */
6424 if (slotno >= SPARC_FP_ARG_MAX)
6425 return -1;
6426
6427 if (TREE_CODE (type) == RECORD_TYPE)
6428 {
6429 classify_data_t data = { false, false };
6430 traverse_record_type<classify_data_t, classify_registers>
6431 (type, named, &data);
6432
6433 if (data.fp_regs)
6434 {
6435 /* If all FP slots are filled except for the last one and
6436 there is no FP field in the first word, then must pass
6437 on stack. */
6438 if (slotno >= SPARC_FP_ARG_MAX - 1
6439 && !data.fp_regs_in_first_word)
6440 return -1;
6441 }
6442 else
6443 {
6444 /* If all int slots are filled, then must pass on stack. */
6445 if (slotno >= SPARC_INT_ARG_MAX)
6446 return -1;
6447 }
6448 }
6449
6450 /* PREGNO isn't set since both int and FP regs can be used. */
6451 return slotno;
6452 }
6453 break;
6454
6455 default :
6456 gcc_unreachable ();
6457 }
6458
6459 *pregno = regno;
6460 return slotno;
6461 }
6462
6463 /* Handle recursive register counting/assigning for structure layout. */
6464
6465 typedef struct
6466 {
6467 int slotno; /* slot number of the argument. */
6468 int regbase; /* regno of the base register. */
6469 int intoffset; /* offset of the first pending integer field. */
6470 int nregs; /* number of words passed in registers. */
6471 bool stack; /* true if part of the argument is on the stack. */
6472 rtx ret; /* return expression being built. */
6473 } assign_data_t;
6474
6475 /* A subroutine of function_arg_record_value. Compute the number of integer
6476 registers to be assigned between PARMS->intoffset and BITPOS. Return
6477 true if at least one integer register is assigned or false otherwise. */
6478
6479 static bool
6480 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6481 {
6482 if (data->intoffset < 0)
6483 return false;
6484
6485 const int intoffset = data->intoffset;
6486 data->intoffset = -1;
6487
6488 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6489 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6490 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6491 int nregs = (endbit - startbit) / BITS_PER_WORD;
6492
6493 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6494 {
6495 nregs = SPARC_INT_ARG_MAX - this_slotno;
6496
6497 /* We need to pass this field (partly) on the stack. */
6498 data->stack = 1;
6499 }
6500
6501 if (nregs <= 0)
6502 return false;
6503
6504 *pnregs = nregs;
6505 return true;
6506 }
6507
6508 /* A subroutine of function_arg_record_value. Compute the number and the mode
6509 of the FP registers to be assigned for FIELD. Return true if at least one
6510 FP register is assigned or false otherwise. */
6511
6512 static bool
6513 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6514 assign_data_t *data,
6515 int *pnregs, machine_mode *pmode)
6516 {
6517 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6518 machine_mode mode = DECL_MODE (field);
6519 int nregs, nslots;
6520
6521 /* Slots are counted as words while regs are counted as having the size of
6522 the (inner) mode. */
6523 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6524 {
6525 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6526 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6527 }
6528 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6529 {
6530 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6531 nregs = 2;
6532 }
6533 else
6534 nregs = 1;
6535
6536 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6537
6538 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6539 {
6540 nslots = SPARC_FP_ARG_MAX - this_slotno;
6541 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6542
6543 /* We need to pass this field (partly) on the stack. */
6544 data->stack = 1;
6545
6546 if (nregs <= 0)
6547 return false;
6548 }
6549
6550 *pnregs = nregs;
6551 *pmode = mode;
6552 return true;
6553 }
6554
6555 /* A subroutine of function_arg_record_value. Count the number of registers
6556 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6557
6558 inline void
6559 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6560 assign_data_t *data)
6561 {
6562 if (fp)
6563 {
6564 int nregs;
6565 machine_mode mode;
6566
6567 if (compute_int_layout (bitpos, data, &nregs))
6568 data->nregs += nregs;
6569
6570 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6571 data->nregs += nregs;
6572 }
6573 else
6574 {
6575 if (data->intoffset < 0)
6576 data->intoffset = bitpos;
6577 }
6578 }
6579
6580 /* A subroutine of function_arg_record_value. Assign the bits of the
6581 structure between PARMS->intoffset and BITPOS to integer registers. */
6582
6583 static void
6584 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6585 {
6586 int intoffset = data->intoffset;
6587 machine_mode mode;
6588 int nregs;
6589
6590 if (!compute_int_layout (bitpos, data, &nregs))
6591 return;
6592
6593 /* If this is the trailing part of a word, only load that much into
6594 the register. Otherwise load the whole register. Note that in
6595 the latter case we may pick up unwanted bits. It's not a problem
6596 at the moment but may wish to revisit. */
6597 if (intoffset % BITS_PER_WORD != 0)
6598 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6599 MODE_INT);
6600 else
6601 mode = word_mode;
6602
6603 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6604 unsigned int regno = data->regbase + this_slotno;
6605 intoffset /= BITS_PER_UNIT;
6606
6607 do
6608 {
6609 rtx reg = gen_rtx_REG (mode, regno);
6610 XVECEXP (data->ret, 0, data->stack + data->nregs)
6611 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6612 data->nregs += 1;
6613 mode = word_mode;
6614 regno += 1;
6615 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6616 }
6617 while (--nregs > 0);
6618 }
6619
6620 /* A subroutine of function_arg_record_value. Assign FIELD at position
6621 BITPOS to FP registers. */
6622
6623 static void
6624 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6625 assign_data_t *data)
6626 {
6627 int nregs;
6628 machine_mode mode;
6629
6630 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6631 return;
6632
6633 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6634 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6635 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6636 regno++;
6637 int pos = bitpos / BITS_PER_UNIT;
6638
6639 do
6640 {
6641 rtx reg = gen_rtx_REG (mode, regno);
6642 XVECEXP (data->ret, 0, data->stack + data->nregs)
6643 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6644 data->nregs += 1;
6645 regno += GET_MODE_SIZE (mode) / 4;
6646 pos += GET_MODE_SIZE (mode);
6647 }
6648 while (--nregs > 0);
6649 }
6650
6651 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6652 the structure between PARMS->intoffset and BITPOS to registers. */
6653
6654 inline void
6655 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6656 assign_data_t *data)
6657 {
6658 if (fp)
6659 {
6660 assign_int_registers (bitpos, data);
6661
6662 assign_fp_registers (field, bitpos, data);
6663 }
6664 else
6665 {
6666 if (data->intoffset < 0)
6667 data->intoffset = bitpos;
6668 }
6669 }
6670
6671 /* Used by function_arg and sparc_function_value_1 to implement the complex
6672 conventions of the 64-bit ABI for passing and returning structures.
6673 Return an expression valid as a return value for the FUNCTION_ARG
6674 and TARGET_FUNCTION_VALUE.
6675
6676 TYPE is the data type of the argument (as a tree).
6677 This is null for libcalls where that information may
6678 not be available.
6679 MODE is the argument's machine mode.
6680 SLOTNO is the index number of the argument's slot in the parameter array.
6681 NAMED is true if this argument is a named parameter
6682 (otherwise it is an extra parameter matching an ellipsis).
6683 REGBASE is the regno of the base register for the parameter array. */
6684
6685 static rtx
6686 function_arg_record_value (const_tree type, machine_mode mode,
6687 int slotno, bool named, int regbase)
6688 {
6689 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6690 assign_data_t data;
6691 int nregs;
6692
6693 data.slotno = slotno;
6694 data.regbase = regbase;
6695
6696 /* Count how many registers we need. */
6697 data.nregs = 0;
6698 data.intoffset = 0;
6699 data.stack = false;
6700 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6701
6702 /* Take into account pending integer fields. */
6703 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6704 data.nregs += nregs;
6705
6706 /* Allocate the vector and handle some annoying special cases. */
6707 nregs = data.nregs;
6708
6709 if (nregs == 0)
6710 {
6711 /* ??? Empty structure has no value? Duh? */
6712 if (typesize <= 0)
6713 {
6714 /* Though there's nothing really to store, return a word register
6715 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6716 leads to breakage due to the fact that there are zero bytes to
6717 load. */
6718 return gen_rtx_REG (mode, regbase);
6719 }
6720
6721 /* ??? C++ has structures with no fields, and yet a size. Give up
6722 for now and pass everything back in integer registers. */
6723 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6724 if (nregs + slotno > SPARC_INT_ARG_MAX)
6725 nregs = SPARC_INT_ARG_MAX - slotno;
6726 }
6727
6728 gcc_assert (nregs > 0);
6729
6730 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6731
6732 /* If at least one field must be passed on the stack, generate
6733 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6734 also be passed on the stack. We can't do much better because the
6735 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6736 of structures for which the fields passed exclusively in registers
6737 are not at the beginning of the structure. */
6738 if (data.stack)
6739 XVECEXP (data.ret, 0, 0)
6740 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6741
6742 /* Assign the registers. */
6743 data.nregs = 0;
6744 data.intoffset = 0;
6745 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6746
6747 /* Assign pending integer fields. */
6748 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6749
6750 gcc_assert (data.nregs == nregs);
6751
6752 return data.ret;
6753 }
6754
6755 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6756 of the 64-bit ABI for passing and returning unions.
6757 Return an expression valid as a return value for the FUNCTION_ARG
6758 and TARGET_FUNCTION_VALUE.
6759
6760 SIZE is the size in bytes of the union.
6761 MODE is the argument's machine mode.
6762 REGNO is the hard register the union will be passed in. */
6763
6764 static rtx
6765 function_arg_union_value (int size, machine_mode mode, int slotno,
6766 int regno)
6767 {
6768 int nwords = CEIL_NWORDS (size), i;
6769 rtx regs;
6770
6771 /* See comment in previous function for empty structures. */
6772 if (nwords == 0)
6773 return gen_rtx_REG (mode, regno);
6774
6775 if (slotno == SPARC_INT_ARG_MAX - 1)
6776 nwords = 1;
6777
6778 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6779
6780 for (i = 0; i < nwords; i++)
6781 {
6782 /* Unions are passed left-justified. */
6783 XVECEXP (regs, 0, i)
6784 = gen_rtx_EXPR_LIST (VOIDmode,
6785 gen_rtx_REG (word_mode, regno),
6786 GEN_INT (UNITS_PER_WORD * i));
6787 regno++;
6788 }
6789
6790 return regs;
6791 }
6792
6793 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6794 for passing and returning BLKmode vectors.
6795 Return an expression valid as a return value for the FUNCTION_ARG
6796 and TARGET_FUNCTION_VALUE.
6797
6798 SIZE is the size in bytes of the vector.
6799 REGNO is the FP hard register the vector will be passed in. */
6800
6801 static rtx
6802 function_arg_vector_value (int size, int regno)
6803 {
6804 const int nregs = MAX (1, size / 8);
6805 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6806
6807 if (size < 8)
6808 XVECEXP (regs, 0, 0)
6809 = gen_rtx_EXPR_LIST (VOIDmode,
6810 gen_rtx_REG (SImode, regno),
6811 const0_rtx);
6812 else
6813 for (int i = 0; i < nregs; i++)
6814 XVECEXP (regs, 0, i)
6815 = gen_rtx_EXPR_LIST (VOIDmode,
6816 gen_rtx_REG (DImode, regno + 2*i),
6817 GEN_INT (i*8));
6818
6819 return regs;
6820 }
6821
6822 /* Determine where to put an argument to a function.
6823 Value is zero to push the argument on the stack,
6824 or a hard register in which to store the argument.
6825
6826 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6827 the preceding args and about the function being called.
6828 MODE is the argument's machine mode.
6829 TYPE is the data type of the argument (as a tree).
6830 This is null for libcalls where that information may
6831 not be available.
6832 NAMED is true if this argument is a named parameter
6833 (otherwise it is an extra parameter matching an ellipsis).
6834 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6835 TARGET_FUNCTION_INCOMING_ARG. */
6836
6837 static rtx
6838 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6839 const_tree type, bool named, bool incoming)
6840 {
6841 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6842
6843 int regbase = (incoming
6844 ? SPARC_INCOMING_INT_ARG_FIRST
6845 : SPARC_OUTGOING_INT_ARG_FIRST);
6846 int slotno, regno, padding;
6847 enum mode_class mclass = GET_MODE_CLASS (mode);
6848
6849 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6850 &regno, &padding);
6851 if (slotno == -1)
6852 return 0;
6853
6854 /* Vector types deserve special treatment because they are polymorphic wrt
6855 their mode, depending upon whether VIS instructions are enabled. */
6856 if (type && TREE_CODE (type) == VECTOR_TYPE)
6857 {
6858 HOST_WIDE_INT size = int_size_in_bytes (type);
6859 gcc_assert ((TARGET_ARCH32 && size <= 8)
6860 || (TARGET_ARCH64 && size <= 16));
6861
6862 if (mode == BLKmode)
6863 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6864
6865 mclass = MODE_FLOAT;
6866 }
6867
6868 if (TARGET_ARCH32)
6869 return gen_rtx_REG (mode, regno);
6870
6871 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6872 and are promoted to registers if possible. */
6873 if (type && TREE_CODE (type) == RECORD_TYPE)
6874 {
6875 HOST_WIDE_INT size = int_size_in_bytes (type);
6876 gcc_assert (size <= 16);
6877
6878 return function_arg_record_value (type, mode, slotno, named, regbase);
6879 }
6880
6881 /* Unions up to 16 bytes in size are passed in integer registers. */
6882 else if (type && TREE_CODE (type) == UNION_TYPE)
6883 {
6884 HOST_WIDE_INT size = int_size_in_bytes (type);
6885 gcc_assert (size <= 16);
6886
6887 return function_arg_union_value (size, mode, slotno, regno);
6888 }
6889
6890 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6891 but also have the slot allocated for them.
6892 If no prototype is in scope fp values in register slots get passed
6893 in two places, either fp regs and int regs or fp regs and memory. */
6894 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6895 && SPARC_FP_REG_P (regno))
6896 {
6897 rtx reg = gen_rtx_REG (mode, regno);
6898 if (cum->prototype_p || cum->libcall_p)
6899 return reg;
6900 else
6901 {
6902 rtx v0, v1;
6903
6904 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6905 {
6906 int intreg;
6907
6908 /* On incoming, we don't need to know that the value
6909 is passed in %f0 and %i0, and it confuses other parts
6910 causing needless spillage even on the simplest cases. */
6911 if (incoming)
6912 return reg;
6913
6914 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6915 + (regno - SPARC_FP_ARG_FIRST) / 2);
6916
6917 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6918 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6919 const0_rtx);
6920 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6921 }
6922 else
6923 {
6924 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6925 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6926 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6927 }
6928 }
6929 }
6930
6931 /* All other aggregate types are passed in an integer register in a mode
6932 corresponding to the size of the type. */
6933 else if (type && AGGREGATE_TYPE_P (type))
6934 {
6935 HOST_WIDE_INT size = int_size_in_bytes (type);
6936 gcc_assert (size <= 16);
6937
6938 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6939 }
6940
6941 return gen_rtx_REG (mode, regno);
6942 }
6943
6944 /* Handle the TARGET_FUNCTION_ARG target hook. */
6945
6946 static rtx
6947 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6948 const_tree type, bool named)
6949 {
6950 return sparc_function_arg_1 (cum, mode, type, named, false);
6951 }
6952
6953 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6954
6955 static rtx
6956 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6957 const_tree type, bool named)
6958 {
6959 return sparc_function_arg_1 (cum, mode, type, named, true);
6960 }
6961
6962 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6963
6964 static unsigned int
6965 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6966 {
6967 return ((TARGET_ARCH64
6968 && (GET_MODE_ALIGNMENT (mode) == 128
6969 || (type && TYPE_ALIGN (type) == 128)))
6970 ? 128
6971 : PARM_BOUNDARY);
6972 }
6973
6974 /* For an arg passed partly in registers and partly in memory,
6975 this is the number of bytes of registers used.
6976 For args passed entirely in registers or entirely in memory, zero.
6977
6978 Any arg that starts in the first 6 regs but won't entirely fit in them
6979 needs partial registers on v8. On v9, structures with integer
6980 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6981 values that begin in the last fp reg [where "last fp reg" varies with the
6982 mode] will be split between that reg and memory. */
6983
6984 static int
6985 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6986 tree type, bool named)
6987 {
6988 int slotno, regno, padding;
6989
6990 /* We pass false for incoming here, it doesn't matter. */
6991 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6992 false, &regno, &padding);
6993
6994 if (slotno == -1)
6995 return 0;
6996
6997 if (TARGET_ARCH32)
6998 {
6999 if ((slotno + (mode == BLKmode
7000 ? CEIL_NWORDS (int_size_in_bytes (type))
7001 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7002 > SPARC_INT_ARG_MAX)
7003 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7004 }
7005 else
7006 {
7007 /* We are guaranteed by pass_by_reference that the size of the
7008 argument is not greater than 16 bytes, so we only need to return
7009 one word if the argument is partially passed in registers. */
7010
7011 if (type && AGGREGATE_TYPE_P (type))
7012 {
7013 int size = int_size_in_bytes (type);
7014
7015 if (size > UNITS_PER_WORD
7016 && (slotno == SPARC_INT_ARG_MAX - 1
7017 || slotno == SPARC_FP_ARG_MAX - 1))
7018 return UNITS_PER_WORD;
7019 }
7020 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7021 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7022 && ! (TARGET_FPU && named)))
7023 {
7024 /* The complex types are passed as packed types. */
7025 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7026 && slotno == SPARC_INT_ARG_MAX - 1)
7027 return UNITS_PER_WORD;
7028 }
7029 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7030 {
7031 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7032 > SPARC_FP_ARG_MAX)
7033 return UNITS_PER_WORD;
7034 }
7035 }
7036
7037 return 0;
7038 }
7039
7040 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7041 Specify whether to pass the argument by reference. */
7042
7043 static bool
7044 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7045 machine_mode mode, const_tree type,
7046 bool named ATTRIBUTE_UNUSED)
7047 {
7048 if (TARGET_ARCH32)
7049 /* Original SPARC 32-bit ABI says that structures and unions,
7050 and quad-precision floats are passed by reference. For Pascal,
7051 also pass arrays by reference. All other base types are passed
7052 in registers.
7053
7054 Extended ABI (as implemented by the Sun compiler) says that all
7055 complex floats are passed by reference. Pass complex integers
7056 in registers up to 8 bytes. More generally, enforce the 2-word
7057 cap for passing arguments in registers.
7058
7059 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7060 integers are passed like floats of the same size, that is in
7061 registers up to 8 bytes. Pass all vector floats by reference
7062 like structure and unions. */
7063 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7064 || mode == SCmode
7065 /* Catch CDImode, TFmode, DCmode and TCmode. */
7066 || GET_MODE_SIZE (mode) > 8
7067 || (type
7068 && TREE_CODE (type) == VECTOR_TYPE
7069 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7070 else
7071 /* Original SPARC 64-bit ABI says that structures and unions
7072 smaller than 16 bytes are passed in registers, as well as
7073 all other base types.
7074
7075 Extended ABI (as implemented by the Sun compiler) says that
7076 complex floats are passed in registers up to 16 bytes. Pass
7077 all complex integers in registers up to 16 bytes. More generally,
7078 enforce the 2-word cap for passing arguments in registers.
7079
7080 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7081 integers are passed like floats of the same size, that is in
7082 registers (up to 16 bytes). Pass all vector floats like structure
7083 and unions. */
7084 return ((type
7085 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7086 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7087 /* Catch CTImode and TCmode. */
7088 || GET_MODE_SIZE (mode) > 16);
7089 }
7090
7091 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7092 Update the data in CUM to advance over an argument
7093 of mode MODE and data type TYPE.
7094 TYPE is null for libcalls where that information may not be available. */
7095
7096 static void
7097 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7098 const_tree type, bool named)
7099 {
7100 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7101 int regno, padding;
7102
7103 /* We pass false for incoming here, it doesn't matter. */
7104 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7105
7106 /* If argument requires leading padding, add it. */
7107 cum->words += padding;
7108
7109 if (TARGET_ARCH32)
7110 cum->words += (mode == BLKmode
7111 ? CEIL_NWORDS (int_size_in_bytes (type))
7112 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7113 else
7114 {
7115 if (type && AGGREGATE_TYPE_P (type))
7116 {
7117 int size = int_size_in_bytes (type);
7118
7119 if (size <= 8)
7120 ++cum->words;
7121 else if (size <= 16)
7122 cum->words += 2;
7123 else /* passed by reference */
7124 ++cum->words;
7125 }
7126 else
7127 cum->words += (mode == BLKmode
7128 ? CEIL_NWORDS (int_size_in_bytes (type))
7129 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7130 }
7131 }
7132
7133 /* Handle the FUNCTION_ARG_PADDING macro.
7134 For the 64 bit ABI structs are always stored left shifted in their
7135 argument slot. */
7136
7137 enum direction
7138 function_arg_padding (machine_mode mode, const_tree type)
7139 {
7140 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7141 return upward;
7142
7143 /* Fall back to the default. */
7144 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7145 }
7146
7147 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7148 Specify whether to return the return value in memory. */
7149
7150 static bool
7151 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7152 {
7153 if (TARGET_ARCH32)
7154 /* Original SPARC 32-bit ABI says that structures and unions,
7155 and quad-precision floats are returned in memory. All other
7156 base types are returned in registers.
7157
7158 Extended ABI (as implemented by the Sun compiler) says that
7159 all complex floats are returned in registers (8 FP registers
7160 at most for '_Complex long double'). Return all complex integers
7161 in registers (4 at most for '_Complex long long').
7162
7163 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7164 integers are returned like floats of the same size, that is in
7165 registers up to 8 bytes and in memory otherwise. Return all
7166 vector floats in memory like structure and unions; note that
7167 they always have BLKmode like the latter. */
7168 return (TYPE_MODE (type) == BLKmode
7169 || TYPE_MODE (type) == TFmode
7170 || (TREE_CODE (type) == VECTOR_TYPE
7171 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7172 else
7173 /* Original SPARC 64-bit ABI says that structures and unions
7174 smaller than 32 bytes are returned in registers, as well as
7175 all other base types.
7176
7177 Extended ABI (as implemented by the Sun compiler) says that all
7178 complex floats are returned in registers (8 FP registers at most
7179 for '_Complex long double'). Return all complex integers in
7180 registers (4 at most for '_Complex TItype').
7181
7182 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7183 integers are returned like floats of the same size, that is in
7184 registers. Return all vector floats like structure and unions;
7185 note that they always have BLKmode like the latter. */
7186 return (TYPE_MODE (type) == BLKmode
7187 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7188 }
7189
7190 /* Handle the TARGET_STRUCT_VALUE target hook.
7191 Return where to find the structure return value address. */
7192
7193 static rtx
7194 sparc_struct_value_rtx (tree fndecl, int incoming)
7195 {
7196 if (TARGET_ARCH64)
7197 return 0;
7198 else
7199 {
7200 rtx mem;
7201
7202 if (incoming)
7203 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7204 STRUCT_VALUE_OFFSET));
7205 else
7206 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7207 STRUCT_VALUE_OFFSET));
7208
7209 /* Only follow the SPARC ABI for fixed-size structure returns.
7210 Variable size structure returns are handled per the normal
7211 procedures in GCC. This is enabled by -mstd-struct-return */
7212 if (incoming == 2
7213 && sparc_std_struct_return
7214 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7215 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7216 {
7217 /* We must check and adjust the return address, as it is optional
7218 as to whether the return object is really provided. */
7219 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7220 rtx scratch = gen_reg_rtx (SImode);
7221 rtx_code_label *endlab = gen_label_rtx ();
7222
7223 /* Calculate the return object size. */
7224 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7225 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7226 /* Construct a temporary return value. */
7227 rtx temp_val
7228 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7229
7230 /* Implement SPARC 32-bit psABI callee return struct checking:
7231
7232 Fetch the instruction where we will return to and see if
7233 it's an unimp instruction (the most significant 10 bits
7234 will be zero). */
7235 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7236 plus_constant (Pmode,
7237 ret_reg, 8)));
7238 /* Assume the size is valid and pre-adjust. */
7239 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7240 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7241 0, endlab);
7242 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7243 /* Write the address of the memory pointed to by temp_val into
7244 the memory pointed to by mem. */
7245 emit_move_insn (mem, XEXP (temp_val, 0));
7246 emit_label (endlab);
7247 }
7248
7249 return mem;
7250 }
7251 }
7252
7253 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7254 For v9, function return values are subject to the same rules as arguments,
7255 except that up to 32 bytes may be returned in registers. */
7256
7257 static rtx
7258 sparc_function_value_1 (const_tree type, machine_mode mode,
7259 bool outgoing)
7260 {
7261 /* Beware that the two values are swapped here wrt function_arg. */
7262 int regbase = (outgoing
7263 ? SPARC_INCOMING_INT_ARG_FIRST
7264 : SPARC_OUTGOING_INT_ARG_FIRST);
7265 enum mode_class mclass = GET_MODE_CLASS (mode);
7266 int regno;
7267
7268 /* Vector types deserve special treatment because they are polymorphic wrt
7269 their mode, depending upon whether VIS instructions are enabled. */
7270 if (type && TREE_CODE (type) == VECTOR_TYPE)
7271 {
7272 HOST_WIDE_INT size = int_size_in_bytes (type);
7273 gcc_assert ((TARGET_ARCH32 && size <= 8)
7274 || (TARGET_ARCH64 && size <= 32));
7275
7276 if (mode == BLKmode)
7277 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7278
7279 mclass = MODE_FLOAT;
7280 }
7281
7282 if (TARGET_ARCH64 && type)
7283 {
7284 /* Structures up to 32 bytes in size are returned in registers. */
7285 if (TREE_CODE (type) == RECORD_TYPE)
7286 {
7287 HOST_WIDE_INT size = int_size_in_bytes (type);
7288 gcc_assert (size <= 32);
7289
7290 return function_arg_record_value (type, mode, 0, 1, regbase);
7291 }
7292
7293 /* Unions up to 32 bytes in size are returned in integer registers. */
7294 else if (TREE_CODE (type) == UNION_TYPE)
7295 {
7296 HOST_WIDE_INT size = int_size_in_bytes (type);
7297 gcc_assert (size <= 32);
7298
7299 return function_arg_union_value (size, mode, 0, regbase);
7300 }
7301
7302 /* Objects that require it are returned in FP registers. */
7303 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7304 ;
7305
7306 /* All other aggregate types are returned in an integer register in a
7307 mode corresponding to the size of the type. */
7308 else if (AGGREGATE_TYPE_P (type))
7309 {
7310 /* All other aggregate types are passed in an integer register
7311 in a mode corresponding to the size of the type. */
7312 HOST_WIDE_INT size = int_size_in_bytes (type);
7313 gcc_assert (size <= 32);
7314
7315 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7316
7317 /* ??? We probably should have made the same ABI change in
7318 3.4.0 as the one we made for unions. The latter was
7319 required by the SCD though, while the former is not
7320 specified, so we favored compatibility and efficiency.
7321
7322 Now we're stuck for aggregates larger than 16 bytes,
7323 because OImode vanished in the meantime. Let's not
7324 try to be unduly clever, and simply follow the ABI
7325 for unions in that case. */
7326 if (mode == BLKmode)
7327 return function_arg_union_value (size, mode, 0, regbase);
7328 else
7329 mclass = MODE_INT;
7330 }
7331
7332 /* We should only have pointer and integer types at this point. This
7333 must match sparc_promote_function_mode. */
7334 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7335 mode = word_mode;
7336 }
7337
7338 /* We should only have pointer and integer types at this point, except with
7339 -freg-struct-return. This must match sparc_promote_function_mode. */
7340 else if (TARGET_ARCH32
7341 && !(type && AGGREGATE_TYPE_P (type))
7342 && mclass == MODE_INT
7343 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7344 mode = word_mode;
7345
7346 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7347 regno = SPARC_FP_ARG_FIRST;
7348 else
7349 regno = regbase;
7350
7351 return gen_rtx_REG (mode, regno);
7352 }
7353
7354 /* Handle TARGET_FUNCTION_VALUE.
7355 On the SPARC, the value is found in the first "output" register, but the
7356 called function leaves it in the first "input" register. */
7357
7358 static rtx
7359 sparc_function_value (const_tree valtype,
7360 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7361 bool outgoing)
7362 {
7363 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7364 }
7365
7366 /* Handle TARGET_LIBCALL_VALUE. */
7367
7368 static rtx
7369 sparc_libcall_value (machine_mode mode,
7370 const_rtx fun ATTRIBUTE_UNUSED)
7371 {
7372 return sparc_function_value_1 (NULL_TREE, mode, false);
7373 }
7374
7375 /* Handle FUNCTION_VALUE_REGNO_P.
7376 On the SPARC, the first "output" reg is used for integer values, and the
7377 first floating point register is used for floating point values. */
7378
7379 static bool
7380 sparc_function_value_regno_p (const unsigned int regno)
7381 {
7382 return (regno == 8 || (TARGET_FPU && regno == 32));
7383 }
7384
7385 /* Do what is necessary for `va_start'. We look at the current function
7386 to determine if stdarg or varargs is used and return the address of
7387 the first unnamed parameter. */
7388
7389 static rtx
7390 sparc_builtin_saveregs (void)
7391 {
7392 int first_reg = crtl->args.info.words;
7393 rtx address;
7394 int regno;
7395
7396 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7397 emit_move_insn (gen_rtx_MEM (word_mode,
7398 gen_rtx_PLUS (Pmode,
7399 frame_pointer_rtx,
7400 GEN_INT (FIRST_PARM_OFFSET (0)
7401 + (UNITS_PER_WORD
7402 * regno)))),
7403 gen_rtx_REG (word_mode,
7404 SPARC_INCOMING_INT_ARG_FIRST + regno));
7405
7406 address = gen_rtx_PLUS (Pmode,
7407 frame_pointer_rtx,
7408 GEN_INT (FIRST_PARM_OFFSET (0)
7409 + UNITS_PER_WORD * first_reg));
7410
7411 return address;
7412 }
7413
7414 /* Implement `va_start' for stdarg. */
7415
7416 static void
7417 sparc_va_start (tree valist, rtx nextarg)
7418 {
7419 nextarg = expand_builtin_saveregs ();
7420 std_expand_builtin_va_start (valist, nextarg);
7421 }
7422
7423 /* Implement `va_arg' for stdarg. */
7424
7425 static tree
7426 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7427 gimple_seq *post_p)
7428 {
7429 HOST_WIDE_INT size, rsize, align;
7430 tree addr, incr;
7431 bool indirect;
7432 tree ptrtype = build_pointer_type (type);
7433
7434 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7435 {
7436 indirect = true;
7437 size = rsize = UNITS_PER_WORD;
7438 align = 0;
7439 }
7440 else
7441 {
7442 indirect = false;
7443 size = int_size_in_bytes (type);
7444 rsize = ROUND_UP (size, UNITS_PER_WORD);
7445 align = 0;
7446
7447 if (TARGET_ARCH64)
7448 {
7449 /* For SPARC64, objects requiring 16-byte alignment get it. */
7450 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7451 align = 2 * UNITS_PER_WORD;
7452
7453 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7454 are left-justified in their slots. */
7455 if (AGGREGATE_TYPE_P (type))
7456 {
7457 if (size == 0)
7458 size = rsize = UNITS_PER_WORD;
7459 else
7460 size = rsize;
7461 }
7462 }
7463 }
7464
7465 incr = valist;
7466 if (align)
7467 {
7468 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7469 incr = fold_convert (sizetype, incr);
7470 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7471 size_int (-align));
7472 incr = fold_convert (ptr_type_node, incr);
7473 }
7474
7475 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7476 addr = incr;
7477
7478 if (BYTES_BIG_ENDIAN && size < rsize)
7479 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7480
7481 if (indirect)
7482 {
7483 addr = fold_convert (build_pointer_type (ptrtype), addr);
7484 addr = build_va_arg_indirect_ref (addr);
7485 }
7486
7487 /* If the address isn't aligned properly for the type, we need a temporary.
7488 FIXME: This is inefficient, usually we can do this in registers. */
7489 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7490 {
7491 tree tmp = create_tmp_var (type, "va_arg_tmp");
7492 tree dest_addr = build_fold_addr_expr (tmp);
7493 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7494 3, dest_addr, addr, size_int (rsize));
7495 TREE_ADDRESSABLE (tmp) = 1;
7496 gimplify_and_add (copy, pre_p);
7497 addr = dest_addr;
7498 }
7499
7500 else
7501 addr = fold_convert (ptrtype, addr);
7502
7503 incr = fold_build_pointer_plus_hwi (incr, rsize);
7504 gimplify_assign (valist, incr, post_p);
7505
7506 return build_va_arg_indirect_ref (addr);
7507 }
7508 \f
7509 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7510 Specify whether the vector mode is supported by the hardware. */
7511
7512 static bool
7513 sparc_vector_mode_supported_p (machine_mode mode)
7514 {
7515 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7516 }
7517 \f
7518 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7519
7520 static machine_mode
7521 sparc_preferred_simd_mode (machine_mode mode)
7522 {
7523 if (TARGET_VIS)
7524 switch (mode)
7525 {
7526 case SImode:
7527 return V2SImode;
7528 case HImode:
7529 return V4HImode;
7530 case QImode:
7531 return V8QImode;
7532
7533 default:;
7534 }
7535
7536 return word_mode;
7537 }
7538 \f
7539 /* Return the string to output an unconditional branch to LABEL, which is
7540 the operand number of the label.
7541
7542 DEST is the destination insn (i.e. the label), INSN is the source. */
7543
7544 const char *
7545 output_ubranch (rtx dest, rtx_insn *insn)
7546 {
7547 static char string[64];
7548 bool v9_form = false;
7549 int delta;
7550 char *p;
7551
7552 /* Even if we are trying to use cbcond for this, evaluate
7553 whether we can use V9 branches as our backup plan. */
7554
7555 delta = 5000000;
7556 if (INSN_ADDRESSES_SET_P ())
7557 delta = (INSN_ADDRESSES (INSN_UID (dest))
7558 - INSN_ADDRESSES (INSN_UID (insn)));
7559
7560 /* Leave some instructions for "slop". */
7561 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7562 v9_form = true;
7563
7564 if (TARGET_CBCOND)
7565 {
7566 bool emit_nop = emit_cbcond_nop (insn);
7567 bool far = false;
7568 const char *rval;
7569
7570 if (delta < -500 || delta > 500)
7571 far = true;
7572
7573 if (far)
7574 {
7575 if (v9_form)
7576 rval = "ba,a,pt\t%%xcc, %l0";
7577 else
7578 rval = "b,a\t%l0";
7579 }
7580 else
7581 {
7582 if (emit_nop)
7583 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7584 else
7585 rval = "cwbe\t%%g0, %%g0, %l0";
7586 }
7587 return rval;
7588 }
7589
7590 if (v9_form)
7591 strcpy (string, "ba%*,pt\t%%xcc, ");
7592 else
7593 strcpy (string, "b%*\t");
7594
7595 p = strchr (string, '\0');
7596 *p++ = '%';
7597 *p++ = 'l';
7598 *p++ = '0';
7599 *p++ = '%';
7600 *p++ = '(';
7601 *p = '\0';
7602
7603 return string;
7604 }
7605
7606 /* Return the string to output a conditional branch to LABEL, which is
7607 the operand number of the label. OP is the conditional expression.
7608 XEXP (OP, 0) is assumed to be a condition code register (integer or
7609 floating point) and its mode specifies what kind of comparison we made.
7610
7611 DEST is the destination insn (i.e. the label), INSN is the source.
7612
7613 REVERSED is nonzero if we should reverse the sense of the comparison.
7614
7615 ANNUL is nonzero if we should generate an annulling branch. */
7616
7617 const char *
7618 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7619 rtx_insn *insn)
7620 {
7621 static char string[64];
7622 enum rtx_code code = GET_CODE (op);
7623 rtx cc_reg = XEXP (op, 0);
7624 machine_mode mode = GET_MODE (cc_reg);
7625 const char *labelno, *branch;
7626 int spaces = 8, far;
7627 char *p;
7628
7629 /* v9 branches are limited to +-1MB. If it is too far away,
7630 change
7631
7632 bne,pt %xcc, .LC30
7633
7634 to
7635
7636 be,pn %xcc, .+12
7637 nop
7638 ba .LC30
7639
7640 and
7641
7642 fbne,a,pn %fcc2, .LC29
7643
7644 to
7645
7646 fbe,pt %fcc2, .+16
7647 nop
7648 ba .LC29 */
7649
7650 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7651 if (reversed ^ far)
7652 {
7653 /* Reversal of FP compares takes care -- an ordered compare
7654 becomes an unordered compare and vice versa. */
7655 if (mode == CCFPmode || mode == CCFPEmode)
7656 code = reverse_condition_maybe_unordered (code);
7657 else
7658 code = reverse_condition (code);
7659 }
7660
7661 /* Start by writing the branch condition. */
7662 if (mode == CCFPmode || mode == CCFPEmode)
7663 {
7664 switch (code)
7665 {
7666 case NE:
7667 branch = "fbne";
7668 break;
7669 case EQ:
7670 branch = "fbe";
7671 break;
7672 case GE:
7673 branch = "fbge";
7674 break;
7675 case GT:
7676 branch = "fbg";
7677 break;
7678 case LE:
7679 branch = "fble";
7680 break;
7681 case LT:
7682 branch = "fbl";
7683 break;
7684 case UNORDERED:
7685 branch = "fbu";
7686 break;
7687 case ORDERED:
7688 branch = "fbo";
7689 break;
7690 case UNGT:
7691 branch = "fbug";
7692 break;
7693 case UNLT:
7694 branch = "fbul";
7695 break;
7696 case UNEQ:
7697 branch = "fbue";
7698 break;
7699 case UNGE:
7700 branch = "fbuge";
7701 break;
7702 case UNLE:
7703 branch = "fbule";
7704 break;
7705 case LTGT:
7706 branch = "fblg";
7707 break;
7708 default:
7709 gcc_unreachable ();
7710 }
7711
7712 /* ??? !v9: FP branches cannot be preceded by another floating point
7713 insn. Because there is currently no concept of pre-delay slots,
7714 we can fix this only by always emitting a nop before a floating
7715 point branch. */
7716
7717 string[0] = '\0';
7718 if (! TARGET_V9)
7719 strcpy (string, "nop\n\t");
7720 strcat (string, branch);
7721 }
7722 else
7723 {
7724 switch (code)
7725 {
7726 case NE:
7727 branch = "bne";
7728 break;
7729 case EQ:
7730 branch = "be";
7731 break;
7732 case GE:
7733 if (mode == CCNZmode || mode == CCXNZmode)
7734 branch = "bpos";
7735 else
7736 branch = "bge";
7737 break;
7738 case GT:
7739 branch = "bg";
7740 break;
7741 case LE:
7742 branch = "ble";
7743 break;
7744 case LT:
7745 if (mode == CCNZmode || mode == CCXNZmode)
7746 branch = "bneg";
7747 else
7748 branch = "bl";
7749 break;
7750 case GEU:
7751 branch = "bgeu";
7752 break;
7753 case GTU:
7754 branch = "bgu";
7755 break;
7756 case LEU:
7757 branch = "bleu";
7758 break;
7759 case LTU:
7760 branch = "blu";
7761 break;
7762 default:
7763 gcc_unreachable ();
7764 }
7765 strcpy (string, branch);
7766 }
7767 spaces -= strlen (branch);
7768 p = strchr (string, '\0');
7769
7770 /* Now add the annulling, the label, and a possible noop. */
7771 if (annul && ! far)
7772 {
7773 strcpy (p, ",a");
7774 p += 2;
7775 spaces -= 2;
7776 }
7777
7778 if (TARGET_V9)
7779 {
7780 rtx note;
7781 int v8 = 0;
7782
7783 if (! far && insn && INSN_ADDRESSES_SET_P ())
7784 {
7785 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7786 - INSN_ADDRESSES (INSN_UID (insn)));
7787 /* Leave some instructions for "slop". */
7788 if (delta < -260000 || delta >= 260000)
7789 v8 = 1;
7790 }
7791
7792 switch (mode)
7793 {
7794 case CCmode:
7795 case CCNZmode:
7796 case CCCmode:
7797 labelno = "%%icc, ";
7798 if (v8)
7799 labelno = "";
7800 break;
7801 case CCXmode:
7802 case CCXNZmode:
7803 case CCXCmode:
7804 labelno = "%%xcc, ";
7805 gcc_assert (!v8);
7806 break;
7807 case CCFPmode:
7808 case CCFPEmode:
7809 {
7810 static char v9_fcc_labelno[] = "%%fccX, ";
7811 /* Set the char indicating the number of the fcc reg to use. */
7812 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7813 labelno = v9_fcc_labelno;
7814 if (v8)
7815 {
7816 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7817 labelno = "";
7818 }
7819 }
7820 break;
7821 default:
7822 gcc_unreachable ();
7823 }
7824
7825 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7826 {
7827 strcpy (p,
7828 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7829 ? ",pt" : ",pn");
7830 p += 3;
7831 spaces -= 3;
7832 }
7833 }
7834 else
7835 labelno = "";
7836
7837 if (spaces > 0)
7838 *p++ = '\t';
7839 else
7840 *p++ = ' ';
7841 strcpy (p, labelno);
7842 p = strchr (p, '\0');
7843 if (far)
7844 {
7845 strcpy (p, ".+12\n\t nop\n\tb\t");
7846 /* Skip the next insn if requested or
7847 if we know that it will be a nop. */
7848 if (annul || ! final_sequence)
7849 p[3] = '6';
7850 p += 14;
7851 }
7852 *p++ = '%';
7853 *p++ = 'l';
7854 *p++ = label + '0';
7855 *p++ = '%';
7856 *p++ = '#';
7857 *p = '\0';
7858
7859 return string;
7860 }
7861
7862 /* Emit a library call comparison between floating point X and Y.
7863 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7864 Return the new operator to be used in the comparison sequence.
7865
7866 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7867 values as arguments instead of the TFmode registers themselves,
7868 that's why we cannot call emit_float_lib_cmp. */
7869
7870 rtx
7871 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7872 {
7873 const char *qpfunc;
7874 rtx slot0, slot1, result, tem, tem2, libfunc;
7875 machine_mode mode;
7876 enum rtx_code new_comparison;
7877
7878 switch (comparison)
7879 {
7880 case EQ:
7881 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7882 break;
7883
7884 case NE:
7885 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7886 break;
7887
7888 case GT:
7889 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7890 break;
7891
7892 case GE:
7893 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7894 break;
7895
7896 case LT:
7897 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7898 break;
7899
7900 case LE:
7901 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7902 break;
7903
7904 case ORDERED:
7905 case UNORDERED:
7906 case UNGT:
7907 case UNLT:
7908 case UNEQ:
7909 case UNGE:
7910 case UNLE:
7911 case LTGT:
7912 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7913 break;
7914
7915 default:
7916 gcc_unreachable ();
7917 }
7918
7919 if (TARGET_ARCH64)
7920 {
7921 if (MEM_P (x))
7922 {
7923 tree expr = MEM_EXPR (x);
7924 if (expr)
7925 mark_addressable (expr);
7926 slot0 = x;
7927 }
7928 else
7929 {
7930 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7931 emit_move_insn (slot0, x);
7932 }
7933
7934 if (MEM_P (y))
7935 {
7936 tree expr = MEM_EXPR (y);
7937 if (expr)
7938 mark_addressable (expr);
7939 slot1 = y;
7940 }
7941 else
7942 {
7943 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7944 emit_move_insn (slot1, y);
7945 }
7946
7947 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7948 emit_library_call (libfunc, LCT_NORMAL,
7949 DImode, 2,
7950 XEXP (slot0, 0), Pmode,
7951 XEXP (slot1, 0), Pmode);
7952 mode = DImode;
7953 }
7954 else
7955 {
7956 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7957 emit_library_call (libfunc, LCT_NORMAL,
7958 SImode, 2,
7959 x, TFmode, y, TFmode);
7960 mode = SImode;
7961 }
7962
7963
7964 /* Immediately move the result of the libcall into a pseudo
7965 register so reload doesn't clobber the value if it needs
7966 the return register for a spill reg. */
7967 result = gen_reg_rtx (mode);
7968 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7969
7970 switch (comparison)
7971 {
7972 default:
7973 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7974 case ORDERED:
7975 case UNORDERED:
7976 new_comparison = (comparison == UNORDERED ? EQ : NE);
7977 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7978 case UNGT:
7979 case UNGE:
7980 new_comparison = (comparison == UNGT ? GT : NE);
7981 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7982 case UNLE:
7983 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7984 case UNLT:
7985 tem = gen_reg_rtx (mode);
7986 if (TARGET_ARCH32)
7987 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7988 else
7989 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7990 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7991 case UNEQ:
7992 case LTGT:
7993 tem = gen_reg_rtx (mode);
7994 if (TARGET_ARCH32)
7995 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7996 else
7997 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7998 tem2 = gen_reg_rtx (mode);
7999 if (TARGET_ARCH32)
8000 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8001 else
8002 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8003 new_comparison = (comparison == UNEQ ? EQ : NE);
8004 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8005 }
8006
8007 gcc_unreachable ();
8008 }
8009
8010 /* Generate an unsigned DImode to FP conversion. This is the same code
8011 optabs would emit if we didn't have TFmode patterns. */
8012
8013 void
8014 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8015 {
8016 rtx i0, i1, f0, in, out;
8017
8018 out = operands[0];
8019 in = force_reg (DImode, operands[1]);
8020 rtx_code_label *neglab = gen_label_rtx ();
8021 rtx_code_label *donelab = gen_label_rtx ();
8022 i0 = gen_reg_rtx (DImode);
8023 i1 = gen_reg_rtx (DImode);
8024 f0 = gen_reg_rtx (mode);
8025
8026 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8027
8028 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8029 emit_jump_insn (gen_jump (donelab));
8030 emit_barrier ();
8031
8032 emit_label (neglab);
8033
8034 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8035 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8036 emit_insn (gen_iordi3 (i0, i0, i1));
8037 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8038 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8039
8040 emit_label (donelab);
8041 }
8042
8043 /* Generate an FP to unsigned DImode conversion. This is the same code
8044 optabs would emit if we didn't have TFmode patterns. */
8045
8046 void
8047 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8048 {
8049 rtx i0, i1, f0, in, out, limit;
8050
8051 out = operands[0];
8052 in = force_reg (mode, operands[1]);
8053 rtx_code_label *neglab = gen_label_rtx ();
8054 rtx_code_label *donelab = gen_label_rtx ();
8055 i0 = gen_reg_rtx (DImode);
8056 i1 = gen_reg_rtx (DImode);
8057 limit = gen_reg_rtx (mode);
8058 f0 = gen_reg_rtx (mode);
8059
8060 emit_move_insn (limit,
8061 const_double_from_real_value (
8062 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8063 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8064
8065 emit_insn (gen_rtx_SET (out,
8066 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8067 emit_jump_insn (gen_jump (donelab));
8068 emit_barrier ();
8069
8070 emit_label (neglab);
8071
8072 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8073 emit_insn (gen_rtx_SET (i0,
8074 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8075 emit_insn (gen_movdi (i1, const1_rtx));
8076 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8077 emit_insn (gen_xordi3 (out, i0, i1));
8078
8079 emit_label (donelab);
8080 }
8081
8082 /* Return the string to output a compare and branch instruction to DEST.
8083 DEST is the destination insn (i.e. the label), INSN is the source,
8084 and OP is the conditional expression. */
8085
8086 const char *
8087 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8088 {
8089 machine_mode mode = GET_MODE (XEXP (op, 0));
8090 enum rtx_code code = GET_CODE (op);
8091 const char *cond_str, *tmpl;
8092 int far, emit_nop, len;
8093 static char string[64];
8094 char size_char;
8095
8096 /* Compare and Branch is limited to +-2KB. If it is too far away,
8097 change
8098
8099 cxbne X, Y, .LC30
8100
8101 to
8102
8103 cxbe X, Y, .+16
8104 nop
8105 ba,pt xcc, .LC30
8106 nop */
8107
8108 len = get_attr_length (insn);
8109
8110 far = len == 4;
8111 emit_nop = len == 2;
8112
8113 if (far)
8114 code = reverse_condition (code);
8115
8116 size_char = ((mode == SImode) ? 'w' : 'x');
8117
8118 switch (code)
8119 {
8120 case NE:
8121 cond_str = "ne";
8122 break;
8123
8124 case EQ:
8125 cond_str = "e";
8126 break;
8127
8128 case GE:
8129 cond_str = "ge";
8130 break;
8131
8132 case GT:
8133 cond_str = "g";
8134 break;
8135
8136 case LE:
8137 cond_str = "le";
8138 break;
8139
8140 case LT:
8141 cond_str = "l";
8142 break;
8143
8144 case GEU:
8145 cond_str = "cc";
8146 break;
8147
8148 case GTU:
8149 cond_str = "gu";
8150 break;
8151
8152 case LEU:
8153 cond_str = "leu";
8154 break;
8155
8156 case LTU:
8157 cond_str = "cs";
8158 break;
8159
8160 default:
8161 gcc_unreachable ();
8162 }
8163
8164 if (far)
8165 {
8166 int veryfar = 1, delta;
8167
8168 if (INSN_ADDRESSES_SET_P ())
8169 {
8170 delta = (INSN_ADDRESSES (INSN_UID (dest))
8171 - INSN_ADDRESSES (INSN_UID (insn)));
8172 /* Leave some instructions for "slop". */
8173 if (delta >= -260000 && delta < 260000)
8174 veryfar = 0;
8175 }
8176
8177 if (veryfar)
8178 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8179 else
8180 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8181 }
8182 else
8183 {
8184 if (emit_nop)
8185 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8186 else
8187 tmpl = "c%cb%s\t%%1, %%2, %%3";
8188 }
8189
8190 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8191
8192 return string;
8193 }
8194
8195 /* Return the string to output a conditional branch to LABEL, testing
8196 register REG. LABEL is the operand number of the label; REG is the
8197 operand number of the reg. OP is the conditional expression. The mode
8198 of REG says what kind of comparison we made.
8199
8200 DEST is the destination insn (i.e. the label), INSN is the source.
8201
8202 REVERSED is nonzero if we should reverse the sense of the comparison.
8203
8204 ANNUL is nonzero if we should generate an annulling branch. */
8205
8206 const char *
8207 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8208 int annul, rtx_insn *insn)
8209 {
8210 static char string[64];
8211 enum rtx_code code = GET_CODE (op);
8212 machine_mode mode = GET_MODE (XEXP (op, 0));
8213 rtx note;
8214 int far;
8215 char *p;
8216
8217 /* branch on register are limited to +-128KB. If it is too far away,
8218 change
8219
8220 brnz,pt %g1, .LC30
8221
8222 to
8223
8224 brz,pn %g1, .+12
8225 nop
8226 ba,pt %xcc, .LC30
8227
8228 and
8229
8230 brgez,a,pn %o1, .LC29
8231
8232 to
8233
8234 brlz,pt %o1, .+16
8235 nop
8236 ba,pt %xcc, .LC29 */
8237
8238 far = get_attr_length (insn) >= 3;
8239
8240 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8241 if (reversed ^ far)
8242 code = reverse_condition (code);
8243
8244 /* Only 64 bit versions of these instructions exist. */
8245 gcc_assert (mode == DImode);
8246
8247 /* Start by writing the branch condition. */
8248
8249 switch (code)
8250 {
8251 case NE:
8252 strcpy (string, "brnz");
8253 break;
8254
8255 case EQ:
8256 strcpy (string, "brz");
8257 break;
8258
8259 case GE:
8260 strcpy (string, "brgez");
8261 break;
8262
8263 case LT:
8264 strcpy (string, "brlz");
8265 break;
8266
8267 case LE:
8268 strcpy (string, "brlez");
8269 break;
8270
8271 case GT:
8272 strcpy (string, "brgz");
8273 break;
8274
8275 default:
8276 gcc_unreachable ();
8277 }
8278
8279 p = strchr (string, '\0');
8280
8281 /* Now add the annulling, reg, label, and nop. */
8282 if (annul && ! far)
8283 {
8284 strcpy (p, ",a");
8285 p += 2;
8286 }
8287
8288 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8289 {
8290 strcpy (p,
8291 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8292 ? ",pt" : ",pn");
8293 p += 3;
8294 }
8295
8296 *p = p < string + 8 ? '\t' : ' ';
8297 p++;
8298 *p++ = '%';
8299 *p++ = '0' + reg;
8300 *p++ = ',';
8301 *p++ = ' ';
8302 if (far)
8303 {
8304 int veryfar = 1, delta;
8305
8306 if (INSN_ADDRESSES_SET_P ())
8307 {
8308 delta = (INSN_ADDRESSES (INSN_UID (dest))
8309 - INSN_ADDRESSES (INSN_UID (insn)));
8310 /* Leave some instructions for "slop". */
8311 if (delta >= -260000 && delta < 260000)
8312 veryfar = 0;
8313 }
8314
8315 strcpy (p, ".+12\n\t nop\n\t");
8316 /* Skip the next insn if requested or
8317 if we know that it will be a nop. */
8318 if (annul || ! final_sequence)
8319 p[3] = '6';
8320 p += 12;
8321 if (veryfar)
8322 {
8323 strcpy (p, "b\t");
8324 p += 2;
8325 }
8326 else
8327 {
8328 strcpy (p, "ba,pt\t%%xcc, ");
8329 p += 13;
8330 }
8331 }
8332 *p++ = '%';
8333 *p++ = 'l';
8334 *p++ = '0' + label;
8335 *p++ = '%';
8336 *p++ = '#';
8337 *p = '\0';
8338
8339 return string;
8340 }
8341
8342 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8343 Such instructions cannot be used in the delay slot of return insn on v9.
8344 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8345 */
8346
8347 static int
8348 epilogue_renumber (register rtx *where, int test)
8349 {
8350 register const char *fmt;
8351 register int i;
8352 register enum rtx_code code;
8353
8354 if (*where == 0)
8355 return 0;
8356
8357 code = GET_CODE (*where);
8358
8359 switch (code)
8360 {
8361 case REG:
8362 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8363 return 1;
8364 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8365 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8366 /* fallthrough */
8367 case SCRATCH:
8368 case CC0:
8369 case PC:
8370 case CONST_INT:
8371 case CONST_WIDE_INT:
8372 case CONST_DOUBLE:
8373 return 0;
8374
8375 /* Do not replace the frame pointer with the stack pointer because
8376 it can cause the delayed instruction to load below the stack.
8377 This occurs when instructions like:
8378
8379 (set (reg/i:SI 24 %i0)
8380 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8381 (const_int -20 [0xffffffec])) 0))
8382
8383 are in the return delayed slot. */
8384 case PLUS:
8385 if (GET_CODE (XEXP (*where, 0)) == REG
8386 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8387 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8388 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8389 return 1;
8390 break;
8391
8392 case MEM:
8393 if (SPARC_STACK_BIAS
8394 && GET_CODE (XEXP (*where, 0)) == REG
8395 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8396 return 1;
8397 break;
8398
8399 default:
8400 break;
8401 }
8402
8403 fmt = GET_RTX_FORMAT (code);
8404
8405 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8406 {
8407 if (fmt[i] == 'E')
8408 {
8409 register int j;
8410 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8411 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8412 return 1;
8413 }
8414 else if (fmt[i] == 'e'
8415 && epilogue_renumber (&(XEXP (*where, i)), test))
8416 return 1;
8417 }
8418 return 0;
8419 }
8420 \f
8421 /* Leaf functions and non-leaf functions have different needs. */
8422
8423 static const int
8424 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8425
8426 static const int
8427 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8428
8429 static const int *const reg_alloc_orders[] = {
8430 reg_leaf_alloc_order,
8431 reg_nonleaf_alloc_order};
8432
8433 void
8434 order_regs_for_local_alloc (void)
8435 {
8436 static int last_order_nonleaf = 1;
8437
8438 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8439 {
8440 last_order_nonleaf = !last_order_nonleaf;
8441 memcpy ((char *) reg_alloc_order,
8442 (const char *) reg_alloc_orders[last_order_nonleaf],
8443 FIRST_PSEUDO_REGISTER * sizeof (int));
8444 }
8445 }
8446 \f
8447 /* Return 1 if REG and MEM are legitimate enough to allow the various
8448 mem<-->reg splits to be run. */
8449
8450 int
8451 sparc_splitdi_legitimate (rtx reg, rtx mem)
8452 {
8453 /* Punt if we are here by mistake. */
8454 gcc_assert (reload_completed);
8455
8456 /* We must have an offsettable memory reference. */
8457 if (! offsettable_memref_p (mem))
8458 return 0;
8459
8460 /* If we have legitimate args for ldd/std, we do not want
8461 the split to happen. */
8462 if ((REGNO (reg) % 2) == 0
8463 && mem_min_alignment (mem, 8))
8464 return 0;
8465
8466 /* Success. */
8467 return 1;
8468 }
8469
8470 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8471
8472 int
8473 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8474 {
8475 int regno1, regno2;
8476
8477 if (GET_CODE (reg1) == SUBREG)
8478 reg1 = SUBREG_REG (reg1);
8479 if (GET_CODE (reg1) != REG)
8480 return 0;
8481 regno1 = REGNO (reg1);
8482
8483 if (GET_CODE (reg2) == SUBREG)
8484 reg2 = SUBREG_REG (reg2);
8485 if (GET_CODE (reg2) != REG)
8486 return 0;
8487 regno2 = REGNO (reg2);
8488
8489 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8490 return 1;
8491
8492 if (TARGET_VIS3)
8493 {
8494 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8495 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8496 return 1;
8497 }
8498
8499 return 0;
8500 }
8501
8502 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8503 This makes them candidates for using ldd and std insns.
8504
8505 Note reg1 and reg2 *must* be hard registers. */
8506
8507 int
8508 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8509 {
8510 /* We might have been passed a SUBREG. */
8511 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8512 return 0;
8513
8514 if (REGNO (reg1) % 2 != 0)
8515 return 0;
8516
8517 /* Integer ldd is deprecated in SPARC V9 */
8518 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8519 return 0;
8520
8521 return (REGNO (reg1) == REGNO (reg2) - 1);
8522 }
8523
8524 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8525 an ldd or std insn.
8526
8527 This can only happen when addr1 and addr2, the addresses in mem1
8528 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8529 addr1 must also be aligned on a 64-bit boundary.
8530
8531 Also iff dependent_reg_rtx is not null it should not be used to
8532 compute the address for mem1, i.e. we cannot optimize a sequence
8533 like:
8534 ld [%o0], %o0
8535 ld [%o0 + 4], %o1
8536 to
8537 ldd [%o0], %o0
8538 nor:
8539 ld [%g3 + 4], %g3
8540 ld [%g3], %g2
8541 to
8542 ldd [%g3], %g2
8543
8544 But, note that the transformation from:
8545 ld [%g2 + 4], %g3
8546 ld [%g2], %g2
8547 to
8548 ldd [%g2], %g2
8549 is perfectly fine. Thus, the peephole2 patterns always pass us
8550 the destination register of the first load, never the second one.
8551
8552 For stores we don't have a similar problem, so dependent_reg_rtx is
8553 NULL_RTX. */
8554
8555 int
8556 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8557 {
8558 rtx addr1, addr2;
8559 unsigned int reg1;
8560 HOST_WIDE_INT offset1;
8561
8562 /* The mems cannot be volatile. */
8563 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8564 return 0;
8565
8566 /* MEM1 should be aligned on a 64-bit boundary. */
8567 if (MEM_ALIGN (mem1) < 64)
8568 return 0;
8569
8570 addr1 = XEXP (mem1, 0);
8571 addr2 = XEXP (mem2, 0);
8572
8573 /* Extract a register number and offset (if used) from the first addr. */
8574 if (GET_CODE (addr1) == PLUS)
8575 {
8576 /* If not a REG, return zero. */
8577 if (GET_CODE (XEXP (addr1, 0)) != REG)
8578 return 0;
8579 else
8580 {
8581 reg1 = REGNO (XEXP (addr1, 0));
8582 /* The offset must be constant! */
8583 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8584 return 0;
8585 offset1 = INTVAL (XEXP (addr1, 1));
8586 }
8587 }
8588 else if (GET_CODE (addr1) != REG)
8589 return 0;
8590 else
8591 {
8592 reg1 = REGNO (addr1);
8593 /* This was a simple (mem (reg)) expression. Offset is 0. */
8594 offset1 = 0;
8595 }
8596
8597 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8598 if (GET_CODE (addr2) != PLUS)
8599 return 0;
8600
8601 if (GET_CODE (XEXP (addr2, 0)) != REG
8602 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8603 return 0;
8604
8605 if (reg1 != REGNO (XEXP (addr2, 0)))
8606 return 0;
8607
8608 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8609 return 0;
8610
8611 /* The first offset must be evenly divisible by 8 to ensure the
8612 address is 64 bit aligned. */
8613 if (offset1 % 8 != 0)
8614 return 0;
8615
8616 /* The offset for the second addr must be 4 more than the first addr. */
8617 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8618 return 0;
8619
8620 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8621 instructions. */
8622 return 1;
8623 }
8624
8625 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8626
8627 rtx
8628 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8629 {
8630 rtx x = widen_memory_access (mem1, mode, 0);
8631 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8632 return x;
8633 }
8634
8635 /* Return 1 if reg is a pseudo, or is the first register in
8636 a hard register pair. This makes it suitable for use in
8637 ldd and std insns. */
8638
8639 int
8640 register_ok_for_ldd (rtx reg)
8641 {
8642 /* We might have been passed a SUBREG. */
8643 if (!REG_P (reg))
8644 return 0;
8645
8646 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8647 return (REGNO (reg) % 2 == 0);
8648
8649 return 1;
8650 }
8651
8652 /* Return 1 if OP, a MEM, has an address which is known to be
8653 aligned to an 8-byte boundary. */
8654
8655 int
8656 memory_ok_for_ldd (rtx op)
8657 {
8658 /* In 64-bit mode, we assume that the address is word-aligned. */
8659 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8660 return 0;
8661
8662 if (! can_create_pseudo_p ()
8663 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8664 return 0;
8665
8666 return 1;
8667 }
8668 \f
8669 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8670
8671 static bool
8672 sparc_print_operand_punct_valid_p (unsigned char code)
8673 {
8674 if (code == '#'
8675 || code == '*'
8676 || code == '('
8677 || code == ')'
8678 || code == '_'
8679 || code == '&')
8680 return true;
8681
8682 return false;
8683 }
8684
8685 /* Implement TARGET_PRINT_OPERAND.
8686 Print operand X (an rtx) in assembler syntax to file FILE.
8687 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8688 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8689
8690 static void
8691 sparc_print_operand (FILE *file, rtx x, int code)
8692 {
8693 const char *s;
8694
8695 switch (code)
8696 {
8697 case '#':
8698 /* Output an insn in a delay slot. */
8699 if (final_sequence)
8700 sparc_indent_opcode = 1;
8701 else
8702 fputs ("\n\t nop", file);
8703 return;
8704 case '*':
8705 /* Output an annul flag if there's nothing for the delay slot and we
8706 are optimizing. This is always used with '(' below.
8707 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8708 this is a dbx bug. So, we only do this when optimizing.
8709 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8710 Always emit a nop in case the next instruction is a branch. */
8711 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8712 fputs (",a", file);
8713 return;
8714 case '(':
8715 /* Output a 'nop' if there's nothing for the delay slot and we are
8716 not optimizing. This is always used with '*' above. */
8717 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8718 fputs ("\n\t nop", file);
8719 else if (final_sequence)
8720 sparc_indent_opcode = 1;
8721 return;
8722 case ')':
8723 /* Output the right displacement from the saved PC on function return.
8724 The caller may have placed an "unimp" insn immediately after the call
8725 so we have to account for it. This insn is used in the 32-bit ABI
8726 when calling a function that returns a non zero-sized structure. The
8727 64-bit ABI doesn't have it. Be careful to have this test be the same
8728 as that for the call. The exception is when sparc_std_struct_return
8729 is enabled, the psABI is followed exactly and the adjustment is made
8730 by the code in sparc_struct_value_rtx. The call emitted is the same
8731 when sparc_std_struct_return is enabled. */
8732 if (!TARGET_ARCH64
8733 && cfun->returns_struct
8734 && !sparc_std_struct_return
8735 && DECL_SIZE (DECL_RESULT (current_function_decl))
8736 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8737 == INTEGER_CST
8738 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8739 fputs ("12", file);
8740 else
8741 fputc ('8', file);
8742 return;
8743 case '_':
8744 /* Output the Embedded Medium/Anywhere code model base register. */
8745 fputs (EMBMEDANY_BASE_REG, file);
8746 return;
8747 case '&':
8748 /* Print some local dynamic TLS name. */
8749 if (const char *name = get_some_local_dynamic_name ())
8750 assemble_name (file, name);
8751 else
8752 output_operand_lossage ("'%%&' used without any "
8753 "local dynamic TLS references");
8754 return;
8755
8756 case 'Y':
8757 /* Adjust the operand to take into account a RESTORE operation. */
8758 if (GET_CODE (x) == CONST_INT)
8759 break;
8760 else if (GET_CODE (x) != REG)
8761 output_operand_lossage ("invalid %%Y operand");
8762 else if (REGNO (x) < 8)
8763 fputs (reg_names[REGNO (x)], file);
8764 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8765 fputs (reg_names[REGNO (x)-16], file);
8766 else
8767 output_operand_lossage ("invalid %%Y operand");
8768 return;
8769 case 'L':
8770 /* Print out the low order register name of a register pair. */
8771 if (WORDS_BIG_ENDIAN)
8772 fputs (reg_names[REGNO (x)+1], file);
8773 else
8774 fputs (reg_names[REGNO (x)], file);
8775 return;
8776 case 'H':
8777 /* Print out the high order register name of a register pair. */
8778 if (WORDS_BIG_ENDIAN)
8779 fputs (reg_names[REGNO (x)], file);
8780 else
8781 fputs (reg_names[REGNO (x)+1], file);
8782 return;
8783 case 'R':
8784 /* Print out the second register name of a register pair or quad.
8785 I.e., R (%o0) => %o1. */
8786 fputs (reg_names[REGNO (x)+1], file);
8787 return;
8788 case 'S':
8789 /* Print out the third register name of a register quad.
8790 I.e., S (%o0) => %o2. */
8791 fputs (reg_names[REGNO (x)+2], file);
8792 return;
8793 case 'T':
8794 /* Print out the fourth register name of a register quad.
8795 I.e., T (%o0) => %o3. */
8796 fputs (reg_names[REGNO (x)+3], file);
8797 return;
8798 case 'x':
8799 /* Print a condition code register. */
8800 if (REGNO (x) == SPARC_ICC_REG)
8801 {
8802 switch (GET_MODE (x))
8803 {
8804 case CCmode:
8805 case CCNZmode:
8806 case CCCmode:
8807 s = "%icc";
8808 break;
8809 case CCXmode:
8810 case CCXNZmode:
8811 case CCXCmode:
8812 s = "%xcc";
8813 break;
8814 default:
8815 gcc_unreachable ();
8816 }
8817 fputs (s, file);
8818 }
8819 else
8820 /* %fccN register */
8821 fputs (reg_names[REGNO (x)], file);
8822 return;
8823 case 'm':
8824 /* Print the operand's address only. */
8825 output_address (GET_MODE (x), XEXP (x, 0));
8826 return;
8827 case 'r':
8828 /* In this case we need a register. Use %g0 if the
8829 operand is const0_rtx. */
8830 if (x == const0_rtx
8831 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8832 {
8833 fputs ("%g0", file);
8834 return;
8835 }
8836 else
8837 break;
8838
8839 case 'A':
8840 switch (GET_CODE (x))
8841 {
8842 case IOR:
8843 s = "or";
8844 break;
8845 case AND:
8846 s = "and";
8847 break;
8848 case XOR:
8849 s = "xor";
8850 break;
8851 default:
8852 output_operand_lossage ("invalid %%A operand");
8853 s = "";
8854 break;
8855 }
8856 fputs (s, file);
8857 return;
8858
8859 case 'B':
8860 switch (GET_CODE (x))
8861 {
8862 case IOR:
8863 s = "orn";
8864 break;
8865 case AND:
8866 s = "andn";
8867 break;
8868 case XOR:
8869 s = "xnor";
8870 break;
8871 default:
8872 output_operand_lossage ("invalid %%B operand");
8873 s = "";
8874 break;
8875 }
8876 fputs (s, file);
8877 return;
8878
8879 /* This is used by the conditional move instructions. */
8880 case 'C':
8881 {
8882 machine_mode mode = GET_MODE (XEXP (x, 0));
8883 switch (GET_CODE (x))
8884 {
8885 case NE:
8886 s = "ne";
8887 break;
8888 case EQ:
8889 s = "e";
8890 break;
8891 case GE:
8892 if (mode == CCNZmode || mode == CCXNZmode)
8893 s = "pos";
8894 else
8895 s = "ge";
8896 break;
8897 case GT:
8898 s = "g";
8899 break;
8900 case LE:
8901 s = "le";
8902 break;
8903 case LT:
8904 if (mode == CCNZmode || mode == CCXNZmode)
8905 s = "neg";
8906 else
8907 s = "l";
8908 break;
8909 case GEU:
8910 s = "geu";
8911 break;
8912 case GTU:
8913 s = "gu";
8914 break;
8915 case LEU:
8916 s = "leu";
8917 break;
8918 case LTU:
8919 s = "lu";
8920 break;
8921 case LTGT:
8922 s = "lg";
8923 break;
8924 case UNORDERED:
8925 s = "u";
8926 break;
8927 case ORDERED:
8928 s = "o";
8929 break;
8930 case UNLT:
8931 s = "ul";
8932 break;
8933 case UNLE:
8934 s = "ule";
8935 break;
8936 case UNGT:
8937 s = "ug";
8938 break;
8939 case UNGE:
8940 s = "uge"
8941 ; break;
8942 case UNEQ:
8943 s = "ue";
8944 break;
8945 default:
8946 output_operand_lossage ("invalid %%C operand");
8947 s = "";
8948 break;
8949 }
8950 fputs (s, file);
8951 return;
8952 }
8953
8954 /* This are used by the movr instruction pattern. */
8955 case 'D':
8956 {
8957 switch (GET_CODE (x))
8958 {
8959 case NE:
8960 s = "ne";
8961 break;
8962 case EQ:
8963 s = "e";
8964 break;
8965 case GE:
8966 s = "gez";
8967 break;
8968 case LT:
8969 s = "lz";
8970 break;
8971 case LE:
8972 s = "lez";
8973 break;
8974 case GT:
8975 s = "gz";
8976 break;
8977 default:
8978 output_operand_lossage ("invalid %%D operand");
8979 s = "";
8980 break;
8981 }
8982 fputs (s, file);
8983 return;
8984 }
8985
8986 case 'b':
8987 {
8988 /* Print a sign-extended character. */
8989 int i = trunc_int_for_mode (INTVAL (x), QImode);
8990 fprintf (file, "%d", i);
8991 return;
8992 }
8993
8994 case 'f':
8995 /* Operand must be a MEM; write its address. */
8996 if (GET_CODE (x) != MEM)
8997 output_operand_lossage ("invalid %%f operand");
8998 output_address (GET_MODE (x), XEXP (x, 0));
8999 return;
9000
9001 case 's':
9002 {
9003 /* Print a sign-extended 32-bit value. */
9004 HOST_WIDE_INT i;
9005 if (GET_CODE(x) == CONST_INT)
9006 i = INTVAL (x);
9007 else
9008 {
9009 output_operand_lossage ("invalid %%s operand");
9010 return;
9011 }
9012 i = trunc_int_for_mode (i, SImode);
9013 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9014 return;
9015 }
9016
9017 case 0:
9018 /* Do nothing special. */
9019 break;
9020
9021 default:
9022 /* Undocumented flag. */
9023 output_operand_lossage ("invalid operand output code");
9024 }
9025
9026 if (GET_CODE (x) == REG)
9027 fputs (reg_names[REGNO (x)], file);
9028 else if (GET_CODE (x) == MEM)
9029 {
9030 fputc ('[', file);
9031 /* Poor Sun assembler doesn't understand absolute addressing. */
9032 if (CONSTANT_P (XEXP (x, 0)))
9033 fputs ("%g0+", file);
9034 output_address (GET_MODE (x), XEXP (x, 0));
9035 fputc (']', file);
9036 }
9037 else if (GET_CODE (x) == HIGH)
9038 {
9039 fputs ("%hi(", file);
9040 output_addr_const (file, XEXP (x, 0));
9041 fputc (')', file);
9042 }
9043 else if (GET_CODE (x) == LO_SUM)
9044 {
9045 sparc_print_operand (file, XEXP (x, 0), 0);
9046 if (TARGET_CM_MEDMID)
9047 fputs ("+%l44(", file);
9048 else
9049 fputs ("+%lo(", file);
9050 output_addr_const (file, XEXP (x, 1));
9051 fputc (')', file);
9052 }
9053 else if (GET_CODE (x) == CONST_DOUBLE)
9054 output_operand_lossage ("floating-point constant not a valid immediate operand");
9055 else
9056 output_addr_const (file, x);
9057 }
9058
9059 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9060
9061 static void
9062 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9063 {
9064 register rtx base, index = 0;
9065 int offset = 0;
9066 register rtx addr = x;
9067
9068 if (REG_P (addr))
9069 fputs (reg_names[REGNO (addr)], file);
9070 else if (GET_CODE (addr) == PLUS)
9071 {
9072 if (CONST_INT_P (XEXP (addr, 0)))
9073 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9074 else if (CONST_INT_P (XEXP (addr, 1)))
9075 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9076 else
9077 base = XEXP (addr, 0), index = XEXP (addr, 1);
9078 if (GET_CODE (base) == LO_SUM)
9079 {
9080 gcc_assert (USE_AS_OFFSETABLE_LO10
9081 && TARGET_ARCH64
9082 && ! TARGET_CM_MEDMID);
9083 output_operand (XEXP (base, 0), 0);
9084 fputs ("+%lo(", file);
9085 output_address (VOIDmode, XEXP (base, 1));
9086 fprintf (file, ")+%d", offset);
9087 }
9088 else
9089 {
9090 fputs (reg_names[REGNO (base)], file);
9091 if (index == 0)
9092 fprintf (file, "%+d", offset);
9093 else if (REG_P (index))
9094 fprintf (file, "+%s", reg_names[REGNO (index)]);
9095 else if (GET_CODE (index) == SYMBOL_REF
9096 || GET_CODE (index) == LABEL_REF
9097 || GET_CODE (index) == CONST)
9098 fputc ('+', file), output_addr_const (file, index);
9099 else gcc_unreachable ();
9100 }
9101 }
9102 else if (GET_CODE (addr) == MINUS
9103 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9104 {
9105 output_addr_const (file, XEXP (addr, 0));
9106 fputs ("-(", file);
9107 output_addr_const (file, XEXP (addr, 1));
9108 fputs ("-.)", file);
9109 }
9110 else if (GET_CODE (addr) == LO_SUM)
9111 {
9112 output_operand (XEXP (addr, 0), 0);
9113 if (TARGET_CM_MEDMID)
9114 fputs ("+%l44(", file);
9115 else
9116 fputs ("+%lo(", file);
9117 output_address (VOIDmode, XEXP (addr, 1));
9118 fputc (')', file);
9119 }
9120 else if (flag_pic
9121 && GET_CODE (addr) == CONST
9122 && GET_CODE (XEXP (addr, 0)) == MINUS
9123 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9124 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9125 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9126 {
9127 addr = XEXP (addr, 0);
9128 output_addr_const (file, XEXP (addr, 0));
9129 /* Group the args of the second CONST in parenthesis. */
9130 fputs ("-(", file);
9131 /* Skip past the second CONST--it does nothing for us. */
9132 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9133 /* Close the parenthesis. */
9134 fputc (')', file);
9135 }
9136 else
9137 {
9138 output_addr_const (file, addr);
9139 }
9140 }
9141 \f
9142 /* Target hook for assembling integer objects. The sparc version has
9143 special handling for aligned DI-mode objects. */
9144
9145 static bool
9146 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9147 {
9148 /* ??? We only output .xword's for symbols and only then in environments
9149 where the assembler can handle them. */
9150 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9151 {
9152 if (TARGET_V9)
9153 {
9154 assemble_integer_with_op ("\t.xword\t", x);
9155 return true;
9156 }
9157 else
9158 {
9159 assemble_aligned_integer (4, const0_rtx);
9160 assemble_aligned_integer (4, x);
9161 return true;
9162 }
9163 }
9164 return default_assemble_integer (x, size, aligned_p);
9165 }
9166 \f
9167 /* Return the value of a code used in the .proc pseudo-op that says
9168 what kind of result this function returns. For non-C types, we pick
9169 the closest C type. */
9170
9171 #ifndef SHORT_TYPE_SIZE
9172 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9173 #endif
9174
9175 #ifndef INT_TYPE_SIZE
9176 #define INT_TYPE_SIZE BITS_PER_WORD
9177 #endif
9178
9179 #ifndef LONG_TYPE_SIZE
9180 #define LONG_TYPE_SIZE BITS_PER_WORD
9181 #endif
9182
9183 #ifndef LONG_LONG_TYPE_SIZE
9184 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9185 #endif
9186
9187 #ifndef FLOAT_TYPE_SIZE
9188 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9189 #endif
9190
9191 #ifndef DOUBLE_TYPE_SIZE
9192 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9193 #endif
9194
9195 #ifndef LONG_DOUBLE_TYPE_SIZE
9196 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9197 #endif
9198
9199 unsigned long
9200 sparc_type_code (register tree type)
9201 {
9202 register unsigned long qualifiers = 0;
9203 register unsigned shift;
9204
9205 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9206 setting more, since some assemblers will give an error for this. Also,
9207 we must be careful to avoid shifts of 32 bits or more to avoid getting
9208 unpredictable results. */
9209
9210 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9211 {
9212 switch (TREE_CODE (type))
9213 {
9214 case ERROR_MARK:
9215 return qualifiers;
9216
9217 case ARRAY_TYPE:
9218 qualifiers |= (3 << shift);
9219 break;
9220
9221 case FUNCTION_TYPE:
9222 case METHOD_TYPE:
9223 qualifiers |= (2 << shift);
9224 break;
9225
9226 case POINTER_TYPE:
9227 case REFERENCE_TYPE:
9228 case OFFSET_TYPE:
9229 qualifiers |= (1 << shift);
9230 break;
9231
9232 case RECORD_TYPE:
9233 return (qualifiers | 8);
9234
9235 case UNION_TYPE:
9236 case QUAL_UNION_TYPE:
9237 return (qualifiers | 9);
9238
9239 case ENUMERAL_TYPE:
9240 return (qualifiers | 10);
9241
9242 case VOID_TYPE:
9243 return (qualifiers | 16);
9244
9245 case INTEGER_TYPE:
9246 /* If this is a range type, consider it to be the underlying
9247 type. */
9248 if (TREE_TYPE (type) != 0)
9249 break;
9250
9251 /* Carefully distinguish all the standard types of C,
9252 without messing up if the language is not C. We do this by
9253 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9254 look at both the names and the above fields, but that's redundant.
9255 Any type whose size is between two C types will be considered
9256 to be the wider of the two types. Also, we do not have a
9257 special code to use for "long long", so anything wider than
9258 long is treated the same. Note that we can't distinguish
9259 between "int" and "long" in this code if they are the same
9260 size, but that's fine, since neither can the assembler. */
9261
9262 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9263 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9264
9265 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9266 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9267
9268 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9269 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9270
9271 else
9272 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9273
9274 case REAL_TYPE:
9275 /* If this is a range type, consider it to be the underlying
9276 type. */
9277 if (TREE_TYPE (type) != 0)
9278 break;
9279
9280 /* Carefully distinguish all the standard types of C,
9281 without messing up if the language is not C. */
9282
9283 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9284 return (qualifiers | 6);
9285
9286 else
9287 return (qualifiers | 7);
9288
9289 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9290 /* ??? We need to distinguish between double and float complex types,
9291 but I don't know how yet because I can't reach this code from
9292 existing front-ends. */
9293 return (qualifiers | 7); /* Who knows? */
9294
9295 case VECTOR_TYPE:
9296 case BOOLEAN_TYPE: /* Boolean truth value type. */
9297 case LANG_TYPE:
9298 case NULLPTR_TYPE:
9299 return qualifiers;
9300
9301 default:
9302 gcc_unreachable (); /* Not a type! */
9303 }
9304 }
9305
9306 return qualifiers;
9307 }
9308 \f
9309 /* Nested function support. */
9310
9311 /* Emit RTL insns to initialize the variable parts of a trampoline.
9312 FNADDR is an RTX for the address of the function's pure code.
9313 CXT is an RTX for the static chain value for the function.
9314
9315 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9316 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9317 (to store insns). This is a bit excessive. Perhaps a different
9318 mechanism would be better here.
9319
9320 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9321
9322 static void
9323 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9324 {
9325 /* SPARC 32-bit trampoline:
9326
9327 sethi %hi(fn), %g1
9328 sethi %hi(static), %g2
9329 jmp %g1+%lo(fn)
9330 or %g2, %lo(static), %g2
9331
9332 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9333 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9334 */
9335
9336 emit_move_insn
9337 (adjust_address (m_tramp, SImode, 0),
9338 expand_binop (SImode, ior_optab,
9339 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9340 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9341 NULL_RTX, 1, OPTAB_DIRECT));
9342
9343 emit_move_insn
9344 (adjust_address (m_tramp, SImode, 4),
9345 expand_binop (SImode, ior_optab,
9346 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9347 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9348 NULL_RTX, 1, OPTAB_DIRECT));
9349
9350 emit_move_insn
9351 (adjust_address (m_tramp, SImode, 8),
9352 expand_binop (SImode, ior_optab,
9353 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9354 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9355 NULL_RTX, 1, OPTAB_DIRECT));
9356
9357 emit_move_insn
9358 (adjust_address (m_tramp, SImode, 12),
9359 expand_binop (SImode, ior_optab,
9360 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9361 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9362 NULL_RTX, 1, OPTAB_DIRECT));
9363
9364 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9365 aligned on a 16 byte boundary so one flush clears it all. */
9366 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9367 if (sparc_cpu != PROCESSOR_ULTRASPARC
9368 && sparc_cpu != PROCESSOR_ULTRASPARC3
9369 && sparc_cpu != PROCESSOR_NIAGARA
9370 && sparc_cpu != PROCESSOR_NIAGARA2
9371 && sparc_cpu != PROCESSOR_NIAGARA3
9372 && sparc_cpu != PROCESSOR_NIAGARA4
9373 && sparc_cpu != PROCESSOR_NIAGARA7)
9374 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9375
9376 /* Call __enable_execute_stack after writing onto the stack to make sure
9377 the stack address is accessible. */
9378 #ifdef HAVE_ENABLE_EXECUTE_STACK
9379 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9380 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9381 #endif
9382
9383 }
9384
9385 /* The 64-bit version is simpler because it makes more sense to load the
9386 values as "immediate" data out of the trampoline. It's also easier since
9387 we can read the PC without clobbering a register. */
9388
9389 static void
9390 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9391 {
9392 /* SPARC 64-bit trampoline:
9393
9394 rd %pc, %g1
9395 ldx [%g1+24], %g5
9396 jmp %g5
9397 ldx [%g1+16], %g5
9398 +16 bytes data
9399 */
9400
9401 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9402 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9403 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9404 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9405 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9406 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9407 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9408 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9409 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9410 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9411 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9412
9413 if (sparc_cpu != PROCESSOR_ULTRASPARC
9414 && sparc_cpu != PROCESSOR_ULTRASPARC3
9415 && sparc_cpu != PROCESSOR_NIAGARA
9416 && sparc_cpu != PROCESSOR_NIAGARA2
9417 && sparc_cpu != PROCESSOR_NIAGARA3
9418 && sparc_cpu != PROCESSOR_NIAGARA4
9419 && sparc_cpu != PROCESSOR_NIAGARA7)
9420 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9421
9422 /* Call __enable_execute_stack after writing onto the stack to make sure
9423 the stack address is accessible. */
9424 #ifdef HAVE_ENABLE_EXECUTE_STACK
9425 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9426 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9427 #endif
9428 }
9429
9430 /* Worker for TARGET_TRAMPOLINE_INIT. */
9431
9432 static void
9433 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9434 {
9435 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9436 cxt = force_reg (Pmode, cxt);
9437 if (TARGET_ARCH64)
9438 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9439 else
9440 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9441 }
9442 \f
9443 /* Adjust the cost of a scheduling dependency. Return the new cost of
9444 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9445
9446 static int
9447 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9448 int cost)
9449 {
9450 enum attr_type insn_type;
9451
9452 if (recog_memoized (insn) < 0)
9453 return cost;
9454
9455 insn_type = get_attr_type (insn);
9456
9457 if (dep_type == 0)
9458 {
9459 /* Data dependency; DEP_INSN writes a register that INSN reads some
9460 cycles later. */
9461
9462 /* if a load, then the dependence must be on the memory address;
9463 add an extra "cycle". Note that the cost could be two cycles
9464 if the reg was written late in an instruction group; we ca not tell
9465 here. */
9466 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9467 return cost + 3;
9468
9469 /* Get the delay only if the address of the store is the dependence. */
9470 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9471 {
9472 rtx pat = PATTERN(insn);
9473 rtx dep_pat = PATTERN (dep_insn);
9474
9475 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9476 return cost; /* This should not happen! */
9477
9478 /* The dependency between the two instructions was on the data that
9479 is being stored. Assume that this implies that the address of the
9480 store is not dependent. */
9481 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9482 return cost;
9483
9484 return cost + 3; /* An approximation. */
9485 }
9486
9487 /* A shift instruction cannot receive its data from an instruction
9488 in the same cycle; add a one cycle penalty. */
9489 if (insn_type == TYPE_SHIFT)
9490 return cost + 3; /* Split before cascade into shift. */
9491 }
9492 else
9493 {
9494 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9495 INSN writes some cycles later. */
9496
9497 /* These are only significant for the fpu unit; writing a fp reg before
9498 the fpu has finished with it stalls the processor. */
9499
9500 /* Reusing an integer register causes no problems. */
9501 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9502 return 0;
9503 }
9504
9505 return cost;
9506 }
9507
9508 static int
9509 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9510 int cost)
9511 {
9512 enum attr_type insn_type, dep_type;
9513 rtx pat = PATTERN(insn);
9514 rtx dep_pat = PATTERN (dep_insn);
9515
9516 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9517 return cost;
9518
9519 insn_type = get_attr_type (insn);
9520 dep_type = get_attr_type (dep_insn);
9521
9522 switch (dtype)
9523 {
9524 case 0:
9525 /* Data dependency; DEP_INSN writes a register that INSN reads some
9526 cycles later. */
9527
9528 switch (insn_type)
9529 {
9530 case TYPE_STORE:
9531 case TYPE_FPSTORE:
9532 /* Get the delay iff the address of the store is the dependence. */
9533 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9534 return cost;
9535
9536 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9537 return cost;
9538 return cost + 3;
9539
9540 case TYPE_LOAD:
9541 case TYPE_SLOAD:
9542 case TYPE_FPLOAD:
9543 /* If a load, then the dependence must be on the memory address. If
9544 the addresses aren't equal, then it might be a false dependency */
9545 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9546 {
9547 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9548 || GET_CODE (SET_DEST (dep_pat)) != MEM
9549 || GET_CODE (SET_SRC (pat)) != MEM
9550 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9551 XEXP (SET_SRC (pat), 0)))
9552 return cost + 2;
9553
9554 return cost + 8;
9555 }
9556 break;
9557
9558 case TYPE_BRANCH:
9559 /* Compare to branch latency is 0. There is no benefit from
9560 separating compare and branch. */
9561 if (dep_type == TYPE_COMPARE)
9562 return 0;
9563 /* Floating point compare to branch latency is less than
9564 compare to conditional move. */
9565 if (dep_type == TYPE_FPCMP)
9566 return cost - 1;
9567 break;
9568 default:
9569 break;
9570 }
9571 break;
9572
9573 case REG_DEP_ANTI:
9574 /* Anti-dependencies only penalize the fpu unit. */
9575 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9576 return 0;
9577 break;
9578
9579 default:
9580 break;
9581 }
9582
9583 return cost;
9584 }
9585
9586 static int
9587 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9588 unsigned int)
9589 {
9590 switch (sparc_cpu)
9591 {
9592 case PROCESSOR_SUPERSPARC:
9593 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9594 break;
9595 case PROCESSOR_HYPERSPARC:
9596 case PROCESSOR_SPARCLITE86X:
9597 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9598 break;
9599 default:
9600 break;
9601 }
9602 return cost;
9603 }
9604
9605 static void
9606 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9607 int sched_verbose ATTRIBUTE_UNUSED,
9608 int max_ready ATTRIBUTE_UNUSED)
9609 {}
9610
9611 static int
9612 sparc_use_sched_lookahead (void)
9613 {
9614 if (sparc_cpu == PROCESSOR_NIAGARA
9615 || sparc_cpu == PROCESSOR_NIAGARA2
9616 || sparc_cpu == PROCESSOR_NIAGARA3)
9617 return 0;
9618 if (sparc_cpu == PROCESSOR_NIAGARA4
9619 || sparc_cpu == PROCESSOR_NIAGARA7)
9620 return 2;
9621 if (sparc_cpu == PROCESSOR_ULTRASPARC
9622 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9623 return 4;
9624 if ((1 << sparc_cpu) &
9625 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9626 (1 << PROCESSOR_SPARCLITE86X)))
9627 return 3;
9628 return 0;
9629 }
9630
9631 static int
9632 sparc_issue_rate (void)
9633 {
9634 switch (sparc_cpu)
9635 {
9636 case PROCESSOR_NIAGARA:
9637 case PROCESSOR_NIAGARA2:
9638 case PROCESSOR_NIAGARA3:
9639 default:
9640 return 1;
9641 case PROCESSOR_NIAGARA4:
9642 case PROCESSOR_NIAGARA7:
9643 case PROCESSOR_V9:
9644 /* Assume V9 processors are capable of at least dual-issue. */
9645 return 2;
9646 case PROCESSOR_SUPERSPARC:
9647 return 3;
9648 case PROCESSOR_HYPERSPARC:
9649 case PROCESSOR_SPARCLITE86X:
9650 return 2;
9651 case PROCESSOR_ULTRASPARC:
9652 case PROCESSOR_ULTRASPARC3:
9653 return 4;
9654 }
9655 }
9656
9657 static int
9658 set_extends (rtx_insn *insn)
9659 {
9660 register rtx pat = PATTERN (insn);
9661
9662 switch (GET_CODE (SET_SRC (pat)))
9663 {
9664 /* Load and some shift instructions zero extend. */
9665 case MEM:
9666 case ZERO_EXTEND:
9667 /* sethi clears the high bits */
9668 case HIGH:
9669 /* LO_SUM is used with sethi. sethi cleared the high
9670 bits and the values used with lo_sum are positive */
9671 case LO_SUM:
9672 /* Store flag stores 0 or 1 */
9673 case LT: case LTU:
9674 case GT: case GTU:
9675 case LE: case LEU:
9676 case GE: case GEU:
9677 case EQ:
9678 case NE:
9679 return 1;
9680 case AND:
9681 {
9682 rtx op0 = XEXP (SET_SRC (pat), 0);
9683 rtx op1 = XEXP (SET_SRC (pat), 1);
9684 if (GET_CODE (op1) == CONST_INT)
9685 return INTVAL (op1) >= 0;
9686 if (GET_CODE (op0) != REG)
9687 return 0;
9688 if (sparc_check_64 (op0, insn) == 1)
9689 return 1;
9690 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9691 }
9692 case IOR:
9693 case XOR:
9694 {
9695 rtx op0 = XEXP (SET_SRC (pat), 0);
9696 rtx op1 = XEXP (SET_SRC (pat), 1);
9697 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9698 return 0;
9699 if (GET_CODE (op1) == CONST_INT)
9700 return INTVAL (op1) >= 0;
9701 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9702 }
9703 case LSHIFTRT:
9704 return GET_MODE (SET_SRC (pat)) == SImode;
9705 /* Positive integers leave the high bits zero. */
9706 case CONST_INT:
9707 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9708 case ASHIFTRT:
9709 case SIGN_EXTEND:
9710 return - (GET_MODE (SET_SRC (pat)) == SImode);
9711 case REG:
9712 return sparc_check_64 (SET_SRC (pat), insn);
9713 default:
9714 return 0;
9715 }
9716 }
9717
9718 /* We _ought_ to have only one kind per function, but... */
9719 static GTY(()) rtx sparc_addr_diff_list;
9720 static GTY(()) rtx sparc_addr_list;
9721
9722 void
9723 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9724 {
9725 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9726 if (diff)
9727 sparc_addr_diff_list
9728 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9729 else
9730 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9731 }
9732
9733 static void
9734 sparc_output_addr_vec (rtx vec)
9735 {
9736 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9737 int idx, vlen = XVECLEN (body, 0);
9738
9739 #ifdef ASM_OUTPUT_ADDR_VEC_START
9740 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9741 #endif
9742
9743 #ifdef ASM_OUTPUT_CASE_LABEL
9744 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9745 NEXT_INSN (lab));
9746 #else
9747 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9748 #endif
9749
9750 for (idx = 0; idx < vlen; idx++)
9751 {
9752 ASM_OUTPUT_ADDR_VEC_ELT
9753 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9754 }
9755
9756 #ifdef ASM_OUTPUT_ADDR_VEC_END
9757 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9758 #endif
9759 }
9760
9761 static void
9762 sparc_output_addr_diff_vec (rtx vec)
9763 {
9764 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9765 rtx base = XEXP (XEXP (body, 0), 0);
9766 int idx, vlen = XVECLEN (body, 1);
9767
9768 #ifdef ASM_OUTPUT_ADDR_VEC_START
9769 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9770 #endif
9771
9772 #ifdef ASM_OUTPUT_CASE_LABEL
9773 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9774 NEXT_INSN (lab));
9775 #else
9776 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9777 #endif
9778
9779 for (idx = 0; idx < vlen; idx++)
9780 {
9781 ASM_OUTPUT_ADDR_DIFF_ELT
9782 (asm_out_file,
9783 body,
9784 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9785 CODE_LABEL_NUMBER (base));
9786 }
9787
9788 #ifdef ASM_OUTPUT_ADDR_VEC_END
9789 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9790 #endif
9791 }
9792
9793 static void
9794 sparc_output_deferred_case_vectors (void)
9795 {
9796 rtx t;
9797 int align;
9798
9799 if (sparc_addr_list == NULL_RTX
9800 && sparc_addr_diff_list == NULL_RTX)
9801 return;
9802
9803 /* Align to cache line in the function's code section. */
9804 switch_to_section (current_function_section ());
9805
9806 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9807 if (align > 0)
9808 ASM_OUTPUT_ALIGN (asm_out_file, align);
9809
9810 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9811 sparc_output_addr_vec (XEXP (t, 0));
9812 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9813 sparc_output_addr_diff_vec (XEXP (t, 0));
9814
9815 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9816 }
9817
9818 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9819 unknown. Return 1 if the high bits are zero, -1 if the register is
9820 sign extended. */
9821 int
9822 sparc_check_64 (rtx x, rtx_insn *insn)
9823 {
9824 /* If a register is set only once it is safe to ignore insns this
9825 code does not know how to handle. The loop will either recognize
9826 the single set and return the correct value or fail to recognize
9827 it and return 0. */
9828 int set_once = 0;
9829 rtx y = x;
9830
9831 gcc_assert (GET_CODE (x) == REG);
9832
9833 if (GET_MODE (x) == DImode)
9834 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9835
9836 if (flag_expensive_optimizations
9837 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9838 set_once = 1;
9839
9840 if (insn == 0)
9841 {
9842 if (set_once)
9843 insn = get_last_insn_anywhere ();
9844 else
9845 return 0;
9846 }
9847
9848 while ((insn = PREV_INSN (insn)))
9849 {
9850 switch (GET_CODE (insn))
9851 {
9852 case JUMP_INSN:
9853 case NOTE:
9854 break;
9855 case CODE_LABEL:
9856 case CALL_INSN:
9857 default:
9858 if (! set_once)
9859 return 0;
9860 break;
9861 case INSN:
9862 {
9863 rtx pat = PATTERN (insn);
9864 if (GET_CODE (pat) != SET)
9865 return 0;
9866 if (rtx_equal_p (x, SET_DEST (pat)))
9867 return set_extends (insn);
9868 if (y && rtx_equal_p (y, SET_DEST (pat)))
9869 return set_extends (insn);
9870 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9871 return 0;
9872 }
9873 }
9874 }
9875 return 0;
9876 }
9877
9878 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9879 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9880
9881 const char *
9882 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9883 {
9884 static char asm_code[60];
9885
9886 /* The scratch register is only required when the destination
9887 register is not a 64-bit global or out register. */
9888 if (which_alternative != 2)
9889 operands[3] = operands[0];
9890
9891 /* We can only shift by constants <= 63. */
9892 if (GET_CODE (operands[2]) == CONST_INT)
9893 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9894
9895 if (GET_CODE (operands[1]) == CONST_INT)
9896 {
9897 output_asm_insn ("mov\t%1, %3", operands);
9898 }
9899 else
9900 {
9901 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9902 if (sparc_check_64 (operands[1], insn) <= 0)
9903 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9904 output_asm_insn ("or\t%L1, %3, %3", operands);
9905 }
9906
9907 strcpy (asm_code, opcode);
9908
9909 if (which_alternative != 2)
9910 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9911 else
9912 return
9913 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9914 }
9915 \f
9916 /* Output rtl to increment the profiler label LABELNO
9917 for profiling a function entry. */
9918
9919 void
9920 sparc_profile_hook (int labelno)
9921 {
9922 char buf[32];
9923 rtx lab, fun;
9924
9925 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9926 if (NO_PROFILE_COUNTERS)
9927 {
9928 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9929 }
9930 else
9931 {
9932 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9933 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9934 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9935 }
9936 }
9937 \f
9938 #ifdef TARGET_SOLARIS
9939 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9940
9941 static void
9942 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9943 tree decl ATTRIBUTE_UNUSED)
9944 {
9945 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9946 {
9947 solaris_elf_asm_comdat_section (name, flags, decl);
9948 return;
9949 }
9950
9951 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9952
9953 if (!(flags & SECTION_DEBUG))
9954 fputs (",#alloc", asm_out_file);
9955 if (flags & SECTION_WRITE)
9956 fputs (",#write", asm_out_file);
9957 if (flags & SECTION_TLS)
9958 fputs (",#tls", asm_out_file);
9959 if (flags & SECTION_CODE)
9960 fputs (",#execinstr", asm_out_file);
9961
9962 if (flags & SECTION_NOTYPE)
9963 ;
9964 else if (flags & SECTION_BSS)
9965 fputs (",#nobits", asm_out_file);
9966 else
9967 fputs (",#progbits", asm_out_file);
9968
9969 fputc ('\n', asm_out_file);
9970 }
9971 #endif /* TARGET_SOLARIS */
9972
9973 /* We do not allow indirect calls to be optimized into sibling calls.
9974
9975 We cannot use sibling calls when delayed branches are disabled
9976 because they will likely require the call delay slot to be filled.
9977
9978 Also, on SPARC 32-bit we cannot emit a sibling call when the
9979 current function returns a structure. This is because the "unimp
9980 after call" convention would cause the callee to return to the
9981 wrong place. The generic code already disallows cases where the
9982 function being called returns a structure.
9983
9984 It may seem strange how this last case could occur. Usually there
9985 is code after the call which jumps to epilogue code which dumps the
9986 return value into the struct return area. That ought to invalidate
9987 the sibling call right? Well, in the C++ case we can end up passing
9988 the pointer to the struct return area to a constructor (which returns
9989 void) and then nothing else happens. Such a sibling call would look
9990 valid without the added check here.
9991
9992 VxWorks PIC PLT entries require the global pointer to be initialized
9993 on entry. We therefore can't emit sibling calls to them. */
9994 static bool
9995 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9996 {
9997 return (decl
9998 && flag_delayed_branch
9999 && (TARGET_ARCH64 || ! cfun->returns_struct)
10000 && !(TARGET_VXWORKS_RTP
10001 && flag_pic
10002 && !targetm.binds_local_p (decl)));
10003 }
10004 \f
10005 /* libfunc renaming. */
10006
10007 static void
10008 sparc_init_libfuncs (void)
10009 {
10010 if (TARGET_ARCH32)
10011 {
10012 /* Use the subroutines that Sun's library provides for integer
10013 multiply and divide. The `*' prevents an underscore from
10014 being prepended by the compiler. .umul is a little faster
10015 than .mul. */
10016 set_optab_libfunc (smul_optab, SImode, "*.umul");
10017 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10018 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10019 set_optab_libfunc (smod_optab, SImode, "*.rem");
10020 set_optab_libfunc (umod_optab, SImode, "*.urem");
10021
10022 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10023 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10024 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10025 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10026 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10027 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10028
10029 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10030 is because with soft-float, the SFmode and DFmode sqrt
10031 instructions will be absent, and the compiler will notice and
10032 try to use the TFmode sqrt instruction for calls to the
10033 builtin function sqrt, but this fails. */
10034 if (TARGET_FPU)
10035 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10036
10037 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10038 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10039 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10040 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10041 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10042 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10043
10044 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10045 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10046 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10047 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10048
10049 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10050 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10051 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10052 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10053
10054 if (DITF_CONVERSION_LIBFUNCS)
10055 {
10056 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10057 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10058 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10059 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10060 }
10061
10062 if (SUN_CONVERSION_LIBFUNCS)
10063 {
10064 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10065 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10066 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10067 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10068 }
10069 }
10070 if (TARGET_ARCH64)
10071 {
10072 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10073 do not exist in the library. Make sure the compiler does not
10074 emit calls to them by accident. (It should always use the
10075 hardware instructions.) */
10076 set_optab_libfunc (smul_optab, SImode, 0);
10077 set_optab_libfunc (sdiv_optab, SImode, 0);
10078 set_optab_libfunc (udiv_optab, SImode, 0);
10079 set_optab_libfunc (smod_optab, SImode, 0);
10080 set_optab_libfunc (umod_optab, SImode, 0);
10081
10082 if (SUN_INTEGER_MULTIPLY_64)
10083 {
10084 set_optab_libfunc (smul_optab, DImode, "__mul64");
10085 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10086 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10087 set_optab_libfunc (smod_optab, DImode, "__rem64");
10088 set_optab_libfunc (umod_optab, DImode, "__urem64");
10089 }
10090
10091 if (SUN_CONVERSION_LIBFUNCS)
10092 {
10093 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10094 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10095 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10096 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10097 }
10098 }
10099 }
10100 \f
10101 /* SPARC builtins. */
10102 enum sparc_builtins
10103 {
10104 /* FPU builtins. */
10105 SPARC_BUILTIN_LDFSR,
10106 SPARC_BUILTIN_STFSR,
10107
10108 /* VIS 1.0 builtins. */
10109 SPARC_BUILTIN_FPACK16,
10110 SPARC_BUILTIN_FPACK32,
10111 SPARC_BUILTIN_FPACKFIX,
10112 SPARC_BUILTIN_FEXPAND,
10113 SPARC_BUILTIN_FPMERGE,
10114 SPARC_BUILTIN_FMUL8X16,
10115 SPARC_BUILTIN_FMUL8X16AU,
10116 SPARC_BUILTIN_FMUL8X16AL,
10117 SPARC_BUILTIN_FMUL8SUX16,
10118 SPARC_BUILTIN_FMUL8ULX16,
10119 SPARC_BUILTIN_FMULD8SUX16,
10120 SPARC_BUILTIN_FMULD8ULX16,
10121 SPARC_BUILTIN_FALIGNDATAV4HI,
10122 SPARC_BUILTIN_FALIGNDATAV8QI,
10123 SPARC_BUILTIN_FALIGNDATAV2SI,
10124 SPARC_BUILTIN_FALIGNDATADI,
10125 SPARC_BUILTIN_WRGSR,
10126 SPARC_BUILTIN_RDGSR,
10127 SPARC_BUILTIN_ALIGNADDR,
10128 SPARC_BUILTIN_ALIGNADDRL,
10129 SPARC_BUILTIN_PDIST,
10130 SPARC_BUILTIN_EDGE8,
10131 SPARC_BUILTIN_EDGE8L,
10132 SPARC_BUILTIN_EDGE16,
10133 SPARC_BUILTIN_EDGE16L,
10134 SPARC_BUILTIN_EDGE32,
10135 SPARC_BUILTIN_EDGE32L,
10136 SPARC_BUILTIN_FCMPLE16,
10137 SPARC_BUILTIN_FCMPLE32,
10138 SPARC_BUILTIN_FCMPNE16,
10139 SPARC_BUILTIN_FCMPNE32,
10140 SPARC_BUILTIN_FCMPGT16,
10141 SPARC_BUILTIN_FCMPGT32,
10142 SPARC_BUILTIN_FCMPEQ16,
10143 SPARC_BUILTIN_FCMPEQ32,
10144 SPARC_BUILTIN_FPADD16,
10145 SPARC_BUILTIN_FPADD16S,
10146 SPARC_BUILTIN_FPADD32,
10147 SPARC_BUILTIN_FPADD32S,
10148 SPARC_BUILTIN_FPSUB16,
10149 SPARC_BUILTIN_FPSUB16S,
10150 SPARC_BUILTIN_FPSUB32,
10151 SPARC_BUILTIN_FPSUB32S,
10152 SPARC_BUILTIN_ARRAY8,
10153 SPARC_BUILTIN_ARRAY16,
10154 SPARC_BUILTIN_ARRAY32,
10155
10156 /* VIS 2.0 builtins. */
10157 SPARC_BUILTIN_EDGE8N,
10158 SPARC_BUILTIN_EDGE8LN,
10159 SPARC_BUILTIN_EDGE16N,
10160 SPARC_BUILTIN_EDGE16LN,
10161 SPARC_BUILTIN_EDGE32N,
10162 SPARC_BUILTIN_EDGE32LN,
10163 SPARC_BUILTIN_BMASK,
10164 SPARC_BUILTIN_BSHUFFLEV4HI,
10165 SPARC_BUILTIN_BSHUFFLEV8QI,
10166 SPARC_BUILTIN_BSHUFFLEV2SI,
10167 SPARC_BUILTIN_BSHUFFLEDI,
10168
10169 /* VIS 3.0 builtins. */
10170 SPARC_BUILTIN_CMASK8,
10171 SPARC_BUILTIN_CMASK16,
10172 SPARC_BUILTIN_CMASK32,
10173 SPARC_BUILTIN_FCHKSM16,
10174 SPARC_BUILTIN_FSLL16,
10175 SPARC_BUILTIN_FSLAS16,
10176 SPARC_BUILTIN_FSRL16,
10177 SPARC_BUILTIN_FSRA16,
10178 SPARC_BUILTIN_FSLL32,
10179 SPARC_BUILTIN_FSLAS32,
10180 SPARC_BUILTIN_FSRL32,
10181 SPARC_BUILTIN_FSRA32,
10182 SPARC_BUILTIN_PDISTN,
10183 SPARC_BUILTIN_FMEAN16,
10184 SPARC_BUILTIN_FPADD64,
10185 SPARC_BUILTIN_FPSUB64,
10186 SPARC_BUILTIN_FPADDS16,
10187 SPARC_BUILTIN_FPADDS16S,
10188 SPARC_BUILTIN_FPSUBS16,
10189 SPARC_BUILTIN_FPSUBS16S,
10190 SPARC_BUILTIN_FPADDS32,
10191 SPARC_BUILTIN_FPADDS32S,
10192 SPARC_BUILTIN_FPSUBS32,
10193 SPARC_BUILTIN_FPSUBS32S,
10194 SPARC_BUILTIN_FUCMPLE8,
10195 SPARC_BUILTIN_FUCMPNE8,
10196 SPARC_BUILTIN_FUCMPGT8,
10197 SPARC_BUILTIN_FUCMPEQ8,
10198 SPARC_BUILTIN_FHADDS,
10199 SPARC_BUILTIN_FHADDD,
10200 SPARC_BUILTIN_FHSUBS,
10201 SPARC_BUILTIN_FHSUBD,
10202 SPARC_BUILTIN_FNHADDS,
10203 SPARC_BUILTIN_FNHADDD,
10204 SPARC_BUILTIN_UMULXHI,
10205 SPARC_BUILTIN_XMULX,
10206 SPARC_BUILTIN_XMULXHI,
10207
10208 /* VIS 4.0 builtins. */
10209 SPARC_BUILTIN_FPADD8,
10210 SPARC_BUILTIN_FPADDS8,
10211 SPARC_BUILTIN_FPADDUS8,
10212 SPARC_BUILTIN_FPADDUS16,
10213 SPARC_BUILTIN_FPCMPLE8,
10214 SPARC_BUILTIN_FPCMPGT8,
10215 SPARC_BUILTIN_FPCMPULE16,
10216 SPARC_BUILTIN_FPCMPUGT16,
10217 SPARC_BUILTIN_FPCMPULE32,
10218 SPARC_BUILTIN_FPCMPUGT32,
10219 SPARC_BUILTIN_FPMAX8,
10220 SPARC_BUILTIN_FPMAX16,
10221 SPARC_BUILTIN_FPMAX32,
10222 SPARC_BUILTIN_FPMAXU8,
10223 SPARC_BUILTIN_FPMAXU16,
10224 SPARC_BUILTIN_FPMAXU32,
10225 SPARC_BUILTIN_FPMIN8,
10226 SPARC_BUILTIN_FPMIN16,
10227 SPARC_BUILTIN_FPMIN32,
10228 SPARC_BUILTIN_FPMINU8,
10229 SPARC_BUILTIN_FPMINU16,
10230 SPARC_BUILTIN_FPMINU32,
10231 SPARC_BUILTIN_FPSUB8,
10232 SPARC_BUILTIN_FPSUBS8,
10233 SPARC_BUILTIN_FPSUBUS8,
10234 SPARC_BUILTIN_FPSUBUS16,
10235
10236 SPARC_BUILTIN_MAX
10237 };
10238
10239 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10240 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10241
10242 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10243 function decl or NULL_TREE if the builtin was not added. */
10244
10245 static tree
10246 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10247 tree type)
10248 {
10249 tree t
10250 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10251
10252 if (t)
10253 {
10254 sparc_builtins[code] = t;
10255 sparc_builtins_icode[code] = icode;
10256 }
10257
10258 return t;
10259 }
10260
10261 /* Likewise, but also marks the function as "const". */
10262
10263 static tree
10264 def_builtin_const (const char *name, enum insn_code icode,
10265 enum sparc_builtins code, tree type)
10266 {
10267 tree t = def_builtin (name, icode, code, type);
10268
10269 if (t)
10270 TREE_READONLY (t) = 1;
10271
10272 return t;
10273 }
10274
10275 /* Implement the TARGET_INIT_BUILTINS target hook.
10276 Create builtin functions for special SPARC instructions. */
10277
10278 static void
10279 sparc_init_builtins (void)
10280 {
10281 if (TARGET_FPU)
10282 sparc_fpu_init_builtins ();
10283
10284 if (TARGET_VIS)
10285 sparc_vis_init_builtins ();
10286 }
10287
10288 /* Create builtin functions for FPU instructions. */
10289
10290 static void
10291 sparc_fpu_init_builtins (void)
10292 {
10293 tree ftype
10294 = build_function_type_list (void_type_node,
10295 build_pointer_type (unsigned_type_node), 0);
10296 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10297 SPARC_BUILTIN_LDFSR, ftype);
10298 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10299 SPARC_BUILTIN_STFSR, ftype);
10300 }
10301
10302 /* Create builtin functions for VIS instructions. */
10303
10304 static void
10305 sparc_vis_init_builtins (void)
10306 {
10307 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10308 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10309 tree v4hi = build_vector_type (intHI_type_node, 4);
10310 tree v2hi = build_vector_type (intHI_type_node, 2);
10311 tree v2si = build_vector_type (intSI_type_node, 2);
10312 tree v1si = build_vector_type (intSI_type_node, 1);
10313
10314 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10315 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10316 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10317 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10318 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10319 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10320 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10321 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10322 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10323 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10324 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10325 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10326 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10327 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10328 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10329 v8qi, v8qi,
10330 intDI_type_node, 0);
10331 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10332 v8qi, v8qi, 0);
10333 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10334 v8qi, v8qi, 0);
10335 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10336 intDI_type_node,
10337 intDI_type_node, 0);
10338 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10339 intSI_type_node,
10340 intSI_type_node, 0);
10341 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10342 ptr_type_node,
10343 intSI_type_node, 0);
10344 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10345 ptr_type_node,
10346 intDI_type_node, 0);
10347 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10348 ptr_type_node,
10349 ptr_type_node, 0);
10350 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10351 ptr_type_node,
10352 ptr_type_node, 0);
10353 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10354 v4hi, v4hi, 0);
10355 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10356 v2si, v2si, 0);
10357 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10358 v4hi, v4hi, 0);
10359 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10360 v2si, v2si, 0);
10361 tree void_ftype_di = build_function_type_list (void_type_node,
10362 intDI_type_node, 0);
10363 tree di_ftype_void = build_function_type_list (intDI_type_node,
10364 void_type_node, 0);
10365 tree void_ftype_si = build_function_type_list (void_type_node,
10366 intSI_type_node, 0);
10367 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10368 float_type_node,
10369 float_type_node, 0);
10370 tree df_ftype_df_df = build_function_type_list (double_type_node,
10371 double_type_node,
10372 double_type_node, 0);
10373
10374 /* Packing and expanding vectors. */
10375 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10376 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10377 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10378 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10379 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10380 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10381 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10382 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10383 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10384 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10385
10386 /* Multiplications. */
10387 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10388 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10389 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10390 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10391 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10392 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10393 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10394 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10395 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10396 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10397 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10398 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10399 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10400 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10401
10402 /* Data aligning. */
10403 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10404 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10405 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10406 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10407 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10408 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10409 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10410 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10411
10412 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10413 SPARC_BUILTIN_WRGSR, void_ftype_di);
10414 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10415 SPARC_BUILTIN_RDGSR, di_ftype_void);
10416
10417 if (TARGET_ARCH64)
10418 {
10419 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10420 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10421 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10422 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10423 }
10424 else
10425 {
10426 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10427 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10428 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10429 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10430 }
10431
10432 /* Pixel distance. */
10433 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10434 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10435
10436 /* Edge handling. */
10437 if (TARGET_ARCH64)
10438 {
10439 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10440 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10441 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10442 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10443 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10444 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10445 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10446 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10447 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10448 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10449 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10450 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10451 }
10452 else
10453 {
10454 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10455 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10456 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10457 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10458 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10459 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10460 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10461 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10462 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10463 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10464 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10465 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10466 }
10467
10468 /* Pixel compare. */
10469 if (TARGET_ARCH64)
10470 {
10471 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10472 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10473 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10474 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10475 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10476 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10477 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10478 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10479 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10480 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10481 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10482 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10483 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10484 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10485 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10486 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10487 }
10488 else
10489 {
10490 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10491 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10492 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10493 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10494 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10495 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10496 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10497 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10498 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10499 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10500 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10501 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10502 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10503 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10504 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10505 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10506 }
10507
10508 /* Addition and subtraction. */
10509 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10510 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10511 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10512 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10513 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10514 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10515 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10516 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10517 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10518 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10519 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10520 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10521 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10522 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10523 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10524 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10525
10526 /* Three-dimensional array addressing. */
10527 if (TARGET_ARCH64)
10528 {
10529 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10530 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10531 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10532 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10533 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10534 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10535 }
10536 else
10537 {
10538 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10539 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10540 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10541 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10542 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10543 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10544 }
10545
10546 if (TARGET_VIS2)
10547 {
10548 /* Edge handling. */
10549 if (TARGET_ARCH64)
10550 {
10551 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10552 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10553 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10554 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10555 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10556 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10557 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10558 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10559 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10560 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10561 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10562 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10563 }
10564 else
10565 {
10566 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10567 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10568 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10569 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10570 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10571 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10572 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10573 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10574 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10575 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10576 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10577 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10578 }
10579
10580 /* Byte mask and shuffle. */
10581 if (TARGET_ARCH64)
10582 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10583 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10584 else
10585 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10586 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10587 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10588 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10589 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10590 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10591 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10592 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10593 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10594 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10595 }
10596
10597 if (TARGET_VIS3)
10598 {
10599 if (TARGET_ARCH64)
10600 {
10601 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10602 SPARC_BUILTIN_CMASK8, void_ftype_di);
10603 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10604 SPARC_BUILTIN_CMASK16, void_ftype_di);
10605 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10606 SPARC_BUILTIN_CMASK32, void_ftype_di);
10607 }
10608 else
10609 {
10610 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10611 SPARC_BUILTIN_CMASK8, void_ftype_si);
10612 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10613 SPARC_BUILTIN_CMASK16, void_ftype_si);
10614 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10615 SPARC_BUILTIN_CMASK32, void_ftype_si);
10616 }
10617
10618 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10619 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10620
10621 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10622 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10623 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10624 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10625 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10626 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10627 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10628 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10629 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10630 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10631 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10632 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10633 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10634 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10635 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10636 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10637
10638 if (TARGET_ARCH64)
10639 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10640 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10641 else
10642 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10643 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10644
10645 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10646 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10647 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10648 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10649 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10650 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10651
10652 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10653 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10654 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10655 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10656 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10657 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10658 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10659 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10660 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10661 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10662 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10663 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10664 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10665 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10666 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10667 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10668
10669 if (TARGET_ARCH64)
10670 {
10671 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10672 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10673 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10674 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10675 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10676 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10677 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10678 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10679 }
10680 else
10681 {
10682 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10683 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10684 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10685 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10686 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10687 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10688 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10689 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10690 }
10691
10692 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10693 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10694 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10695 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10696 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10697 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10698 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10699 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10700 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10701 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10702 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10703 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10704
10705 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10706 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10707 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10708 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10709 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10710 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10711 }
10712
10713 if (TARGET_VIS4)
10714 {
10715 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10716 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10717 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10718 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10719 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10720 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10721 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10722 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10723
10724
10725 if (TARGET_ARCH64)
10726 {
10727 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10728 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10729 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10730 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10731 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10732 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10733 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10734 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10735 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10736 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10737 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10738 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10739 }
10740 else
10741 {
10742 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10743 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10744 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10745 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10746 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10747 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10748 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10749 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10750 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10751 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10752 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10753 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10754 }
10755
10756 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10757 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10758 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10759 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10760 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10761 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10762 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10763 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10764 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10765 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10766 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10767 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10768 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10769 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10770 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10771 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10772 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10773 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10774 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10775 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10776 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10777 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10778 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10779 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10780 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10781 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10782 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10783 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10784 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10785 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10786 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10787 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10788 }
10789 }
10790
10791 /* Implement TARGET_BUILTIN_DECL hook. */
10792
10793 static tree
10794 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10795 {
10796 if (code >= SPARC_BUILTIN_MAX)
10797 return error_mark_node;
10798
10799 return sparc_builtins[code];
10800 }
10801
10802 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10803
10804 static rtx
10805 sparc_expand_builtin (tree exp, rtx target,
10806 rtx subtarget ATTRIBUTE_UNUSED,
10807 machine_mode tmode ATTRIBUTE_UNUSED,
10808 int ignore ATTRIBUTE_UNUSED)
10809 {
10810 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10811 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10812 enum insn_code icode = sparc_builtins_icode[code];
10813 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10814 call_expr_arg_iterator iter;
10815 int arg_count = 0;
10816 rtx pat, op[4];
10817 tree arg;
10818
10819 if (nonvoid)
10820 {
10821 machine_mode tmode = insn_data[icode].operand[0].mode;
10822 if (!target
10823 || GET_MODE (target) != tmode
10824 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10825 op[0] = gen_reg_rtx (tmode);
10826 else
10827 op[0] = target;
10828 }
10829
10830 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10831 {
10832 const struct insn_operand_data *insn_op;
10833 int idx;
10834
10835 if (arg == error_mark_node)
10836 return NULL_RTX;
10837
10838 arg_count++;
10839 idx = arg_count - !nonvoid;
10840 insn_op = &insn_data[icode].operand[idx];
10841 op[arg_count] = expand_normal (arg);
10842
10843 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10844 {
10845 if (!address_operand (op[arg_count], SImode))
10846 {
10847 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10848 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10849 }
10850 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10851 }
10852
10853 else if (insn_op->mode == V1DImode
10854 && GET_MODE (op[arg_count]) == DImode)
10855 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10856
10857 else if (insn_op->mode == V1SImode
10858 && GET_MODE (op[arg_count]) == SImode)
10859 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10860
10861 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10862 insn_op->mode))
10863 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10864 }
10865
10866 switch (arg_count)
10867 {
10868 case 0:
10869 pat = GEN_FCN (icode) (op[0]);
10870 break;
10871 case 1:
10872 if (nonvoid)
10873 pat = GEN_FCN (icode) (op[0], op[1]);
10874 else
10875 pat = GEN_FCN (icode) (op[1]);
10876 break;
10877 case 2:
10878 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10879 break;
10880 case 3:
10881 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10882 break;
10883 default:
10884 gcc_unreachable ();
10885 }
10886
10887 if (!pat)
10888 return NULL_RTX;
10889
10890 emit_insn (pat);
10891
10892 return (nonvoid ? op[0] : const0_rtx);
10893 }
10894
10895 /* Return the upper 16 bits of the 8x16 multiplication. */
10896
10897 static int
10898 sparc_vis_mul8x16 (int e8, int e16)
10899 {
10900 return (e8 * e16 + 128) / 256;
10901 }
10902
10903 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10904 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10905
10906 static void
10907 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10908 tree inner_type, tree cst0, tree cst1)
10909 {
10910 unsigned i, num = VECTOR_CST_NELTS (cst0);
10911 int scale;
10912
10913 switch (fncode)
10914 {
10915 case SPARC_BUILTIN_FMUL8X16:
10916 for (i = 0; i < num; ++i)
10917 {
10918 int val
10919 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10920 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10921 n_elts[i] = build_int_cst (inner_type, val);
10922 }
10923 break;
10924
10925 case SPARC_BUILTIN_FMUL8X16AU:
10926 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10927
10928 for (i = 0; i < num; ++i)
10929 {
10930 int val
10931 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10932 scale);
10933 n_elts[i] = build_int_cst (inner_type, val);
10934 }
10935 break;
10936
10937 case SPARC_BUILTIN_FMUL8X16AL:
10938 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10939
10940 for (i = 0; i < num; ++i)
10941 {
10942 int val
10943 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10944 scale);
10945 n_elts[i] = build_int_cst (inner_type, val);
10946 }
10947 break;
10948
10949 default:
10950 gcc_unreachable ();
10951 }
10952 }
10953
10954 /* Implement TARGET_FOLD_BUILTIN hook.
10955
10956 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10957 result of the function call is ignored. NULL_TREE is returned if the
10958 function could not be folded. */
10959
10960 static tree
10961 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10962 tree *args, bool ignore)
10963 {
10964 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10965 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10966 tree arg0, arg1, arg2;
10967
10968 if (ignore)
10969 switch (code)
10970 {
10971 case SPARC_BUILTIN_LDFSR:
10972 case SPARC_BUILTIN_STFSR:
10973 case SPARC_BUILTIN_ALIGNADDR:
10974 case SPARC_BUILTIN_WRGSR:
10975 case SPARC_BUILTIN_BMASK:
10976 case SPARC_BUILTIN_CMASK8:
10977 case SPARC_BUILTIN_CMASK16:
10978 case SPARC_BUILTIN_CMASK32:
10979 break;
10980
10981 default:
10982 return build_zero_cst (rtype);
10983 }
10984
10985 switch (code)
10986 {
10987 case SPARC_BUILTIN_FEXPAND:
10988 arg0 = args[0];
10989 STRIP_NOPS (arg0);
10990
10991 if (TREE_CODE (arg0) == VECTOR_CST)
10992 {
10993 tree inner_type = TREE_TYPE (rtype);
10994 tree *n_elts;
10995 unsigned i;
10996
10997 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10998 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10999 n_elts[i] = build_int_cst (inner_type,
11000 TREE_INT_CST_LOW
11001 (VECTOR_CST_ELT (arg0, i)) << 4);
11002 return build_vector (rtype, n_elts);
11003 }
11004 break;
11005
11006 case SPARC_BUILTIN_FMUL8X16:
11007 case SPARC_BUILTIN_FMUL8X16AU:
11008 case SPARC_BUILTIN_FMUL8X16AL:
11009 arg0 = args[0];
11010 arg1 = args[1];
11011 STRIP_NOPS (arg0);
11012 STRIP_NOPS (arg1);
11013
11014 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11015 {
11016 tree inner_type = TREE_TYPE (rtype);
11017 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11018 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11019 return build_vector (rtype, n_elts);
11020 }
11021 break;
11022
11023 case SPARC_BUILTIN_FPMERGE:
11024 arg0 = args[0];
11025 arg1 = args[1];
11026 STRIP_NOPS (arg0);
11027 STRIP_NOPS (arg1);
11028
11029 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11030 {
11031 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11032 unsigned i;
11033 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11034 {
11035 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11036 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11037 }
11038
11039 return build_vector (rtype, n_elts);
11040 }
11041 break;
11042
11043 case SPARC_BUILTIN_PDIST:
11044 case SPARC_BUILTIN_PDISTN:
11045 arg0 = args[0];
11046 arg1 = args[1];
11047 STRIP_NOPS (arg0);
11048 STRIP_NOPS (arg1);
11049 if (code == SPARC_BUILTIN_PDIST)
11050 {
11051 arg2 = args[2];
11052 STRIP_NOPS (arg2);
11053 }
11054 else
11055 arg2 = integer_zero_node;
11056
11057 if (TREE_CODE (arg0) == VECTOR_CST
11058 && TREE_CODE (arg1) == VECTOR_CST
11059 && TREE_CODE (arg2) == INTEGER_CST)
11060 {
11061 bool overflow = false;
11062 widest_int result = wi::to_widest (arg2);
11063 widest_int tmp;
11064 unsigned i;
11065
11066 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11067 {
11068 tree e0 = VECTOR_CST_ELT (arg0, i);
11069 tree e1 = VECTOR_CST_ELT (arg1, i);
11070
11071 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11072
11073 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11074 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11075 if (wi::neg_p (tmp))
11076 tmp = wi::neg (tmp, &neg2_ovf);
11077 else
11078 neg2_ovf = false;
11079 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11080 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11081 }
11082
11083 gcc_assert (!overflow);
11084
11085 return wide_int_to_tree (rtype, result);
11086 }
11087
11088 default:
11089 break;
11090 }
11091
11092 return NULL_TREE;
11093 }
11094 \f
11095 /* ??? This duplicates information provided to the compiler by the
11096 ??? scheduler description. Some day, teach genautomata to output
11097 ??? the latencies and then CSE will just use that. */
11098
11099 static bool
11100 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11101 int opno ATTRIBUTE_UNUSED,
11102 int *total, bool speed ATTRIBUTE_UNUSED)
11103 {
11104 int code = GET_CODE (x);
11105 bool float_mode_p = FLOAT_MODE_P (mode);
11106
11107 switch (code)
11108 {
11109 case CONST_INT:
11110 if (SMALL_INT (x))
11111 *total = 0;
11112 else
11113 *total = 2;
11114 return true;
11115
11116 case CONST_WIDE_INT:
11117 *total = 0;
11118 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11119 *total += 2;
11120 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11121 *total += 2;
11122 return true;
11123
11124 case HIGH:
11125 *total = 2;
11126 return true;
11127
11128 case CONST:
11129 case LABEL_REF:
11130 case SYMBOL_REF:
11131 *total = 4;
11132 return true;
11133
11134 case CONST_DOUBLE:
11135 *total = 8;
11136 return true;
11137
11138 case MEM:
11139 /* If outer-code was a sign or zero extension, a cost
11140 of COSTS_N_INSNS (1) was already added in. This is
11141 why we are subtracting it back out. */
11142 if (outer_code == ZERO_EXTEND)
11143 {
11144 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11145 }
11146 else if (outer_code == SIGN_EXTEND)
11147 {
11148 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11149 }
11150 else if (float_mode_p)
11151 {
11152 *total = sparc_costs->float_load;
11153 }
11154 else
11155 {
11156 *total = sparc_costs->int_load;
11157 }
11158
11159 return true;
11160
11161 case PLUS:
11162 case MINUS:
11163 if (float_mode_p)
11164 *total = sparc_costs->float_plusminus;
11165 else
11166 *total = COSTS_N_INSNS (1);
11167 return false;
11168
11169 case FMA:
11170 {
11171 rtx sub;
11172
11173 gcc_assert (float_mode_p);
11174 *total = sparc_costs->float_mul;
11175
11176 sub = XEXP (x, 0);
11177 if (GET_CODE (sub) == NEG)
11178 sub = XEXP (sub, 0);
11179 *total += rtx_cost (sub, mode, FMA, 0, speed);
11180
11181 sub = XEXP (x, 2);
11182 if (GET_CODE (sub) == NEG)
11183 sub = XEXP (sub, 0);
11184 *total += rtx_cost (sub, mode, FMA, 2, speed);
11185 return true;
11186 }
11187
11188 case MULT:
11189 if (float_mode_p)
11190 *total = sparc_costs->float_mul;
11191 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11192 *total = COSTS_N_INSNS (25);
11193 else
11194 {
11195 int bit_cost;
11196
11197 bit_cost = 0;
11198 if (sparc_costs->int_mul_bit_factor)
11199 {
11200 int nbits;
11201
11202 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11203 {
11204 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11205 for (nbits = 0; value != 0; value &= value - 1)
11206 nbits++;
11207 }
11208 else
11209 nbits = 7;
11210
11211 if (nbits < 3)
11212 nbits = 3;
11213 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11214 bit_cost = COSTS_N_INSNS (bit_cost);
11215 }
11216
11217 if (mode == DImode || !TARGET_HARD_MUL)
11218 *total = sparc_costs->int_mulX + bit_cost;
11219 else
11220 *total = sparc_costs->int_mul + bit_cost;
11221 }
11222 return false;
11223
11224 case ASHIFT:
11225 case ASHIFTRT:
11226 case LSHIFTRT:
11227 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11228 return false;
11229
11230 case DIV:
11231 case UDIV:
11232 case MOD:
11233 case UMOD:
11234 if (float_mode_p)
11235 {
11236 if (mode == DFmode)
11237 *total = sparc_costs->float_div_df;
11238 else
11239 *total = sparc_costs->float_div_sf;
11240 }
11241 else
11242 {
11243 if (mode == DImode)
11244 *total = sparc_costs->int_divX;
11245 else
11246 *total = sparc_costs->int_div;
11247 }
11248 return false;
11249
11250 case NEG:
11251 if (! float_mode_p)
11252 {
11253 *total = COSTS_N_INSNS (1);
11254 return false;
11255 }
11256 /* FALLTHRU */
11257
11258 case ABS:
11259 case FLOAT:
11260 case UNSIGNED_FLOAT:
11261 case FIX:
11262 case UNSIGNED_FIX:
11263 case FLOAT_EXTEND:
11264 case FLOAT_TRUNCATE:
11265 *total = sparc_costs->float_move;
11266 return false;
11267
11268 case SQRT:
11269 if (mode == DFmode)
11270 *total = sparc_costs->float_sqrt_df;
11271 else
11272 *total = sparc_costs->float_sqrt_sf;
11273 return false;
11274
11275 case COMPARE:
11276 if (float_mode_p)
11277 *total = sparc_costs->float_cmp;
11278 else
11279 *total = COSTS_N_INSNS (1);
11280 return false;
11281
11282 case IF_THEN_ELSE:
11283 if (float_mode_p)
11284 *total = sparc_costs->float_cmove;
11285 else
11286 *total = sparc_costs->int_cmove;
11287 return false;
11288
11289 case IOR:
11290 /* Handle the NAND vector patterns. */
11291 if (sparc_vector_mode_supported_p (mode)
11292 && GET_CODE (XEXP (x, 0)) == NOT
11293 && GET_CODE (XEXP (x, 1)) == NOT)
11294 {
11295 *total = COSTS_N_INSNS (1);
11296 return true;
11297 }
11298 else
11299 return false;
11300
11301 default:
11302 return false;
11303 }
11304 }
11305
11306 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11307
11308 static inline bool
11309 general_or_i64_p (reg_class_t rclass)
11310 {
11311 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11312 }
11313
11314 /* Implement TARGET_REGISTER_MOVE_COST. */
11315
11316 static int
11317 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11318 reg_class_t from, reg_class_t to)
11319 {
11320 bool need_memory = false;
11321
11322 /* This helps postreload CSE to eliminate redundant comparisons. */
11323 if (from == NO_REGS || to == NO_REGS)
11324 return 100;
11325
11326 if (from == FPCC_REGS || to == FPCC_REGS)
11327 need_memory = true;
11328 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11329 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11330 {
11331 if (TARGET_VIS3)
11332 {
11333 int size = GET_MODE_SIZE (mode);
11334 if (size == 8 || size == 4)
11335 {
11336 if (! TARGET_ARCH32 || size == 4)
11337 return 4;
11338 else
11339 return 6;
11340 }
11341 }
11342 need_memory = true;
11343 }
11344
11345 if (need_memory)
11346 {
11347 if (sparc_cpu == PROCESSOR_ULTRASPARC
11348 || sparc_cpu == PROCESSOR_ULTRASPARC3
11349 || sparc_cpu == PROCESSOR_NIAGARA
11350 || sparc_cpu == PROCESSOR_NIAGARA2
11351 || sparc_cpu == PROCESSOR_NIAGARA3
11352 || sparc_cpu == PROCESSOR_NIAGARA4
11353 || sparc_cpu == PROCESSOR_NIAGARA7)
11354 return 12;
11355
11356 return 6;
11357 }
11358
11359 return 2;
11360 }
11361
11362 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11363 This is achieved by means of a manual dynamic stack space allocation in
11364 the current frame. We make the assumption that SEQ doesn't contain any
11365 function calls, with the possible exception of calls to the GOT helper. */
11366
11367 static void
11368 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11369 {
11370 /* We must preserve the lowest 16 words for the register save area. */
11371 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11372 /* We really need only 2 words of fresh stack space. */
11373 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11374
11375 rtx slot
11376 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11377 SPARC_STACK_BIAS + offset));
11378
11379 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11380 emit_insn (gen_rtx_SET (slot, reg));
11381 if (reg2)
11382 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11383 reg2));
11384 emit_insn (seq);
11385 if (reg2)
11386 emit_insn (gen_rtx_SET (reg2,
11387 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11388 emit_insn (gen_rtx_SET (reg, slot));
11389 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11390 }
11391
11392 /* Output the assembler code for a thunk function. THUNK_DECL is the
11393 declaration for the thunk function itself, FUNCTION is the decl for
11394 the target function. DELTA is an immediate constant offset to be
11395 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11396 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11397
11398 static void
11399 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11400 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11401 tree function)
11402 {
11403 rtx this_rtx, funexp;
11404 rtx_insn *insn;
11405 unsigned int int_arg_first;
11406
11407 reload_completed = 1;
11408 epilogue_completed = 1;
11409
11410 emit_note (NOTE_INSN_PROLOGUE_END);
11411
11412 if (TARGET_FLAT)
11413 {
11414 sparc_leaf_function_p = 1;
11415
11416 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11417 }
11418 else if (flag_delayed_branch)
11419 {
11420 /* We will emit a regular sibcall below, so we need to instruct
11421 output_sibcall that we are in a leaf function. */
11422 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11423
11424 /* This will cause final.c to invoke leaf_renumber_regs so we
11425 must behave as if we were in a not-yet-leafified function. */
11426 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11427 }
11428 else
11429 {
11430 /* We will emit the sibcall manually below, so we will need to
11431 manually spill non-leaf registers. */
11432 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11433
11434 /* We really are in a leaf function. */
11435 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11436 }
11437
11438 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11439 returns a structure, the structure return pointer is there instead. */
11440 if (TARGET_ARCH64
11441 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11442 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11443 else
11444 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11445
11446 /* Add DELTA. When possible use a plain add, otherwise load it into
11447 a register first. */
11448 if (delta)
11449 {
11450 rtx delta_rtx = GEN_INT (delta);
11451
11452 if (! SPARC_SIMM13_P (delta))
11453 {
11454 rtx scratch = gen_rtx_REG (Pmode, 1);
11455 emit_move_insn (scratch, delta_rtx);
11456 delta_rtx = scratch;
11457 }
11458
11459 /* THIS_RTX += DELTA. */
11460 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11461 }
11462
11463 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11464 if (vcall_offset)
11465 {
11466 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11467 rtx scratch = gen_rtx_REG (Pmode, 1);
11468
11469 gcc_assert (vcall_offset < 0);
11470
11471 /* SCRATCH = *THIS_RTX. */
11472 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11473
11474 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11475 may not have any available scratch register at this point. */
11476 if (SPARC_SIMM13_P (vcall_offset))
11477 ;
11478 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11479 else if (! fixed_regs[5]
11480 /* The below sequence is made up of at least 2 insns,
11481 while the default method may need only one. */
11482 && vcall_offset < -8192)
11483 {
11484 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11485 emit_move_insn (scratch2, vcall_offset_rtx);
11486 vcall_offset_rtx = scratch2;
11487 }
11488 else
11489 {
11490 rtx increment = GEN_INT (-4096);
11491
11492 /* VCALL_OFFSET is a negative number whose typical range can be
11493 estimated as -32768..0 in 32-bit mode. In almost all cases
11494 it is therefore cheaper to emit multiple add insns than
11495 spilling and loading the constant into a register (at least
11496 6 insns). */
11497 while (! SPARC_SIMM13_P (vcall_offset))
11498 {
11499 emit_insn (gen_add2_insn (scratch, increment));
11500 vcall_offset += 4096;
11501 }
11502 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11503 }
11504
11505 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11506 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11507 gen_rtx_PLUS (Pmode,
11508 scratch,
11509 vcall_offset_rtx)));
11510
11511 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11512 emit_insn (gen_add2_insn (this_rtx, scratch));
11513 }
11514
11515 /* Generate a tail call to the target function. */
11516 if (! TREE_USED (function))
11517 {
11518 assemble_external (function);
11519 TREE_USED (function) = 1;
11520 }
11521 funexp = XEXP (DECL_RTL (function), 0);
11522
11523 if (flag_delayed_branch)
11524 {
11525 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11526 insn = emit_call_insn (gen_sibcall (funexp));
11527 SIBLING_CALL_P (insn) = 1;
11528 }
11529 else
11530 {
11531 /* The hoops we have to jump through in order to generate a sibcall
11532 without using delay slots... */
11533 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11534
11535 if (flag_pic)
11536 {
11537 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11538 start_sequence ();
11539 load_got_register (); /* clobbers %o7 */
11540 scratch = sparc_legitimize_pic_address (funexp, scratch);
11541 seq = get_insns ();
11542 end_sequence ();
11543 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11544 }
11545 else if (TARGET_ARCH32)
11546 {
11547 emit_insn (gen_rtx_SET (scratch,
11548 gen_rtx_HIGH (SImode, funexp)));
11549 emit_insn (gen_rtx_SET (scratch,
11550 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11551 }
11552 else /* TARGET_ARCH64 */
11553 {
11554 switch (sparc_cmodel)
11555 {
11556 case CM_MEDLOW:
11557 case CM_MEDMID:
11558 /* The destination can serve as a temporary. */
11559 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11560 break;
11561
11562 case CM_MEDANY:
11563 case CM_EMBMEDANY:
11564 /* The destination cannot serve as a temporary. */
11565 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11566 start_sequence ();
11567 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11568 seq = get_insns ();
11569 end_sequence ();
11570 emit_and_preserve (seq, spill_reg, 0);
11571 break;
11572
11573 default:
11574 gcc_unreachable ();
11575 }
11576 }
11577
11578 emit_jump_insn (gen_indirect_jump (scratch));
11579 }
11580
11581 emit_barrier ();
11582
11583 /* Run just enough of rest_of_compilation to get the insns emitted.
11584 There's not really enough bulk here to make other passes such as
11585 instruction scheduling worth while. Note that use_thunk calls
11586 assemble_start_function and assemble_end_function. */
11587 insn = get_insns ();
11588 shorten_branches (insn);
11589 final_start_function (insn, file, 1);
11590 final (insn, file, 1);
11591 final_end_function ();
11592
11593 reload_completed = 0;
11594 epilogue_completed = 0;
11595 }
11596
11597 /* Return true if sparc_output_mi_thunk would be able to output the
11598 assembler code for the thunk function specified by the arguments
11599 it is passed, and false otherwise. */
11600 static bool
11601 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11602 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11603 HOST_WIDE_INT vcall_offset,
11604 const_tree function ATTRIBUTE_UNUSED)
11605 {
11606 /* Bound the loop used in the default method above. */
11607 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11608 }
11609
11610 /* How to allocate a 'struct machine_function'. */
11611
11612 static struct machine_function *
11613 sparc_init_machine_status (void)
11614 {
11615 return ggc_cleared_alloc<machine_function> ();
11616 }
11617
11618 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11619 We need to emit DTP-relative relocations. */
11620
11621 static void
11622 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11623 {
11624 switch (size)
11625 {
11626 case 4:
11627 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11628 break;
11629 case 8:
11630 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11631 break;
11632 default:
11633 gcc_unreachable ();
11634 }
11635 output_addr_const (file, x);
11636 fputs (")", file);
11637 }
11638
11639 /* Do whatever processing is required at the end of a file. */
11640
11641 static void
11642 sparc_file_end (void)
11643 {
11644 /* If we need to emit the special GOT helper function, do so now. */
11645 if (got_helper_rtx)
11646 {
11647 const char *name = XSTR (got_helper_rtx, 0);
11648 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11649 #ifdef DWARF2_UNWIND_INFO
11650 bool do_cfi;
11651 #endif
11652
11653 if (USE_HIDDEN_LINKONCE)
11654 {
11655 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11656 get_identifier (name),
11657 build_function_type_list (void_type_node,
11658 NULL_TREE));
11659 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11660 NULL_TREE, void_type_node);
11661 TREE_PUBLIC (decl) = 1;
11662 TREE_STATIC (decl) = 1;
11663 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11664 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11665 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11666 resolve_unique_section (decl, 0, flag_function_sections);
11667 allocate_struct_function (decl, true);
11668 cfun->is_thunk = 1;
11669 current_function_decl = decl;
11670 init_varasm_status ();
11671 assemble_start_function (decl, name);
11672 }
11673 else
11674 {
11675 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11676 switch_to_section (text_section);
11677 if (align > 0)
11678 ASM_OUTPUT_ALIGN (asm_out_file, align);
11679 ASM_OUTPUT_LABEL (asm_out_file, name);
11680 }
11681
11682 #ifdef DWARF2_UNWIND_INFO
11683 do_cfi = dwarf2out_do_cfi_asm ();
11684 if (do_cfi)
11685 fprintf (asm_out_file, "\t.cfi_startproc\n");
11686 #endif
11687 if (flag_delayed_branch)
11688 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11689 reg_name, reg_name);
11690 else
11691 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11692 reg_name, reg_name);
11693 #ifdef DWARF2_UNWIND_INFO
11694 if (do_cfi)
11695 fprintf (asm_out_file, "\t.cfi_endproc\n");
11696 #endif
11697 }
11698
11699 if (NEED_INDICATE_EXEC_STACK)
11700 file_end_indicate_exec_stack ();
11701
11702 #ifdef TARGET_SOLARIS
11703 solaris_file_end ();
11704 #endif
11705 }
11706
11707 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11708 /* Implement TARGET_MANGLE_TYPE. */
11709
11710 static const char *
11711 sparc_mangle_type (const_tree type)
11712 {
11713 if (!TARGET_64BIT
11714 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11715 && TARGET_LONG_DOUBLE_128)
11716 return "g";
11717
11718 /* For all other types, use normal C++ mangling. */
11719 return NULL;
11720 }
11721 #endif
11722
11723 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11724 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11725 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11726
11727 void
11728 sparc_emit_membar_for_model (enum memmodel model,
11729 int load_store, int before_after)
11730 {
11731 /* Bits for the MEMBAR mmask field. */
11732 const int LoadLoad = 1;
11733 const int StoreLoad = 2;
11734 const int LoadStore = 4;
11735 const int StoreStore = 8;
11736
11737 int mm = 0, implied = 0;
11738
11739 switch (sparc_memory_model)
11740 {
11741 case SMM_SC:
11742 /* Sequential Consistency. All memory transactions are immediately
11743 visible in sequential execution order. No barriers needed. */
11744 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11745 break;
11746
11747 case SMM_TSO:
11748 /* Total Store Ordering: all memory transactions with store semantics
11749 are followed by an implied StoreStore. */
11750 implied |= StoreStore;
11751
11752 /* If we're not looking for a raw barrer (before+after), then atomic
11753 operations get the benefit of being both load and store. */
11754 if (load_store == 3 && before_after == 1)
11755 implied |= StoreLoad;
11756 /* FALLTHRU */
11757
11758 case SMM_PSO:
11759 /* Partial Store Ordering: all memory transactions with load semantics
11760 are followed by an implied LoadLoad | LoadStore. */
11761 implied |= LoadLoad | LoadStore;
11762
11763 /* If we're not looking for a raw barrer (before+after), then atomic
11764 operations get the benefit of being both load and store. */
11765 if (load_store == 3 && before_after == 2)
11766 implied |= StoreLoad | StoreStore;
11767 /* FALLTHRU */
11768
11769 case SMM_RMO:
11770 /* Relaxed Memory Ordering: no implicit bits. */
11771 break;
11772
11773 default:
11774 gcc_unreachable ();
11775 }
11776
11777 if (before_after & 1)
11778 {
11779 if (is_mm_release (model) || is_mm_acq_rel (model)
11780 || is_mm_seq_cst (model))
11781 {
11782 if (load_store & 1)
11783 mm |= LoadLoad | StoreLoad;
11784 if (load_store & 2)
11785 mm |= LoadStore | StoreStore;
11786 }
11787 }
11788 if (before_after & 2)
11789 {
11790 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11791 || is_mm_seq_cst (model))
11792 {
11793 if (load_store & 1)
11794 mm |= LoadLoad | LoadStore;
11795 if (load_store & 2)
11796 mm |= StoreLoad | StoreStore;
11797 }
11798 }
11799
11800 /* Remove the bits implied by the system memory model. */
11801 mm &= ~implied;
11802
11803 /* For raw barriers (before+after), always emit a barrier.
11804 This will become a compile-time barrier if needed. */
11805 if (mm || before_after == 3)
11806 emit_insn (gen_membar (GEN_INT (mm)));
11807 }
11808
11809 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11810 compare and swap on the word containing the byte or half-word. */
11811
11812 static void
11813 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11814 rtx oldval, rtx newval)
11815 {
11816 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11817 rtx addr = gen_reg_rtx (Pmode);
11818 rtx off = gen_reg_rtx (SImode);
11819 rtx oldv = gen_reg_rtx (SImode);
11820 rtx newv = gen_reg_rtx (SImode);
11821 rtx oldvalue = gen_reg_rtx (SImode);
11822 rtx newvalue = gen_reg_rtx (SImode);
11823 rtx res = gen_reg_rtx (SImode);
11824 rtx resv = gen_reg_rtx (SImode);
11825 rtx memsi, val, mask, cc;
11826
11827 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11828
11829 if (Pmode != SImode)
11830 addr1 = gen_lowpart (SImode, addr1);
11831 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11832
11833 memsi = gen_rtx_MEM (SImode, addr);
11834 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11835 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11836
11837 val = copy_to_reg (memsi);
11838
11839 emit_insn (gen_rtx_SET (off,
11840 gen_rtx_XOR (SImode, off,
11841 GEN_INT (GET_MODE (mem) == QImode
11842 ? 3 : 2))));
11843
11844 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11845
11846 if (GET_MODE (mem) == QImode)
11847 mask = force_reg (SImode, GEN_INT (0xff));
11848 else
11849 mask = force_reg (SImode, GEN_INT (0xffff));
11850
11851 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11852
11853 emit_insn (gen_rtx_SET (val,
11854 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11855 val)));
11856
11857 oldval = gen_lowpart (SImode, oldval);
11858 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11859
11860 newval = gen_lowpart_common (SImode, newval);
11861 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11862
11863 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11864
11865 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11866
11867 rtx_code_label *end_label = gen_label_rtx ();
11868 rtx_code_label *loop_label = gen_label_rtx ();
11869 emit_label (loop_label);
11870
11871 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11872
11873 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11874
11875 emit_move_insn (bool_result, const1_rtx);
11876
11877 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11878
11879 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11880
11881 emit_insn (gen_rtx_SET (resv,
11882 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11883 res)));
11884
11885 emit_move_insn (bool_result, const0_rtx);
11886
11887 cc = gen_compare_reg_1 (NE, resv, val);
11888 emit_insn (gen_rtx_SET (val, resv));
11889
11890 /* Use cbranchcc4 to separate the compare and branch! */
11891 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11892 cc, const0_rtx, loop_label));
11893
11894 emit_label (end_label);
11895
11896 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11897
11898 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11899
11900 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11901 }
11902
11903 /* Expand code to perform a compare-and-swap. */
11904
11905 void
11906 sparc_expand_compare_and_swap (rtx operands[])
11907 {
11908 rtx bval, retval, mem, oldval, newval;
11909 machine_mode mode;
11910 enum memmodel model;
11911
11912 bval = operands[0];
11913 retval = operands[1];
11914 mem = operands[2];
11915 oldval = operands[3];
11916 newval = operands[4];
11917 model = (enum memmodel) INTVAL (operands[6]);
11918 mode = GET_MODE (mem);
11919
11920 sparc_emit_membar_for_model (model, 3, 1);
11921
11922 if (reg_overlap_mentioned_p (retval, oldval))
11923 oldval = copy_to_reg (oldval);
11924
11925 if (mode == QImode || mode == HImode)
11926 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11927 else
11928 {
11929 rtx (*gen) (rtx, rtx, rtx, rtx);
11930 rtx x;
11931
11932 if (mode == SImode)
11933 gen = gen_atomic_compare_and_swapsi_1;
11934 else
11935 gen = gen_atomic_compare_and_swapdi_1;
11936 emit_insn (gen (retval, mem, oldval, newval));
11937
11938 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11939 if (x != bval)
11940 convert_move (bval, x, 1);
11941 }
11942
11943 sparc_emit_membar_for_model (model, 3, 2);
11944 }
11945
11946 void
11947 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11948 {
11949 rtx t_1, t_2, t_3;
11950
11951 sel = gen_lowpart (DImode, sel);
11952 switch (vmode)
11953 {
11954 case V2SImode:
11955 /* inp = xxxxxxxAxxxxxxxB */
11956 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11957 NULL_RTX, 1, OPTAB_DIRECT);
11958 /* t_1 = ....xxxxxxxAxxx. */
11959 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11960 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11961 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11962 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11963 /* sel = .......B */
11964 /* t_1 = ...A.... */
11965 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11966 /* sel = ...A...B */
11967 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11968 /* sel = AAAABBBB * 4 */
11969 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11970 /* sel = { A*4, A*4+1, A*4+2, ... } */
11971 break;
11972
11973 case V4HImode:
11974 /* inp = xxxAxxxBxxxCxxxD */
11975 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11976 NULL_RTX, 1, OPTAB_DIRECT);
11977 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11978 NULL_RTX, 1, OPTAB_DIRECT);
11979 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11980 NULL_RTX, 1, OPTAB_DIRECT);
11981 /* t_1 = ..xxxAxxxBxxxCxx */
11982 /* t_2 = ....xxxAxxxBxxxC */
11983 /* t_3 = ......xxxAxxxBxx */
11984 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11985 GEN_INT (0x07),
11986 NULL_RTX, 1, OPTAB_DIRECT);
11987 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11988 GEN_INT (0x0700),
11989 NULL_RTX, 1, OPTAB_DIRECT);
11990 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11991 GEN_INT (0x070000),
11992 NULL_RTX, 1, OPTAB_DIRECT);
11993 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11994 GEN_INT (0x07000000),
11995 NULL_RTX, 1, OPTAB_DIRECT);
11996 /* sel = .......D */
11997 /* t_1 = .....C.. */
11998 /* t_2 = ...B.... */
11999 /* t_3 = .A...... */
12000 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12001 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12002 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12003 /* sel = .A.B.C.D */
12004 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12005 /* sel = AABBCCDD * 2 */
12006 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12007 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12008 break;
12009
12010 case V8QImode:
12011 /* input = xAxBxCxDxExFxGxH */
12012 sel = expand_simple_binop (DImode, AND, sel,
12013 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12014 | 0x0f0f0f0f),
12015 NULL_RTX, 1, OPTAB_DIRECT);
12016 /* sel = .A.B.C.D.E.F.G.H */
12017 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12018 NULL_RTX, 1, OPTAB_DIRECT);
12019 /* t_1 = ..A.B.C.D.E.F.G. */
12020 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12021 NULL_RTX, 1, OPTAB_DIRECT);
12022 /* sel = .AABBCCDDEEFFGGH */
12023 sel = expand_simple_binop (DImode, AND, sel,
12024 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12025 | 0xff00ff),
12026 NULL_RTX, 1, OPTAB_DIRECT);
12027 /* sel = ..AB..CD..EF..GH */
12028 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12029 NULL_RTX, 1, OPTAB_DIRECT);
12030 /* t_1 = ....AB..CD..EF.. */
12031 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12032 NULL_RTX, 1, OPTAB_DIRECT);
12033 /* sel = ..ABABCDCDEFEFGH */
12034 sel = expand_simple_binop (DImode, AND, sel,
12035 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12036 NULL_RTX, 1, OPTAB_DIRECT);
12037 /* sel = ....ABCD....EFGH */
12038 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12039 NULL_RTX, 1, OPTAB_DIRECT);
12040 /* t_1 = ........ABCD.... */
12041 sel = gen_lowpart (SImode, sel);
12042 t_1 = gen_lowpart (SImode, t_1);
12043 break;
12044
12045 default:
12046 gcc_unreachable ();
12047 }
12048
12049 /* Always perform the final addition/merge within the bmask insn. */
12050 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12051 }
12052
12053 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12054
12055 static bool
12056 sparc_frame_pointer_required (void)
12057 {
12058 /* If the stack pointer is dynamically modified in the function, it cannot
12059 serve as the frame pointer. */
12060 if (cfun->calls_alloca)
12061 return true;
12062
12063 /* If the function receives nonlocal gotos, it needs to save the frame
12064 pointer in the nonlocal_goto_save_area object. */
12065 if (cfun->has_nonlocal_label)
12066 return true;
12067
12068 /* In flat mode, that's it. */
12069 if (TARGET_FLAT)
12070 return false;
12071
12072 /* Otherwise, the frame pointer is required if the function isn't leaf. */
12073 return !(crtl->is_leaf && only_leaf_regs_used ());
12074 }
12075
12076 /* The way this is structured, we can't eliminate SFP in favor of SP
12077 if the frame pointer is required: we want to use the SFP->HFP elimination
12078 in that case. But the test in update_eliminables doesn't know we are
12079 assuming below that we only do the former elimination. */
12080
12081 static bool
12082 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12083 {
12084 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12085 }
12086
12087 /* Return the hard frame pointer directly to bypass the stack bias. */
12088
12089 static rtx
12090 sparc_builtin_setjmp_frame_value (void)
12091 {
12092 return hard_frame_pointer_rtx;
12093 }
12094
12095 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12096 they won't be allocated. */
12097
12098 static void
12099 sparc_conditional_register_usage (void)
12100 {
12101 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12102 {
12103 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12104 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12105 }
12106 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12107 /* then honor it. */
12108 if (TARGET_ARCH32 && fixed_regs[5])
12109 fixed_regs[5] = 1;
12110 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12111 fixed_regs[5] = 0;
12112 if (! TARGET_V9)
12113 {
12114 int regno;
12115 for (regno = SPARC_FIRST_V9_FP_REG;
12116 regno <= SPARC_LAST_V9_FP_REG;
12117 regno++)
12118 fixed_regs[regno] = 1;
12119 /* %fcc0 is used by v8 and v9. */
12120 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12121 regno <= SPARC_LAST_V9_FCC_REG;
12122 regno++)
12123 fixed_regs[regno] = 1;
12124 }
12125 if (! TARGET_FPU)
12126 {
12127 int regno;
12128 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12129 fixed_regs[regno] = 1;
12130 }
12131 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12132 /* then honor it. Likewise with g3 and g4. */
12133 if (fixed_regs[2] == 2)
12134 fixed_regs[2] = ! TARGET_APP_REGS;
12135 if (fixed_regs[3] == 2)
12136 fixed_regs[3] = ! TARGET_APP_REGS;
12137 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12138 fixed_regs[4] = ! TARGET_APP_REGS;
12139 else if (TARGET_CM_EMBMEDANY)
12140 fixed_regs[4] = 1;
12141 else if (fixed_regs[4] == 2)
12142 fixed_regs[4] = 0;
12143 if (TARGET_FLAT)
12144 {
12145 int regno;
12146 /* Disable leaf functions. */
12147 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12148 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12149 leaf_reg_remap [regno] = regno;
12150 }
12151 if (TARGET_VIS)
12152 global_regs[SPARC_GSR_REG] = 1;
12153 }
12154
12155 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12156
12157 - We can't load constants into FP registers.
12158 - We can't load FP constants into integer registers when soft-float,
12159 because there is no soft-float pattern with a r/F constraint.
12160 - We can't load FP constants into integer registers for TFmode unless
12161 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12162 - Try and reload integer constants (symbolic or otherwise) back into
12163 registers directly, rather than having them dumped to memory. */
12164
12165 static reg_class_t
12166 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12167 {
12168 machine_mode mode = GET_MODE (x);
12169 if (CONSTANT_P (x))
12170 {
12171 if (FP_REG_CLASS_P (rclass)
12172 || rclass == GENERAL_OR_FP_REGS
12173 || rclass == GENERAL_OR_EXTRA_FP_REGS
12174 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12175 || (mode == TFmode && ! const_zero_operand (x, mode)))
12176 return NO_REGS;
12177
12178 if (GET_MODE_CLASS (mode) == MODE_INT)
12179 return GENERAL_REGS;
12180
12181 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12182 {
12183 if (! FP_REG_CLASS_P (rclass)
12184 || !(const_zero_operand (x, mode)
12185 || const_all_ones_operand (x, mode)))
12186 return NO_REGS;
12187 }
12188 }
12189
12190 if (TARGET_VIS3
12191 && ! TARGET_ARCH64
12192 && (rclass == EXTRA_FP_REGS
12193 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12194 {
12195 int regno = true_regnum (x);
12196
12197 if (SPARC_INT_REG_P (regno))
12198 return (rclass == EXTRA_FP_REGS
12199 ? FP_REGS : GENERAL_OR_FP_REGS);
12200 }
12201
12202 return rclass;
12203 }
12204
12205 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12206 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12207
12208 const char *
12209 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12210 {
12211 char mulstr[32];
12212
12213 gcc_assert (! TARGET_ARCH64);
12214
12215 if (sparc_check_64 (operands[1], insn) <= 0)
12216 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12217 if (which_alternative == 1)
12218 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12219 if (GET_CODE (operands[2]) == CONST_INT)
12220 {
12221 if (which_alternative == 1)
12222 {
12223 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12224 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12225 output_asm_insn (mulstr, operands);
12226 return "srlx\t%L0, 32, %H0";
12227 }
12228 else
12229 {
12230 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12231 output_asm_insn ("or\t%L1, %3, %3", operands);
12232 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12233 output_asm_insn (mulstr, operands);
12234 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12235 return "mov\t%3, %L0";
12236 }
12237 }
12238 else if (rtx_equal_p (operands[1], operands[2]))
12239 {
12240 if (which_alternative == 1)
12241 {
12242 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12243 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12244 output_asm_insn (mulstr, operands);
12245 return "srlx\t%L0, 32, %H0";
12246 }
12247 else
12248 {
12249 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12250 output_asm_insn ("or\t%L1, %3, %3", operands);
12251 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12252 output_asm_insn (mulstr, operands);
12253 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12254 return "mov\t%3, %L0";
12255 }
12256 }
12257 if (sparc_check_64 (operands[2], insn) <= 0)
12258 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12259 if (which_alternative == 1)
12260 {
12261 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12262 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12263 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12264 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12265 output_asm_insn (mulstr, operands);
12266 return "srlx\t%L0, 32, %H0";
12267 }
12268 else
12269 {
12270 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12271 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12272 output_asm_insn ("or\t%L1, %3, %3", operands);
12273 output_asm_insn ("or\t%L2, %4, %4", operands);
12274 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12275 output_asm_insn (mulstr, operands);
12276 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12277 return "mov\t%3, %L0";
12278 }
12279 }
12280
12281 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12282 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12283 and INNER_MODE are the modes describing TARGET. */
12284
12285 static void
12286 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12287 machine_mode inner_mode)
12288 {
12289 rtx t1, final_insn, sel;
12290 int bmask;
12291
12292 t1 = gen_reg_rtx (mode);
12293
12294 elt = convert_modes (SImode, inner_mode, elt, true);
12295 emit_move_insn (gen_lowpart(SImode, t1), elt);
12296
12297 switch (mode)
12298 {
12299 case V2SImode:
12300 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12301 bmask = 0x45674567;
12302 break;
12303 case V4HImode:
12304 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12305 bmask = 0x67676767;
12306 break;
12307 case V8QImode:
12308 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12309 bmask = 0x77777777;
12310 break;
12311 default:
12312 gcc_unreachable ();
12313 }
12314
12315 sel = force_reg (SImode, GEN_INT (bmask));
12316 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12317 emit_insn (final_insn);
12318 }
12319
12320 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12321 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12322
12323 static void
12324 vector_init_fpmerge (rtx target, rtx elt)
12325 {
12326 rtx t1, t2, t2_low, t3, t3_low;
12327
12328 t1 = gen_reg_rtx (V4QImode);
12329 elt = convert_modes (SImode, QImode, elt, true);
12330 emit_move_insn (gen_lowpart (SImode, t1), elt);
12331
12332 t2 = gen_reg_rtx (V8QImode);
12333 t2_low = gen_lowpart (V4QImode, t2);
12334 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12335
12336 t3 = gen_reg_rtx (V8QImode);
12337 t3_low = gen_lowpart (V4QImode, t3);
12338 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12339
12340 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12341 }
12342
12343 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12344 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12345
12346 static void
12347 vector_init_faligndata (rtx target, rtx elt)
12348 {
12349 rtx t1 = gen_reg_rtx (V4HImode);
12350 int i;
12351
12352 elt = convert_modes (SImode, HImode, elt, true);
12353 emit_move_insn (gen_lowpart (SImode, t1), elt);
12354
12355 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12356 force_reg (SImode, GEN_INT (6)),
12357 const0_rtx));
12358
12359 for (i = 0; i < 4; i++)
12360 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12361 }
12362
12363 /* Emit code to initialize TARGET to values for individual fields VALS. */
12364
12365 void
12366 sparc_expand_vector_init (rtx target, rtx vals)
12367 {
12368 const machine_mode mode = GET_MODE (target);
12369 const machine_mode inner_mode = GET_MODE_INNER (mode);
12370 const int n_elts = GET_MODE_NUNITS (mode);
12371 int i, n_var = 0;
12372 bool all_same = true;
12373 rtx mem;
12374
12375 for (i = 0; i < n_elts; i++)
12376 {
12377 rtx x = XVECEXP (vals, 0, i);
12378 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12379 n_var++;
12380
12381 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12382 all_same = false;
12383 }
12384
12385 if (n_var == 0)
12386 {
12387 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12388 return;
12389 }
12390
12391 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12392 {
12393 if (GET_MODE_SIZE (inner_mode) == 4)
12394 {
12395 emit_move_insn (gen_lowpart (SImode, target),
12396 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12397 return;
12398 }
12399 else if (GET_MODE_SIZE (inner_mode) == 8)
12400 {
12401 emit_move_insn (gen_lowpart (DImode, target),
12402 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12403 return;
12404 }
12405 }
12406 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12407 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12408 {
12409 emit_move_insn (gen_highpart (word_mode, target),
12410 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12411 emit_move_insn (gen_lowpart (word_mode, target),
12412 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12413 return;
12414 }
12415
12416 if (all_same && GET_MODE_SIZE (mode) == 8)
12417 {
12418 if (TARGET_VIS2)
12419 {
12420 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12421 return;
12422 }
12423 if (mode == V8QImode)
12424 {
12425 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12426 return;
12427 }
12428 if (mode == V4HImode)
12429 {
12430 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12431 return;
12432 }
12433 }
12434
12435 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12436 for (i = 0; i < n_elts; i++)
12437 emit_move_insn (adjust_address_nv (mem, inner_mode,
12438 i * GET_MODE_SIZE (inner_mode)),
12439 XVECEXP (vals, 0, i));
12440 emit_move_insn (target, mem);
12441 }
12442
12443 /* Implement TARGET_SECONDARY_RELOAD. */
12444
12445 static reg_class_t
12446 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12447 machine_mode mode, secondary_reload_info *sri)
12448 {
12449 enum reg_class rclass = (enum reg_class) rclass_i;
12450
12451 sri->icode = CODE_FOR_nothing;
12452 sri->extra_cost = 0;
12453
12454 /* We need a temporary when loading/storing a HImode/QImode value
12455 between memory and the FPU registers. This can happen when combine puts
12456 a paradoxical subreg in a float/fix conversion insn. */
12457 if (FP_REG_CLASS_P (rclass)
12458 && (mode == HImode || mode == QImode)
12459 && (GET_CODE (x) == MEM
12460 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12461 && true_regnum (x) == -1)))
12462 return GENERAL_REGS;
12463
12464 /* On 32-bit we need a temporary when loading/storing a DFmode value
12465 between unaligned memory and the upper FPU registers. */
12466 if (TARGET_ARCH32
12467 && rclass == EXTRA_FP_REGS
12468 && mode == DFmode
12469 && GET_CODE (x) == MEM
12470 && ! mem_min_alignment (x, 8))
12471 return FP_REGS;
12472
12473 if (((TARGET_CM_MEDANY
12474 && symbolic_operand (x, mode))
12475 || (TARGET_CM_EMBMEDANY
12476 && text_segment_operand (x, mode)))
12477 && ! flag_pic)
12478 {
12479 if (in_p)
12480 sri->icode = direct_optab_handler (reload_in_optab, mode);
12481 else
12482 sri->icode = direct_optab_handler (reload_out_optab, mode);
12483 return NO_REGS;
12484 }
12485
12486 if (TARGET_VIS3 && TARGET_ARCH32)
12487 {
12488 int regno = true_regnum (x);
12489
12490 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12491 to move 8-byte values in 4-byte pieces. This only works via
12492 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12493 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12494 an FP_REGS intermediate move. */
12495 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12496 || ((general_or_i64_p (rclass)
12497 || rclass == GENERAL_OR_FP_REGS)
12498 && SPARC_FP_REG_P (regno)))
12499 {
12500 sri->extra_cost = 2;
12501 return FP_REGS;
12502 }
12503 }
12504
12505 return NO_REGS;
12506 }
12507
12508 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12509 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12510
12511 bool
12512 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12513 {
12514 enum rtx_code rc = GET_CODE (operands[1]);
12515 machine_mode cmp_mode;
12516 rtx cc_reg, dst, cmp;
12517
12518 cmp = operands[1];
12519 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12520 return false;
12521
12522 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12523 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12524
12525 cmp_mode = GET_MODE (XEXP (cmp, 0));
12526 rc = GET_CODE (cmp);
12527
12528 dst = operands[0];
12529 if (! rtx_equal_p (operands[2], dst)
12530 && ! rtx_equal_p (operands[3], dst))
12531 {
12532 if (reg_overlap_mentioned_p (dst, cmp))
12533 dst = gen_reg_rtx (mode);
12534
12535 emit_move_insn (dst, operands[3]);
12536 }
12537 else if (operands[2] == dst)
12538 {
12539 operands[2] = operands[3];
12540
12541 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12542 rc = reverse_condition_maybe_unordered (rc);
12543 else
12544 rc = reverse_condition (rc);
12545 }
12546
12547 if (XEXP (cmp, 1) == const0_rtx
12548 && GET_CODE (XEXP (cmp, 0)) == REG
12549 && cmp_mode == DImode
12550 && v9_regcmp_p (rc))
12551 cc_reg = XEXP (cmp, 0);
12552 else
12553 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12554
12555 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12556
12557 emit_insn (gen_rtx_SET (dst,
12558 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12559
12560 if (dst != operands[0])
12561 emit_move_insn (operands[0], dst);
12562
12563 return true;
12564 }
12565
12566 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12567 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12568 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12569 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12570 code to be used for the condition mask. */
12571
12572 void
12573 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12574 {
12575 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12576 enum rtx_code code = GET_CODE (operands[3]);
12577
12578 mask = gen_reg_rtx (Pmode);
12579 cop0 = operands[4];
12580 cop1 = operands[5];
12581 if (code == LT || code == GE)
12582 {
12583 rtx t;
12584
12585 code = swap_condition (code);
12586 t = cop0; cop0 = cop1; cop1 = t;
12587 }
12588
12589 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12590
12591 fcmp = gen_rtx_UNSPEC (Pmode,
12592 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12593 fcode);
12594
12595 cmask = gen_rtx_UNSPEC (DImode,
12596 gen_rtvec (2, mask, gsr),
12597 ccode);
12598
12599 bshuf = gen_rtx_UNSPEC (mode,
12600 gen_rtvec (3, operands[1], operands[2], gsr),
12601 UNSPEC_BSHUFFLE);
12602
12603 emit_insn (gen_rtx_SET (mask, fcmp));
12604 emit_insn (gen_rtx_SET (gsr, cmask));
12605
12606 emit_insn (gen_rtx_SET (operands[0], bshuf));
12607 }
12608
12609 /* On sparc, any mode which naturally allocates into the float
12610 registers should return 4 here. */
12611
12612 unsigned int
12613 sparc_regmode_natural_size (machine_mode mode)
12614 {
12615 int size = UNITS_PER_WORD;
12616
12617 if (TARGET_ARCH64)
12618 {
12619 enum mode_class mclass = GET_MODE_CLASS (mode);
12620
12621 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12622 size = 4;
12623 }
12624
12625 return size;
12626 }
12627
12628 /* Return TRUE if it is a good idea to tie two pseudo registers
12629 when one has mode MODE1 and one has mode MODE2.
12630 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12631 for any hard reg, then this must be FALSE for correct output.
12632
12633 For V9 we have to deal with the fact that only the lower 32 floating
12634 point registers are 32-bit addressable. */
12635
12636 bool
12637 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12638 {
12639 enum mode_class mclass1, mclass2;
12640 unsigned short size1, size2;
12641
12642 if (mode1 == mode2)
12643 return true;
12644
12645 mclass1 = GET_MODE_CLASS (mode1);
12646 mclass2 = GET_MODE_CLASS (mode2);
12647 if (mclass1 != mclass2)
12648 return false;
12649
12650 if (! TARGET_V9)
12651 return true;
12652
12653 /* Classes are the same and we are V9 so we have to deal with upper
12654 vs. lower floating point registers. If one of the modes is a
12655 4-byte mode, and the other is not, we have to mark them as not
12656 tieable because only the lower 32 floating point register are
12657 addressable 32-bits at a time.
12658
12659 We can't just test explicitly for SFmode, otherwise we won't
12660 cover the vector mode cases properly. */
12661
12662 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12663 return true;
12664
12665 size1 = GET_MODE_SIZE (mode1);
12666 size2 = GET_MODE_SIZE (mode2);
12667 if ((size1 > 4 && size2 == 4)
12668 || (size2 > 4 && size1 == 4))
12669 return false;
12670
12671 return true;
12672 }
12673
12674 /* Implement TARGET_CSTORE_MODE. */
12675
12676 static machine_mode
12677 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12678 {
12679 return (TARGET_ARCH64 ? DImode : SImode);
12680 }
12681
12682 /* Return the compound expression made of T1 and T2. */
12683
12684 static inline tree
12685 compound_expr (tree t1, tree t2)
12686 {
12687 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12688 }
12689
12690 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12691
12692 static void
12693 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12694 {
12695 if (!TARGET_FPU)
12696 return;
12697
12698 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12699 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12700
12701 /* We generate the equivalent of feholdexcept (&fenv_var):
12702
12703 unsigned int fenv_var;
12704 __builtin_store_fsr (&fenv_var);
12705
12706 unsigned int tmp1_var;
12707 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12708
12709 __builtin_load_fsr (&tmp1_var); */
12710
12711 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12712 TREE_ADDRESSABLE (fenv_var) = 1;
12713 tree fenv_addr = build_fold_addr_expr (fenv_var);
12714 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12715 tree hold_stfsr
12716 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12717 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12718
12719 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12720 TREE_ADDRESSABLE (tmp1_var) = 1;
12721 tree masked_fenv_var
12722 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12723 build_int_cst (unsigned_type_node,
12724 ~(accrued_exception_mask | trap_enable_mask)));
12725 tree hold_mask
12726 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12727 NULL_TREE, NULL_TREE);
12728
12729 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12730 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12731 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12732
12733 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12734
12735 /* We reload the value of tmp1_var to clear the exceptions:
12736
12737 __builtin_load_fsr (&tmp1_var); */
12738
12739 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12740
12741 /* We generate the equivalent of feupdateenv (&fenv_var):
12742
12743 unsigned int tmp2_var;
12744 __builtin_store_fsr (&tmp2_var);
12745
12746 __builtin_load_fsr (&fenv_var);
12747
12748 if (SPARC_LOW_FE_EXCEPT_VALUES)
12749 tmp2_var >>= 5;
12750 __atomic_feraiseexcept ((int) tmp2_var); */
12751
12752 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12753 TREE_ADDRESSABLE (tmp2_var) = 1;
12754 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12755 tree update_stfsr
12756 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12757 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12758
12759 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12760
12761 tree atomic_feraiseexcept
12762 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12763 tree update_call
12764 = build_call_expr (atomic_feraiseexcept, 1,
12765 fold_convert (integer_type_node, tmp2_var));
12766
12767 if (SPARC_LOW_FE_EXCEPT_VALUES)
12768 {
12769 tree shifted_tmp2_var
12770 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12771 build_int_cst (unsigned_type_node, 5));
12772 tree update_shift
12773 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12774 update_call = compound_expr (update_shift, update_call);
12775 }
12776
12777 *update
12778 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12779 }
12780
12781 #include "gt-sparc.h"