]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
sparc.opt (mlra): New target option.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_lra_p (void);
643 static bool sparc_print_operand_punct_valid_p (unsigned char);
644 static void sparc_print_operand (FILE *, rtx, int);
645 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
646 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
647 machine_mode,
648 secondary_reload_info *);
649 static machine_mode sparc_cstore_mode (enum insn_code icode);
650 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
651 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
652 static unsigned int sparc_min_arithmetic_precision (void);
653 \f
654 #ifdef SUBTARGET_ATTRIBUTE_TABLE
655 /* Table of valid machine attributes. */
656 static const struct attribute_spec sparc_attribute_table[] =
657 {
658 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
659 do_diagnostic } */
660 SUBTARGET_ATTRIBUTE_TABLE,
661 { NULL, 0, 0, false, false, false, NULL, false }
662 };
663 #endif
664 \f
665 /* Option handling. */
666
667 /* Parsed value. */
668 enum cmodel sparc_cmodel;
669
670 char sparc_hard_reg_printed[8];
671
672 /* Initialize the GCC target structure. */
673
674 /* The default is to use .half rather than .short for aligned HI objects. */
675 #undef TARGET_ASM_ALIGNED_HI_OP
676 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
677
678 #undef TARGET_ASM_UNALIGNED_HI_OP
679 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
680 #undef TARGET_ASM_UNALIGNED_SI_OP
681 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
682 #undef TARGET_ASM_UNALIGNED_DI_OP
683 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
684
685 /* The target hook has to handle DI-mode values. */
686 #undef TARGET_ASM_INTEGER
687 #define TARGET_ASM_INTEGER sparc_assemble_integer
688
689 #undef TARGET_ASM_FUNCTION_PROLOGUE
690 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
691 #undef TARGET_ASM_FUNCTION_EPILOGUE
692 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
693
694 #undef TARGET_SCHED_ADJUST_COST
695 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
696 #undef TARGET_SCHED_ISSUE_RATE
697 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
698 #undef TARGET_SCHED_INIT
699 #define TARGET_SCHED_INIT sparc_sched_init
700 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
701 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
702
703 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
704 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
705
706 #undef TARGET_INIT_LIBFUNCS
707 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
708
709 #undef TARGET_LEGITIMIZE_ADDRESS
710 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
711 #undef TARGET_DELEGITIMIZE_ADDRESS
712 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
713 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
714 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
715
716 #undef TARGET_INIT_BUILTINS
717 #define TARGET_INIT_BUILTINS sparc_init_builtins
718 #undef TARGET_BUILTIN_DECL
719 #define TARGET_BUILTIN_DECL sparc_builtin_decl
720 #undef TARGET_EXPAND_BUILTIN
721 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
722 #undef TARGET_FOLD_BUILTIN
723 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
724
725 #if TARGET_TLS
726 #undef TARGET_HAVE_TLS
727 #define TARGET_HAVE_TLS true
728 #endif
729
730 #undef TARGET_CANNOT_FORCE_CONST_MEM
731 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
732
733 #undef TARGET_ASM_OUTPUT_MI_THUNK
734 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
735 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
736 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
737
738 #undef TARGET_RTX_COSTS
739 #define TARGET_RTX_COSTS sparc_rtx_costs
740 #undef TARGET_ADDRESS_COST
741 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
742 #undef TARGET_REGISTER_MOVE_COST
743 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
744
745 #undef TARGET_PROMOTE_FUNCTION_MODE
746 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
747
748 #undef TARGET_FUNCTION_VALUE
749 #define TARGET_FUNCTION_VALUE sparc_function_value
750 #undef TARGET_LIBCALL_VALUE
751 #define TARGET_LIBCALL_VALUE sparc_libcall_value
752 #undef TARGET_FUNCTION_VALUE_REGNO_P
753 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
754
755 #undef TARGET_STRUCT_VALUE_RTX
756 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
757 #undef TARGET_RETURN_IN_MEMORY
758 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
759 #undef TARGET_MUST_PASS_IN_STACK
760 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
761 #undef TARGET_PASS_BY_REFERENCE
762 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
763 #undef TARGET_ARG_PARTIAL_BYTES
764 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
765 #undef TARGET_FUNCTION_ARG_ADVANCE
766 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
767 #undef TARGET_FUNCTION_ARG
768 #define TARGET_FUNCTION_ARG sparc_function_arg
769 #undef TARGET_FUNCTION_INCOMING_ARG
770 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
771 #undef TARGET_FUNCTION_ARG_BOUNDARY
772 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
773
774 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
775 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
776 #undef TARGET_STRICT_ARGUMENT_NAMING
777 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
778
779 #undef TARGET_EXPAND_BUILTIN_VA_START
780 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
781 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
782 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
783
784 #undef TARGET_VECTOR_MODE_SUPPORTED_P
785 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
786
787 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
788 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
789
790 #ifdef SUBTARGET_INSERT_ATTRIBUTES
791 #undef TARGET_INSERT_ATTRIBUTES
792 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
793 #endif
794
795 #ifdef SUBTARGET_ATTRIBUTE_TABLE
796 #undef TARGET_ATTRIBUTE_TABLE
797 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
798 #endif
799
800 #undef TARGET_OPTION_OVERRIDE
801 #define TARGET_OPTION_OVERRIDE sparc_option_override
802
803 #ifdef TARGET_THREAD_SSP_OFFSET
804 #undef TARGET_STACK_PROTECT_GUARD
805 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
806 #endif
807
808 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
809 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
810 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
811 #endif
812
813 #undef TARGET_ASM_FILE_END
814 #define TARGET_ASM_FILE_END sparc_file_end
815
816 #undef TARGET_FRAME_POINTER_REQUIRED
817 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
818
819 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
820 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
821
822 #undef TARGET_CAN_ELIMINATE
823 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
824
825 #undef TARGET_PREFERRED_RELOAD_CLASS
826 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
827
828 #undef TARGET_SECONDARY_RELOAD
829 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
830
831 #undef TARGET_CONDITIONAL_REGISTER_USAGE
832 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
833
834 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
835 #undef TARGET_MANGLE_TYPE
836 #define TARGET_MANGLE_TYPE sparc_mangle_type
837 #endif
838
839 #undef TARGET_LRA_P
840 #define TARGET_LRA_P sparc_lra_p
841
842 #undef TARGET_LEGITIMATE_ADDRESS_P
843 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
844
845 #undef TARGET_LEGITIMATE_CONSTANT_P
846 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
847
848 #undef TARGET_TRAMPOLINE_INIT
849 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
850
851 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
852 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
853 #undef TARGET_PRINT_OPERAND
854 #define TARGET_PRINT_OPERAND sparc_print_operand
855 #undef TARGET_PRINT_OPERAND_ADDRESS
856 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
857
858 /* The value stored by LDSTUB. */
859 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
860 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
861
862 #undef TARGET_CSTORE_MODE
863 #define TARGET_CSTORE_MODE sparc_cstore_mode
864
865 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
866 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
867
868 #undef TARGET_FIXED_CONDITION_CODE_REGS
869 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
870
871 #undef TARGET_MIN_ARITHMETIC_PRECISION
872 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
873
874 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
875 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
876
877 struct gcc_target targetm = TARGET_INITIALIZER;
878
879 /* Return the memory reference contained in X if any, zero otherwise. */
880
881 static rtx
882 mem_ref (rtx x)
883 {
884 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
885 x = XEXP (x, 0);
886
887 if (MEM_P (x))
888 return x;
889
890 return NULL_RTX;
891 }
892
893 /* We use a machine specific pass to enable workarounds for errata.
894
895 We need to have the (essentially) final form of the insn stream in order
896 to properly detect the various hazards. Therefore, this machine specific
897 pass runs as late as possible. */
898
899 static unsigned int
900 sparc_do_work_around_errata (void)
901 {
902 rtx_insn *insn, *next;
903
904 /* Force all instructions to be split into their final form. */
905 split_all_insns_noflow ();
906
907 /* Now look for specific patterns in the insn stream. */
908 for (insn = get_insns (); insn; insn = next)
909 {
910 bool insert_nop = false;
911 rtx set;
912
913 /* Look into the instruction in a delay slot. */
914 if (NONJUMP_INSN_P (insn))
915 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
916 insn = seq->insn (1);
917
918 /* Look for a single-word load into an odd-numbered FP register. */
919 if (sparc_fix_at697f
920 && NONJUMP_INSN_P (insn)
921 && (set = single_set (insn)) != NULL_RTX
922 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
923 && MEM_P (SET_SRC (set))
924 && REG_P (SET_DEST (set))
925 && REGNO (SET_DEST (set)) > 31
926 && REGNO (SET_DEST (set)) % 2 != 0)
927 {
928 /* The wrong dependency is on the enclosing double register. */
929 const unsigned int x = REGNO (SET_DEST (set)) - 1;
930 unsigned int src1, src2, dest;
931 int code;
932
933 next = next_active_insn (insn);
934 if (!next)
935 break;
936 /* If the insn is a branch, then it cannot be problematic. */
937 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
938 continue;
939
940 extract_insn (next);
941 code = INSN_CODE (next);
942
943 switch (code)
944 {
945 case CODE_FOR_adddf3:
946 case CODE_FOR_subdf3:
947 case CODE_FOR_muldf3:
948 case CODE_FOR_divdf3:
949 dest = REGNO (recog_data.operand[0]);
950 src1 = REGNO (recog_data.operand[1]);
951 src2 = REGNO (recog_data.operand[2]);
952 if (src1 != src2)
953 {
954 /* Case [1-4]:
955 ld [address], %fx+1
956 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
957 if ((src1 == x || src2 == x)
958 && (dest == src1 || dest == src2))
959 insert_nop = true;
960 }
961 else
962 {
963 /* Case 5:
964 ld [address], %fx+1
965 FPOPd %fx, %fx, %fx */
966 if (src1 == x
967 && dest == src1
968 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
969 insert_nop = true;
970 }
971 break;
972
973 case CODE_FOR_sqrtdf2:
974 dest = REGNO (recog_data.operand[0]);
975 src1 = REGNO (recog_data.operand[1]);
976 /* Case 6:
977 ld [address], %fx+1
978 fsqrtd %fx, %fx */
979 if (src1 == x && dest == src1)
980 insert_nop = true;
981 break;
982
983 default:
984 break;
985 }
986 }
987
988 /* Look for a single-word load into an integer register. */
989 else if (sparc_fix_ut699
990 && NONJUMP_INSN_P (insn)
991 && (set = single_set (insn)) != NULL_RTX
992 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
993 && mem_ref (SET_SRC (set)) != NULL_RTX
994 && REG_P (SET_DEST (set))
995 && REGNO (SET_DEST (set)) < 32)
996 {
997 /* There is no problem if the second memory access has a data
998 dependency on the first single-cycle load. */
999 rtx x = SET_DEST (set);
1000
1001 next = next_active_insn (insn);
1002 if (!next)
1003 break;
1004 /* If the insn is a branch, then it cannot be problematic. */
1005 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1006 continue;
1007
1008 /* Look for a second memory access to/from an integer register. */
1009 if ((set = single_set (next)) != NULL_RTX)
1010 {
1011 rtx src = SET_SRC (set);
1012 rtx dest = SET_DEST (set);
1013 rtx mem;
1014
1015 /* LDD is affected. */
1016 if ((mem = mem_ref (src)) != NULL_RTX
1017 && REG_P (dest)
1018 && REGNO (dest) < 32
1019 && !reg_mentioned_p (x, XEXP (mem, 0)))
1020 insert_nop = true;
1021
1022 /* STD is *not* affected. */
1023 else if (MEM_P (dest)
1024 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1025 && (src == CONST0_RTX (GET_MODE (dest))
1026 || (REG_P (src)
1027 && REGNO (src) < 32
1028 && REGNO (src) != REGNO (x)))
1029 && !reg_mentioned_p (x, XEXP (dest, 0)))
1030 insert_nop = true;
1031 }
1032 }
1033
1034 /* Look for a single-word load/operation into an FP register. */
1035 else if (sparc_fix_ut699
1036 && NONJUMP_INSN_P (insn)
1037 && (set = single_set (insn)) != NULL_RTX
1038 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1039 && REG_P (SET_DEST (set))
1040 && REGNO (SET_DEST (set)) > 31)
1041 {
1042 /* Number of instructions in the problematic window. */
1043 const int n_insns = 4;
1044 /* The problematic combination is with the sibling FP register. */
1045 const unsigned int x = REGNO (SET_DEST (set));
1046 const unsigned int y = x ^ 1;
1047 rtx_insn *after;
1048 int i;
1049
1050 next = next_active_insn (insn);
1051 if (!next)
1052 break;
1053 /* If the insn is a branch, then it cannot be problematic. */
1054 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1055 continue;
1056
1057 /* Look for a second load/operation into the sibling FP register. */
1058 if (!((set = single_set (next)) != NULL_RTX
1059 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1060 && REG_P (SET_DEST (set))
1061 && REGNO (SET_DEST (set)) == y))
1062 continue;
1063
1064 /* Look for a (possible) store from the FP register in the next N
1065 instructions, but bail out if it is again modified or if there
1066 is a store from the sibling FP register before this store. */
1067 for (after = next, i = 0; i < n_insns; i++)
1068 {
1069 bool branch_p;
1070
1071 after = next_active_insn (after);
1072 if (!after)
1073 break;
1074
1075 /* This is a branch with an empty delay slot. */
1076 if (!NONJUMP_INSN_P (after))
1077 {
1078 if (++i == n_insns)
1079 break;
1080 branch_p = true;
1081 after = NULL;
1082 }
1083 /* This is a branch with a filled delay slot. */
1084 else if (rtx_sequence *seq =
1085 dyn_cast <rtx_sequence *> (PATTERN (after)))
1086 {
1087 if (++i == n_insns)
1088 break;
1089 branch_p = true;
1090 after = seq->insn (1);
1091 }
1092 /* This is a regular instruction. */
1093 else
1094 branch_p = false;
1095
1096 if (after && (set = single_set (after)) != NULL_RTX)
1097 {
1098 const rtx src = SET_SRC (set);
1099 const rtx dest = SET_DEST (set);
1100 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1101
1102 /* If the FP register is again modified before the store,
1103 then the store isn't affected. */
1104 if (REG_P (dest)
1105 && (REGNO (dest) == x
1106 || (REGNO (dest) == y && size == 8)))
1107 break;
1108
1109 if (MEM_P (dest) && REG_P (src))
1110 {
1111 /* If there is a store from the sibling FP register
1112 before the store, then the store is not affected. */
1113 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1114 break;
1115
1116 /* Otherwise, the store is affected. */
1117 if (REGNO (src) == x && size == 4)
1118 {
1119 insert_nop = true;
1120 break;
1121 }
1122 }
1123 }
1124
1125 /* If we have a branch in the first M instructions, then we
1126 cannot see the (M+2)th instruction so we play safe. */
1127 if (branch_p && i <= (n_insns - 2))
1128 {
1129 insert_nop = true;
1130 break;
1131 }
1132 }
1133 }
1134
1135 else
1136 next = NEXT_INSN (insn);
1137
1138 if (insert_nop)
1139 emit_insn_before (gen_nop (), next);
1140 }
1141
1142 return 0;
1143 }
1144
1145 namespace {
1146
1147 const pass_data pass_data_work_around_errata =
1148 {
1149 RTL_PASS, /* type */
1150 "errata", /* name */
1151 OPTGROUP_NONE, /* optinfo_flags */
1152 TV_MACH_DEP, /* tv_id */
1153 0, /* properties_required */
1154 0, /* properties_provided */
1155 0, /* properties_destroyed */
1156 0, /* todo_flags_start */
1157 0, /* todo_flags_finish */
1158 };
1159
1160 class pass_work_around_errata : public rtl_opt_pass
1161 {
1162 public:
1163 pass_work_around_errata(gcc::context *ctxt)
1164 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1165 {}
1166
1167 /* opt_pass methods: */
1168 virtual bool gate (function *)
1169 {
1170 /* The only errata we handle are those of the AT697F and UT699. */
1171 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1172 }
1173
1174 virtual unsigned int execute (function *)
1175 {
1176 return sparc_do_work_around_errata ();
1177 }
1178
1179 }; // class pass_work_around_errata
1180
1181 } // anon namespace
1182
1183 rtl_opt_pass *
1184 make_pass_work_around_errata (gcc::context *ctxt)
1185 {
1186 return new pass_work_around_errata (ctxt);
1187 }
1188
1189 /* Helpers for TARGET_DEBUG_OPTIONS. */
1190 static void
1191 dump_target_flag_bits (const int flags)
1192 {
1193 if (flags & MASK_64BIT)
1194 fprintf (stderr, "64BIT ");
1195 if (flags & MASK_APP_REGS)
1196 fprintf (stderr, "APP_REGS ");
1197 if (flags & MASK_FASTER_STRUCTS)
1198 fprintf (stderr, "FASTER_STRUCTS ");
1199 if (flags & MASK_FLAT)
1200 fprintf (stderr, "FLAT ");
1201 if (flags & MASK_FMAF)
1202 fprintf (stderr, "FMAF ");
1203 if (flags & MASK_FPU)
1204 fprintf (stderr, "FPU ");
1205 if (flags & MASK_HARD_QUAD)
1206 fprintf (stderr, "HARD_QUAD ");
1207 if (flags & MASK_POPC)
1208 fprintf (stderr, "POPC ");
1209 if (flags & MASK_PTR64)
1210 fprintf (stderr, "PTR64 ");
1211 if (flags & MASK_STACK_BIAS)
1212 fprintf (stderr, "STACK_BIAS ");
1213 if (flags & MASK_UNALIGNED_DOUBLES)
1214 fprintf (stderr, "UNALIGNED_DOUBLES ");
1215 if (flags & MASK_V8PLUS)
1216 fprintf (stderr, "V8PLUS ");
1217 if (flags & MASK_VIS)
1218 fprintf (stderr, "VIS ");
1219 if (flags & MASK_VIS2)
1220 fprintf (stderr, "VIS2 ");
1221 if (flags & MASK_VIS3)
1222 fprintf (stderr, "VIS3 ");
1223 if (flags & MASK_VIS4)
1224 fprintf (stderr, "VIS4 ");
1225 if (flags & MASK_CBCOND)
1226 fprintf (stderr, "CBCOND ");
1227 if (flags & MASK_DEPRECATED_V8_INSNS)
1228 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1229 if (flags & MASK_SPARCLET)
1230 fprintf (stderr, "SPARCLET ");
1231 if (flags & MASK_SPARCLITE)
1232 fprintf (stderr, "SPARCLITE ");
1233 if (flags & MASK_V8)
1234 fprintf (stderr, "V8 ");
1235 if (flags & MASK_V9)
1236 fprintf (stderr, "V9 ");
1237 }
1238
1239 static void
1240 dump_target_flags (const char *prefix, const int flags)
1241 {
1242 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1243 dump_target_flag_bits (flags);
1244 fprintf(stderr, "]\n");
1245 }
1246
1247 /* Validate and override various options, and do some machine dependent
1248 initialization. */
1249
1250 static void
1251 sparc_option_override (void)
1252 {
1253 static struct code_model {
1254 const char *const name;
1255 const enum cmodel value;
1256 } const cmodels[] = {
1257 { "32", CM_32 },
1258 { "medlow", CM_MEDLOW },
1259 { "medmid", CM_MEDMID },
1260 { "medany", CM_MEDANY },
1261 { "embmedany", CM_EMBMEDANY },
1262 { NULL, (enum cmodel) 0 }
1263 };
1264 const struct code_model *cmodel;
1265 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1266 static struct cpu_default {
1267 const int cpu;
1268 const enum processor_type processor;
1269 } const cpu_default[] = {
1270 /* There must be one entry here for each TARGET_CPU value. */
1271 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1272 { TARGET_CPU_v8, PROCESSOR_V8 },
1273 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1274 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1275 { TARGET_CPU_leon, PROCESSOR_LEON },
1276 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1277 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1278 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1279 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1280 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1281 { TARGET_CPU_v9, PROCESSOR_V9 },
1282 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1283 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1284 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1285 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1286 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1287 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1288 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1289 { -1, PROCESSOR_V7 }
1290 };
1291 const struct cpu_default *def;
1292 /* Table of values for -m{cpu,tune}=. This must match the order of
1293 the enum processor_type in sparc-opts.h. */
1294 static struct cpu_table {
1295 const char *const name;
1296 const int disable;
1297 const int enable;
1298 } const cpu_table[] = {
1299 { "v7", MASK_ISA, 0 },
1300 { "cypress", MASK_ISA, 0 },
1301 { "v8", MASK_ISA, MASK_V8 },
1302 /* TI TMS390Z55 supersparc */
1303 { "supersparc", MASK_ISA, MASK_V8 },
1304 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1305 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1306 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1307 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1308 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1309 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1310 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1311 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1312 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1313 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1314 { "sparclet", MASK_ISA, MASK_SPARCLET },
1315 /* TEMIC sparclet */
1316 { "tsc701", MASK_ISA, MASK_SPARCLET },
1317 { "v9", MASK_ISA, MASK_V9 },
1318 /* UltraSPARC I, II, IIi */
1319 { "ultrasparc", MASK_ISA,
1320 /* Although insns using %y are deprecated, it is a clear win. */
1321 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1322 /* UltraSPARC III */
1323 /* ??? Check if %y issue still holds true. */
1324 { "ultrasparc3", MASK_ISA,
1325 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1326 /* UltraSPARC T1 */
1327 { "niagara", MASK_ISA,
1328 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1329 /* UltraSPARC T2 */
1330 { "niagara2", MASK_ISA,
1331 MASK_V9|MASK_POPC|MASK_VIS2 },
1332 /* UltraSPARC T3 */
1333 { "niagara3", MASK_ISA,
1334 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1335 /* UltraSPARC T4 */
1336 { "niagara4", MASK_ISA,
1337 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1338 /* UltraSPARC M7 */
1339 { "niagara7", MASK_ISA,
1340 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1341 };
1342 const struct cpu_table *cpu;
1343 unsigned int i;
1344 int fpu;
1345
1346 if (sparc_debug_string != NULL)
1347 {
1348 const char *q;
1349 char *p;
1350
1351 p = ASTRDUP (sparc_debug_string);
1352 while ((q = strtok (p, ",")) != NULL)
1353 {
1354 bool invert;
1355 int mask;
1356
1357 p = NULL;
1358 if (*q == '!')
1359 {
1360 invert = true;
1361 q++;
1362 }
1363 else
1364 invert = false;
1365
1366 if (! strcmp (q, "all"))
1367 mask = MASK_DEBUG_ALL;
1368 else if (! strcmp (q, "options"))
1369 mask = MASK_DEBUG_OPTIONS;
1370 else
1371 error ("unknown -mdebug-%s switch", q);
1372
1373 if (invert)
1374 sparc_debug &= ~mask;
1375 else
1376 sparc_debug |= mask;
1377 }
1378 }
1379
1380 if (TARGET_DEBUG_OPTIONS)
1381 {
1382 dump_target_flags("Initial target_flags", target_flags);
1383 dump_target_flags("target_flags_explicit", target_flags_explicit);
1384 }
1385
1386 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1387 SUBTARGET_OVERRIDE_OPTIONS;
1388 #endif
1389
1390 #ifndef SPARC_BI_ARCH
1391 /* Check for unsupported architecture size. */
1392 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1393 error ("%s is not supported by this configuration",
1394 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1395 #endif
1396
1397 /* We force all 64bit archs to use 128 bit long double */
1398 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1399 {
1400 error ("-mlong-double-64 not allowed with -m64");
1401 target_flags |= MASK_LONG_DOUBLE_128;
1402 }
1403
1404 /* Code model selection. */
1405 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1406
1407 #ifdef SPARC_BI_ARCH
1408 if (TARGET_ARCH32)
1409 sparc_cmodel = CM_32;
1410 #endif
1411
1412 if (sparc_cmodel_string != NULL)
1413 {
1414 if (TARGET_ARCH64)
1415 {
1416 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1417 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1418 break;
1419 if (cmodel->name == NULL)
1420 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1421 else
1422 sparc_cmodel = cmodel->value;
1423 }
1424 else
1425 error ("-mcmodel= is not supported on 32 bit systems");
1426 }
1427
1428 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1429 for (i = 8; i < 16; i++)
1430 if (!call_used_regs [i])
1431 {
1432 error ("-fcall-saved-REG is not supported for out registers");
1433 call_used_regs [i] = 1;
1434 }
1435
1436 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1437
1438 /* Set the default CPU. */
1439 if (!global_options_set.x_sparc_cpu_and_features)
1440 {
1441 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1442 if (def->cpu == TARGET_CPU_DEFAULT)
1443 break;
1444 gcc_assert (def->cpu != -1);
1445 sparc_cpu_and_features = def->processor;
1446 }
1447
1448 if (!global_options_set.x_sparc_cpu)
1449 sparc_cpu = sparc_cpu_and_features;
1450
1451 cpu = &cpu_table[(int) sparc_cpu_and_features];
1452
1453 if (TARGET_DEBUG_OPTIONS)
1454 {
1455 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1456 fprintf (stderr, "sparc_cpu: %s\n",
1457 cpu_table[(int) sparc_cpu].name);
1458 dump_target_flags ("cpu->disable", cpu->disable);
1459 dump_target_flags ("cpu->enable", cpu->enable);
1460 }
1461
1462 target_flags &= ~cpu->disable;
1463 target_flags |= (cpu->enable
1464 #ifndef HAVE_AS_FMAF_HPC_VIS3
1465 & ~(MASK_FMAF | MASK_VIS3)
1466 #endif
1467 #ifndef HAVE_AS_SPARC4
1468 & ~MASK_CBCOND
1469 #endif
1470 #ifndef HAVE_AS_SPARC5_VIS4
1471 & ~(MASK_VIS4 | MASK_SUBXC)
1472 #endif
1473 #ifndef HAVE_AS_LEON
1474 & ~(MASK_LEON | MASK_LEON3)
1475 #endif
1476 );
1477
1478 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1479 the processor default. */
1480 if (target_flags_explicit & MASK_FPU)
1481 target_flags = (target_flags & ~MASK_FPU) | fpu;
1482
1483 /* -mvis2 implies -mvis */
1484 if (TARGET_VIS2)
1485 target_flags |= MASK_VIS;
1486
1487 /* -mvis3 implies -mvis2 and -mvis */
1488 if (TARGET_VIS3)
1489 target_flags |= MASK_VIS2 | MASK_VIS;
1490
1491 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1492 if (TARGET_VIS4)
1493 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1494
1495 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1496 disabled. */
1497 if (! TARGET_FPU)
1498 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1499 | MASK_FMAF);
1500
1501 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1502 are available.
1503 -m64 also implies v9. */
1504 if (TARGET_VIS || TARGET_ARCH64)
1505 {
1506 target_flags |= MASK_V9;
1507 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1508 }
1509
1510 /* -mvis also implies -mv8plus on 32-bit */
1511 if (TARGET_VIS && ! TARGET_ARCH64)
1512 target_flags |= MASK_V8PLUS;
1513
1514 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1515 if (TARGET_V9 && TARGET_ARCH32)
1516 target_flags |= MASK_DEPRECATED_V8_INSNS;
1517
1518 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1519 if (! TARGET_V9 || TARGET_ARCH64)
1520 target_flags &= ~MASK_V8PLUS;
1521
1522 /* Don't use stack biasing in 32 bit mode. */
1523 if (TARGET_ARCH32)
1524 target_flags &= ~MASK_STACK_BIAS;
1525
1526 /* Supply a default value for align_functions. */
1527 if (align_functions == 0
1528 && (sparc_cpu == PROCESSOR_ULTRASPARC
1529 || sparc_cpu == PROCESSOR_ULTRASPARC3
1530 || sparc_cpu == PROCESSOR_NIAGARA
1531 || sparc_cpu == PROCESSOR_NIAGARA2
1532 || sparc_cpu == PROCESSOR_NIAGARA3
1533 || sparc_cpu == PROCESSOR_NIAGARA4
1534 || sparc_cpu == PROCESSOR_NIAGARA7))
1535 align_functions = 32;
1536
1537 /* Validate PCC_STRUCT_RETURN. */
1538 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1539 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1540
1541 /* Only use .uaxword when compiling for a 64-bit target. */
1542 if (!TARGET_ARCH64)
1543 targetm.asm_out.unaligned_op.di = NULL;
1544
1545 /* Do various machine dependent initializations. */
1546 sparc_init_modes ();
1547
1548 /* Set up function hooks. */
1549 init_machine_status = sparc_init_machine_status;
1550
1551 switch (sparc_cpu)
1552 {
1553 case PROCESSOR_V7:
1554 case PROCESSOR_CYPRESS:
1555 sparc_costs = &cypress_costs;
1556 break;
1557 case PROCESSOR_V8:
1558 case PROCESSOR_SPARCLITE:
1559 case PROCESSOR_SUPERSPARC:
1560 sparc_costs = &supersparc_costs;
1561 break;
1562 case PROCESSOR_F930:
1563 case PROCESSOR_F934:
1564 case PROCESSOR_HYPERSPARC:
1565 case PROCESSOR_SPARCLITE86X:
1566 sparc_costs = &hypersparc_costs;
1567 break;
1568 case PROCESSOR_LEON:
1569 sparc_costs = &leon_costs;
1570 break;
1571 case PROCESSOR_LEON3:
1572 case PROCESSOR_LEON3V7:
1573 sparc_costs = &leon3_costs;
1574 break;
1575 case PROCESSOR_SPARCLET:
1576 case PROCESSOR_TSC701:
1577 sparc_costs = &sparclet_costs;
1578 break;
1579 case PROCESSOR_V9:
1580 case PROCESSOR_ULTRASPARC:
1581 sparc_costs = &ultrasparc_costs;
1582 break;
1583 case PROCESSOR_ULTRASPARC3:
1584 sparc_costs = &ultrasparc3_costs;
1585 break;
1586 case PROCESSOR_NIAGARA:
1587 sparc_costs = &niagara_costs;
1588 break;
1589 case PROCESSOR_NIAGARA2:
1590 sparc_costs = &niagara2_costs;
1591 break;
1592 case PROCESSOR_NIAGARA3:
1593 sparc_costs = &niagara3_costs;
1594 break;
1595 case PROCESSOR_NIAGARA4:
1596 sparc_costs = &niagara4_costs;
1597 break;
1598 case PROCESSOR_NIAGARA7:
1599 sparc_costs = &niagara7_costs;
1600 break;
1601 case PROCESSOR_NATIVE:
1602 gcc_unreachable ();
1603 };
1604
1605 if (sparc_memory_model == SMM_DEFAULT)
1606 {
1607 /* Choose the memory model for the operating system. */
1608 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1609 if (os_default != SMM_DEFAULT)
1610 sparc_memory_model = os_default;
1611 /* Choose the most relaxed model for the processor. */
1612 else if (TARGET_V9)
1613 sparc_memory_model = SMM_RMO;
1614 else if (TARGET_LEON3)
1615 sparc_memory_model = SMM_TSO;
1616 else if (TARGET_LEON)
1617 sparc_memory_model = SMM_SC;
1618 else if (TARGET_V8)
1619 sparc_memory_model = SMM_PSO;
1620 else
1621 sparc_memory_model = SMM_SC;
1622 }
1623
1624 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1625 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1626 target_flags |= MASK_LONG_DOUBLE_128;
1627 #endif
1628
1629 if (TARGET_DEBUG_OPTIONS)
1630 dump_target_flags ("Final target_flags", target_flags);
1631
1632 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1633 can run at the same time. More important, it is the threshold
1634 defining when additional prefetches will be dropped by the
1635 hardware.
1636
1637 The UltraSPARC-III features a documented prefetch queue with a
1638 size of 8. Additional prefetches issued in the cpu are
1639 dropped.
1640
1641 Niagara processors are different. In these processors prefetches
1642 are handled much like regular loads. The L1 miss buffer is 32
1643 entries, but prefetches start getting affected when 30 entries
1644 become occupied. That occupation could be a mix of regular loads
1645 and prefetches though. And that buffer is shared by all threads.
1646 Once the threshold is reached, if the core is running a single
1647 thread the prefetch will retry. If more than one thread is
1648 running, the prefetch will be dropped.
1649
1650 All this makes it very difficult to determine how many
1651 simultaneous prefetches can be issued simultaneously, even in a
1652 single-threaded program. Experimental results show that setting
1653 this parameter to 32 works well when the number of threads is not
1654 high. */
1655 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1656 ((sparc_cpu == PROCESSOR_ULTRASPARC
1657 || sparc_cpu == PROCESSOR_NIAGARA
1658 || sparc_cpu == PROCESSOR_NIAGARA2
1659 || sparc_cpu == PROCESSOR_NIAGARA3
1660 || sparc_cpu == PROCESSOR_NIAGARA4)
1661 ? 2
1662 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1663 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1664 ? 32 : 3))),
1665 global_options.x_param_values,
1666 global_options_set.x_param_values);
1667
1668 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1669 params.def), so no maybe_set_param_value is needed.
1670
1671 The Oracle SPARC Architecture (previously the UltraSPARC
1672 Architecture) specification states that when a PREFETCH[A]
1673 instruction is executed an implementation-specific amount of data
1674 is prefetched, and that it is at least 64 bytes long (aligned to
1675 at least 64 bytes).
1676
1677 However, this is not correct. The M7 (and implementations prior
1678 to that) does not guarantee a 64B prefetch into a cache if the
1679 line size is smaller. A single cache line is all that is ever
1680 prefetched. So for the M7, where the L1D$ has 32B lines and the
1681 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1682 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1683 is a read_n prefetch, which is the only type which allocates to
1684 the L1.) */
1685
1686 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1687 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1688 Niagara processors feature a L1D$ of 16KB. */
1689 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1690 ((sparc_cpu == PROCESSOR_ULTRASPARC
1691 || sparc_cpu == PROCESSOR_ULTRASPARC3
1692 || sparc_cpu == PROCESSOR_NIAGARA
1693 || sparc_cpu == PROCESSOR_NIAGARA2
1694 || sparc_cpu == PROCESSOR_NIAGARA3
1695 || sparc_cpu == PROCESSOR_NIAGARA4
1696 || sparc_cpu == PROCESSOR_NIAGARA7)
1697 ? 16 : 64),
1698 global_options.x_param_values,
1699 global_options_set.x_param_values);
1700
1701
1702 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1703 that 512 is the default in params.def. */
1704 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1705 (sparc_cpu == PROCESSOR_NIAGARA4
1706 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1707 ? 256 : 512)),
1708 global_options.x_param_values,
1709 global_options_set.x_param_values);
1710
1711
1712 /* Disable save slot sharing for call-clobbered registers by default.
1713 The IRA sharing algorithm works on single registers only and this
1714 pessimizes for double floating-point registers. */
1715 if (!global_options_set.x_flag_ira_share_save_slots)
1716 flag_ira_share_save_slots = 0;
1717
1718 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1719 redundant 32-to-64-bit extensions. */
1720 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1721 flag_ree = 0;
1722 }
1723 \f
1724 /* Miscellaneous utilities. */
1725
1726 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1727 or branch on register contents instructions. */
1728
1729 int
1730 v9_regcmp_p (enum rtx_code code)
1731 {
1732 return (code == EQ || code == NE || code == GE || code == LT
1733 || code == LE || code == GT);
1734 }
1735
1736 /* Nonzero if OP is a floating point constant which can
1737 be loaded into an integer register using a single
1738 sethi instruction. */
1739
1740 int
1741 fp_sethi_p (rtx op)
1742 {
1743 if (GET_CODE (op) == CONST_DOUBLE)
1744 {
1745 long i;
1746
1747 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1748 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1749 }
1750
1751 return 0;
1752 }
1753
1754 /* Nonzero if OP is a floating point constant which can
1755 be loaded into an integer register using a single
1756 mov instruction. */
1757
1758 int
1759 fp_mov_p (rtx op)
1760 {
1761 if (GET_CODE (op) == CONST_DOUBLE)
1762 {
1763 long i;
1764
1765 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1766 return SPARC_SIMM13_P (i);
1767 }
1768
1769 return 0;
1770 }
1771
1772 /* Nonzero if OP is a floating point constant which can
1773 be loaded into an integer register using a high/losum
1774 instruction sequence. */
1775
1776 int
1777 fp_high_losum_p (rtx op)
1778 {
1779 /* The constraints calling this should only be in
1780 SFmode move insns, so any constant which cannot
1781 be moved using a single insn will do. */
1782 if (GET_CODE (op) == CONST_DOUBLE)
1783 {
1784 long i;
1785
1786 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1787 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1788 }
1789
1790 return 0;
1791 }
1792
1793 /* Return true if the address of LABEL can be loaded by means of the
1794 mov{si,di}_pic_label_ref patterns in PIC mode. */
1795
1796 static bool
1797 can_use_mov_pic_label_ref (rtx label)
1798 {
1799 /* VxWorks does not impose a fixed gap between segments; the run-time
1800 gap can be different from the object-file gap. We therefore can't
1801 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1802 are absolutely sure that X is in the same segment as the GOT.
1803 Unfortunately, the flexibility of linker scripts means that we
1804 can't be sure of that in general, so assume that GOT-relative
1805 accesses are never valid on VxWorks. */
1806 if (TARGET_VXWORKS_RTP)
1807 return false;
1808
1809 /* Similarly, if the label is non-local, it might end up being placed
1810 in a different section than the current one; now mov_pic_label_ref
1811 requires the label and the code to be in the same section. */
1812 if (LABEL_REF_NONLOCAL_P (label))
1813 return false;
1814
1815 /* Finally, if we are reordering basic blocks and partition into hot
1816 and cold sections, this might happen for any label. */
1817 if (flag_reorder_blocks_and_partition)
1818 return false;
1819
1820 return true;
1821 }
1822
1823 /* Expand a move instruction. Return true if all work is done. */
1824
1825 bool
1826 sparc_expand_move (machine_mode mode, rtx *operands)
1827 {
1828 /* Handle sets of MEM first. */
1829 if (GET_CODE (operands[0]) == MEM)
1830 {
1831 /* 0 is a register (or a pair of registers) on SPARC. */
1832 if (register_or_zero_operand (operands[1], mode))
1833 return false;
1834
1835 if (!reload_in_progress)
1836 {
1837 operands[0] = validize_mem (operands[0]);
1838 operands[1] = force_reg (mode, operands[1]);
1839 }
1840 }
1841
1842 /* Fixup TLS cases. */
1843 if (TARGET_HAVE_TLS
1844 && CONSTANT_P (operands[1])
1845 && sparc_tls_referenced_p (operands [1]))
1846 {
1847 operands[1] = sparc_legitimize_tls_address (operands[1]);
1848 return false;
1849 }
1850
1851 /* Fixup PIC cases. */
1852 if (flag_pic && CONSTANT_P (operands[1]))
1853 {
1854 if (pic_address_needs_scratch (operands[1]))
1855 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1856
1857 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1858 if (GET_CODE (operands[1]) == LABEL_REF
1859 && can_use_mov_pic_label_ref (operands[1]))
1860 {
1861 if (mode == SImode)
1862 {
1863 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1864 return true;
1865 }
1866
1867 if (mode == DImode)
1868 {
1869 gcc_assert (TARGET_ARCH64);
1870 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1871 return true;
1872 }
1873 }
1874
1875 if (symbolic_operand (operands[1], mode))
1876 {
1877 operands[1]
1878 = sparc_legitimize_pic_address (operands[1],
1879 reload_in_progress
1880 ? operands[0] : NULL_RTX);
1881 return false;
1882 }
1883 }
1884
1885 /* If we are trying to toss an integer constant into FP registers,
1886 or loading a FP or vector constant, force it into memory. */
1887 if (CONSTANT_P (operands[1])
1888 && REG_P (operands[0])
1889 && (SPARC_FP_REG_P (REGNO (operands[0]))
1890 || SCALAR_FLOAT_MODE_P (mode)
1891 || VECTOR_MODE_P (mode)))
1892 {
1893 /* emit_group_store will send such bogosity to us when it is
1894 not storing directly into memory. So fix this up to avoid
1895 crashes in output_constant_pool. */
1896 if (operands [1] == const0_rtx)
1897 operands[1] = CONST0_RTX (mode);
1898
1899 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1900 always other regs. */
1901 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1902 && (const_zero_operand (operands[1], mode)
1903 || const_all_ones_operand (operands[1], mode)))
1904 return false;
1905
1906 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1907 /* We are able to build any SF constant in integer registers
1908 with at most 2 instructions. */
1909 && (mode == SFmode
1910 /* And any DF constant in integer registers. */
1911 || (mode == DFmode
1912 && ! can_create_pseudo_p ())))
1913 return false;
1914
1915 operands[1] = force_const_mem (mode, operands[1]);
1916 if (!reload_in_progress)
1917 operands[1] = validize_mem (operands[1]);
1918 return false;
1919 }
1920
1921 /* Accept non-constants and valid constants unmodified. */
1922 if (!CONSTANT_P (operands[1])
1923 || GET_CODE (operands[1]) == HIGH
1924 || input_operand (operands[1], mode))
1925 return false;
1926
1927 switch (mode)
1928 {
1929 case QImode:
1930 /* All QImode constants require only one insn, so proceed. */
1931 break;
1932
1933 case HImode:
1934 case SImode:
1935 sparc_emit_set_const32 (operands[0], operands[1]);
1936 return true;
1937
1938 case DImode:
1939 /* input_operand should have filtered out 32-bit mode. */
1940 sparc_emit_set_const64 (operands[0], operands[1]);
1941 return true;
1942
1943 case TImode:
1944 {
1945 rtx high, low;
1946 /* TImode isn't available in 32-bit mode. */
1947 split_double (operands[1], &high, &low);
1948 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1949 high));
1950 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1951 low));
1952 }
1953 return true;
1954
1955 default:
1956 gcc_unreachable ();
1957 }
1958
1959 return false;
1960 }
1961
1962 /* Load OP1, a 32-bit constant, into OP0, a register.
1963 We know it can't be done in one insn when we get
1964 here, the move expander guarantees this. */
1965
1966 static void
1967 sparc_emit_set_const32 (rtx op0, rtx op1)
1968 {
1969 machine_mode mode = GET_MODE (op0);
1970 rtx temp = op0;
1971
1972 if (can_create_pseudo_p ())
1973 temp = gen_reg_rtx (mode);
1974
1975 if (GET_CODE (op1) == CONST_INT)
1976 {
1977 gcc_assert (!small_int_operand (op1, mode)
1978 && !const_high_operand (op1, mode));
1979
1980 /* Emit them as real moves instead of a HIGH/LO_SUM,
1981 this way CSE can see everything and reuse intermediate
1982 values if it wants. */
1983 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1984 & ~(HOST_WIDE_INT) 0x3ff)));
1985
1986 emit_insn (gen_rtx_SET (op0,
1987 gen_rtx_IOR (mode, temp,
1988 GEN_INT (INTVAL (op1) & 0x3ff))));
1989 }
1990 else
1991 {
1992 /* A symbol, emit in the traditional way. */
1993 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1994 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1995 }
1996 }
1997
1998 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1999 If TEMP is nonzero, we are forbidden to use any other scratch
2000 registers. Otherwise, we are allowed to generate them as needed.
2001
2002 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2003 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2004
2005 void
2006 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2007 {
2008 rtx temp1, temp2, temp3, temp4, temp5;
2009 rtx ti_temp = 0;
2010
2011 if (temp && GET_MODE (temp) == TImode)
2012 {
2013 ti_temp = temp;
2014 temp = gen_rtx_REG (DImode, REGNO (temp));
2015 }
2016
2017 /* SPARC-V9 code-model support. */
2018 switch (sparc_cmodel)
2019 {
2020 case CM_MEDLOW:
2021 /* The range spanned by all instructions in the object is less
2022 than 2^31 bytes (2GB) and the distance from any instruction
2023 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2024 than 2^31 bytes (2GB).
2025
2026 The executable must be in the low 4TB of the virtual address
2027 space.
2028
2029 sethi %hi(symbol), %temp1
2030 or %temp1, %lo(symbol), %reg */
2031 if (temp)
2032 temp1 = temp; /* op0 is allowed. */
2033 else
2034 temp1 = gen_reg_rtx (DImode);
2035
2036 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2037 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2038 break;
2039
2040 case CM_MEDMID:
2041 /* The range spanned by all instructions in the object is less
2042 than 2^31 bytes (2GB) and the distance from any instruction
2043 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2044 than 2^31 bytes (2GB).
2045
2046 The executable must be in the low 16TB of the virtual address
2047 space.
2048
2049 sethi %h44(symbol), %temp1
2050 or %temp1, %m44(symbol), %temp2
2051 sllx %temp2, 12, %temp3
2052 or %temp3, %l44(symbol), %reg */
2053 if (temp)
2054 {
2055 temp1 = op0;
2056 temp2 = op0;
2057 temp3 = temp; /* op0 is allowed. */
2058 }
2059 else
2060 {
2061 temp1 = gen_reg_rtx (DImode);
2062 temp2 = gen_reg_rtx (DImode);
2063 temp3 = gen_reg_rtx (DImode);
2064 }
2065
2066 emit_insn (gen_seth44 (temp1, op1));
2067 emit_insn (gen_setm44 (temp2, temp1, op1));
2068 emit_insn (gen_rtx_SET (temp3,
2069 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2070 emit_insn (gen_setl44 (op0, temp3, op1));
2071 break;
2072
2073 case CM_MEDANY:
2074 /* The range spanned by all instructions in the object is less
2075 than 2^31 bytes (2GB) and the distance from any instruction
2076 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2077 than 2^31 bytes (2GB).
2078
2079 The executable can be placed anywhere in the virtual address
2080 space.
2081
2082 sethi %hh(symbol), %temp1
2083 sethi %lm(symbol), %temp2
2084 or %temp1, %hm(symbol), %temp3
2085 sllx %temp3, 32, %temp4
2086 or %temp4, %temp2, %temp5
2087 or %temp5, %lo(symbol), %reg */
2088 if (temp)
2089 {
2090 /* It is possible that one of the registers we got for operands[2]
2091 might coincide with that of operands[0] (which is why we made
2092 it TImode). Pick the other one to use as our scratch. */
2093 if (rtx_equal_p (temp, op0))
2094 {
2095 gcc_assert (ti_temp);
2096 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2097 }
2098 temp1 = op0;
2099 temp2 = temp; /* op0 is _not_ allowed, see above. */
2100 temp3 = op0;
2101 temp4 = op0;
2102 temp5 = op0;
2103 }
2104 else
2105 {
2106 temp1 = gen_reg_rtx (DImode);
2107 temp2 = gen_reg_rtx (DImode);
2108 temp3 = gen_reg_rtx (DImode);
2109 temp4 = gen_reg_rtx (DImode);
2110 temp5 = gen_reg_rtx (DImode);
2111 }
2112
2113 emit_insn (gen_sethh (temp1, op1));
2114 emit_insn (gen_setlm (temp2, op1));
2115 emit_insn (gen_sethm (temp3, temp1, op1));
2116 emit_insn (gen_rtx_SET (temp4,
2117 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2118 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2119 emit_insn (gen_setlo (op0, temp5, op1));
2120 break;
2121
2122 case CM_EMBMEDANY:
2123 /* Old old old backwards compatibility kruft here.
2124 Essentially it is MEDLOW with a fixed 64-bit
2125 virtual base added to all data segment addresses.
2126 Text-segment stuff is computed like MEDANY, we can't
2127 reuse the code above because the relocation knobs
2128 look different.
2129
2130 Data segment: sethi %hi(symbol), %temp1
2131 add %temp1, EMBMEDANY_BASE_REG, %temp2
2132 or %temp2, %lo(symbol), %reg */
2133 if (data_segment_operand (op1, GET_MODE (op1)))
2134 {
2135 if (temp)
2136 {
2137 temp1 = temp; /* op0 is allowed. */
2138 temp2 = op0;
2139 }
2140 else
2141 {
2142 temp1 = gen_reg_rtx (DImode);
2143 temp2 = gen_reg_rtx (DImode);
2144 }
2145
2146 emit_insn (gen_embmedany_sethi (temp1, op1));
2147 emit_insn (gen_embmedany_brsum (temp2, temp1));
2148 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2149 }
2150
2151 /* Text segment: sethi %uhi(symbol), %temp1
2152 sethi %hi(symbol), %temp2
2153 or %temp1, %ulo(symbol), %temp3
2154 sllx %temp3, 32, %temp4
2155 or %temp4, %temp2, %temp5
2156 or %temp5, %lo(symbol), %reg */
2157 else
2158 {
2159 if (temp)
2160 {
2161 /* It is possible that one of the registers we got for operands[2]
2162 might coincide with that of operands[0] (which is why we made
2163 it TImode). Pick the other one to use as our scratch. */
2164 if (rtx_equal_p (temp, op0))
2165 {
2166 gcc_assert (ti_temp);
2167 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2168 }
2169 temp1 = op0;
2170 temp2 = temp; /* op0 is _not_ allowed, see above. */
2171 temp3 = op0;
2172 temp4 = op0;
2173 temp5 = op0;
2174 }
2175 else
2176 {
2177 temp1 = gen_reg_rtx (DImode);
2178 temp2 = gen_reg_rtx (DImode);
2179 temp3 = gen_reg_rtx (DImode);
2180 temp4 = gen_reg_rtx (DImode);
2181 temp5 = gen_reg_rtx (DImode);
2182 }
2183
2184 emit_insn (gen_embmedany_textuhi (temp1, op1));
2185 emit_insn (gen_embmedany_texthi (temp2, op1));
2186 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2187 emit_insn (gen_rtx_SET (temp4,
2188 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2189 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2190 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2191 }
2192 break;
2193
2194 default:
2195 gcc_unreachable ();
2196 }
2197 }
2198
2199 /* These avoid problems when cross compiling. If we do not
2200 go through all this hair then the optimizer will see
2201 invalid REG_EQUAL notes or in some cases none at all. */
2202 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2203 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2204 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2205 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2206
2207 /* The optimizer is not to assume anything about exactly
2208 which bits are set for a HIGH, they are unspecified.
2209 Unfortunately this leads to many missed optimizations
2210 during CSE. We mask out the non-HIGH bits, and matches
2211 a plain movdi, to alleviate this problem. */
2212 static rtx
2213 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2214 {
2215 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2216 }
2217
2218 static rtx
2219 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2220 {
2221 return gen_rtx_SET (dest, GEN_INT (val));
2222 }
2223
2224 static rtx
2225 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2226 {
2227 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2228 }
2229
2230 static rtx
2231 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2232 {
2233 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2234 }
2235
2236 /* Worker routines for 64-bit constant formation on arch64.
2237 One of the key things to be doing in these emissions is
2238 to create as many temp REGs as possible. This makes it
2239 possible for half-built constants to be used later when
2240 such values are similar to something required later on.
2241 Without doing this, the optimizer cannot see such
2242 opportunities. */
2243
2244 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2245 unsigned HOST_WIDE_INT, int);
2246
2247 static void
2248 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2249 unsigned HOST_WIDE_INT low_bits, int is_neg)
2250 {
2251 unsigned HOST_WIDE_INT high_bits;
2252
2253 if (is_neg)
2254 high_bits = (~low_bits) & 0xffffffff;
2255 else
2256 high_bits = low_bits;
2257
2258 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2259 if (!is_neg)
2260 {
2261 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2262 }
2263 else
2264 {
2265 /* If we are XOR'ing with -1, then we should emit a one's complement
2266 instead. This way the combiner will notice logical operations
2267 such as ANDN later on and substitute. */
2268 if ((low_bits & 0x3ff) == 0x3ff)
2269 {
2270 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2271 }
2272 else
2273 {
2274 emit_insn (gen_rtx_SET (op0,
2275 gen_safe_XOR64 (temp,
2276 (-(HOST_WIDE_INT)0x400
2277 | (low_bits & 0x3ff)))));
2278 }
2279 }
2280 }
2281
2282 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2283 unsigned HOST_WIDE_INT, int);
2284
2285 static void
2286 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2287 unsigned HOST_WIDE_INT high_bits,
2288 unsigned HOST_WIDE_INT low_immediate,
2289 int shift_count)
2290 {
2291 rtx temp2 = op0;
2292
2293 if ((high_bits & 0xfffffc00) != 0)
2294 {
2295 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2296 if ((high_bits & ~0xfffffc00) != 0)
2297 emit_insn (gen_rtx_SET (op0,
2298 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2299 else
2300 temp2 = temp;
2301 }
2302 else
2303 {
2304 emit_insn (gen_safe_SET64 (temp, high_bits));
2305 temp2 = temp;
2306 }
2307
2308 /* Now shift it up into place. */
2309 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2310 GEN_INT (shift_count))));
2311
2312 /* If there is a low immediate part piece, finish up by
2313 putting that in as well. */
2314 if (low_immediate != 0)
2315 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2316 }
2317
2318 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2319 unsigned HOST_WIDE_INT);
2320
2321 /* Full 64-bit constant decomposition. Even though this is the
2322 'worst' case, we still optimize a few things away. */
2323 static void
2324 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2325 unsigned HOST_WIDE_INT high_bits,
2326 unsigned HOST_WIDE_INT low_bits)
2327 {
2328 rtx sub_temp = op0;
2329
2330 if (can_create_pseudo_p ())
2331 sub_temp = gen_reg_rtx (DImode);
2332
2333 if ((high_bits & 0xfffffc00) != 0)
2334 {
2335 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2336 if ((high_bits & ~0xfffffc00) != 0)
2337 emit_insn (gen_rtx_SET (sub_temp,
2338 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2339 else
2340 sub_temp = temp;
2341 }
2342 else
2343 {
2344 emit_insn (gen_safe_SET64 (temp, high_bits));
2345 sub_temp = temp;
2346 }
2347
2348 if (can_create_pseudo_p ())
2349 {
2350 rtx temp2 = gen_reg_rtx (DImode);
2351 rtx temp3 = gen_reg_rtx (DImode);
2352 rtx temp4 = gen_reg_rtx (DImode);
2353
2354 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2355 GEN_INT (32))));
2356
2357 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2358 if ((low_bits & ~0xfffffc00) != 0)
2359 {
2360 emit_insn (gen_rtx_SET (temp3,
2361 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2362 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2363 }
2364 else
2365 {
2366 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2367 }
2368 }
2369 else
2370 {
2371 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2372 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2373 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2374 int to_shift = 12;
2375
2376 /* We are in the middle of reload, so this is really
2377 painful. However we do still make an attempt to
2378 avoid emitting truly stupid code. */
2379 if (low1 != const0_rtx)
2380 {
2381 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2382 GEN_INT (to_shift))));
2383 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2384 sub_temp = op0;
2385 to_shift = 12;
2386 }
2387 else
2388 {
2389 to_shift += 12;
2390 }
2391 if (low2 != const0_rtx)
2392 {
2393 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2394 GEN_INT (to_shift))));
2395 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2396 sub_temp = op0;
2397 to_shift = 8;
2398 }
2399 else
2400 {
2401 to_shift += 8;
2402 }
2403 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2404 GEN_INT (to_shift))));
2405 if (low3 != const0_rtx)
2406 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2407 /* phew... */
2408 }
2409 }
2410
2411 /* Analyze a 64-bit constant for certain properties. */
2412 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2413 unsigned HOST_WIDE_INT,
2414 int *, int *, int *);
2415
2416 static void
2417 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2418 unsigned HOST_WIDE_INT low_bits,
2419 int *hbsp, int *lbsp, int *abbasp)
2420 {
2421 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2422 int i;
2423
2424 lowest_bit_set = highest_bit_set = -1;
2425 i = 0;
2426 do
2427 {
2428 if ((lowest_bit_set == -1)
2429 && ((low_bits >> i) & 1))
2430 lowest_bit_set = i;
2431 if ((highest_bit_set == -1)
2432 && ((high_bits >> (32 - i - 1)) & 1))
2433 highest_bit_set = (64 - i - 1);
2434 }
2435 while (++i < 32
2436 && ((highest_bit_set == -1)
2437 || (lowest_bit_set == -1)));
2438 if (i == 32)
2439 {
2440 i = 0;
2441 do
2442 {
2443 if ((lowest_bit_set == -1)
2444 && ((high_bits >> i) & 1))
2445 lowest_bit_set = i + 32;
2446 if ((highest_bit_set == -1)
2447 && ((low_bits >> (32 - i - 1)) & 1))
2448 highest_bit_set = 32 - i - 1;
2449 }
2450 while (++i < 32
2451 && ((highest_bit_set == -1)
2452 || (lowest_bit_set == -1)));
2453 }
2454 /* If there are no bits set this should have gone out
2455 as one instruction! */
2456 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2457 all_bits_between_are_set = 1;
2458 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2459 {
2460 if (i < 32)
2461 {
2462 if ((low_bits & (1 << i)) != 0)
2463 continue;
2464 }
2465 else
2466 {
2467 if ((high_bits & (1 << (i - 32))) != 0)
2468 continue;
2469 }
2470 all_bits_between_are_set = 0;
2471 break;
2472 }
2473 *hbsp = highest_bit_set;
2474 *lbsp = lowest_bit_set;
2475 *abbasp = all_bits_between_are_set;
2476 }
2477
2478 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2479
2480 static int
2481 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2482 unsigned HOST_WIDE_INT low_bits)
2483 {
2484 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2485
2486 if (high_bits == 0
2487 || high_bits == 0xffffffff)
2488 return 1;
2489
2490 analyze_64bit_constant (high_bits, low_bits,
2491 &highest_bit_set, &lowest_bit_set,
2492 &all_bits_between_are_set);
2493
2494 if ((highest_bit_set == 63
2495 || lowest_bit_set == 0)
2496 && all_bits_between_are_set != 0)
2497 return 1;
2498
2499 if ((highest_bit_set - lowest_bit_set) < 21)
2500 return 1;
2501
2502 return 0;
2503 }
2504
2505 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2506 unsigned HOST_WIDE_INT,
2507 int, int);
2508
2509 static unsigned HOST_WIDE_INT
2510 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2511 unsigned HOST_WIDE_INT low_bits,
2512 int lowest_bit_set, int shift)
2513 {
2514 HOST_WIDE_INT hi, lo;
2515
2516 if (lowest_bit_set < 32)
2517 {
2518 lo = (low_bits >> lowest_bit_set) << shift;
2519 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2520 }
2521 else
2522 {
2523 lo = 0;
2524 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2525 }
2526 gcc_assert (! (hi & lo));
2527 return (hi | lo);
2528 }
2529
2530 /* Here we are sure to be arch64 and this is an integer constant
2531 being loaded into a register. Emit the most efficient
2532 insn sequence possible. Detection of all the 1-insn cases
2533 has been done already. */
2534 static void
2535 sparc_emit_set_const64 (rtx op0, rtx op1)
2536 {
2537 unsigned HOST_WIDE_INT high_bits, low_bits;
2538 int lowest_bit_set, highest_bit_set;
2539 int all_bits_between_are_set;
2540 rtx temp = 0;
2541
2542 /* Sanity check that we know what we are working with. */
2543 gcc_assert (TARGET_ARCH64
2544 && (GET_CODE (op0) == SUBREG
2545 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2546
2547 if (! can_create_pseudo_p ())
2548 temp = op0;
2549
2550 if (GET_CODE (op1) != CONST_INT)
2551 {
2552 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2553 return;
2554 }
2555
2556 if (! temp)
2557 temp = gen_reg_rtx (DImode);
2558
2559 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2560 low_bits = (INTVAL (op1) & 0xffffffff);
2561
2562 /* low_bits bits 0 --> 31
2563 high_bits bits 32 --> 63 */
2564
2565 analyze_64bit_constant (high_bits, low_bits,
2566 &highest_bit_set, &lowest_bit_set,
2567 &all_bits_between_are_set);
2568
2569 /* First try for a 2-insn sequence. */
2570
2571 /* These situations are preferred because the optimizer can
2572 * do more things with them:
2573 * 1) mov -1, %reg
2574 * sllx %reg, shift, %reg
2575 * 2) mov -1, %reg
2576 * srlx %reg, shift, %reg
2577 * 3) mov some_small_const, %reg
2578 * sllx %reg, shift, %reg
2579 */
2580 if (((highest_bit_set == 63
2581 || lowest_bit_set == 0)
2582 && all_bits_between_are_set != 0)
2583 || ((highest_bit_set - lowest_bit_set) < 12))
2584 {
2585 HOST_WIDE_INT the_const = -1;
2586 int shift = lowest_bit_set;
2587
2588 if ((highest_bit_set != 63
2589 && lowest_bit_set != 0)
2590 || all_bits_between_are_set == 0)
2591 {
2592 the_const =
2593 create_simple_focus_bits (high_bits, low_bits,
2594 lowest_bit_set, 0);
2595 }
2596 else if (lowest_bit_set == 0)
2597 shift = -(63 - highest_bit_set);
2598
2599 gcc_assert (SPARC_SIMM13_P (the_const));
2600 gcc_assert (shift != 0);
2601
2602 emit_insn (gen_safe_SET64 (temp, the_const));
2603 if (shift > 0)
2604 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2605 GEN_INT (shift))));
2606 else if (shift < 0)
2607 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2608 GEN_INT (-shift))));
2609 return;
2610 }
2611
2612 /* Now a range of 22 or less bits set somewhere.
2613 * 1) sethi %hi(focus_bits), %reg
2614 * sllx %reg, shift, %reg
2615 * 2) sethi %hi(focus_bits), %reg
2616 * srlx %reg, shift, %reg
2617 */
2618 if ((highest_bit_set - lowest_bit_set) < 21)
2619 {
2620 unsigned HOST_WIDE_INT focus_bits =
2621 create_simple_focus_bits (high_bits, low_bits,
2622 lowest_bit_set, 10);
2623
2624 gcc_assert (SPARC_SETHI_P (focus_bits));
2625 gcc_assert (lowest_bit_set != 10);
2626
2627 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2628
2629 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2630 if (lowest_bit_set < 10)
2631 emit_insn (gen_rtx_SET (op0,
2632 gen_rtx_LSHIFTRT (DImode, temp,
2633 GEN_INT (10 - lowest_bit_set))));
2634 else if (lowest_bit_set > 10)
2635 emit_insn (gen_rtx_SET (op0,
2636 gen_rtx_ASHIFT (DImode, temp,
2637 GEN_INT (lowest_bit_set - 10))));
2638 return;
2639 }
2640
2641 /* 1) sethi %hi(low_bits), %reg
2642 * or %reg, %lo(low_bits), %reg
2643 * 2) sethi %hi(~low_bits), %reg
2644 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2645 */
2646 if (high_bits == 0
2647 || high_bits == 0xffffffff)
2648 {
2649 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2650 (high_bits == 0xffffffff));
2651 return;
2652 }
2653
2654 /* Now, try 3-insn sequences. */
2655
2656 /* 1) sethi %hi(high_bits), %reg
2657 * or %reg, %lo(high_bits), %reg
2658 * sllx %reg, 32, %reg
2659 */
2660 if (low_bits == 0)
2661 {
2662 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2663 return;
2664 }
2665
2666 /* We may be able to do something quick
2667 when the constant is negated, so try that. */
2668 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2669 (~low_bits) & 0xfffffc00))
2670 {
2671 /* NOTE: The trailing bits get XOR'd so we need the
2672 non-negated bits, not the negated ones. */
2673 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2674
2675 if ((((~high_bits) & 0xffffffff) == 0
2676 && ((~low_bits) & 0x80000000) == 0)
2677 || (((~high_bits) & 0xffffffff) == 0xffffffff
2678 && ((~low_bits) & 0x80000000) != 0))
2679 {
2680 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2681
2682 if ((SPARC_SETHI_P (fast_int)
2683 && (~high_bits & 0xffffffff) == 0)
2684 || SPARC_SIMM13_P (fast_int))
2685 emit_insn (gen_safe_SET64 (temp, fast_int));
2686 else
2687 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2688 }
2689 else
2690 {
2691 rtx negated_const;
2692 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2693 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2694 sparc_emit_set_const64 (temp, negated_const);
2695 }
2696
2697 /* If we are XOR'ing with -1, then we should emit a one's complement
2698 instead. This way the combiner will notice logical operations
2699 such as ANDN later on and substitute. */
2700 if (trailing_bits == 0x3ff)
2701 {
2702 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2703 }
2704 else
2705 {
2706 emit_insn (gen_rtx_SET (op0,
2707 gen_safe_XOR64 (temp,
2708 (-0x400 | trailing_bits))));
2709 }
2710 return;
2711 }
2712
2713 /* 1) sethi %hi(xxx), %reg
2714 * or %reg, %lo(xxx), %reg
2715 * sllx %reg, yyy, %reg
2716 *
2717 * ??? This is just a generalized version of the low_bits==0
2718 * thing above, FIXME...
2719 */
2720 if ((highest_bit_set - lowest_bit_set) < 32)
2721 {
2722 unsigned HOST_WIDE_INT focus_bits =
2723 create_simple_focus_bits (high_bits, low_bits,
2724 lowest_bit_set, 0);
2725
2726 /* We can't get here in this state. */
2727 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2728
2729 /* So what we know is that the set bits straddle the
2730 middle of the 64-bit word. */
2731 sparc_emit_set_const64_quick2 (op0, temp,
2732 focus_bits, 0,
2733 lowest_bit_set);
2734 return;
2735 }
2736
2737 /* 1) sethi %hi(high_bits), %reg
2738 * or %reg, %lo(high_bits), %reg
2739 * sllx %reg, 32, %reg
2740 * or %reg, low_bits, %reg
2741 */
2742 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2743 {
2744 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2745 return;
2746 }
2747
2748 /* The easiest way when all else fails, is full decomposition. */
2749 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2750 }
2751
2752 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2753
2754 static bool
2755 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2756 {
2757 *p1 = SPARC_ICC_REG;
2758 *p2 = SPARC_FCC_REG;
2759 return true;
2760 }
2761
2762 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2763
2764 static unsigned int
2765 sparc_min_arithmetic_precision (void)
2766 {
2767 return 32;
2768 }
2769
2770 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2771 return the mode to be used for the comparison. For floating-point,
2772 CCFP[E]mode is used. CCNZmode should be used when the first operand
2773 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2774 processing is needed. */
2775
2776 machine_mode
2777 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2778 {
2779 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2780 {
2781 switch (op)
2782 {
2783 case EQ:
2784 case NE:
2785 case UNORDERED:
2786 case ORDERED:
2787 case UNLT:
2788 case UNLE:
2789 case UNGT:
2790 case UNGE:
2791 case UNEQ:
2792 case LTGT:
2793 return CCFPmode;
2794
2795 case LT:
2796 case LE:
2797 case GT:
2798 case GE:
2799 return CCFPEmode;
2800
2801 default:
2802 gcc_unreachable ();
2803 }
2804 }
2805 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2806 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2807 && y == const0_rtx)
2808 {
2809 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2810 return CCXNZmode;
2811 else
2812 return CCNZmode;
2813 }
2814 else
2815 {
2816 /* This is for the cmp<mode>_sne pattern. */
2817 if (GET_CODE (x) == NOT && y == constm1_rtx)
2818 {
2819 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2820 return CCXCmode;
2821 else
2822 return CCCmode;
2823 }
2824
2825 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
2826 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
2827 {
2828 if (GET_CODE (y) == UNSPEC
2829 && (XINT (y, 1) == UNSPEC_ADDV
2830 || XINT (y, 1) == UNSPEC_SUBV
2831 || XINT (y, 1) == UNSPEC_NEGV))
2832 return CCVmode;
2833 else
2834 return CCCmode;
2835 }
2836
2837 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2838 return CCXmode;
2839 else
2840 return CCmode;
2841 }
2842 }
2843
2844 /* Emit the compare insn and return the CC reg for a CODE comparison
2845 with operands X and Y. */
2846
2847 static rtx
2848 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2849 {
2850 machine_mode mode;
2851 rtx cc_reg;
2852
2853 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2854 return x;
2855
2856 mode = SELECT_CC_MODE (code, x, y);
2857
2858 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2859 fcc regs (cse can't tell they're really call clobbered regs and will
2860 remove a duplicate comparison even if there is an intervening function
2861 call - it will then try to reload the cc reg via an int reg which is why
2862 we need the movcc patterns). It is possible to provide the movcc
2863 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2864 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2865 to tell cse that CCFPE mode registers (even pseudos) are call
2866 clobbered. */
2867
2868 /* ??? This is an experiment. Rather than making changes to cse which may
2869 or may not be easy/clean, we do our own cse. This is possible because
2870 we will generate hard registers. Cse knows they're call clobbered (it
2871 doesn't know the same thing about pseudos). If we guess wrong, no big
2872 deal, but if we win, great! */
2873
2874 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2875 #if 1 /* experiment */
2876 {
2877 int reg;
2878 /* We cycle through the registers to ensure they're all exercised. */
2879 static int next_fcc_reg = 0;
2880 /* Previous x,y for each fcc reg. */
2881 static rtx prev_args[4][2];
2882
2883 /* Scan prev_args for x,y. */
2884 for (reg = 0; reg < 4; reg++)
2885 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2886 break;
2887 if (reg == 4)
2888 {
2889 reg = next_fcc_reg;
2890 prev_args[reg][0] = x;
2891 prev_args[reg][1] = y;
2892 next_fcc_reg = (next_fcc_reg + 1) & 3;
2893 }
2894 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2895 }
2896 #else
2897 cc_reg = gen_reg_rtx (mode);
2898 #endif /* ! experiment */
2899 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2900 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2901 else
2902 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2903
2904 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2905 will only result in an unrecognizable insn so no point in asserting. */
2906 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2907
2908 return cc_reg;
2909 }
2910
2911
2912 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2913
2914 rtx
2915 gen_compare_reg (rtx cmp)
2916 {
2917 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2918 }
2919
2920 /* This function is used for v9 only.
2921 DEST is the target of the Scc insn.
2922 CODE is the code for an Scc's comparison.
2923 X and Y are the values we compare.
2924
2925 This function is needed to turn
2926
2927 (set (reg:SI 110)
2928 (gt (reg:CCX 100 %icc)
2929 (const_int 0)))
2930 into
2931 (set (reg:SI 110)
2932 (gt:DI (reg:CCX 100 %icc)
2933 (const_int 0)))
2934
2935 IE: The instruction recognizer needs to see the mode of the comparison to
2936 find the right instruction. We could use "gt:DI" right in the
2937 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2938
2939 static int
2940 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2941 {
2942 if (! TARGET_ARCH64
2943 && (GET_MODE (x) == DImode
2944 || GET_MODE (dest) == DImode))
2945 return 0;
2946
2947 /* Try to use the movrCC insns. */
2948 if (TARGET_ARCH64
2949 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2950 && y == const0_rtx
2951 && v9_regcmp_p (compare_code))
2952 {
2953 rtx op0 = x;
2954 rtx temp;
2955
2956 /* Special case for op0 != 0. This can be done with one instruction if
2957 dest == x. */
2958
2959 if (compare_code == NE
2960 && GET_MODE (dest) == DImode
2961 && rtx_equal_p (op0, dest))
2962 {
2963 emit_insn (gen_rtx_SET (dest,
2964 gen_rtx_IF_THEN_ELSE (DImode,
2965 gen_rtx_fmt_ee (compare_code, DImode,
2966 op0, const0_rtx),
2967 const1_rtx,
2968 dest)));
2969 return 1;
2970 }
2971
2972 if (reg_overlap_mentioned_p (dest, op0))
2973 {
2974 /* Handle the case where dest == x.
2975 We "early clobber" the result. */
2976 op0 = gen_reg_rtx (GET_MODE (x));
2977 emit_move_insn (op0, x);
2978 }
2979
2980 emit_insn (gen_rtx_SET (dest, const0_rtx));
2981 if (GET_MODE (op0) != DImode)
2982 {
2983 temp = gen_reg_rtx (DImode);
2984 convert_move (temp, op0, 0);
2985 }
2986 else
2987 temp = op0;
2988 emit_insn (gen_rtx_SET (dest,
2989 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2990 gen_rtx_fmt_ee (compare_code, DImode,
2991 temp, const0_rtx),
2992 const1_rtx,
2993 dest)));
2994 return 1;
2995 }
2996 else
2997 {
2998 x = gen_compare_reg_1 (compare_code, x, y);
2999 y = const0_rtx;
3000
3001 emit_insn (gen_rtx_SET (dest, const0_rtx));
3002 emit_insn (gen_rtx_SET (dest,
3003 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3004 gen_rtx_fmt_ee (compare_code,
3005 GET_MODE (x), x, y),
3006 const1_rtx, dest)));
3007 return 1;
3008 }
3009 }
3010
3011
3012 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3013 without jumps using the addx/subx instructions. */
3014
3015 bool
3016 emit_scc_insn (rtx operands[])
3017 {
3018 rtx tem, x, y;
3019 enum rtx_code code;
3020 machine_mode mode;
3021
3022 /* The quad-word fp compare library routines all return nonzero to indicate
3023 true, which is different from the equivalent libgcc routines, so we must
3024 handle them specially here. */
3025 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3026 {
3027 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3028 GET_CODE (operands[1]));
3029 operands[2] = XEXP (operands[1], 0);
3030 operands[3] = XEXP (operands[1], 1);
3031 }
3032
3033 code = GET_CODE (operands[1]);
3034 x = operands[2];
3035 y = operands[3];
3036 mode = GET_MODE (x);
3037
3038 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3039 more applications). The exception to this is "reg != 0" which can
3040 be done in one instruction on v9 (so we do it). */
3041 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3042 {
3043 if (y != const0_rtx)
3044 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3045
3046 rtx pat = gen_rtx_SET (operands[0],
3047 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3048 x, const0_rtx));
3049
3050 /* If we can use addx/subx or addxc, add a clobber for CC. */
3051 if (mode == SImode || (code == NE && TARGET_VIS3))
3052 {
3053 rtx clobber
3054 = gen_rtx_CLOBBER (VOIDmode,
3055 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3056 SPARC_ICC_REG));
3057 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3058 }
3059
3060 emit_insn (pat);
3061 return true;
3062 }
3063
3064 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3065 if (TARGET_ARCH64
3066 && mode == DImode
3067 && !((code == LTU || code == GTU) && TARGET_VIS3)
3068 && gen_v9_scc (operands[0], code, x, y))
3069 return true;
3070
3071 /* We can do LTU and GEU using the addx/subx instructions too. And
3072 for GTU/LEU, if both operands are registers swap them and fall
3073 back to the easy case. */
3074 if (code == GTU || code == LEU)
3075 {
3076 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3077 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3078 {
3079 tem = x;
3080 x = y;
3081 y = tem;
3082 code = swap_condition (code);
3083 }
3084 }
3085
3086 if (code == LTU || code == GEU)
3087 {
3088 emit_insn (gen_rtx_SET (operands[0],
3089 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3090 gen_compare_reg_1 (code, x, y),
3091 const0_rtx)));
3092 return true;
3093 }
3094
3095 /* All the posibilities to use addx/subx based sequences has been
3096 exhausted, try for a 3 instruction sequence using v9 conditional
3097 moves. */
3098 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3099 return true;
3100
3101 /* Nope, do branches. */
3102 return false;
3103 }
3104
3105 /* Emit a conditional jump insn for the v9 architecture using comparison code
3106 CODE and jump target LABEL.
3107 This function exists to take advantage of the v9 brxx insns. */
3108
3109 static void
3110 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3111 {
3112 emit_jump_insn (gen_rtx_SET (pc_rtx,
3113 gen_rtx_IF_THEN_ELSE (VOIDmode,
3114 gen_rtx_fmt_ee (code, GET_MODE (op0),
3115 op0, const0_rtx),
3116 gen_rtx_LABEL_REF (VOIDmode, label),
3117 pc_rtx)));
3118 }
3119
3120 /* Emit a conditional jump insn for the UA2011 architecture using
3121 comparison code CODE and jump target LABEL. This function exists
3122 to take advantage of the UA2011 Compare and Branch insns. */
3123
3124 static void
3125 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3126 {
3127 rtx if_then_else;
3128
3129 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3130 gen_rtx_fmt_ee(code, GET_MODE(op0),
3131 op0, op1),
3132 gen_rtx_LABEL_REF (VOIDmode, label),
3133 pc_rtx);
3134
3135 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3136 }
3137
3138 void
3139 emit_conditional_branch_insn (rtx operands[])
3140 {
3141 /* The quad-word fp compare library routines all return nonzero to indicate
3142 true, which is different from the equivalent libgcc routines, so we must
3143 handle them specially here. */
3144 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3145 {
3146 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3147 GET_CODE (operands[0]));
3148 operands[1] = XEXP (operands[0], 0);
3149 operands[2] = XEXP (operands[0], 1);
3150 }
3151
3152 /* If we can tell early on that the comparison is against a constant
3153 that won't fit in the 5-bit signed immediate field of a cbcond,
3154 use one of the other v9 conditional branch sequences. */
3155 if (TARGET_CBCOND
3156 && GET_CODE (operands[1]) == REG
3157 && (GET_MODE (operands[1]) == SImode
3158 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3159 && (GET_CODE (operands[2]) != CONST_INT
3160 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3161 {
3162 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3163 return;
3164 }
3165
3166 if (TARGET_ARCH64 && operands[2] == const0_rtx
3167 && GET_CODE (operands[1]) == REG
3168 && GET_MODE (operands[1]) == DImode)
3169 {
3170 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3171 return;
3172 }
3173
3174 operands[1] = gen_compare_reg (operands[0]);
3175 operands[2] = const0_rtx;
3176 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3177 operands[1], operands[2]);
3178 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3179 operands[3]));
3180 }
3181
3182
3183 /* Generate a DFmode part of a hard TFmode register.
3184 REG is the TFmode hard register, LOW is 1 for the
3185 low 64bit of the register and 0 otherwise.
3186 */
3187 rtx
3188 gen_df_reg (rtx reg, int low)
3189 {
3190 int regno = REGNO (reg);
3191
3192 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3193 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3194 return gen_rtx_REG (DFmode, regno);
3195 }
3196 \f
3197 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3198 Unlike normal calls, TFmode operands are passed by reference. It is
3199 assumed that no more than 3 operands are required. */
3200
3201 static void
3202 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3203 {
3204 rtx ret_slot = NULL, arg[3], func_sym;
3205 int i;
3206
3207 /* We only expect to be called for conversions, unary, and binary ops. */
3208 gcc_assert (nargs == 2 || nargs == 3);
3209
3210 for (i = 0; i < nargs; ++i)
3211 {
3212 rtx this_arg = operands[i];
3213 rtx this_slot;
3214
3215 /* TFmode arguments and return values are passed by reference. */
3216 if (GET_MODE (this_arg) == TFmode)
3217 {
3218 int force_stack_temp;
3219
3220 force_stack_temp = 0;
3221 if (TARGET_BUGGY_QP_LIB && i == 0)
3222 force_stack_temp = 1;
3223
3224 if (GET_CODE (this_arg) == MEM
3225 && ! force_stack_temp)
3226 {
3227 tree expr = MEM_EXPR (this_arg);
3228 if (expr)
3229 mark_addressable (expr);
3230 this_arg = XEXP (this_arg, 0);
3231 }
3232 else if (CONSTANT_P (this_arg)
3233 && ! force_stack_temp)
3234 {
3235 this_slot = force_const_mem (TFmode, this_arg);
3236 this_arg = XEXP (this_slot, 0);
3237 }
3238 else
3239 {
3240 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3241
3242 /* Operand 0 is the return value. We'll copy it out later. */
3243 if (i > 0)
3244 emit_move_insn (this_slot, this_arg);
3245 else
3246 ret_slot = this_slot;
3247
3248 this_arg = XEXP (this_slot, 0);
3249 }
3250 }
3251
3252 arg[i] = this_arg;
3253 }
3254
3255 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3256
3257 if (GET_MODE (operands[0]) == TFmode)
3258 {
3259 if (nargs == 2)
3260 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3261 arg[0], GET_MODE (arg[0]),
3262 arg[1], GET_MODE (arg[1]));
3263 else
3264 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3265 arg[0], GET_MODE (arg[0]),
3266 arg[1], GET_MODE (arg[1]),
3267 arg[2], GET_MODE (arg[2]));
3268
3269 if (ret_slot)
3270 emit_move_insn (operands[0], ret_slot);
3271 }
3272 else
3273 {
3274 rtx ret;
3275
3276 gcc_assert (nargs == 2);
3277
3278 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3279 GET_MODE (operands[0]), 1,
3280 arg[1], GET_MODE (arg[1]));
3281
3282 if (ret != operands[0])
3283 emit_move_insn (operands[0], ret);
3284 }
3285 }
3286
3287 /* Expand soft-float TFmode calls to sparc abi routines. */
3288
3289 static void
3290 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3291 {
3292 const char *func;
3293
3294 switch (code)
3295 {
3296 case PLUS:
3297 func = "_Qp_add";
3298 break;
3299 case MINUS:
3300 func = "_Qp_sub";
3301 break;
3302 case MULT:
3303 func = "_Qp_mul";
3304 break;
3305 case DIV:
3306 func = "_Qp_div";
3307 break;
3308 default:
3309 gcc_unreachable ();
3310 }
3311
3312 emit_soft_tfmode_libcall (func, 3, operands);
3313 }
3314
3315 static void
3316 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3317 {
3318 const char *func;
3319
3320 gcc_assert (code == SQRT);
3321 func = "_Qp_sqrt";
3322
3323 emit_soft_tfmode_libcall (func, 2, operands);
3324 }
3325
3326 static void
3327 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3328 {
3329 const char *func;
3330
3331 switch (code)
3332 {
3333 case FLOAT_EXTEND:
3334 switch (GET_MODE (operands[1]))
3335 {
3336 case SFmode:
3337 func = "_Qp_stoq";
3338 break;
3339 case DFmode:
3340 func = "_Qp_dtoq";
3341 break;
3342 default:
3343 gcc_unreachable ();
3344 }
3345 break;
3346
3347 case FLOAT_TRUNCATE:
3348 switch (GET_MODE (operands[0]))
3349 {
3350 case SFmode:
3351 func = "_Qp_qtos";
3352 break;
3353 case DFmode:
3354 func = "_Qp_qtod";
3355 break;
3356 default:
3357 gcc_unreachable ();
3358 }
3359 break;
3360
3361 case FLOAT:
3362 switch (GET_MODE (operands[1]))
3363 {
3364 case SImode:
3365 func = "_Qp_itoq";
3366 if (TARGET_ARCH64)
3367 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3368 break;
3369 case DImode:
3370 func = "_Qp_xtoq";
3371 break;
3372 default:
3373 gcc_unreachable ();
3374 }
3375 break;
3376
3377 case UNSIGNED_FLOAT:
3378 switch (GET_MODE (operands[1]))
3379 {
3380 case SImode:
3381 func = "_Qp_uitoq";
3382 if (TARGET_ARCH64)
3383 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3384 break;
3385 case DImode:
3386 func = "_Qp_uxtoq";
3387 break;
3388 default:
3389 gcc_unreachable ();
3390 }
3391 break;
3392
3393 case FIX:
3394 switch (GET_MODE (operands[0]))
3395 {
3396 case SImode:
3397 func = "_Qp_qtoi";
3398 break;
3399 case DImode:
3400 func = "_Qp_qtox";
3401 break;
3402 default:
3403 gcc_unreachable ();
3404 }
3405 break;
3406
3407 case UNSIGNED_FIX:
3408 switch (GET_MODE (operands[0]))
3409 {
3410 case SImode:
3411 func = "_Qp_qtoui";
3412 break;
3413 case DImode:
3414 func = "_Qp_qtoux";
3415 break;
3416 default:
3417 gcc_unreachable ();
3418 }
3419 break;
3420
3421 default:
3422 gcc_unreachable ();
3423 }
3424
3425 emit_soft_tfmode_libcall (func, 2, operands);
3426 }
3427
3428 /* Expand a hard-float tfmode operation. All arguments must be in
3429 registers. */
3430
3431 static void
3432 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3433 {
3434 rtx op, dest;
3435
3436 if (GET_RTX_CLASS (code) == RTX_UNARY)
3437 {
3438 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3439 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3440 }
3441 else
3442 {
3443 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3444 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3445 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3446 operands[1], operands[2]);
3447 }
3448
3449 if (register_operand (operands[0], VOIDmode))
3450 dest = operands[0];
3451 else
3452 dest = gen_reg_rtx (GET_MODE (operands[0]));
3453
3454 emit_insn (gen_rtx_SET (dest, op));
3455
3456 if (dest != operands[0])
3457 emit_move_insn (operands[0], dest);
3458 }
3459
3460 void
3461 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3462 {
3463 if (TARGET_HARD_QUAD)
3464 emit_hard_tfmode_operation (code, operands);
3465 else
3466 emit_soft_tfmode_binop (code, operands);
3467 }
3468
3469 void
3470 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3471 {
3472 if (TARGET_HARD_QUAD)
3473 emit_hard_tfmode_operation (code, operands);
3474 else
3475 emit_soft_tfmode_unop (code, operands);
3476 }
3477
3478 void
3479 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3480 {
3481 if (TARGET_HARD_QUAD)
3482 emit_hard_tfmode_operation (code, operands);
3483 else
3484 emit_soft_tfmode_cvt (code, operands);
3485 }
3486 \f
3487 /* Return nonzero if a branch/jump/call instruction will be emitting
3488 nop into its delay slot. */
3489
3490 int
3491 empty_delay_slot (rtx_insn *insn)
3492 {
3493 rtx seq;
3494
3495 /* If no previous instruction (should not happen), return true. */
3496 if (PREV_INSN (insn) == NULL)
3497 return 1;
3498
3499 seq = NEXT_INSN (PREV_INSN (insn));
3500 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3501 return 0;
3502
3503 return 1;
3504 }
3505
3506 /* Return nonzero if we should emit a nop after a cbcond instruction.
3507 The cbcond instruction does not have a delay slot, however there is
3508 a severe performance penalty if a control transfer appears right
3509 after a cbcond. Therefore we emit a nop when we detect this
3510 situation. */
3511
3512 int
3513 emit_cbcond_nop (rtx_insn *insn)
3514 {
3515 rtx next = next_active_insn (insn);
3516
3517 if (!next)
3518 return 1;
3519
3520 if (NONJUMP_INSN_P (next)
3521 && GET_CODE (PATTERN (next)) == SEQUENCE)
3522 next = XVECEXP (PATTERN (next), 0, 0);
3523 else if (CALL_P (next)
3524 && GET_CODE (PATTERN (next)) == PARALLEL)
3525 {
3526 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3527
3528 if (GET_CODE (delay) == RETURN)
3529 {
3530 /* It's a sibling call. Do not emit the nop if we're going
3531 to emit something other than the jump itself as the first
3532 instruction of the sibcall sequence. */
3533 if (sparc_leaf_function_p || TARGET_FLAT)
3534 return 0;
3535 }
3536 }
3537
3538 if (NONJUMP_INSN_P (next))
3539 return 0;
3540
3541 return 1;
3542 }
3543
3544 /* Return nonzero if TRIAL can go into the call delay slot. */
3545
3546 int
3547 eligible_for_call_delay (rtx_insn *trial)
3548 {
3549 rtx pat;
3550
3551 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3552 return 0;
3553
3554 /* Binutils allows
3555 call __tls_get_addr, %tgd_call (foo)
3556 add %l7, %o0, %o0, %tgd_add (foo)
3557 while Sun as/ld does not. */
3558 if (TARGET_GNU_TLS || !TARGET_TLS)
3559 return 1;
3560
3561 pat = PATTERN (trial);
3562
3563 /* We must reject tgd_add{32|64}, i.e.
3564 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3565 and tldm_add{32|64}, i.e.
3566 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3567 for Sun as/ld. */
3568 if (GET_CODE (pat) == SET
3569 && GET_CODE (SET_SRC (pat)) == PLUS)
3570 {
3571 rtx unspec = XEXP (SET_SRC (pat), 1);
3572
3573 if (GET_CODE (unspec) == UNSPEC
3574 && (XINT (unspec, 1) == UNSPEC_TLSGD
3575 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3576 return 0;
3577 }
3578
3579 return 1;
3580 }
3581
3582 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3583 instruction. RETURN_P is true if the v9 variant 'return' is to be
3584 considered in the test too.
3585
3586 TRIAL must be a SET whose destination is a REG appropriate for the
3587 'restore' instruction or, if RETURN_P is true, for the 'return'
3588 instruction. */
3589
3590 static int
3591 eligible_for_restore_insn (rtx trial, bool return_p)
3592 {
3593 rtx pat = PATTERN (trial);
3594 rtx src = SET_SRC (pat);
3595 bool src_is_freg = false;
3596 rtx src_reg;
3597
3598 /* Since we now can do moves between float and integer registers when
3599 VIS3 is enabled, we have to catch this case. We can allow such
3600 moves when doing a 'return' however. */
3601 src_reg = src;
3602 if (GET_CODE (src_reg) == SUBREG)
3603 src_reg = SUBREG_REG (src_reg);
3604 if (GET_CODE (src_reg) == REG
3605 && SPARC_FP_REG_P (REGNO (src_reg)))
3606 src_is_freg = true;
3607
3608 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3609 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3610 && arith_operand (src, GET_MODE (src))
3611 && ! src_is_freg)
3612 {
3613 if (TARGET_ARCH64)
3614 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3615 else
3616 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3617 }
3618
3619 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3620 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3621 && arith_double_operand (src, GET_MODE (src))
3622 && ! src_is_freg)
3623 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3624
3625 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3626 else if (! TARGET_FPU && register_operand (src, SFmode))
3627 return 1;
3628
3629 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3630 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3631 return 1;
3632
3633 /* If we have the 'return' instruction, anything that does not use
3634 local or output registers and can go into a delay slot wins. */
3635 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3636 return 1;
3637
3638 /* The 'restore src1,src2,dest' pattern for SImode. */
3639 else if (GET_CODE (src) == PLUS
3640 && register_operand (XEXP (src, 0), SImode)
3641 && arith_operand (XEXP (src, 1), SImode))
3642 return 1;
3643
3644 /* The 'restore src1,src2,dest' pattern for DImode. */
3645 else if (GET_CODE (src) == PLUS
3646 && register_operand (XEXP (src, 0), DImode)
3647 && arith_double_operand (XEXP (src, 1), DImode))
3648 return 1;
3649
3650 /* The 'restore src1,%lo(src2),dest' pattern. */
3651 else if (GET_CODE (src) == LO_SUM
3652 && ! TARGET_CM_MEDMID
3653 && ((register_operand (XEXP (src, 0), SImode)
3654 && immediate_operand (XEXP (src, 1), SImode))
3655 || (TARGET_ARCH64
3656 && register_operand (XEXP (src, 0), DImode)
3657 && immediate_operand (XEXP (src, 1), DImode))))
3658 return 1;
3659
3660 /* The 'restore src,src,dest' pattern. */
3661 else if (GET_CODE (src) == ASHIFT
3662 && (register_operand (XEXP (src, 0), SImode)
3663 || register_operand (XEXP (src, 0), DImode))
3664 && XEXP (src, 1) == const1_rtx)
3665 return 1;
3666
3667 return 0;
3668 }
3669
3670 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3671
3672 int
3673 eligible_for_return_delay (rtx_insn *trial)
3674 {
3675 int regno;
3676 rtx pat;
3677
3678 /* If the function uses __builtin_eh_return, the eh_return machinery
3679 occupies the delay slot. */
3680 if (crtl->calls_eh_return)
3681 return 0;
3682
3683 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3684 return 0;
3685
3686 /* In the case of a leaf or flat function, anything can go into the slot. */
3687 if (sparc_leaf_function_p || TARGET_FLAT)
3688 return 1;
3689
3690 if (!NONJUMP_INSN_P (trial))
3691 return 0;
3692
3693 pat = PATTERN (trial);
3694 if (GET_CODE (pat) == PARALLEL)
3695 {
3696 int i;
3697
3698 if (! TARGET_V9)
3699 return 0;
3700 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3701 {
3702 rtx expr = XVECEXP (pat, 0, i);
3703 if (GET_CODE (expr) != SET)
3704 return 0;
3705 if (GET_CODE (SET_DEST (expr)) != REG)
3706 return 0;
3707 regno = REGNO (SET_DEST (expr));
3708 if (regno >= 8 && regno < 24)
3709 return 0;
3710 }
3711 return !epilogue_renumber (&pat, 1);
3712 }
3713
3714 if (GET_CODE (pat) != SET)
3715 return 0;
3716
3717 if (GET_CODE (SET_DEST (pat)) != REG)
3718 return 0;
3719
3720 regno = REGNO (SET_DEST (pat));
3721
3722 /* Otherwise, only operations which can be done in tandem with
3723 a `restore' or `return' insn can go into the delay slot. */
3724 if (regno >= 8 && regno < 24)
3725 return 0;
3726
3727 /* If this instruction sets up floating point register and we have a return
3728 instruction, it can probably go in. But restore will not work
3729 with FP_REGS. */
3730 if (! SPARC_INT_REG_P (regno))
3731 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3732
3733 return eligible_for_restore_insn (trial, true);
3734 }
3735
3736 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3737
3738 int
3739 eligible_for_sibcall_delay (rtx_insn *trial)
3740 {
3741 rtx pat;
3742
3743 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3744 return 0;
3745
3746 if (!NONJUMP_INSN_P (trial))
3747 return 0;
3748
3749 pat = PATTERN (trial);
3750
3751 if (sparc_leaf_function_p || TARGET_FLAT)
3752 {
3753 /* If the tail call is done using the call instruction,
3754 we have to restore %o7 in the delay slot. */
3755 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3756 return 0;
3757
3758 /* %g1 is used to build the function address */
3759 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3760 return 0;
3761
3762 return 1;
3763 }
3764
3765 if (GET_CODE (pat) != SET)
3766 return 0;
3767
3768 /* Otherwise, only operations which can be done in tandem with
3769 a `restore' insn can go into the delay slot. */
3770 if (GET_CODE (SET_DEST (pat)) != REG
3771 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3772 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3773 return 0;
3774
3775 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3776 in most cases. */
3777 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3778 return 0;
3779
3780 return eligible_for_restore_insn (trial, false);
3781 }
3782 \f
3783 /* Determine if it's legal to put X into the constant pool. This
3784 is not possible if X contains the address of a symbol that is
3785 not constant (TLS) or not known at final link time (PIC). */
3786
3787 static bool
3788 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3789 {
3790 switch (GET_CODE (x))
3791 {
3792 case CONST_INT:
3793 case CONST_WIDE_INT:
3794 case CONST_DOUBLE:
3795 case CONST_VECTOR:
3796 /* Accept all non-symbolic constants. */
3797 return false;
3798
3799 case LABEL_REF:
3800 /* Labels are OK iff we are non-PIC. */
3801 return flag_pic != 0;
3802
3803 case SYMBOL_REF:
3804 /* 'Naked' TLS symbol references are never OK,
3805 non-TLS symbols are OK iff we are non-PIC. */
3806 if (SYMBOL_REF_TLS_MODEL (x))
3807 return true;
3808 else
3809 return flag_pic != 0;
3810
3811 case CONST:
3812 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3813 case PLUS:
3814 case MINUS:
3815 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3816 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3817 case UNSPEC:
3818 return true;
3819 default:
3820 gcc_unreachable ();
3821 }
3822 }
3823 \f
3824 /* Global Offset Table support. */
3825 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3826 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3827
3828 /* Return the SYMBOL_REF for the Global Offset Table. */
3829
3830 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3831
3832 static rtx
3833 sparc_got (void)
3834 {
3835 if (!sparc_got_symbol)
3836 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3837
3838 return sparc_got_symbol;
3839 }
3840
3841 /* Ensure that we are not using patterns that are not OK with PIC. */
3842
3843 int
3844 check_pic (int i)
3845 {
3846 rtx op;
3847
3848 switch (flag_pic)
3849 {
3850 case 1:
3851 op = recog_data.operand[i];
3852 gcc_assert (GET_CODE (op) != SYMBOL_REF
3853 && (GET_CODE (op) != CONST
3854 || (GET_CODE (XEXP (op, 0)) == MINUS
3855 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3856 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3857 /* fallthrough */
3858 case 2:
3859 default:
3860 return 1;
3861 }
3862 }
3863
3864 /* Return true if X is an address which needs a temporary register when
3865 reloaded while generating PIC code. */
3866
3867 int
3868 pic_address_needs_scratch (rtx x)
3869 {
3870 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3871 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3873 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3874 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3875 return 1;
3876
3877 return 0;
3878 }
3879
3880 /* Determine if a given RTX is a valid constant. We already know this
3881 satisfies CONSTANT_P. */
3882
3883 static bool
3884 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3885 {
3886 switch (GET_CODE (x))
3887 {
3888 case CONST:
3889 case SYMBOL_REF:
3890 if (sparc_tls_referenced_p (x))
3891 return false;
3892 break;
3893
3894 case CONST_DOUBLE:
3895 /* Floating point constants are generally not ok.
3896 The only exception is 0.0 and all-ones in VIS. */
3897 if (TARGET_VIS
3898 && SCALAR_FLOAT_MODE_P (mode)
3899 && (const_zero_operand (x, mode)
3900 || const_all_ones_operand (x, mode)))
3901 return true;
3902
3903 return false;
3904
3905 case CONST_VECTOR:
3906 /* Vector constants are generally not ok.
3907 The only exception is 0 or -1 in VIS. */
3908 if (TARGET_VIS
3909 && (const_zero_operand (x, mode)
3910 || const_all_ones_operand (x, mode)))
3911 return true;
3912
3913 return false;
3914
3915 default:
3916 break;
3917 }
3918
3919 return true;
3920 }
3921
3922 /* Determine if a given RTX is a valid constant address. */
3923
3924 bool
3925 constant_address_p (rtx x)
3926 {
3927 switch (GET_CODE (x))
3928 {
3929 case LABEL_REF:
3930 case CONST_INT:
3931 case HIGH:
3932 return true;
3933
3934 case CONST:
3935 if (flag_pic && pic_address_needs_scratch (x))
3936 return false;
3937 return sparc_legitimate_constant_p (Pmode, x);
3938
3939 case SYMBOL_REF:
3940 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3941
3942 default:
3943 return false;
3944 }
3945 }
3946
3947 /* Nonzero if the constant value X is a legitimate general operand
3948 when generating PIC code. It is given that flag_pic is on and
3949 that X satisfies CONSTANT_P. */
3950
3951 bool
3952 legitimate_pic_operand_p (rtx x)
3953 {
3954 if (pic_address_needs_scratch (x))
3955 return false;
3956 if (sparc_tls_referenced_p (x))
3957 return false;
3958 return true;
3959 }
3960
3961 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3962 (CONST_INT_P (X) \
3963 && INTVAL (X) >= -0x1000 \
3964 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3965
3966 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3967 (CONST_INT_P (X) \
3968 && INTVAL (X) >= -0x1000 \
3969 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3970
3971 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3972
3973 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3974 ordinarily. This changes a bit when generating PIC. */
3975
3976 static bool
3977 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3978 {
3979 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3980
3981 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3982 rs1 = addr;
3983 else if (GET_CODE (addr) == PLUS)
3984 {
3985 rs1 = XEXP (addr, 0);
3986 rs2 = XEXP (addr, 1);
3987
3988 /* Canonicalize. REG comes first, if there are no regs,
3989 LO_SUM comes first. */
3990 if (!REG_P (rs1)
3991 && GET_CODE (rs1) != SUBREG
3992 && (REG_P (rs2)
3993 || GET_CODE (rs2) == SUBREG
3994 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3995 {
3996 rs1 = XEXP (addr, 1);
3997 rs2 = XEXP (addr, 0);
3998 }
3999
4000 if ((flag_pic == 1
4001 && rs1 == pic_offset_table_rtx
4002 && !REG_P (rs2)
4003 && GET_CODE (rs2) != SUBREG
4004 && GET_CODE (rs2) != LO_SUM
4005 && GET_CODE (rs2) != MEM
4006 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4007 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4008 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4009 || ((REG_P (rs1)
4010 || GET_CODE (rs1) == SUBREG)
4011 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4012 {
4013 imm1 = rs2;
4014 rs2 = NULL;
4015 }
4016 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4017 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4018 {
4019 /* We prohibit REG + REG for TFmode when there are no quad move insns
4020 and we consequently need to split. We do this because REG+REG
4021 is not an offsettable address. If we get the situation in reload
4022 where source and destination of a movtf pattern are both MEMs with
4023 REG+REG address, then only one of them gets converted to an
4024 offsettable address. */
4025 if (mode == TFmode
4026 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4027 return 0;
4028
4029 /* Likewise for TImode, but in all cases. */
4030 if (mode == TImode)
4031 return 0;
4032
4033 /* We prohibit REG + REG on ARCH32 if not optimizing for
4034 DFmode/DImode because then mem_min_alignment is likely to be zero
4035 after reload and the forced split would lack a matching splitter
4036 pattern. */
4037 if (TARGET_ARCH32 && !optimize
4038 && (mode == DFmode || mode == DImode))
4039 return 0;
4040 }
4041 else if (USE_AS_OFFSETABLE_LO10
4042 && GET_CODE (rs1) == LO_SUM
4043 && TARGET_ARCH64
4044 && ! TARGET_CM_MEDMID
4045 && RTX_OK_FOR_OLO10_P (rs2, mode))
4046 {
4047 rs2 = NULL;
4048 imm1 = XEXP (rs1, 1);
4049 rs1 = XEXP (rs1, 0);
4050 if (!CONSTANT_P (imm1)
4051 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4052 return 0;
4053 }
4054 }
4055 else if (GET_CODE (addr) == LO_SUM)
4056 {
4057 rs1 = XEXP (addr, 0);
4058 imm1 = XEXP (addr, 1);
4059
4060 if (!CONSTANT_P (imm1)
4061 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4062 return 0;
4063
4064 /* We can't allow TFmode in 32-bit mode, because an offset greater
4065 than the alignment (8) may cause the LO_SUM to overflow. */
4066 if (mode == TFmode && TARGET_ARCH32)
4067 return 0;
4068
4069 /* During reload, accept the HIGH+LO_SUM construct generated by
4070 sparc_legitimize_reload_address. */
4071 if (reload_in_progress
4072 && GET_CODE (rs1) == HIGH
4073 && XEXP (rs1, 0) == imm1)
4074 return 1;
4075 }
4076 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4077 return 1;
4078 else
4079 return 0;
4080
4081 if (GET_CODE (rs1) == SUBREG)
4082 rs1 = SUBREG_REG (rs1);
4083 if (!REG_P (rs1))
4084 return 0;
4085
4086 if (rs2)
4087 {
4088 if (GET_CODE (rs2) == SUBREG)
4089 rs2 = SUBREG_REG (rs2);
4090 if (!REG_P (rs2))
4091 return 0;
4092 }
4093
4094 if (strict)
4095 {
4096 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4097 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4098 return 0;
4099 }
4100 else
4101 {
4102 if ((! SPARC_INT_REG_P (REGNO (rs1))
4103 && REGNO (rs1) != FRAME_POINTER_REGNUM
4104 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4105 || (rs2
4106 && (! SPARC_INT_REG_P (REGNO (rs2))
4107 && REGNO (rs2) != FRAME_POINTER_REGNUM
4108 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4109 return 0;
4110 }
4111 return 1;
4112 }
4113
4114 /* Return the SYMBOL_REF for the tls_get_addr function. */
4115
4116 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4117
4118 static rtx
4119 sparc_tls_get_addr (void)
4120 {
4121 if (!sparc_tls_symbol)
4122 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4123
4124 return sparc_tls_symbol;
4125 }
4126
4127 /* Return the Global Offset Table to be used in TLS mode. */
4128
4129 static rtx
4130 sparc_tls_got (void)
4131 {
4132 /* In PIC mode, this is just the PIC offset table. */
4133 if (flag_pic)
4134 {
4135 crtl->uses_pic_offset_table = 1;
4136 return pic_offset_table_rtx;
4137 }
4138
4139 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4140 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4141 if (TARGET_SUN_TLS && TARGET_ARCH32)
4142 {
4143 load_got_register ();
4144 return global_offset_table_rtx;
4145 }
4146
4147 /* In all other cases, we load a new pseudo with the GOT symbol. */
4148 return copy_to_reg (sparc_got ());
4149 }
4150
4151 /* Return true if X contains a thread-local symbol. */
4152
4153 static bool
4154 sparc_tls_referenced_p (rtx x)
4155 {
4156 if (!TARGET_HAVE_TLS)
4157 return false;
4158
4159 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4160 x = XEXP (XEXP (x, 0), 0);
4161
4162 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4163 return true;
4164
4165 /* That's all we handle in sparc_legitimize_tls_address for now. */
4166 return false;
4167 }
4168
4169 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4170 this (thread-local) address. */
4171
4172 static rtx
4173 sparc_legitimize_tls_address (rtx addr)
4174 {
4175 rtx temp1, temp2, temp3, ret, o0, got;
4176 rtx_insn *insn;
4177
4178 gcc_assert (can_create_pseudo_p ());
4179
4180 if (GET_CODE (addr) == SYMBOL_REF)
4181 switch (SYMBOL_REF_TLS_MODEL (addr))
4182 {
4183 case TLS_MODEL_GLOBAL_DYNAMIC:
4184 start_sequence ();
4185 temp1 = gen_reg_rtx (SImode);
4186 temp2 = gen_reg_rtx (SImode);
4187 ret = gen_reg_rtx (Pmode);
4188 o0 = gen_rtx_REG (Pmode, 8);
4189 got = sparc_tls_got ();
4190 emit_insn (gen_tgd_hi22 (temp1, addr));
4191 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4192 if (TARGET_ARCH32)
4193 {
4194 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4195 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4196 addr, const1_rtx));
4197 }
4198 else
4199 {
4200 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4201 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4202 addr, const1_rtx));
4203 }
4204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4205 insn = get_insns ();
4206 end_sequence ();
4207 emit_libcall_block (insn, ret, o0, addr);
4208 break;
4209
4210 case TLS_MODEL_LOCAL_DYNAMIC:
4211 start_sequence ();
4212 temp1 = gen_reg_rtx (SImode);
4213 temp2 = gen_reg_rtx (SImode);
4214 temp3 = gen_reg_rtx (Pmode);
4215 ret = gen_reg_rtx (Pmode);
4216 o0 = gen_rtx_REG (Pmode, 8);
4217 got = sparc_tls_got ();
4218 emit_insn (gen_tldm_hi22 (temp1));
4219 emit_insn (gen_tldm_lo10 (temp2, temp1));
4220 if (TARGET_ARCH32)
4221 {
4222 emit_insn (gen_tldm_add32 (o0, got, temp2));
4223 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4224 const1_rtx));
4225 }
4226 else
4227 {
4228 emit_insn (gen_tldm_add64 (o0, got, temp2));
4229 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4230 const1_rtx));
4231 }
4232 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4233 insn = get_insns ();
4234 end_sequence ();
4235 emit_libcall_block (insn, temp3, o0,
4236 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4237 UNSPEC_TLSLD_BASE));
4238 temp1 = gen_reg_rtx (SImode);
4239 temp2 = gen_reg_rtx (SImode);
4240 emit_insn (gen_tldo_hix22 (temp1, addr));
4241 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4242 if (TARGET_ARCH32)
4243 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4244 else
4245 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4246 break;
4247
4248 case TLS_MODEL_INITIAL_EXEC:
4249 temp1 = gen_reg_rtx (SImode);
4250 temp2 = gen_reg_rtx (SImode);
4251 temp3 = gen_reg_rtx (Pmode);
4252 got = sparc_tls_got ();
4253 emit_insn (gen_tie_hi22 (temp1, addr));
4254 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4255 if (TARGET_ARCH32)
4256 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4257 else
4258 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4259 if (TARGET_SUN_TLS)
4260 {
4261 ret = gen_reg_rtx (Pmode);
4262 if (TARGET_ARCH32)
4263 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4264 temp3, addr));
4265 else
4266 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4267 temp3, addr));
4268 }
4269 else
4270 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4271 break;
4272
4273 case TLS_MODEL_LOCAL_EXEC:
4274 temp1 = gen_reg_rtx (Pmode);
4275 temp2 = gen_reg_rtx (Pmode);
4276 if (TARGET_ARCH32)
4277 {
4278 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4279 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4280 }
4281 else
4282 {
4283 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4284 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4285 }
4286 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4287 break;
4288
4289 default:
4290 gcc_unreachable ();
4291 }
4292
4293 else if (GET_CODE (addr) == CONST)
4294 {
4295 rtx base, offset;
4296
4297 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4298
4299 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4300 offset = XEXP (XEXP (addr, 0), 1);
4301
4302 base = force_operand (base, NULL_RTX);
4303 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4304 offset = force_reg (Pmode, offset);
4305 ret = gen_rtx_PLUS (Pmode, base, offset);
4306 }
4307
4308 else
4309 gcc_unreachable (); /* for now ... */
4310
4311 return ret;
4312 }
4313
4314 /* Legitimize PIC addresses. If the address is already position-independent,
4315 we return ORIG. Newly generated position-independent addresses go into a
4316 reg. This is REG if nonzero, otherwise we allocate register(s) as
4317 necessary. */
4318
4319 static rtx
4320 sparc_legitimize_pic_address (rtx orig, rtx reg)
4321 {
4322 bool gotdata_op = false;
4323
4324 if (GET_CODE (orig) == SYMBOL_REF
4325 /* See the comment in sparc_expand_move. */
4326 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4327 {
4328 rtx pic_ref, address;
4329 rtx_insn *insn;
4330
4331 if (reg == 0)
4332 {
4333 gcc_assert (can_create_pseudo_p ());
4334 reg = gen_reg_rtx (Pmode);
4335 }
4336
4337 if (flag_pic == 2)
4338 {
4339 /* If not during reload, allocate another temp reg here for loading
4340 in the address, so that these instructions can be optimized
4341 properly. */
4342 rtx temp_reg = (! can_create_pseudo_p ()
4343 ? reg : gen_reg_rtx (Pmode));
4344
4345 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4346 won't get confused into thinking that these two instructions
4347 are loading in the true address of the symbol. If in the
4348 future a PIC rtx exists, that should be used instead. */
4349 if (TARGET_ARCH64)
4350 {
4351 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4352 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4353 }
4354 else
4355 {
4356 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4357 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4358 }
4359 address = temp_reg;
4360 gotdata_op = true;
4361 }
4362 else
4363 address = orig;
4364
4365 crtl->uses_pic_offset_table = 1;
4366 if (gotdata_op)
4367 {
4368 if (TARGET_ARCH64)
4369 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4370 pic_offset_table_rtx,
4371 address, orig));
4372 else
4373 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4374 pic_offset_table_rtx,
4375 address, orig));
4376 }
4377 else
4378 {
4379 pic_ref
4380 = gen_const_mem (Pmode,
4381 gen_rtx_PLUS (Pmode,
4382 pic_offset_table_rtx, address));
4383 insn = emit_move_insn (reg, pic_ref);
4384 }
4385
4386 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4387 by loop. */
4388 set_unique_reg_note (insn, REG_EQUAL, orig);
4389 return reg;
4390 }
4391 else if (GET_CODE (orig) == CONST)
4392 {
4393 rtx base, offset;
4394
4395 if (GET_CODE (XEXP (orig, 0)) == PLUS
4396 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4397 return orig;
4398
4399 if (reg == 0)
4400 {
4401 gcc_assert (can_create_pseudo_p ());
4402 reg = gen_reg_rtx (Pmode);
4403 }
4404
4405 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4406 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4407 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4408 base == reg ? NULL_RTX : reg);
4409
4410 if (GET_CODE (offset) == CONST_INT)
4411 {
4412 if (SMALL_INT (offset))
4413 return plus_constant (Pmode, base, INTVAL (offset));
4414 else if (can_create_pseudo_p ())
4415 offset = force_reg (Pmode, offset);
4416 else
4417 /* If we reach here, then something is seriously wrong. */
4418 gcc_unreachable ();
4419 }
4420 return gen_rtx_PLUS (Pmode, base, offset);
4421 }
4422 else if (GET_CODE (orig) == LABEL_REF)
4423 /* ??? We ought to be checking that the register is live instead, in case
4424 it is eliminated. */
4425 crtl->uses_pic_offset_table = 1;
4426
4427 return orig;
4428 }
4429
4430 /* Try machine-dependent ways of modifying an illegitimate address X
4431 to be legitimate. If we find one, return the new, valid address.
4432
4433 OLDX is the address as it was before break_out_memory_refs was called.
4434 In some cases it is useful to look at this to decide what needs to be done.
4435
4436 MODE is the mode of the operand pointed to by X.
4437
4438 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4439
4440 static rtx
4441 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4442 machine_mode mode)
4443 {
4444 rtx orig_x = x;
4445
4446 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4447 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4448 force_operand (XEXP (x, 0), NULL_RTX));
4449 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4450 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4451 force_operand (XEXP (x, 1), NULL_RTX));
4452 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4453 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4454 XEXP (x, 1));
4455 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4456 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4457 force_operand (XEXP (x, 1), NULL_RTX));
4458
4459 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4460 return x;
4461
4462 if (sparc_tls_referenced_p (x))
4463 x = sparc_legitimize_tls_address (x);
4464 else if (flag_pic)
4465 x = sparc_legitimize_pic_address (x, NULL_RTX);
4466 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4467 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4468 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4469 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4470 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4471 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4472 else if (GET_CODE (x) == SYMBOL_REF
4473 || GET_CODE (x) == CONST
4474 || GET_CODE (x) == LABEL_REF)
4475 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4476
4477 return x;
4478 }
4479
4480 /* Delegitimize an address that was legitimized by the above function. */
4481
4482 static rtx
4483 sparc_delegitimize_address (rtx x)
4484 {
4485 x = delegitimize_mem_from_attrs (x);
4486
4487 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4488 switch (XINT (XEXP (x, 1), 1))
4489 {
4490 case UNSPEC_MOVE_PIC:
4491 case UNSPEC_TLSLE:
4492 x = XVECEXP (XEXP (x, 1), 0, 0);
4493 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4494 break;
4495 default:
4496 break;
4497 }
4498
4499 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4500 if (GET_CODE (x) == MINUS
4501 && REG_P (XEXP (x, 0))
4502 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4503 && GET_CODE (XEXP (x, 1)) == LO_SUM
4504 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4505 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4506 {
4507 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4508 gcc_assert (GET_CODE (x) == LABEL_REF);
4509 }
4510
4511 return x;
4512 }
4513
4514 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4515 replace the input X, or the original X if no replacement is called for.
4516 The output parameter *WIN is 1 if the calling macro should goto WIN,
4517 0 if it should not.
4518
4519 For SPARC, we wish to handle addresses by splitting them into
4520 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4521 This cuts the number of extra insns by one.
4522
4523 Do nothing when generating PIC code and the address is a symbolic
4524 operand or requires a scratch register. */
4525
4526 rtx
4527 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4528 int opnum, int type,
4529 int ind_levels ATTRIBUTE_UNUSED, int *win)
4530 {
4531 /* Decompose SImode constants into HIGH+LO_SUM. */
4532 if (CONSTANT_P (x)
4533 && (mode != TFmode || TARGET_ARCH64)
4534 && GET_MODE (x) == SImode
4535 && GET_CODE (x) != LO_SUM
4536 && GET_CODE (x) != HIGH
4537 && sparc_cmodel <= CM_MEDLOW
4538 && !(flag_pic
4539 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4540 {
4541 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4542 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4543 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4544 opnum, (enum reload_type)type);
4545 *win = 1;
4546 return x;
4547 }
4548
4549 /* We have to recognize what we have already generated above. */
4550 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4551 {
4552 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4553 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4554 opnum, (enum reload_type)type);
4555 *win = 1;
4556 return x;
4557 }
4558
4559 *win = 0;
4560 return x;
4561 }
4562
4563 /* Return true if ADDR (a legitimate address expression)
4564 has an effect that depends on the machine mode it is used for.
4565
4566 In PIC mode,
4567
4568 (mem:HI [%l7+a])
4569
4570 is not equivalent to
4571
4572 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4573
4574 because [%l7+a+1] is interpreted as the address of (a+1). */
4575
4576
4577 static bool
4578 sparc_mode_dependent_address_p (const_rtx addr,
4579 addr_space_t as ATTRIBUTE_UNUSED)
4580 {
4581 if (flag_pic && GET_CODE (addr) == PLUS)
4582 {
4583 rtx op0 = XEXP (addr, 0);
4584 rtx op1 = XEXP (addr, 1);
4585 if (op0 == pic_offset_table_rtx
4586 && symbolic_operand (op1, VOIDmode))
4587 return true;
4588 }
4589
4590 return false;
4591 }
4592
4593 #ifdef HAVE_GAS_HIDDEN
4594 # define USE_HIDDEN_LINKONCE 1
4595 #else
4596 # define USE_HIDDEN_LINKONCE 0
4597 #endif
4598
4599 static void
4600 get_pc_thunk_name (char name[32], unsigned int regno)
4601 {
4602 const char *reg_name = reg_names[regno];
4603
4604 /* Skip the leading '%' as that cannot be used in a
4605 symbol name. */
4606 reg_name += 1;
4607
4608 if (USE_HIDDEN_LINKONCE)
4609 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4610 else
4611 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4612 }
4613
4614 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4615
4616 static rtx
4617 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4618 {
4619 int orig_flag_pic = flag_pic;
4620 rtx insn;
4621
4622 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4623 flag_pic = 0;
4624 if (TARGET_ARCH64)
4625 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4626 else
4627 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4628 flag_pic = orig_flag_pic;
4629
4630 return insn;
4631 }
4632
4633 /* Emit code to load the GOT register. */
4634
4635 void
4636 load_got_register (void)
4637 {
4638 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4639 if (!global_offset_table_rtx)
4640 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4641
4642 if (TARGET_VXWORKS_RTP)
4643 emit_insn (gen_vxworks_load_got ());
4644 else
4645 {
4646 /* The GOT symbol is subject to a PC-relative relocation so we need a
4647 helper function to add the PC value and thus get the final value. */
4648 if (!got_helper_rtx)
4649 {
4650 char name[32];
4651 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4652 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4653 }
4654
4655 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4656 got_helper_rtx,
4657 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4658 }
4659
4660 /* Need to emit this whether or not we obey regdecls,
4661 since setjmp/longjmp can cause life info to screw up.
4662 ??? In the case where we don't obey regdecls, this is not sufficient
4663 since we may not fall out the bottom. */
4664 emit_use (global_offset_table_rtx);
4665 }
4666
4667 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4668 address of the call target. */
4669
4670 void
4671 sparc_emit_call_insn (rtx pat, rtx addr)
4672 {
4673 rtx_insn *insn;
4674
4675 insn = emit_call_insn (pat);
4676
4677 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4678 if (TARGET_VXWORKS_RTP
4679 && flag_pic
4680 && GET_CODE (addr) == SYMBOL_REF
4681 && (SYMBOL_REF_DECL (addr)
4682 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4683 : !SYMBOL_REF_LOCAL_P (addr)))
4684 {
4685 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4686 crtl->uses_pic_offset_table = 1;
4687 }
4688 }
4689 \f
4690 /* Return 1 if RTX is a MEM which is known to be aligned to at
4691 least a DESIRED byte boundary. */
4692
4693 int
4694 mem_min_alignment (rtx mem, int desired)
4695 {
4696 rtx addr, base, offset;
4697
4698 /* If it's not a MEM we can't accept it. */
4699 if (GET_CODE (mem) != MEM)
4700 return 0;
4701
4702 /* Obviously... */
4703 if (!TARGET_UNALIGNED_DOUBLES
4704 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4705 return 1;
4706
4707 /* ??? The rest of the function predates MEM_ALIGN so
4708 there is probably a bit of redundancy. */
4709 addr = XEXP (mem, 0);
4710 base = offset = NULL_RTX;
4711 if (GET_CODE (addr) == PLUS)
4712 {
4713 if (GET_CODE (XEXP (addr, 0)) == REG)
4714 {
4715 base = XEXP (addr, 0);
4716
4717 /* What we are saying here is that if the base
4718 REG is aligned properly, the compiler will make
4719 sure any REG based index upon it will be so
4720 as well. */
4721 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4722 offset = XEXP (addr, 1);
4723 else
4724 offset = const0_rtx;
4725 }
4726 }
4727 else if (GET_CODE (addr) == REG)
4728 {
4729 base = addr;
4730 offset = const0_rtx;
4731 }
4732
4733 if (base != NULL_RTX)
4734 {
4735 int regno = REGNO (base);
4736
4737 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4738 {
4739 /* Check if the compiler has recorded some information
4740 about the alignment of the base REG. If reload has
4741 completed, we already matched with proper alignments.
4742 If not running global_alloc, reload might give us
4743 unaligned pointer to local stack though. */
4744 if (((cfun != 0
4745 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4746 || (optimize && reload_completed))
4747 && (INTVAL (offset) & (desired - 1)) == 0)
4748 return 1;
4749 }
4750 else
4751 {
4752 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4753 return 1;
4754 }
4755 }
4756 else if (! TARGET_UNALIGNED_DOUBLES
4757 || CONSTANT_P (addr)
4758 || GET_CODE (addr) == LO_SUM)
4759 {
4760 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4761 is true, in which case we can only assume that an access is aligned if
4762 it is to a constant address, or the address involves a LO_SUM. */
4763 return 1;
4764 }
4765
4766 /* An obviously unaligned address. */
4767 return 0;
4768 }
4769
4770 \f
4771 /* Vectors to keep interesting information about registers where it can easily
4772 be got. We used to use the actual mode value as the bit number, but there
4773 are more than 32 modes now. Instead we use two tables: one indexed by
4774 hard register number, and one indexed by mode. */
4775
4776 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4777 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4778 mapped into one sparc_mode_class mode. */
4779
4780 enum sparc_mode_class {
4781 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4782 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4783 CC_MODE, CCFP_MODE
4784 };
4785
4786 /* Modes for single-word and smaller quantities. */
4787 #define S_MODES \
4788 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4789
4790 /* Modes for double-word and smaller quantities. */
4791 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4792
4793 /* Modes for quad-word and smaller quantities. */
4794 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4795
4796 /* Modes for 8-word and smaller quantities. */
4797 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4798
4799 /* Modes for single-float quantities. */
4800 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4801
4802 /* Modes for double-float and smaller quantities. */
4803 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4804
4805 /* Modes for quad-float and smaller quantities. */
4806 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4807
4808 /* Modes for quad-float pairs and smaller quantities. */
4809 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4810
4811 /* Modes for double-float only quantities. */
4812 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4813
4814 /* Modes for quad-float and double-float only quantities. */
4815 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4816
4817 /* Modes for quad-float pairs and double-float only quantities. */
4818 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4819
4820 /* Modes for condition codes. */
4821 #define CC_MODES (1 << (int) CC_MODE)
4822 #define CCFP_MODES (1 << (int) CCFP_MODE)
4823
4824 /* Value is 1 if register/mode pair is acceptable on sparc.
4825
4826 The funny mixture of D and T modes is because integer operations
4827 do not specially operate on tetra quantities, so non-quad-aligned
4828 registers can hold quadword quantities (except %o4 and %i4 because
4829 they cross fixed registers).
4830
4831 ??? Note that, despite the settings, non-double-aligned parameter
4832 registers can hold double-word quantities in 32-bit mode. */
4833
4834 /* This points to either the 32 bit or the 64 bit version. */
4835 const int *hard_regno_mode_classes;
4836
4837 static const int hard_32bit_mode_classes[] = {
4838 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4839 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4840 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4841 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4842
4843 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4844 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4845 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4846 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4847
4848 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4849 and none can hold SFmode/SImode values. */
4850 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4851 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4852 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4853 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4854
4855 /* %fcc[0123] */
4856 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4857
4858 /* %icc, %sfp, %gsr */
4859 CC_MODES, 0, D_MODES
4860 };
4861
4862 static const int hard_64bit_mode_classes[] = {
4863 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4864 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4865 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4866 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4867
4868 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4869 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4870 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4871 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4872
4873 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4874 and none can hold SFmode/SImode values. */
4875 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4876 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4877 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4878 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4879
4880 /* %fcc[0123] */
4881 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4882
4883 /* %icc, %sfp, %gsr */
4884 CC_MODES, 0, D_MODES
4885 };
4886
4887 int sparc_mode_class [NUM_MACHINE_MODES];
4888
4889 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4890
4891 static void
4892 sparc_init_modes (void)
4893 {
4894 int i;
4895
4896 for (i = 0; i < NUM_MACHINE_MODES; i++)
4897 {
4898 machine_mode m = (machine_mode) i;
4899 unsigned int size = GET_MODE_SIZE (m);
4900
4901 switch (GET_MODE_CLASS (m))
4902 {
4903 case MODE_INT:
4904 case MODE_PARTIAL_INT:
4905 case MODE_COMPLEX_INT:
4906 if (size < 4)
4907 sparc_mode_class[i] = 1 << (int) H_MODE;
4908 else if (size == 4)
4909 sparc_mode_class[i] = 1 << (int) S_MODE;
4910 else if (size == 8)
4911 sparc_mode_class[i] = 1 << (int) D_MODE;
4912 else if (size == 16)
4913 sparc_mode_class[i] = 1 << (int) T_MODE;
4914 else if (size == 32)
4915 sparc_mode_class[i] = 1 << (int) O_MODE;
4916 else
4917 sparc_mode_class[i] = 0;
4918 break;
4919 case MODE_VECTOR_INT:
4920 if (size == 4)
4921 sparc_mode_class[i] = 1 << (int) SF_MODE;
4922 else if (size == 8)
4923 sparc_mode_class[i] = 1 << (int) DF_MODE;
4924 else
4925 sparc_mode_class[i] = 0;
4926 break;
4927 case MODE_FLOAT:
4928 case MODE_COMPLEX_FLOAT:
4929 if (size == 4)
4930 sparc_mode_class[i] = 1 << (int) SF_MODE;
4931 else if (size == 8)
4932 sparc_mode_class[i] = 1 << (int) DF_MODE;
4933 else if (size == 16)
4934 sparc_mode_class[i] = 1 << (int) TF_MODE;
4935 else if (size == 32)
4936 sparc_mode_class[i] = 1 << (int) OF_MODE;
4937 else
4938 sparc_mode_class[i] = 0;
4939 break;
4940 case MODE_CC:
4941 if (m == CCFPmode || m == CCFPEmode)
4942 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4943 else
4944 sparc_mode_class[i] = 1 << (int) CC_MODE;
4945 break;
4946 default:
4947 sparc_mode_class[i] = 0;
4948 break;
4949 }
4950 }
4951
4952 if (TARGET_ARCH64)
4953 hard_regno_mode_classes = hard_64bit_mode_classes;
4954 else
4955 hard_regno_mode_classes = hard_32bit_mode_classes;
4956
4957 /* Initialize the array used by REGNO_REG_CLASS. */
4958 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4959 {
4960 if (i < 16 && TARGET_V8PLUS)
4961 sparc_regno_reg_class[i] = I64_REGS;
4962 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4963 sparc_regno_reg_class[i] = GENERAL_REGS;
4964 else if (i < 64)
4965 sparc_regno_reg_class[i] = FP_REGS;
4966 else if (i < 96)
4967 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4968 else if (i < 100)
4969 sparc_regno_reg_class[i] = FPCC_REGS;
4970 else
4971 sparc_regno_reg_class[i] = NO_REGS;
4972 }
4973 }
4974 \f
4975 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4976
4977 static inline bool
4978 save_global_or_fp_reg_p (unsigned int regno,
4979 int leaf_function ATTRIBUTE_UNUSED)
4980 {
4981 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4982 }
4983
4984 /* Return whether the return address register (%i7) is needed. */
4985
4986 static inline bool
4987 return_addr_reg_needed_p (int leaf_function)
4988 {
4989 /* If it is live, for example because of __builtin_return_address (0). */
4990 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4991 return true;
4992
4993 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4994 if (!leaf_function
4995 /* Loading the GOT register clobbers %o7. */
4996 || crtl->uses_pic_offset_table
4997 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4998 return true;
4999
5000 return false;
5001 }
5002
5003 /* Return whether REGNO, a local or in register, must be saved/restored. */
5004
5005 static bool
5006 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5007 {
5008 /* General case: call-saved registers live at some point. */
5009 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5010 return true;
5011
5012 /* Frame pointer register (%fp) if needed. */
5013 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5014 return true;
5015
5016 /* Return address register (%i7) if needed. */
5017 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5018 return true;
5019
5020 /* GOT register (%l7) if needed. */
5021 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5022 return true;
5023
5024 /* If the function accesses prior frames, the frame pointer and the return
5025 address of the previous frame must be saved on the stack. */
5026 if (crtl->accesses_prior_frames
5027 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5028 return true;
5029
5030 return false;
5031 }
5032
5033 /* Compute the frame size required by the function. This function is called
5034 during the reload pass and also by sparc_expand_prologue. */
5035
5036 HOST_WIDE_INT
5037 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5038 {
5039 HOST_WIDE_INT frame_size, apparent_frame_size;
5040 int args_size, n_global_fp_regs = 0;
5041 bool save_local_in_regs_p = false;
5042 unsigned int i;
5043
5044 /* If the function allocates dynamic stack space, the dynamic offset is
5045 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5046 if (leaf_function && !cfun->calls_alloca)
5047 args_size = 0;
5048 else
5049 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5050
5051 /* Calculate space needed for global registers. */
5052 if (TARGET_ARCH64)
5053 {
5054 for (i = 0; i < 8; i++)
5055 if (save_global_or_fp_reg_p (i, 0))
5056 n_global_fp_regs += 2;
5057 }
5058 else
5059 {
5060 for (i = 0; i < 8; i += 2)
5061 if (save_global_or_fp_reg_p (i, 0)
5062 || save_global_or_fp_reg_p (i + 1, 0))
5063 n_global_fp_regs += 2;
5064 }
5065
5066 /* In the flat window model, find out which local and in registers need to
5067 be saved. We don't reserve space in the current frame for them as they
5068 will be spilled into the register window save area of the caller's frame.
5069 However, as soon as we use this register window save area, we must create
5070 that of the current frame to make it the live one. */
5071 if (TARGET_FLAT)
5072 for (i = 16; i < 32; i++)
5073 if (save_local_or_in_reg_p (i, leaf_function))
5074 {
5075 save_local_in_regs_p = true;
5076 break;
5077 }
5078
5079 /* Calculate space needed for FP registers. */
5080 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5081 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5082 n_global_fp_regs += 2;
5083
5084 if (size == 0
5085 && n_global_fp_regs == 0
5086 && args_size == 0
5087 && !save_local_in_regs_p)
5088 frame_size = apparent_frame_size = 0;
5089 else
5090 {
5091 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5092 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5093 apparent_frame_size += n_global_fp_regs * 4;
5094
5095 /* We need to add the size of the outgoing argument area. */
5096 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5097
5098 /* And that of the register window save area. */
5099 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5100
5101 /* Finally, bump to the appropriate alignment. */
5102 frame_size = SPARC_STACK_ALIGN (frame_size);
5103 }
5104
5105 /* Set up values for use in prologue and epilogue. */
5106 sparc_frame_size = frame_size;
5107 sparc_apparent_frame_size = apparent_frame_size;
5108 sparc_n_global_fp_regs = n_global_fp_regs;
5109 sparc_save_local_in_regs_p = save_local_in_regs_p;
5110
5111 return frame_size;
5112 }
5113
5114 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5115
5116 int
5117 sparc_initial_elimination_offset (int to)
5118 {
5119 int offset;
5120
5121 if (to == STACK_POINTER_REGNUM)
5122 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5123 else
5124 offset = 0;
5125
5126 offset += SPARC_STACK_BIAS;
5127 return offset;
5128 }
5129
5130 /* Output any necessary .register pseudo-ops. */
5131
5132 void
5133 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5134 {
5135 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5136 int i;
5137
5138 if (TARGET_ARCH32)
5139 return;
5140
5141 /* Check if %g[2367] were used without
5142 .register being printed for them already. */
5143 for (i = 2; i < 8; i++)
5144 {
5145 if (df_regs_ever_live_p (i)
5146 && ! sparc_hard_reg_printed [i])
5147 {
5148 sparc_hard_reg_printed [i] = 1;
5149 /* %g7 is used as TLS base register, use #ignore
5150 for it instead of #scratch. */
5151 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5152 i == 7 ? "ignore" : "scratch");
5153 }
5154 if (i == 3) i = 5;
5155 }
5156 #endif
5157 }
5158
5159 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5160
5161 #if PROBE_INTERVAL > 4096
5162 #error Cannot use indexed addressing mode for stack probing
5163 #endif
5164
5165 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5166 inclusive. These are offsets from the current stack pointer.
5167
5168 Note that we don't use the REG+REG addressing mode for the probes because
5169 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5170 so the advantages of having a single code win here. */
5171
5172 static void
5173 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5174 {
5175 rtx g1 = gen_rtx_REG (Pmode, 1);
5176
5177 /* See if we have a constant small number of probes to generate. If so,
5178 that's the easy case. */
5179 if (size <= PROBE_INTERVAL)
5180 {
5181 emit_move_insn (g1, GEN_INT (first));
5182 emit_insn (gen_rtx_SET (g1,
5183 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5184 emit_stack_probe (plus_constant (Pmode, g1, -size));
5185 }
5186
5187 /* The run-time loop is made up of 9 insns in the generic case while the
5188 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5189 else if (size <= 4 * PROBE_INTERVAL)
5190 {
5191 HOST_WIDE_INT i;
5192
5193 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5194 emit_insn (gen_rtx_SET (g1,
5195 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5196 emit_stack_probe (g1);
5197
5198 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5199 it exceeds SIZE. If only two probes are needed, this will not
5200 generate any code. Then probe at FIRST + SIZE. */
5201 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5202 {
5203 emit_insn (gen_rtx_SET (g1,
5204 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5205 emit_stack_probe (g1);
5206 }
5207
5208 emit_stack_probe (plus_constant (Pmode, g1,
5209 (i - PROBE_INTERVAL) - size));
5210 }
5211
5212 /* Otherwise, do the same as above, but in a loop. Note that we must be
5213 extra careful with variables wrapping around because we might be at
5214 the very top (or the very bottom) of the address space and we have
5215 to be able to handle this case properly; in particular, we use an
5216 equality test for the loop condition. */
5217 else
5218 {
5219 HOST_WIDE_INT rounded_size;
5220 rtx g4 = gen_rtx_REG (Pmode, 4);
5221
5222 emit_move_insn (g1, GEN_INT (first));
5223
5224
5225 /* Step 1: round SIZE to the previous multiple of the interval. */
5226
5227 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5228 emit_move_insn (g4, GEN_INT (rounded_size));
5229
5230
5231 /* Step 2: compute initial and final value of the loop counter. */
5232
5233 /* TEST_ADDR = SP + FIRST. */
5234 emit_insn (gen_rtx_SET (g1,
5235 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5236
5237 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5238 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5239
5240
5241 /* Step 3: the loop
5242
5243 while (TEST_ADDR != LAST_ADDR)
5244 {
5245 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5246 probe at TEST_ADDR
5247 }
5248
5249 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5250 until it is equal to ROUNDED_SIZE. */
5251
5252 if (TARGET_ARCH64)
5253 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5254 else
5255 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5256
5257
5258 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5259 that SIZE is equal to ROUNDED_SIZE. */
5260
5261 if (size != rounded_size)
5262 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5263 }
5264
5265 /* Make sure nothing is scheduled before we are done. */
5266 emit_insn (gen_blockage ());
5267 }
5268
5269 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5270 absolute addresses. */
5271
5272 const char *
5273 output_probe_stack_range (rtx reg1, rtx reg2)
5274 {
5275 static int labelno = 0;
5276 char loop_lab[32];
5277 rtx xops[2];
5278
5279 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5280
5281 /* Loop. */
5282 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5283
5284 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5285 xops[0] = reg1;
5286 xops[1] = GEN_INT (-PROBE_INTERVAL);
5287 output_asm_insn ("add\t%0, %1, %0", xops);
5288
5289 /* Test if TEST_ADDR == LAST_ADDR. */
5290 xops[1] = reg2;
5291 output_asm_insn ("cmp\t%0, %1", xops);
5292
5293 /* Probe at TEST_ADDR and branch. */
5294 if (TARGET_ARCH64)
5295 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5296 else
5297 fputs ("\tbne\t", asm_out_file);
5298 assemble_name_raw (asm_out_file, loop_lab);
5299 fputc ('\n', asm_out_file);
5300 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5301 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5302
5303 return "";
5304 }
5305
5306 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5307 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5308 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5309 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5310 the action to be performed if it returns false. Return the new offset. */
5311
5312 typedef bool (*sorr_pred_t) (unsigned int, int);
5313 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5314
5315 static int
5316 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5317 int offset, int leaf_function, sorr_pred_t save_p,
5318 sorr_act_t action_true, sorr_act_t action_false)
5319 {
5320 unsigned int i;
5321 rtx mem;
5322 rtx_insn *insn;
5323
5324 if (TARGET_ARCH64 && high <= 32)
5325 {
5326 int fp_offset = -1;
5327
5328 for (i = low; i < high; i++)
5329 {
5330 if (save_p (i, leaf_function))
5331 {
5332 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5333 base, offset));
5334 if (action_true == SORR_SAVE)
5335 {
5336 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5337 RTX_FRAME_RELATED_P (insn) = 1;
5338 }
5339 else /* action_true == SORR_RESTORE */
5340 {
5341 /* The frame pointer must be restored last since its old
5342 value may be used as base address for the frame. This
5343 is problematic in 64-bit mode only because of the lack
5344 of double-word load instruction. */
5345 if (i == HARD_FRAME_POINTER_REGNUM)
5346 fp_offset = offset;
5347 else
5348 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5349 }
5350 offset += 8;
5351 }
5352 else if (action_false == SORR_ADVANCE)
5353 offset += 8;
5354 }
5355
5356 if (fp_offset >= 0)
5357 {
5358 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5359 emit_move_insn (hard_frame_pointer_rtx, mem);
5360 }
5361 }
5362 else
5363 {
5364 for (i = low; i < high; i += 2)
5365 {
5366 bool reg0 = save_p (i, leaf_function);
5367 bool reg1 = save_p (i + 1, leaf_function);
5368 machine_mode mode;
5369 int regno;
5370
5371 if (reg0 && reg1)
5372 {
5373 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5374 regno = i;
5375 }
5376 else if (reg0)
5377 {
5378 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5379 regno = i;
5380 }
5381 else if (reg1)
5382 {
5383 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5384 regno = i + 1;
5385 offset += 4;
5386 }
5387 else
5388 {
5389 if (action_false == SORR_ADVANCE)
5390 offset += 8;
5391 continue;
5392 }
5393
5394 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5395 if (action_true == SORR_SAVE)
5396 {
5397 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5398 RTX_FRAME_RELATED_P (insn) = 1;
5399 if (mode == DImode)
5400 {
5401 rtx set1, set2;
5402 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5403 offset));
5404 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5405 RTX_FRAME_RELATED_P (set1) = 1;
5406 mem
5407 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5408 offset + 4));
5409 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5410 RTX_FRAME_RELATED_P (set2) = 1;
5411 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5412 gen_rtx_PARALLEL (VOIDmode,
5413 gen_rtvec (2, set1, set2)));
5414 }
5415 }
5416 else /* action_true == SORR_RESTORE */
5417 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5418
5419 /* Bump and round down to double word
5420 in case we already bumped by 4. */
5421 offset = ROUND_DOWN (offset + 8, 8);
5422 }
5423 }
5424
5425 return offset;
5426 }
5427
5428 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5429
5430 static rtx
5431 emit_adjust_base_to_offset (rtx base, int offset)
5432 {
5433 /* ??? This might be optimized a little as %g1 might already have a
5434 value close enough that a single add insn will do. */
5435 /* ??? Although, all of this is probably only a temporary fix because
5436 if %g1 can hold a function result, then sparc_expand_epilogue will
5437 lose (the result will be clobbered). */
5438 rtx new_base = gen_rtx_REG (Pmode, 1);
5439 emit_move_insn (new_base, GEN_INT (offset));
5440 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5441 return new_base;
5442 }
5443
5444 /* Emit code to save/restore call-saved global and FP registers. */
5445
5446 static void
5447 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5448 {
5449 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5450 {
5451 base = emit_adjust_base_to_offset (base, offset);
5452 offset = 0;
5453 }
5454
5455 offset
5456 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5457 save_global_or_fp_reg_p, action, SORR_NONE);
5458 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5459 save_global_or_fp_reg_p, action, SORR_NONE);
5460 }
5461
5462 /* Emit code to save/restore call-saved local and in registers. */
5463
5464 static void
5465 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5466 {
5467 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5468 {
5469 base = emit_adjust_base_to_offset (base, offset);
5470 offset = 0;
5471 }
5472
5473 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5474 save_local_or_in_reg_p, action, SORR_ADVANCE);
5475 }
5476
5477 /* Emit a window_save insn. */
5478
5479 static rtx_insn *
5480 emit_window_save (rtx increment)
5481 {
5482 rtx_insn *insn = emit_insn (gen_window_save (increment));
5483 RTX_FRAME_RELATED_P (insn) = 1;
5484
5485 /* The incoming return address (%o7) is saved in %i7. */
5486 add_reg_note (insn, REG_CFA_REGISTER,
5487 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5488 gen_rtx_REG (Pmode,
5489 INCOMING_RETURN_ADDR_REGNUM)));
5490
5491 /* The window save event. */
5492 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5493
5494 /* The CFA is %fp, the hard frame pointer. */
5495 add_reg_note (insn, REG_CFA_DEF_CFA,
5496 plus_constant (Pmode, hard_frame_pointer_rtx,
5497 INCOMING_FRAME_SP_OFFSET));
5498
5499 return insn;
5500 }
5501
5502 /* Generate an increment for the stack pointer. */
5503
5504 static rtx
5505 gen_stack_pointer_inc (rtx increment)
5506 {
5507 return gen_rtx_SET (stack_pointer_rtx,
5508 gen_rtx_PLUS (Pmode,
5509 stack_pointer_rtx,
5510 increment));
5511 }
5512
5513 /* Expand the function prologue. The prologue is responsible for reserving
5514 storage for the frame, saving the call-saved registers and loading the
5515 GOT register if needed. */
5516
5517 void
5518 sparc_expand_prologue (void)
5519 {
5520 HOST_WIDE_INT size;
5521 rtx_insn *insn;
5522
5523 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5524 on the final value of the flag means deferring the prologue/epilogue
5525 expansion until just before the second scheduling pass, which is too
5526 late to emit multiple epilogues or return insns.
5527
5528 Of course we are making the assumption that the value of the flag
5529 will not change between now and its final value. Of the three parts
5530 of the formula, only the last one can reasonably vary. Let's take a
5531 closer look, after assuming that the first two ones are set to true
5532 (otherwise the last value is effectively silenced).
5533
5534 If only_leaf_regs_used returns false, the global predicate will also
5535 be false so the actual frame size calculated below will be positive.
5536 As a consequence, the save_register_window insn will be emitted in
5537 the instruction stream; now this insn explicitly references %fp
5538 which is not a leaf register so only_leaf_regs_used will always
5539 return false subsequently.
5540
5541 If only_leaf_regs_used returns true, we hope that the subsequent
5542 optimization passes won't cause non-leaf registers to pop up. For
5543 example, the regrename pass has special provisions to not rename to
5544 non-leaf registers in a leaf function. */
5545 sparc_leaf_function_p
5546 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5547
5548 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5549
5550 if (flag_stack_usage_info)
5551 current_function_static_stack_size = size;
5552
5553 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5554 {
5555 if (crtl->is_leaf && !cfun->calls_alloca)
5556 {
5557 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5558 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5559 size - STACK_CHECK_PROTECT);
5560 }
5561 else if (size > 0)
5562 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5563 }
5564
5565 if (size == 0)
5566 ; /* do nothing. */
5567 else if (sparc_leaf_function_p)
5568 {
5569 rtx size_int_rtx = GEN_INT (-size);
5570
5571 if (size <= 4096)
5572 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5573 else if (size <= 8192)
5574 {
5575 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5576 RTX_FRAME_RELATED_P (insn) = 1;
5577
5578 /* %sp is still the CFA register. */
5579 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5580 }
5581 else
5582 {
5583 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5584 emit_move_insn (size_rtx, size_int_rtx);
5585 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5586 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5587 gen_stack_pointer_inc (size_int_rtx));
5588 }
5589
5590 RTX_FRAME_RELATED_P (insn) = 1;
5591 }
5592 else
5593 {
5594 rtx size_int_rtx = GEN_INT (-size);
5595
5596 if (size <= 4096)
5597 emit_window_save (size_int_rtx);
5598 else if (size <= 8192)
5599 {
5600 emit_window_save (GEN_INT (-4096));
5601
5602 /* %sp is not the CFA register anymore. */
5603 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5604
5605 /* Make sure no %fp-based store is issued until after the frame is
5606 established. The offset between the frame pointer and the stack
5607 pointer is calculated relative to the value of the stack pointer
5608 at the end of the function prologue, and moving instructions that
5609 access the stack via the frame pointer between the instructions
5610 that decrement the stack pointer could result in accessing the
5611 register window save area, which is volatile. */
5612 emit_insn (gen_frame_blockage ());
5613 }
5614 else
5615 {
5616 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5617 emit_move_insn (size_rtx, size_int_rtx);
5618 emit_window_save (size_rtx);
5619 }
5620 }
5621
5622 if (sparc_leaf_function_p)
5623 {
5624 sparc_frame_base_reg = stack_pointer_rtx;
5625 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5626 }
5627 else
5628 {
5629 sparc_frame_base_reg = hard_frame_pointer_rtx;
5630 sparc_frame_base_offset = SPARC_STACK_BIAS;
5631 }
5632
5633 if (sparc_n_global_fp_regs > 0)
5634 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5635 sparc_frame_base_offset
5636 - sparc_apparent_frame_size,
5637 SORR_SAVE);
5638
5639 /* Load the GOT register if needed. */
5640 if (crtl->uses_pic_offset_table)
5641 load_got_register ();
5642
5643 /* Advertise that the data calculated just above are now valid. */
5644 sparc_prologue_data_valid_p = true;
5645 }
5646
5647 /* Expand the function prologue. The prologue is responsible for reserving
5648 storage for the frame, saving the call-saved registers and loading the
5649 GOT register if needed. */
5650
5651 void
5652 sparc_flat_expand_prologue (void)
5653 {
5654 HOST_WIDE_INT size;
5655 rtx_insn *insn;
5656
5657 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5658
5659 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5660
5661 if (flag_stack_usage_info)
5662 current_function_static_stack_size = size;
5663
5664 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5665 {
5666 if (crtl->is_leaf && !cfun->calls_alloca)
5667 {
5668 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5669 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5670 size - STACK_CHECK_PROTECT);
5671 }
5672 else if (size > 0)
5673 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5674 }
5675
5676 if (sparc_save_local_in_regs_p)
5677 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5678 SORR_SAVE);
5679
5680 if (size == 0)
5681 ; /* do nothing. */
5682 else
5683 {
5684 rtx size_int_rtx, size_rtx;
5685
5686 size_rtx = size_int_rtx = GEN_INT (-size);
5687
5688 /* We establish the frame (i.e. decrement the stack pointer) first, even
5689 if we use a frame pointer, because we cannot clobber any call-saved
5690 registers, including the frame pointer, if we haven't created a new
5691 register save area, for the sake of compatibility with the ABI. */
5692 if (size <= 4096)
5693 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5694 else if (size <= 8192 && !frame_pointer_needed)
5695 {
5696 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5697 RTX_FRAME_RELATED_P (insn) = 1;
5698 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5699 }
5700 else
5701 {
5702 size_rtx = gen_rtx_REG (Pmode, 1);
5703 emit_move_insn (size_rtx, size_int_rtx);
5704 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5705 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5706 gen_stack_pointer_inc (size_int_rtx));
5707 }
5708 RTX_FRAME_RELATED_P (insn) = 1;
5709
5710 /* Ensure nothing is scheduled until after the frame is established. */
5711 emit_insn (gen_blockage ());
5712
5713 if (frame_pointer_needed)
5714 {
5715 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5716 gen_rtx_MINUS (Pmode,
5717 stack_pointer_rtx,
5718 size_rtx)));
5719 RTX_FRAME_RELATED_P (insn) = 1;
5720
5721 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5722 gen_rtx_SET (hard_frame_pointer_rtx,
5723 plus_constant (Pmode, stack_pointer_rtx,
5724 size)));
5725 }
5726
5727 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5728 {
5729 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5730 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5731
5732 insn = emit_move_insn (i7, o7);
5733 RTX_FRAME_RELATED_P (insn) = 1;
5734
5735 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5736
5737 /* Prevent this instruction from ever being considered dead,
5738 even if this function has no epilogue. */
5739 emit_use (i7);
5740 }
5741 }
5742
5743 if (frame_pointer_needed)
5744 {
5745 sparc_frame_base_reg = hard_frame_pointer_rtx;
5746 sparc_frame_base_offset = SPARC_STACK_BIAS;
5747 }
5748 else
5749 {
5750 sparc_frame_base_reg = stack_pointer_rtx;
5751 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5752 }
5753
5754 if (sparc_n_global_fp_regs > 0)
5755 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5756 sparc_frame_base_offset
5757 - sparc_apparent_frame_size,
5758 SORR_SAVE);
5759
5760 /* Load the GOT register if needed. */
5761 if (crtl->uses_pic_offset_table)
5762 load_got_register ();
5763
5764 /* Advertise that the data calculated just above are now valid. */
5765 sparc_prologue_data_valid_p = true;
5766 }
5767
5768 /* This function generates the assembly code for function entry, which boils
5769 down to emitting the necessary .register directives. */
5770
5771 static void
5772 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5773 {
5774 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5775 if (!TARGET_FLAT)
5776 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5777
5778 sparc_output_scratch_registers (file);
5779 }
5780
5781 /* Expand the function epilogue, either normal or part of a sibcall.
5782 We emit all the instructions except the return or the call. */
5783
5784 void
5785 sparc_expand_epilogue (bool for_eh)
5786 {
5787 HOST_WIDE_INT size = sparc_frame_size;
5788
5789 if (sparc_n_global_fp_regs > 0)
5790 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5791 sparc_frame_base_offset
5792 - sparc_apparent_frame_size,
5793 SORR_RESTORE);
5794
5795 if (size == 0 || for_eh)
5796 ; /* do nothing. */
5797 else if (sparc_leaf_function_p)
5798 {
5799 if (size <= 4096)
5800 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5801 else if (size <= 8192)
5802 {
5803 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5804 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5805 }
5806 else
5807 {
5808 rtx reg = gen_rtx_REG (Pmode, 1);
5809 emit_move_insn (reg, GEN_INT (size));
5810 emit_insn (gen_stack_pointer_inc (reg));
5811 }
5812 }
5813 }
5814
5815 /* Expand the function epilogue, either normal or part of a sibcall.
5816 We emit all the instructions except the return or the call. */
5817
5818 void
5819 sparc_flat_expand_epilogue (bool for_eh)
5820 {
5821 HOST_WIDE_INT size = sparc_frame_size;
5822
5823 if (sparc_n_global_fp_regs > 0)
5824 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5825 sparc_frame_base_offset
5826 - sparc_apparent_frame_size,
5827 SORR_RESTORE);
5828
5829 /* If we have a frame pointer, we'll need both to restore it before the
5830 frame is destroyed and use its current value in destroying the frame.
5831 Since we don't have an atomic way to do that in the flat window model,
5832 we save the current value into a temporary register (%g1). */
5833 if (frame_pointer_needed && !for_eh)
5834 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5835
5836 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5837 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5838 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5839
5840 if (sparc_save_local_in_regs_p)
5841 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5842 sparc_frame_base_offset,
5843 SORR_RESTORE);
5844
5845 if (size == 0 || for_eh)
5846 ; /* do nothing. */
5847 else if (frame_pointer_needed)
5848 {
5849 /* Make sure the frame is destroyed after everything else is done. */
5850 emit_insn (gen_blockage ());
5851
5852 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5853 }
5854 else
5855 {
5856 /* Likewise. */
5857 emit_insn (gen_blockage ());
5858
5859 if (size <= 4096)
5860 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5861 else if (size <= 8192)
5862 {
5863 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5864 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5865 }
5866 else
5867 {
5868 rtx reg = gen_rtx_REG (Pmode, 1);
5869 emit_move_insn (reg, GEN_INT (size));
5870 emit_insn (gen_stack_pointer_inc (reg));
5871 }
5872 }
5873 }
5874
5875 /* Return true if it is appropriate to emit `return' instructions in the
5876 body of a function. */
5877
5878 bool
5879 sparc_can_use_return_insn_p (void)
5880 {
5881 return sparc_prologue_data_valid_p
5882 && sparc_n_global_fp_regs == 0
5883 && TARGET_FLAT
5884 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5885 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5886 }
5887
5888 /* This function generates the assembly code for function exit. */
5889
5890 static void
5891 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5892 {
5893 /* If the last two instructions of a function are "call foo; dslot;"
5894 the return address might point to the first instruction in the next
5895 function and we have to output a dummy nop for the sake of sane
5896 backtraces in such cases. This is pointless for sibling calls since
5897 the return address is explicitly adjusted. */
5898
5899 rtx_insn *insn = get_last_insn ();
5900
5901 rtx last_real_insn = prev_real_insn (insn);
5902 if (last_real_insn
5903 && NONJUMP_INSN_P (last_real_insn)
5904 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5905 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5906
5907 if (last_real_insn
5908 && CALL_P (last_real_insn)
5909 && !SIBLING_CALL_P (last_real_insn))
5910 fputs("\tnop\n", file);
5911
5912 sparc_output_deferred_case_vectors ();
5913 }
5914
5915 /* Output a 'restore' instruction. */
5916
5917 static void
5918 output_restore (rtx pat)
5919 {
5920 rtx operands[3];
5921
5922 if (! pat)
5923 {
5924 fputs ("\t restore\n", asm_out_file);
5925 return;
5926 }
5927
5928 gcc_assert (GET_CODE (pat) == SET);
5929
5930 operands[0] = SET_DEST (pat);
5931 pat = SET_SRC (pat);
5932
5933 switch (GET_CODE (pat))
5934 {
5935 case PLUS:
5936 operands[1] = XEXP (pat, 0);
5937 operands[2] = XEXP (pat, 1);
5938 output_asm_insn (" restore %r1, %2, %Y0", operands);
5939 break;
5940 case LO_SUM:
5941 operands[1] = XEXP (pat, 0);
5942 operands[2] = XEXP (pat, 1);
5943 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5944 break;
5945 case ASHIFT:
5946 operands[1] = XEXP (pat, 0);
5947 gcc_assert (XEXP (pat, 1) == const1_rtx);
5948 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5949 break;
5950 default:
5951 operands[1] = pat;
5952 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5953 break;
5954 }
5955 }
5956
5957 /* Output a return. */
5958
5959 const char *
5960 output_return (rtx_insn *insn)
5961 {
5962 if (crtl->calls_eh_return)
5963 {
5964 /* If the function uses __builtin_eh_return, the eh_return
5965 machinery occupies the delay slot. */
5966 gcc_assert (!final_sequence);
5967
5968 if (flag_delayed_branch)
5969 {
5970 if (!TARGET_FLAT && TARGET_V9)
5971 fputs ("\treturn\t%i7+8\n", asm_out_file);
5972 else
5973 {
5974 if (!TARGET_FLAT)
5975 fputs ("\trestore\n", asm_out_file);
5976
5977 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5978 }
5979
5980 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5981 }
5982 else
5983 {
5984 if (!TARGET_FLAT)
5985 fputs ("\trestore\n", asm_out_file);
5986
5987 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5988 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5989 }
5990 }
5991 else if (sparc_leaf_function_p || TARGET_FLAT)
5992 {
5993 /* This is a leaf or flat function so we don't have to bother restoring
5994 the register window, which frees us from dealing with the convoluted
5995 semantics of restore/return. We simply output the jump to the
5996 return address and the insn in the delay slot (if any). */
5997
5998 return "jmp\t%%o7+%)%#";
5999 }
6000 else
6001 {
6002 /* This is a regular function so we have to restore the register window.
6003 We may have a pending insn for the delay slot, which will be either
6004 combined with the 'restore' instruction or put in the delay slot of
6005 the 'return' instruction. */
6006
6007 if (final_sequence)
6008 {
6009 rtx delay, pat;
6010
6011 delay = NEXT_INSN (insn);
6012 gcc_assert (delay);
6013
6014 pat = PATTERN (delay);
6015
6016 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6017 {
6018 epilogue_renumber (&pat, 0);
6019 return "return\t%%i7+%)%#";
6020 }
6021 else
6022 {
6023 output_asm_insn ("jmp\t%%i7+%)", NULL);
6024 output_restore (pat);
6025 PATTERN (delay) = gen_blockage ();
6026 INSN_CODE (delay) = -1;
6027 }
6028 }
6029 else
6030 {
6031 /* The delay slot is empty. */
6032 if (TARGET_V9)
6033 return "return\t%%i7+%)\n\t nop";
6034 else if (flag_delayed_branch)
6035 return "jmp\t%%i7+%)\n\t restore";
6036 else
6037 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6038 }
6039 }
6040
6041 return "";
6042 }
6043
6044 /* Output a sibling call. */
6045
6046 const char *
6047 output_sibcall (rtx_insn *insn, rtx call_operand)
6048 {
6049 rtx operands[1];
6050
6051 gcc_assert (flag_delayed_branch);
6052
6053 operands[0] = call_operand;
6054
6055 if (sparc_leaf_function_p || TARGET_FLAT)
6056 {
6057 /* This is a leaf or flat function so we don't have to bother restoring
6058 the register window. We simply output the jump to the function and
6059 the insn in the delay slot (if any). */
6060
6061 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6062
6063 if (final_sequence)
6064 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6065 operands);
6066 else
6067 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6068 it into branch if possible. */
6069 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6070 operands);
6071 }
6072 else
6073 {
6074 /* This is a regular function so we have to restore the register window.
6075 We may have a pending insn for the delay slot, which will be combined
6076 with the 'restore' instruction. */
6077
6078 output_asm_insn ("call\t%a0, 0", operands);
6079
6080 if (final_sequence)
6081 {
6082 rtx_insn *delay = NEXT_INSN (insn);
6083 gcc_assert (delay);
6084
6085 output_restore (PATTERN (delay));
6086
6087 PATTERN (delay) = gen_blockage ();
6088 INSN_CODE (delay) = -1;
6089 }
6090 else
6091 output_restore (NULL_RTX);
6092 }
6093
6094 return "";
6095 }
6096 \f
6097 /* Functions for handling argument passing.
6098
6099 For 32-bit, the first 6 args are normally in registers and the rest are
6100 pushed. Any arg that starts within the first 6 words is at least
6101 partially passed in a register unless its data type forbids.
6102
6103 For 64-bit, the argument registers are laid out as an array of 16 elements
6104 and arguments are added sequentially. The first 6 int args and up to the
6105 first 16 fp args (depending on size) are passed in regs.
6106
6107 Slot Stack Integral Float Float in structure Double Long Double
6108 ---- ----- -------- ----- ------------------ ------ -----------
6109 15 [SP+248] %f31 %f30,%f31 %d30
6110 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6111 13 [SP+232] %f27 %f26,%f27 %d26
6112 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6113 11 [SP+216] %f23 %f22,%f23 %d22
6114 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6115 9 [SP+200] %f19 %f18,%f19 %d18
6116 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6117 7 [SP+184] %f15 %f14,%f15 %d14
6118 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6119 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6120 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6121 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6122 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6123 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6124 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6125
6126 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6127
6128 Integral arguments are always passed as 64-bit quantities appropriately
6129 extended.
6130
6131 Passing of floating point values is handled as follows.
6132 If a prototype is in scope:
6133 If the value is in a named argument (i.e. not a stdarg function or a
6134 value not part of the `...') then the value is passed in the appropriate
6135 fp reg.
6136 If the value is part of the `...' and is passed in one of the first 6
6137 slots then the value is passed in the appropriate int reg.
6138 If the value is part of the `...' and is not passed in one of the first 6
6139 slots then the value is passed in memory.
6140 If a prototype is not in scope:
6141 If the value is one of the first 6 arguments the value is passed in the
6142 appropriate integer reg and the appropriate fp reg.
6143 If the value is not one of the first 6 arguments the value is passed in
6144 the appropriate fp reg and in memory.
6145
6146
6147 Summary of the calling conventions implemented by GCC on the SPARC:
6148
6149 32-bit ABI:
6150 size argument return value
6151
6152 small integer <4 int. reg. int. reg.
6153 word 4 int. reg. int. reg.
6154 double word 8 int. reg. int. reg.
6155
6156 _Complex small integer <8 int. reg. int. reg.
6157 _Complex word 8 int. reg. int. reg.
6158 _Complex double word 16 memory int. reg.
6159
6160 vector integer <=8 int. reg. FP reg.
6161 vector integer >8 memory memory
6162
6163 float 4 int. reg. FP reg.
6164 double 8 int. reg. FP reg.
6165 long double 16 memory memory
6166
6167 _Complex float 8 memory FP reg.
6168 _Complex double 16 memory FP reg.
6169 _Complex long double 32 memory FP reg.
6170
6171 vector float any memory memory
6172
6173 aggregate any memory memory
6174
6175
6176
6177 64-bit ABI:
6178 size argument return value
6179
6180 small integer <8 int. reg. int. reg.
6181 word 8 int. reg. int. reg.
6182 double word 16 int. reg. int. reg.
6183
6184 _Complex small integer <16 int. reg. int. reg.
6185 _Complex word 16 int. reg. int. reg.
6186 _Complex double word 32 memory int. reg.
6187
6188 vector integer <=16 FP reg. FP reg.
6189 vector integer 16<s<=32 memory FP reg.
6190 vector integer >32 memory memory
6191
6192 float 4 FP reg. FP reg.
6193 double 8 FP reg. FP reg.
6194 long double 16 FP reg. FP reg.
6195
6196 _Complex float 8 FP reg. FP reg.
6197 _Complex double 16 FP reg. FP reg.
6198 _Complex long double 32 memory FP reg.
6199
6200 vector float <=16 FP reg. FP reg.
6201 vector float 16<s<=32 memory FP reg.
6202 vector float >32 memory memory
6203
6204 aggregate <=16 reg. reg.
6205 aggregate 16<s<=32 memory reg.
6206 aggregate >32 memory memory
6207
6208
6209
6210 Note #1: complex floating-point types follow the extended SPARC ABIs as
6211 implemented by the Sun compiler.
6212
6213 Note #2: integral vector types follow the scalar floating-point types
6214 conventions to match what is implemented by the Sun VIS SDK.
6215
6216 Note #3: floating-point vector types follow the aggregate types
6217 conventions. */
6218
6219
6220 /* Maximum number of int regs for args. */
6221 #define SPARC_INT_ARG_MAX 6
6222 /* Maximum number of fp regs for args. */
6223 #define SPARC_FP_ARG_MAX 16
6224 /* Number of words (partially) occupied for a given size in units. */
6225 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6226
6227 /* Handle the INIT_CUMULATIVE_ARGS macro.
6228 Initialize a variable CUM of type CUMULATIVE_ARGS
6229 for a call to a function whose data type is FNTYPE.
6230 For a library call, FNTYPE is 0. */
6231
6232 void
6233 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6234 {
6235 cum->words = 0;
6236 cum->prototype_p = fntype && prototype_p (fntype);
6237 cum->libcall_p = !fntype;
6238 }
6239
6240 /* Handle promotion of pointer and integer arguments. */
6241
6242 static machine_mode
6243 sparc_promote_function_mode (const_tree type, machine_mode mode,
6244 int *punsignedp, const_tree, int)
6245 {
6246 if (type && POINTER_TYPE_P (type))
6247 {
6248 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6249 return Pmode;
6250 }
6251
6252 /* Integral arguments are passed as full words, as per the ABI. */
6253 if (GET_MODE_CLASS (mode) == MODE_INT
6254 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6255 return word_mode;
6256
6257 return mode;
6258 }
6259
6260 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6261
6262 static bool
6263 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6264 {
6265 return TARGET_ARCH64 ? true : false;
6266 }
6267
6268 /* Traverse the record TYPE recursively and call FUNC on its fields.
6269 NAMED is true if this is for a named parameter. DATA is passed
6270 to FUNC for each field. OFFSET is the starting position and
6271 PACKED is true if we are inside a packed record. */
6272
6273 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6274 static void
6275 traverse_record_type (const_tree type, bool named, T *data,
6276 HOST_WIDE_INT offset = 0, bool packed = false)
6277 {
6278 /* The ABI obviously doesn't specify how packed structures are passed.
6279 These are passed in integer regs if possible, otherwise memory. */
6280 if (!packed)
6281 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6282 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6283 {
6284 packed = true;
6285 break;
6286 }
6287
6288 /* Walk the real fields, but skip those with no size or a zero size.
6289 ??? Fields with variable offset are handled as having zero offset. */
6290 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6291 if (TREE_CODE (field) == FIELD_DECL)
6292 {
6293 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6294 continue;
6295
6296 HOST_WIDE_INT bitpos = offset;
6297 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6298 bitpos += int_bit_position (field);
6299
6300 tree field_type = TREE_TYPE (field);
6301 if (TREE_CODE (field_type) == RECORD_TYPE)
6302 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6303 packed);
6304 else
6305 {
6306 const bool fp_type
6307 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6308 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6309 data);
6310 }
6311 }
6312 }
6313
6314 /* Handle recursive register classifying for structure layout. */
6315
6316 typedef struct
6317 {
6318 bool fp_regs; /* true if field eligible to FP registers. */
6319 bool fp_regs_in_first_word; /* true if such field in first word. */
6320 } classify_data_t;
6321
6322 /* A subroutine of function_arg_slotno. Classify the field. */
6323
6324 inline void
6325 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6326 classify_data_t *data)
6327 {
6328 if (fp)
6329 {
6330 data->fp_regs = true;
6331 if (bitpos < BITS_PER_WORD)
6332 data->fp_regs_in_first_word = true;
6333 }
6334 }
6335
6336 /* Compute the slot number to pass an argument in.
6337 Return the slot number or -1 if passing on the stack.
6338
6339 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6340 the preceding args and about the function being called.
6341 MODE is the argument's machine mode.
6342 TYPE is the data type of the argument (as a tree).
6343 This is null for libcalls where that information may
6344 not be available.
6345 NAMED is nonzero if this argument is a named parameter
6346 (otherwise it is an extra parameter matching an ellipsis).
6347 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6348 *PREGNO records the register number to use if scalar type.
6349 *PPADDING records the amount of padding needed in words. */
6350
6351 static int
6352 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6353 const_tree type, bool named, bool incoming,
6354 int *pregno, int *ppadding)
6355 {
6356 int regbase = (incoming
6357 ? SPARC_INCOMING_INT_ARG_FIRST
6358 : SPARC_OUTGOING_INT_ARG_FIRST);
6359 int slotno = cum->words;
6360 enum mode_class mclass;
6361 int regno;
6362
6363 *ppadding = 0;
6364
6365 if (type && TREE_ADDRESSABLE (type))
6366 return -1;
6367
6368 if (TARGET_ARCH32
6369 && mode == BLKmode
6370 && type
6371 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6372 return -1;
6373
6374 /* For SPARC64, objects requiring 16-byte alignment get it. */
6375 if (TARGET_ARCH64
6376 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6377 && (slotno & 1) != 0)
6378 slotno++, *ppadding = 1;
6379
6380 mclass = GET_MODE_CLASS (mode);
6381 if (type && TREE_CODE (type) == VECTOR_TYPE)
6382 {
6383 /* Vector types deserve special treatment because they are
6384 polymorphic wrt their mode, depending upon whether VIS
6385 instructions are enabled. */
6386 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6387 {
6388 /* The SPARC port defines no floating-point vector modes. */
6389 gcc_assert (mode == BLKmode);
6390 }
6391 else
6392 {
6393 /* Integral vector types should either have a vector
6394 mode or an integral mode, because we are guaranteed
6395 by pass_by_reference that their size is not greater
6396 than 16 bytes and TImode is 16-byte wide. */
6397 gcc_assert (mode != BLKmode);
6398
6399 /* Vector integers are handled like floats according to
6400 the Sun VIS SDK. */
6401 mclass = MODE_FLOAT;
6402 }
6403 }
6404
6405 switch (mclass)
6406 {
6407 case MODE_FLOAT:
6408 case MODE_COMPLEX_FLOAT:
6409 case MODE_VECTOR_INT:
6410 if (TARGET_ARCH64 && TARGET_FPU && named)
6411 {
6412 /* If all arg slots are filled, then must pass on stack. */
6413 if (slotno >= SPARC_FP_ARG_MAX)
6414 return -1;
6415
6416 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6417 /* Arguments filling only one single FP register are
6418 right-justified in the outer double FP register. */
6419 if (GET_MODE_SIZE (mode) <= 4)
6420 regno++;
6421 break;
6422 }
6423 /* fallthrough */
6424
6425 case MODE_INT:
6426 case MODE_COMPLEX_INT:
6427 /* If all arg slots are filled, then must pass on stack. */
6428 if (slotno >= SPARC_INT_ARG_MAX)
6429 return -1;
6430
6431 regno = regbase + slotno;
6432 break;
6433
6434 case MODE_RANDOM:
6435 if (mode == VOIDmode)
6436 /* MODE is VOIDmode when generating the actual call. */
6437 return -1;
6438
6439 gcc_assert (mode == BLKmode);
6440
6441 if (TARGET_ARCH32
6442 || !type
6443 || (TREE_CODE (type) != RECORD_TYPE
6444 && TREE_CODE (type) != VECTOR_TYPE))
6445 {
6446 /* If all arg slots are filled, then must pass on stack. */
6447 if (slotno >= SPARC_INT_ARG_MAX)
6448 return -1;
6449
6450 regno = regbase + slotno;
6451 }
6452 else /* TARGET_ARCH64 && type */
6453 {
6454 /* If all arg slots are filled, then must pass on stack. */
6455 if (slotno >= SPARC_FP_ARG_MAX)
6456 return -1;
6457
6458 if (TREE_CODE (type) == RECORD_TYPE)
6459 {
6460 classify_data_t data = { false, false };
6461 traverse_record_type<classify_data_t, classify_registers>
6462 (type, named, &data);
6463
6464 if (data.fp_regs)
6465 {
6466 /* If all FP slots are filled except for the last one and
6467 there is no FP field in the first word, then must pass
6468 on stack. */
6469 if (slotno >= SPARC_FP_ARG_MAX - 1
6470 && !data.fp_regs_in_first_word)
6471 return -1;
6472 }
6473 else
6474 {
6475 /* If all int slots are filled, then must pass on stack. */
6476 if (slotno >= SPARC_INT_ARG_MAX)
6477 return -1;
6478 }
6479 }
6480
6481 /* PREGNO isn't set since both int and FP regs can be used. */
6482 return slotno;
6483 }
6484 break;
6485
6486 default :
6487 gcc_unreachable ();
6488 }
6489
6490 *pregno = regno;
6491 return slotno;
6492 }
6493
6494 /* Handle recursive register counting/assigning for structure layout. */
6495
6496 typedef struct
6497 {
6498 int slotno; /* slot number of the argument. */
6499 int regbase; /* regno of the base register. */
6500 int intoffset; /* offset of the first pending integer field. */
6501 int nregs; /* number of words passed in registers. */
6502 bool stack; /* true if part of the argument is on the stack. */
6503 rtx ret; /* return expression being built. */
6504 } assign_data_t;
6505
6506 /* A subroutine of function_arg_record_value. Compute the number of integer
6507 registers to be assigned between PARMS->intoffset and BITPOS. Return
6508 true if at least one integer register is assigned or false otherwise. */
6509
6510 static bool
6511 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6512 {
6513 if (data->intoffset < 0)
6514 return false;
6515
6516 const int intoffset = data->intoffset;
6517 data->intoffset = -1;
6518
6519 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6520 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6521 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6522 int nregs = (endbit - startbit) / BITS_PER_WORD;
6523
6524 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6525 {
6526 nregs = SPARC_INT_ARG_MAX - this_slotno;
6527
6528 /* We need to pass this field (partly) on the stack. */
6529 data->stack = 1;
6530 }
6531
6532 if (nregs <= 0)
6533 return false;
6534
6535 *pnregs = nregs;
6536 return true;
6537 }
6538
6539 /* A subroutine of function_arg_record_value. Compute the number and the mode
6540 of the FP registers to be assigned for FIELD. Return true if at least one
6541 FP register is assigned or false otherwise. */
6542
6543 static bool
6544 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6545 assign_data_t *data,
6546 int *pnregs, machine_mode *pmode)
6547 {
6548 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6549 machine_mode mode = DECL_MODE (field);
6550 int nregs, nslots;
6551
6552 /* Slots are counted as words while regs are counted as having the size of
6553 the (inner) mode. */
6554 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6555 {
6556 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6557 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6558 }
6559 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6560 {
6561 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6562 nregs = 2;
6563 }
6564 else
6565 nregs = 1;
6566
6567 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6568
6569 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6570 {
6571 nslots = SPARC_FP_ARG_MAX - this_slotno;
6572 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6573
6574 /* We need to pass this field (partly) on the stack. */
6575 data->stack = 1;
6576
6577 if (nregs <= 0)
6578 return false;
6579 }
6580
6581 *pnregs = nregs;
6582 *pmode = mode;
6583 return true;
6584 }
6585
6586 /* A subroutine of function_arg_record_value. Count the number of registers
6587 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6588
6589 inline void
6590 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6591 assign_data_t *data)
6592 {
6593 if (fp)
6594 {
6595 int nregs;
6596 machine_mode mode;
6597
6598 if (compute_int_layout (bitpos, data, &nregs))
6599 data->nregs += nregs;
6600
6601 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6602 data->nregs += nregs;
6603 }
6604 else
6605 {
6606 if (data->intoffset < 0)
6607 data->intoffset = bitpos;
6608 }
6609 }
6610
6611 /* A subroutine of function_arg_record_value. Assign the bits of the
6612 structure between PARMS->intoffset and BITPOS to integer registers. */
6613
6614 static void
6615 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6616 {
6617 int intoffset = data->intoffset;
6618 machine_mode mode;
6619 int nregs;
6620
6621 if (!compute_int_layout (bitpos, data, &nregs))
6622 return;
6623
6624 /* If this is the trailing part of a word, only load that much into
6625 the register. Otherwise load the whole register. Note that in
6626 the latter case we may pick up unwanted bits. It's not a problem
6627 at the moment but may wish to revisit. */
6628 if (intoffset % BITS_PER_WORD != 0)
6629 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6630 MODE_INT);
6631 else
6632 mode = word_mode;
6633
6634 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6635 unsigned int regno = data->regbase + this_slotno;
6636 intoffset /= BITS_PER_UNIT;
6637
6638 do
6639 {
6640 rtx reg = gen_rtx_REG (mode, regno);
6641 XVECEXP (data->ret, 0, data->stack + data->nregs)
6642 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6643 data->nregs += 1;
6644 mode = word_mode;
6645 regno += 1;
6646 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6647 }
6648 while (--nregs > 0);
6649 }
6650
6651 /* A subroutine of function_arg_record_value. Assign FIELD at position
6652 BITPOS to FP registers. */
6653
6654 static void
6655 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6656 assign_data_t *data)
6657 {
6658 int nregs;
6659 machine_mode mode;
6660
6661 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6662 return;
6663
6664 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6665 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6666 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6667 regno++;
6668 int pos = bitpos / BITS_PER_UNIT;
6669
6670 do
6671 {
6672 rtx reg = gen_rtx_REG (mode, regno);
6673 XVECEXP (data->ret, 0, data->stack + data->nregs)
6674 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6675 data->nregs += 1;
6676 regno += GET_MODE_SIZE (mode) / 4;
6677 pos += GET_MODE_SIZE (mode);
6678 }
6679 while (--nregs > 0);
6680 }
6681
6682 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6683 the structure between PARMS->intoffset and BITPOS to registers. */
6684
6685 inline void
6686 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6687 assign_data_t *data)
6688 {
6689 if (fp)
6690 {
6691 assign_int_registers (bitpos, data);
6692
6693 assign_fp_registers (field, bitpos, data);
6694 }
6695 else
6696 {
6697 if (data->intoffset < 0)
6698 data->intoffset = bitpos;
6699 }
6700 }
6701
6702 /* Used by function_arg and sparc_function_value_1 to implement the complex
6703 conventions of the 64-bit ABI for passing and returning structures.
6704 Return an expression valid as a return value for the FUNCTION_ARG
6705 and TARGET_FUNCTION_VALUE.
6706
6707 TYPE is the data type of the argument (as a tree).
6708 This is null for libcalls where that information may
6709 not be available.
6710 MODE is the argument's machine mode.
6711 SLOTNO is the index number of the argument's slot in the parameter array.
6712 NAMED is true if this argument is a named parameter
6713 (otherwise it is an extra parameter matching an ellipsis).
6714 REGBASE is the regno of the base register for the parameter array. */
6715
6716 static rtx
6717 function_arg_record_value (const_tree type, machine_mode mode,
6718 int slotno, bool named, int regbase)
6719 {
6720 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6721 assign_data_t data;
6722 int nregs;
6723
6724 data.slotno = slotno;
6725 data.regbase = regbase;
6726
6727 /* Count how many registers we need. */
6728 data.nregs = 0;
6729 data.intoffset = 0;
6730 data.stack = false;
6731 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6732
6733 /* Take into account pending integer fields. */
6734 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6735 data.nregs += nregs;
6736
6737 /* Allocate the vector and handle some annoying special cases. */
6738 nregs = data.nregs;
6739
6740 if (nregs == 0)
6741 {
6742 /* ??? Empty structure has no value? Duh? */
6743 if (typesize <= 0)
6744 {
6745 /* Though there's nothing really to store, return a word register
6746 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6747 leads to breakage due to the fact that there are zero bytes to
6748 load. */
6749 return gen_rtx_REG (mode, regbase);
6750 }
6751
6752 /* ??? C++ has structures with no fields, and yet a size. Give up
6753 for now and pass everything back in integer registers. */
6754 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6755 if (nregs + slotno > SPARC_INT_ARG_MAX)
6756 nregs = SPARC_INT_ARG_MAX - slotno;
6757 }
6758
6759 gcc_assert (nregs > 0);
6760
6761 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6762
6763 /* If at least one field must be passed on the stack, generate
6764 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6765 also be passed on the stack. We can't do much better because the
6766 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6767 of structures for which the fields passed exclusively in registers
6768 are not at the beginning of the structure. */
6769 if (data.stack)
6770 XVECEXP (data.ret, 0, 0)
6771 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6772
6773 /* Assign the registers. */
6774 data.nregs = 0;
6775 data.intoffset = 0;
6776 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6777
6778 /* Assign pending integer fields. */
6779 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6780
6781 gcc_assert (data.nregs == nregs);
6782
6783 return data.ret;
6784 }
6785
6786 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6787 of the 64-bit ABI for passing and returning unions.
6788 Return an expression valid as a return value for the FUNCTION_ARG
6789 and TARGET_FUNCTION_VALUE.
6790
6791 SIZE is the size in bytes of the union.
6792 MODE is the argument's machine mode.
6793 REGNO is the hard register the union will be passed in. */
6794
6795 static rtx
6796 function_arg_union_value (int size, machine_mode mode, int slotno,
6797 int regno)
6798 {
6799 int nwords = CEIL_NWORDS (size), i;
6800 rtx regs;
6801
6802 /* See comment in previous function for empty structures. */
6803 if (nwords == 0)
6804 return gen_rtx_REG (mode, regno);
6805
6806 if (slotno == SPARC_INT_ARG_MAX - 1)
6807 nwords = 1;
6808
6809 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6810
6811 for (i = 0; i < nwords; i++)
6812 {
6813 /* Unions are passed left-justified. */
6814 XVECEXP (regs, 0, i)
6815 = gen_rtx_EXPR_LIST (VOIDmode,
6816 gen_rtx_REG (word_mode, regno),
6817 GEN_INT (UNITS_PER_WORD * i));
6818 regno++;
6819 }
6820
6821 return regs;
6822 }
6823
6824 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6825 for passing and returning BLKmode vectors.
6826 Return an expression valid as a return value for the FUNCTION_ARG
6827 and TARGET_FUNCTION_VALUE.
6828
6829 SIZE is the size in bytes of the vector.
6830 REGNO is the FP hard register the vector will be passed in. */
6831
6832 static rtx
6833 function_arg_vector_value (int size, int regno)
6834 {
6835 const int nregs = MAX (1, size / 8);
6836 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6837
6838 if (size < 8)
6839 XVECEXP (regs, 0, 0)
6840 = gen_rtx_EXPR_LIST (VOIDmode,
6841 gen_rtx_REG (SImode, regno),
6842 const0_rtx);
6843 else
6844 for (int i = 0; i < nregs; i++)
6845 XVECEXP (regs, 0, i)
6846 = gen_rtx_EXPR_LIST (VOIDmode,
6847 gen_rtx_REG (DImode, regno + 2*i),
6848 GEN_INT (i*8));
6849
6850 return regs;
6851 }
6852
6853 /* Determine where to put an argument to a function.
6854 Value is zero to push the argument on the stack,
6855 or a hard register in which to store the argument.
6856
6857 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6858 the preceding args and about the function being called.
6859 MODE is the argument's machine mode.
6860 TYPE is the data type of the argument (as a tree).
6861 This is null for libcalls where that information may
6862 not be available.
6863 NAMED is true if this argument is a named parameter
6864 (otherwise it is an extra parameter matching an ellipsis).
6865 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6866 TARGET_FUNCTION_INCOMING_ARG. */
6867
6868 static rtx
6869 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6870 const_tree type, bool named, bool incoming)
6871 {
6872 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6873
6874 int regbase = (incoming
6875 ? SPARC_INCOMING_INT_ARG_FIRST
6876 : SPARC_OUTGOING_INT_ARG_FIRST);
6877 int slotno, regno, padding;
6878 enum mode_class mclass = GET_MODE_CLASS (mode);
6879
6880 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6881 &regno, &padding);
6882 if (slotno == -1)
6883 return 0;
6884
6885 /* Vector types deserve special treatment because they are polymorphic wrt
6886 their mode, depending upon whether VIS instructions are enabled. */
6887 if (type && TREE_CODE (type) == VECTOR_TYPE)
6888 {
6889 HOST_WIDE_INT size = int_size_in_bytes (type);
6890 gcc_assert ((TARGET_ARCH32 && size <= 8)
6891 || (TARGET_ARCH64 && size <= 16));
6892
6893 if (mode == BLKmode)
6894 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6895
6896 mclass = MODE_FLOAT;
6897 }
6898
6899 if (TARGET_ARCH32)
6900 return gen_rtx_REG (mode, regno);
6901
6902 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6903 and are promoted to registers if possible. */
6904 if (type && TREE_CODE (type) == RECORD_TYPE)
6905 {
6906 HOST_WIDE_INT size = int_size_in_bytes (type);
6907 gcc_assert (size <= 16);
6908
6909 return function_arg_record_value (type, mode, slotno, named, regbase);
6910 }
6911
6912 /* Unions up to 16 bytes in size are passed in integer registers. */
6913 else if (type && TREE_CODE (type) == UNION_TYPE)
6914 {
6915 HOST_WIDE_INT size = int_size_in_bytes (type);
6916 gcc_assert (size <= 16);
6917
6918 return function_arg_union_value (size, mode, slotno, regno);
6919 }
6920
6921 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6922 but also have the slot allocated for them.
6923 If no prototype is in scope fp values in register slots get passed
6924 in two places, either fp regs and int regs or fp regs and memory. */
6925 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6926 && SPARC_FP_REG_P (regno))
6927 {
6928 rtx reg = gen_rtx_REG (mode, regno);
6929 if (cum->prototype_p || cum->libcall_p)
6930 return reg;
6931 else
6932 {
6933 rtx v0, v1;
6934
6935 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6936 {
6937 int intreg;
6938
6939 /* On incoming, we don't need to know that the value
6940 is passed in %f0 and %i0, and it confuses other parts
6941 causing needless spillage even on the simplest cases. */
6942 if (incoming)
6943 return reg;
6944
6945 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6946 + (regno - SPARC_FP_ARG_FIRST) / 2);
6947
6948 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6949 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6950 const0_rtx);
6951 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6952 }
6953 else
6954 {
6955 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6956 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6957 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6958 }
6959 }
6960 }
6961
6962 /* All other aggregate types are passed in an integer register in a mode
6963 corresponding to the size of the type. */
6964 else if (type && AGGREGATE_TYPE_P (type))
6965 {
6966 HOST_WIDE_INT size = int_size_in_bytes (type);
6967 gcc_assert (size <= 16);
6968
6969 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6970 }
6971
6972 return gen_rtx_REG (mode, regno);
6973 }
6974
6975 /* Handle the TARGET_FUNCTION_ARG target hook. */
6976
6977 static rtx
6978 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6979 const_tree type, bool named)
6980 {
6981 return sparc_function_arg_1 (cum, mode, type, named, false);
6982 }
6983
6984 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6985
6986 static rtx
6987 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6988 const_tree type, bool named)
6989 {
6990 return sparc_function_arg_1 (cum, mode, type, named, true);
6991 }
6992
6993 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6994
6995 static unsigned int
6996 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6997 {
6998 return ((TARGET_ARCH64
6999 && (GET_MODE_ALIGNMENT (mode) == 128
7000 || (type && TYPE_ALIGN (type) == 128)))
7001 ? 128
7002 : PARM_BOUNDARY);
7003 }
7004
7005 /* For an arg passed partly in registers and partly in memory,
7006 this is the number of bytes of registers used.
7007 For args passed entirely in registers or entirely in memory, zero.
7008
7009 Any arg that starts in the first 6 regs but won't entirely fit in them
7010 needs partial registers on v8. On v9, structures with integer
7011 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7012 values that begin in the last fp reg [where "last fp reg" varies with the
7013 mode] will be split between that reg and memory. */
7014
7015 static int
7016 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7017 tree type, bool named)
7018 {
7019 int slotno, regno, padding;
7020
7021 /* We pass false for incoming here, it doesn't matter. */
7022 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7023 false, &regno, &padding);
7024
7025 if (slotno == -1)
7026 return 0;
7027
7028 if (TARGET_ARCH32)
7029 {
7030 if ((slotno + (mode == BLKmode
7031 ? CEIL_NWORDS (int_size_in_bytes (type))
7032 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7033 > SPARC_INT_ARG_MAX)
7034 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7035 }
7036 else
7037 {
7038 /* We are guaranteed by pass_by_reference that the size of the
7039 argument is not greater than 16 bytes, so we only need to return
7040 one word if the argument is partially passed in registers. */
7041
7042 if (type && AGGREGATE_TYPE_P (type))
7043 {
7044 int size = int_size_in_bytes (type);
7045
7046 if (size > UNITS_PER_WORD
7047 && (slotno == SPARC_INT_ARG_MAX - 1
7048 || slotno == SPARC_FP_ARG_MAX - 1))
7049 return UNITS_PER_WORD;
7050 }
7051 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7052 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7053 && ! (TARGET_FPU && named)))
7054 {
7055 /* The complex types are passed as packed types. */
7056 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7057 && slotno == SPARC_INT_ARG_MAX - 1)
7058 return UNITS_PER_WORD;
7059 }
7060 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7061 {
7062 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7063 > SPARC_FP_ARG_MAX)
7064 return UNITS_PER_WORD;
7065 }
7066 }
7067
7068 return 0;
7069 }
7070
7071 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7072 Specify whether to pass the argument by reference. */
7073
7074 static bool
7075 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7076 machine_mode mode, const_tree type,
7077 bool named ATTRIBUTE_UNUSED)
7078 {
7079 if (TARGET_ARCH32)
7080 /* Original SPARC 32-bit ABI says that structures and unions,
7081 and quad-precision floats are passed by reference. For Pascal,
7082 also pass arrays by reference. All other base types are passed
7083 in registers.
7084
7085 Extended ABI (as implemented by the Sun compiler) says that all
7086 complex floats are passed by reference. Pass complex integers
7087 in registers up to 8 bytes. More generally, enforce the 2-word
7088 cap for passing arguments in registers.
7089
7090 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7091 integers are passed like floats of the same size, that is in
7092 registers up to 8 bytes. Pass all vector floats by reference
7093 like structure and unions. */
7094 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7095 || mode == SCmode
7096 /* Catch CDImode, TFmode, DCmode and TCmode. */
7097 || GET_MODE_SIZE (mode) > 8
7098 || (type
7099 && TREE_CODE (type) == VECTOR_TYPE
7100 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7101 else
7102 /* Original SPARC 64-bit ABI says that structures and unions
7103 smaller than 16 bytes are passed in registers, as well as
7104 all other base types.
7105
7106 Extended ABI (as implemented by the Sun compiler) says that
7107 complex floats are passed in registers up to 16 bytes. Pass
7108 all complex integers in registers up to 16 bytes. More generally,
7109 enforce the 2-word cap for passing arguments in registers.
7110
7111 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7112 integers are passed like floats of the same size, that is in
7113 registers (up to 16 bytes). Pass all vector floats like structure
7114 and unions. */
7115 return ((type
7116 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7117 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7118 /* Catch CTImode and TCmode. */
7119 || GET_MODE_SIZE (mode) > 16);
7120 }
7121
7122 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7123 Update the data in CUM to advance over an argument
7124 of mode MODE and data type TYPE.
7125 TYPE is null for libcalls where that information may not be available. */
7126
7127 static void
7128 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7129 const_tree type, bool named)
7130 {
7131 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7132 int regno, padding;
7133
7134 /* We pass false for incoming here, it doesn't matter. */
7135 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7136
7137 /* If argument requires leading padding, add it. */
7138 cum->words += padding;
7139
7140 if (TARGET_ARCH32)
7141 cum->words += (mode == BLKmode
7142 ? CEIL_NWORDS (int_size_in_bytes (type))
7143 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7144 else
7145 {
7146 if (type && AGGREGATE_TYPE_P (type))
7147 {
7148 int size = int_size_in_bytes (type);
7149
7150 if (size <= 8)
7151 ++cum->words;
7152 else if (size <= 16)
7153 cum->words += 2;
7154 else /* passed by reference */
7155 ++cum->words;
7156 }
7157 else
7158 cum->words += (mode == BLKmode
7159 ? CEIL_NWORDS (int_size_in_bytes (type))
7160 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7161 }
7162 }
7163
7164 /* Handle the FUNCTION_ARG_PADDING macro.
7165 For the 64 bit ABI structs are always stored left shifted in their
7166 argument slot. */
7167
7168 enum direction
7169 function_arg_padding (machine_mode mode, const_tree type)
7170 {
7171 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7172 return upward;
7173
7174 /* Fall back to the default. */
7175 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7176 }
7177
7178 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7179 Specify whether to return the return value in memory. */
7180
7181 static bool
7182 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7183 {
7184 if (TARGET_ARCH32)
7185 /* Original SPARC 32-bit ABI says that structures and unions,
7186 and quad-precision floats are returned in memory. All other
7187 base types are returned in registers.
7188
7189 Extended ABI (as implemented by the Sun compiler) says that
7190 all complex floats are returned in registers (8 FP registers
7191 at most for '_Complex long double'). Return all complex integers
7192 in registers (4 at most for '_Complex long long').
7193
7194 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7195 integers are returned like floats of the same size, that is in
7196 registers up to 8 bytes and in memory otherwise. Return all
7197 vector floats in memory like structure and unions; note that
7198 they always have BLKmode like the latter. */
7199 return (TYPE_MODE (type) == BLKmode
7200 || TYPE_MODE (type) == TFmode
7201 || (TREE_CODE (type) == VECTOR_TYPE
7202 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7203 else
7204 /* Original SPARC 64-bit ABI says that structures and unions
7205 smaller than 32 bytes are returned in registers, as well as
7206 all other base types.
7207
7208 Extended ABI (as implemented by the Sun compiler) says that all
7209 complex floats are returned in registers (8 FP registers at most
7210 for '_Complex long double'). Return all complex integers in
7211 registers (4 at most for '_Complex TItype').
7212
7213 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7214 integers are returned like floats of the same size, that is in
7215 registers. Return all vector floats like structure and unions;
7216 note that they always have BLKmode like the latter. */
7217 return (TYPE_MODE (type) == BLKmode
7218 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7219 }
7220
7221 /* Handle the TARGET_STRUCT_VALUE target hook.
7222 Return where to find the structure return value address. */
7223
7224 static rtx
7225 sparc_struct_value_rtx (tree fndecl, int incoming)
7226 {
7227 if (TARGET_ARCH64)
7228 return 0;
7229 else
7230 {
7231 rtx mem;
7232
7233 if (incoming)
7234 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7235 STRUCT_VALUE_OFFSET));
7236 else
7237 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7238 STRUCT_VALUE_OFFSET));
7239
7240 /* Only follow the SPARC ABI for fixed-size structure returns.
7241 Variable size structure returns are handled per the normal
7242 procedures in GCC. This is enabled by -mstd-struct-return */
7243 if (incoming == 2
7244 && sparc_std_struct_return
7245 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7246 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7247 {
7248 /* We must check and adjust the return address, as it is optional
7249 as to whether the return object is really provided. */
7250 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7251 rtx scratch = gen_reg_rtx (SImode);
7252 rtx_code_label *endlab = gen_label_rtx ();
7253
7254 /* Calculate the return object size. */
7255 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7256 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7257 /* Construct a temporary return value. */
7258 rtx temp_val
7259 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7260
7261 /* Implement SPARC 32-bit psABI callee return struct checking:
7262
7263 Fetch the instruction where we will return to and see if
7264 it's an unimp instruction (the most significant 10 bits
7265 will be zero). */
7266 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7267 plus_constant (Pmode,
7268 ret_reg, 8)));
7269 /* Assume the size is valid and pre-adjust. */
7270 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7271 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7272 0, endlab);
7273 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7274 /* Write the address of the memory pointed to by temp_val into
7275 the memory pointed to by mem. */
7276 emit_move_insn (mem, XEXP (temp_val, 0));
7277 emit_label (endlab);
7278 }
7279
7280 return mem;
7281 }
7282 }
7283
7284 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7285 For v9, function return values are subject to the same rules as arguments,
7286 except that up to 32 bytes may be returned in registers. */
7287
7288 static rtx
7289 sparc_function_value_1 (const_tree type, machine_mode mode,
7290 bool outgoing)
7291 {
7292 /* Beware that the two values are swapped here wrt function_arg. */
7293 int regbase = (outgoing
7294 ? SPARC_INCOMING_INT_ARG_FIRST
7295 : SPARC_OUTGOING_INT_ARG_FIRST);
7296 enum mode_class mclass = GET_MODE_CLASS (mode);
7297 int regno;
7298
7299 /* Vector types deserve special treatment because they are polymorphic wrt
7300 their mode, depending upon whether VIS instructions are enabled. */
7301 if (type && TREE_CODE (type) == VECTOR_TYPE)
7302 {
7303 HOST_WIDE_INT size = int_size_in_bytes (type);
7304 gcc_assert ((TARGET_ARCH32 && size <= 8)
7305 || (TARGET_ARCH64 && size <= 32));
7306
7307 if (mode == BLKmode)
7308 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7309
7310 mclass = MODE_FLOAT;
7311 }
7312
7313 if (TARGET_ARCH64 && type)
7314 {
7315 /* Structures up to 32 bytes in size are returned in registers. */
7316 if (TREE_CODE (type) == RECORD_TYPE)
7317 {
7318 HOST_WIDE_INT size = int_size_in_bytes (type);
7319 gcc_assert (size <= 32);
7320
7321 return function_arg_record_value (type, mode, 0, 1, regbase);
7322 }
7323
7324 /* Unions up to 32 bytes in size are returned in integer registers. */
7325 else if (TREE_CODE (type) == UNION_TYPE)
7326 {
7327 HOST_WIDE_INT size = int_size_in_bytes (type);
7328 gcc_assert (size <= 32);
7329
7330 return function_arg_union_value (size, mode, 0, regbase);
7331 }
7332
7333 /* Objects that require it are returned in FP registers. */
7334 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7335 ;
7336
7337 /* All other aggregate types are returned in an integer register in a
7338 mode corresponding to the size of the type. */
7339 else if (AGGREGATE_TYPE_P (type))
7340 {
7341 /* All other aggregate types are passed in an integer register
7342 in a mode corresponding to the size of the type. */
7343 HOST_WIDE_INT size = int_size_in_bytes (type);
7344 gcc_assert (size <= 32);
7345
7346 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7347
7348 /* ??? We probably should have made the same ABI change in
7349 3.4.0 as the one we made for unions. The latter was
7350 required by the SCD though, while the former is not
7351 specified, so we favored compatibility and efficiency.
7352
7353 Now we're stuck for aggregates larger than 16 bytes,
7354 because OImode vanished in the meantime. Let's not
7355 try to be unduly clever, and simply follow the ABI
7356 for unions in that case. */
7357 if (mode == BLKmode)
7358 return function_arg_union_value (size, mode, 0, regbase);
7359 else
7360 mclass = MODE_INT;
7361 }
7362
7363 /* We should only have pointer and integer types at this point. This
7364 must match sparc_promote_function_mode. */
7365 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7366 mode = word_mode;
7367 }
7368
7369 /* We should only have pointer and integer types at this point, except with
7370 -freg-struct-return. This must match sparc_promote_function_mode. */
7371 else if (TARGET_ARCH32
7372 && !(type && AGGREGATE_TYPE_P (type))
7373 && mclass == MODE_INT
7374 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7375 mode = word_mode;
7376
7377 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7378 regno = SPARC_FP_ARG_FIRST;
7379 else
7380 regno = regbase;
7381
7382 return gen_rtx_REG (mode, regno);
7383 }
7384
7385 /* Handle TARGET_FUNCTION_VALUE.
7386 On the SPARC, the value is found in the first "output" register, but the
7387 called function leaves it in the first "input" register. */
7388
7389 static rtx
7390 sparc_function_value (const_tree valtype,
7391 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7392 bool outgoing)
7393 {
7394 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7395 }
7396
7397 /* Handle TARGET_LIBCALL_VALUE. */
7398
7399 static rtx
7400 sparc_libcall_value (machine_mode mode,
7401 const_rtx fun ATTRIBUTE_UNUSED)
7402 {
7403 return sparc_function_value_1 (NULL_TREE, mode, false);
7404 }
7405
7406 /* Handle FUNCTION_VALUE_REGNO_P.
7407 On the SPARC, the first "output" reg is used for integer values, and the
7408 first floating point register is used for floating point values. */
7409
7410 static bool
7411 sparc_function_value_regno_p (const unsigned int regno)
7412 {
7413 return (regno == 8 || (TARGET_FPU && regno == 32));
7414 }
7415
7416 /* Do what is necessary for `va_start'. We look at the current function
7417 to determine if stdarg or varargs is used and return the address of
7418 the first unnamed parameter. */
7419
7420 static rtx
7421 sparc_builtin_saveregs (void)
7422 {
7423 int first_reg = crtl->args.info.words;
7424 rtx address;
7425 int regno;
7426
7427 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7428 emit_move_insn (gen_rtx_MEM (word_mode,
7429 gen_rtx_PLUS (Pmode,
7430 frame_pointer_rtx,
7431 GEN_INT (FIRST_PARM_OFFSET (0)
7432 + (UNITS_PER_WORD
7433 * regno)))),
7434 gen_rtx_REG (word_mode,
7435 SPARC_INCOMING_INT_ARG_FIRST + regno));
7436
7437 address = gen_rtx_PLUS (Pmode,
7438 frame_pointer_rtx,
7439 GEN_INT (FIRST_PARM_OFFSET (0)
7440 + UNITS_PER_WORD * first_reg));
7441
7442 return address;
7443 }
7444
7445 /* Implement `va_start' for stdarg. */
7446
7447 static void
7448 sparc_va_start (tree valist, rtx nextarg)
7449 {
7450 nextarg = expand_builtin_saveregs ();
7451 std_expand_builtin_va_start (valist, nextarg);
7452 }
7453
7454 /* Implement `va_arg' for stdarg. */
7455
7456 static tree
7457 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7458 gimple_seq *post_p)
7459 {
7460 HOST_WIDE_INT size, rsize, align;
7461 tree addr, incr;
7462 bool indirect;
7463 tree ptrtype = build_pointer_type (type);
7464
7465 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7466 {
7467 indirect = true;
7468 size = rsize = UNITS_PER_WORD;
7469 align = 0;
7470 }
7471 else
7472 {
7473 indirect = false;
7474 size = int_size_in_bytes (type);
7475 rsize = ROUND_UP (size, UNITS_PER_WORD);
7476 align = 0;
7477
7478 if (TARGET_ARCH64)
7479 {
7480 /* For SPARC64, objects requiring 16-byte alignment get it. */
7481 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7482 align = 2 * UNITS_PER_WORD;
7483
7484 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7485 are left-justified in their slots. */
7486 if (AGGREGATE_TYPE_P (type))
7487 {
7488 if (size == 0)
7489 size = rsize = UNITS_PER_WORD;
7490 else
7491 size = rsize;
7492 }
7493 }
7494 }
7495
7496 incr = valist;
7497 if (align)
7498 {
7499 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7500 incr = fold_convert (sizetype, incr);
7501 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7502 size_int (-align));
7503 incr = fold_convert (ptr_type_node, incr);
7504 }
7505
7506 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7507 addr = incr;
7508
7509 if (BYTES_BIG_ENDIAN && size < rsize)
7510 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7511
7512 if (indirect)
7513 {
7514 addr = fold_convert (build_pointer_type (ptrtype), addr);
7515 addr = build_va_arg_indirect_ref (addr);
7516 }
7517
7518 /* If the address isn't aligned properly for the type, we need a temporary.
7519 FIXME: This is inefficient, usually we can do this in registers. */
7520 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7521 {
7522 tree tmp = create_tmp_var (type, "va_arg_tmp");
7523 tree dest_addr = build_fold_addr_expr (tmp);
7524 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7525 3, dest_addr, addr, size_int (rsize));
7526 TREE_ADDRESSABLE (tmp) = 1;
7527 gimplify_and_add (copy, pre_p);
7528 addr = dest_addr;
7529 }
7530
7531 else
7532 addr = fold_convert (ptrtype, addr);
7533
7534 incr = fold_build_pointer_plus_hwi (incr, rsize);
7535 gimplify_assign (valist, incr, post_p);
7536
7537 return build_va_arg_indirect_ref (addr);
7538 }
7539 \f
7540 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7541 Specify whether the vector mode is supported by the hardware. */
7542
7543 static bool
7544 sparc_vector_mode_supported_p (machine_mode mode)
7545 {
7546 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7547 }
7548 \f
7549 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7550
7551 static machine_mode
7552 sparc_preferred_simd_mode (machine_mode mode)
7553 {
7554 if (TARGET_VIS)
7555 switch (mode)
7556 {
7557 case SImode:
7558 return V2SImode;
7559 case HImode:
7560 return V4HImode;
7561 case QImode:
7562 return V8QImode;
7563
7564 default:;
7565 }
7566
7567 return word_mode;
7568 }
7569 \f
7570 /* Return the string to output an unconditional branch to LABEL, which is
7571 the operand number of the label.
7572
7573 DEST is the destination insn (i.e. the label), INSN is the source. */
7574
7575 const char *
7576 output_ubranch (rtx dest, rtx_insn *insn)
7577 {
7578 static char string[64];
7579 bool v9_form = false;
7580 int delta;
7581 char *p;
7582
7583 /* Even if we are trying to use cbcond for this, evaluate
7584 whether we can use V9 branches as our backup plan. */
7585
7586 delta = 5000000;
7587 if (INSN_ADDRESSES_SET_P ())
7588 delta = (INSN_ADDRESSES (INSN_UID (dest))
7589 - INSN_ADDRESSES (INSN_UID (insn)));
7590
7591 /* Leave some instructions for "slop". */
7592 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7593 v9_form = true;
7594
7595 if (TARGET_CBCOND)
7596 {
7597 bool emit_nop = emit_cbcond_nop (insn);
7598 bool far = false;
7599 const char *rval;
7600
7601 if (delta < -500 || delta > 500)
7602 far = true;
7603
7604 if (far)
7605 {
7606 if (v9_form)
7607 rval = "ba,a,pt\t%%xcc, %l0";
7608 else
7609 rval = "b,a\t%l0";
7610 }
7611 else
7612 {
7613 if (emit_nop)
7614 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7615 else
7616 rval = "cwbe\t%%g0, %%g0, %l0";
7617 }
7618 return rval;
7619 }
7620
7621 if (v9_form)
7622 strcpy (string, "ba%*,pt\t%%xcc, ");
7623 else
7624 strcpy (string, "b%*\t");
7625
7626 p = strchr (string, '\0');
7627 *p++ = '%';
7628 *p++ = 'l';
7629 *p++ = '0';
7630 *p++ = '%';
7631 *p++ = '(';
7632 *p = '\0';
7633
7634 return string;
7635 }
7636
7637 /* Return the string to output a conditional branch to LABEL, which is
7638 the operand number of the label. OP is the conditional expression.
7639 XEXP (OP, 0) is assumed to be a condition code register (integer or
7640 floating point) and its mode specifies what kind of comparison we made.
7641
7642 DEST is the destination insn (i.e. the label), INSN is the source.
7643
7644 REVERSED is nonzero if we should reverse the sense of the comparison.
7645
7646 ANNUL is nonzero if we should generate an annulling branch. */
7647
7648 const char *
7649 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7650 rtx_insn *insn)
7651 {
7652 static char string[64];
7653 enum rtx_code code = GET_CODE (op);
7654 rtx cc_reg = XEXP (op, 0);
7655 machine_mode mode = GET_MODE (cc_reg);
7656 const char *labelno, *branch;
7657 int spaces = 8, far;
7658 char *p;
7659
7660 /* v9 branches are limited to +-1MB. If it is too far away,
7661 change
7662
7663 bne,pt %xcc, .LC30
7664
7665 to
7666
7667 be,pn %xcc, .+12
7668 nop
7669 ba .LC30
7670
7671 and
7672
7673 fbne,a,pn %fcc2, .LC29
7674
7675 to
7676
7677 fbe,pt %fcc2, .+16
7678 nop
7679 ba .LC29 */
7680
7681 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7682 if (reversed ^ far)
7683 {
7684 /* Reversal of FP compares takes care -- an ordered compare
7685 becomes an unordered compare and vice versa. */
7686 if (mode == CCFPmode || mode == CCFPEmode)
7687 code = reverse_condition_maybe_unordered (code);
7688 else
7689 code = reverse_condition (code);
7690 }
7691
7692 /* Start by writing the branch condition. */
7693 if (mode == CCFPmode || mode == CCFPEmode)
7694 {
7695 switch (code)
7696 {
7697 case NE:
7698 branch = "fbne";
7699 break;
7700 case EQ:
7701 branch = "fbe";
7702 break;
7703 case GE:
7704 branch = "fbge";
7705 break;
7706 case GT:
7707 branch = "fbg";
7708 break;
7709 case LE:
7710 branch = "fble";
7711 break;
7712 case LT:
7713 branch = "fbl";
7714 break;
7715 case UNORDERED:
7716 branch = "fbu";
7717 break;
7718 case ORDERED:
7719 branch = "fbo";
7720 break;
7721 case UNGT:
7722 branch = "fbug";
7723 break;
7724 case UNLT:
7725 branch = "fbul";
7726 break;
7727 case UNEQ:
7728 branch = "fbue";
7729 break;
7730 case UNGE:
7731 branch = "fbuge";
7732 break;
7733 case UNLE:
7734 branch = "fbule";
7735 break;
7736 case LTGT:
7737 branch = "fblg";
7738 break;
7739 default:
7740 gcc_unreachable ();
7741 }
7742
7743 /* ??? !v9: FP branches cannot be preceded by another floating point
7744 insn. Because there is currently no concept of pre-delay slots,
7745 we can fix this only by always emitting a nop before a floating
7746 point branch. */
7747
7748 string[0] = '\0';
7749 if (! TARGET_V9)
7750 strcpy (string, "nop\n\t");
7751 strcat (string, branch);
7752 }
7753 else
7754 {
7755 switch (code)
7756 {
7757 case NE:
7758 if (mode == CCVmode || mode == CCXVmode)
7759 branch = "bvs";
7760 else
7761 branch = "bne";
7762 break;
7763 case EQ:
7764 if (mode == CCVmode || mode == CCXVmode)
7765 branch = "bvc";
7766 else
7767 branch = "be";
7768 break;
7769 case GE:
7770 if (mode == CCNZmode || mode == CCXNZmode)
7771 branch = "bpos";
7772 else
7773 branch = "bge";
7774 break;
7775 case GT:
7776 branch = "bg";
7777 break;
7778 case LE:
7779 branch = "ble";
7780 break;
7781 case LT:
7782 if (mode == CCNZmode || mode == CCXNZmode)
7783 branch = "bneg";
7784 else
7785 branch = "bl";
7786 break;
7787 case GEU:
7788 branch = "bgeu";
7789 break;
7790 case GTU:
7791 branch = "bgu";
7792 break;
7793 case LEU:
7794 branch = "bleu";
7795 break;
7796 case LTU:
7797 branch = "blu";
7798 break;
7799 default:
7800 gcc_unreachable ();
7801 }
7802 strcpy (string, branch);
7803 }
7804 spaces -= strlen (branch);
7805 p = strchr (string, '\0');
7806
7807 /* Now add the annulling, the label, and a possible noop. */
7808 if (annul && ! far)
7809 {
7810 strcpy (p, ",a");
7811 p += 2;
7812 spaces -= 2;
7813 }
7814
7815 if (TARGET_V9)
7816 {
7817 rtx note;
7818 int v8 = 0;
7819
7820 if (! far && insn && INSN_ADDRESSES_SET_P ())
7821 {
7822 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7823 - INSN_ADDRESSES (INSN_UID (insn)));
7824 /* Leave some instructions for "slop". */
7825 if (delta < -260000 || delta >= 260000)
7826 v8 = 1;
7827 }
7828
7829 switch (mode)
7830 {
7831 case CCmode:
7832 case CCNZmode:
7833 case CCCmode:
7834 case CCVmode:
7835 labelno = "%%icc, ";
7836 if (v8)
7837 labelno = "";
7838 break;
7839 case CCXmode:
7840 case CCXNZmode:
7841 case CCXCmode:
7842 case CCXVmode:
7843 labelno = "%%xcc, ";
7844 gcc_assert (!v8);
7845 break;
7846 case CCFPmode:
7847 case CCFPEmode:
7848 {
7849 static char v9_fcc_labelno[] = "%%fccX, ";
7850 /* Set the char indicating the number of the fcc reg to use. */
7851 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7852 labelno = v9_fcc_labelno;
7853 if (v8)
7854 {
7855 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7856 labelno = "";
7857 }
7858 }
7859 break;
7860 default:
7861 gcc_unreachable ();
7862 }
7863
7864 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7865 {
7866 strcpy (p,
7867 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7868 ? ",pt" : ",pn");
7869 p += 3;
7870 spaces -= 3;
7871 }
7872 }
7873 else
7874 labelno = "";
7875
7876 if (spaces > 0)
7877 *p++ = '\t';
7878 else
7879 *p++ = ' ';
7880 strcpy (p, labelno);
7881 p = strchr (p, '\0');
7882 if (far)
7883 {
7884 strcpy (p, ".+12\n\t nop\n\tb\t");
7885 /* Skip the next insn if requested or
7886 if we know that it will be a nop. */
7887 if (annul || ! final_sequence)
7888 p[3] = '6';
7889 p += 14;
7890 }
7891 *p++ = '%';
7892 *p++ = 'l';
7893 *p++ = label + '0';
7894 *p++ = '%';
7895 *p++ = '#';
7896 *p = '\0';
7897
7898 return string;
7899 }
7900
7901 /* Emit a library call comparison between floating point X and Y.
7902 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7903 Return the new operator to be used in the comparison sequence.
7904
7905 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7906 values as arguments instead of the TFmode registers themselves,
7907 that's why we cannot call emit_float_lib_cmp. */
7908
7909 rtx
7910 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7911 {
7912 const char *qpfunc;
7913 rtx slot0, slot1, result, tem, tem2, libfunc;
7914 machine_mode mode;
7915 enum rtx_code new_comparison;
7916
7917 switch (comparison)
7918 {
7919 case EQ:
7920 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7921 break;
7922
7923 case NE:
7924 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7925 break;
7926
7927 case GT:
7928 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7929 break;
7930
7931 case GE:
7932 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7933 break;
7934
7935 case LT:
7936 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7937 break;
7938
7939 case LE:
7940 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7941 break;
7942
7943 case ORDERED:
7944 case UNORDERED:
7945 case UNGT:
7946 case UNLT:
7947 case UNEQ:
7948 case UNGE:
7949 case UNLE:
7950 case LTGT:
7951 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7952 break;
7953
7954 default:
7955 gcc_unreachable ();
7956 }
7957
7958 if (TARGET_ARCH64)
7959 {
7960 if (MEM_P (x))
7961 {
7962 tree expr = MEM_EXPR (x);
7963 if (expr)
7964 mark_addressable (expr);
7965 slot0 = x;
7966 }
7967 else
7968 {
7969 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7970 emit_move_insn (slot0, x);
7971 }
7972
7973 if (MEM_P (y))
7974 {
7975 tree expr = MEM_EXPR (y);
7976 if (expr)
7977 mark_addressable (expr);
7978 slot1 = y;
7979 }
7980 else
7981 {
7982 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7983 emit_move_insn (slot1, y);
7984 }
7985
7986 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7987 emit_library_call (libfunc, LCT_NORMAL,
7988 DImode, 2,
7989 XEXP (slot0, 0), Pmode,
7990 XEXP (slot1, 0), Pmode);
7991 mode = DImode;
7992 }
7993 else
7994 {
7995 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7996 emit_library_call (libfunc, LCT_NORMAL,
7997 SImode, 2,
7998 x, TFmode, y, TFmode);
7999 mode = SImode;
8000 }
8001
8002
8003 /* Immediately move the result of the libcall into a pseudo
8004 register so reload doesn't clobber the value if it needs
8005 the return register for a spill reg. */
8006 result = gen_reg_rtx (mode);
8007 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8008
8009 switch (comparison)
8010 {
8011 default:
8012 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8013 case ORDERED:
8014 case UNORDERED:
8015 new_comparison = (comparison == UNORDERED ? EQ : NE);
8016 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8017 case UNGT:
8018 case UNGE:
8019 new_comparison = (comparison == UNGT ? GT : NE);
8020 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8021 case UNLE:
8022 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8023 case UNLT:
8024 tem = gen_reg_rtx (mode);
8025 if (TARGET_ARCH32)
8026 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8027 else
8028 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8029 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8030 case UNEQ:
8031 case LTGT:
8032 tem = gen_reg_rtx (mode);
8033 if (TARGET_ARCH32)
8034 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8035 else
8036 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8037 tem2 = gen_reg_rtx (mode);
8038 if (TARGET_ARCH32)
8039 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8040 else
8041 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8042 new_comparison = (comparison == UNEQ ? EQ : NE);
8043 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8044 }
8045
8046 gcc_unreachable ();
8047 }
8048
8049 /* Generate an unsigned DImode to FP conversion. This is the same code
8050 optabs would emit if we didn't have TFmode patterns. */
8051
8052 void
8053 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8054 {
8055 rtx i0, i1, f0, in, out;
8056
8057 out = operands[0];
8058 in = force_reg (DImode, operands[1]);
8059 rtx_code_label *neglab = gen_label_rtx ();
8060 rtx_code_label *donelab = gen_label_rtx ();
8061 i0 = gen_reg_rtx (DImode);
8062 i1 = gen_reg_rtx (DImode);
8063 f0 = gen_reg_rtx (mode);
8064
8065 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8066
8067 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8068 emit_jump_insn (gen_jump (donelab));
8069 emit_barrier ();
8070
8071 emit_label (neglab);
8072
8073 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8074 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8075 emit_insn (gen_iordi3 (i0, i0, i1));
8076 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8077 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8078
8079 emit_label (donelab);
8080 }
8081
8082 /* Generate an FP to unsigned DImode conversion. This is the same code
8083 optabs would emit if we didn't have TFmode patterns. */
8084
8085 void
8086 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8087 {
8088 rtx i0, i1, f0, in, out, limit;
8089
8090 out = operands[0];
8091 in = force_reg (mode, operands[1]);
8092 rtx_code_label *neglab = gen_label_rtx ();
8093 rtx_code_label *donelab = gen_label_rtx ();
8094 i0 = gen_reg_rtx (DImode);
8095 i1 = gen_reg_rtx (DImode);
8096 limit = gen_reg_rtx (mode);
8097 f0 = gen_reg_rtx (mode);
8098
8099 emit_move_insn (limit,
8100 const_double_from_real_value (
8101 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8102 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8103
8104 emit_insn (gen_rtx_SET (out,
8105 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8106 emit_jump_insn (gen_jump (donelab));
8107 emit_barrier ();
8108
8109 emit_label (neglab);
8110
8111 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8112 emit_insn (gen_rtx_SET (i0,
8113 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8114 emit_insn (gen_movdi (i1, const1_rtx));
8115 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8116 emit_insn (gen_xordi3 (out, i0, i1));
8117
8118 emit_label (donelab);
8119 }
8120
8121 /* Return the string to output a compare and branch instruction to DEST.
8122 DEST is the destination insn (i.e. the label), INSN is the source,
8123 and OP is the conditional expression. */
8124
8125 const char *
8126 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8127 {
8128 machine_mode mode = GET_MODE (XEXP (op, 0));
8129 enum rtx_code code = GET_CODE (op);
8130 const char *cond_str, *tmpl;
8131 int far, emit_nop, len;
8132 static char string[64];
8133 char size_char;
8134
8135 /* Compare and Branch is limited to +-2KB. If it is too far away,
8136 change
8137
8138 cxbne X, Y, .LC30
8139
8140 to
8141
8142 cxbe X, Y, .+16
8143 nop
8144 ba,pt xcc, .LC30
8145 nop */
8146
8147 len = get_attr_length (insn);
8148
8149 far = len == 4;
8150 emit_nop = len == 2;
8151
8152 if (far)
8153 code = reverse_condition (code);
8154
8155 size_char = ((mode == SImode) ? 'w' : 'x');
8156
8157 switch (code)
8158 {
8159 case NE:
8160 cond_str = "ne";
8161 break;
8162
8163 case EQ:
8164 cond_str = "e";
8165 break;
8166
8167 case GE:
8168 cond_str = "ge";
8169 break;
8170
8171 case GT:
8172 cond_str = "g";
8173 break;
8174
8175 case LE:
8176 cond_str = "le";
8177 break;
8178
8179 case LT:
8180 cond_str = "l";
8181 break;
8182
8183 case GEU:
8184 cond_str = "cc";
8185 break;
8186
8187 case GTU:
8188 cond_str = "gu";
8189 break;
8190
8191 case LEU:
8192 cond_str = "leu";
8193 break;
8194
8195 case LTU:
8196 cond_str = "cs";
8197 break;
8198
8199 default:
8200 gcc_unreachable ();
8201 }
8202
8203 if (far)
8204 {
8205 int veryfar = 1, delta;
8206
8207 if (INSN_ADDRESSES_SET_P ())
8208 {
8209 delta = (INSN_ADDRESSES (INSN_UID (dest))
8210 - INSN_ADDRESSES (INSN_UID (insn)));
8211 /* Leave some instructions for "slop". */
8212 if (delta >= -260000 && delta < 260000)
8213 veryfar = 0;
8214 }
8215
8216 if (veryfar)
8217 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8218 else
8219 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8220 }
8221 else
8222 {
8223 if (emit_nop)
8224 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8225 else
8226 tmpl = "c%cb%s\t%%1, %%2, %%3";
8227 }
8228
8229 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8230
8231 return string;
8232 }
8233
8234 /* Return the string to output a conditional branch to LABEL, testing
8235 register REG. LABEL is the operand number of the label; REG is the
8236 operand number of the reg. OP is the conditional expression. The mode
8237 of REG says what kind of comparison we made.
8238
8239 DEST is the destination insn (i.e. the label), INSN is the source.
8240
8241 REVERSED is nonzero if we should reverse the sense of the comparison.
8242
8243 ANNUL is nonzero if we should generate an annulling branch. */
8244
8245 const char *
8246 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8247 int annul, rtx_insn *insn)
8248 {
8249 static char string[64];
8250 enum rtx_code code = GET_CODE (op);
8251 machine_mode mode = GET_MODE (XEXP (op, 0));
8252 rtx note;
8253 int far;
8254 char *p;
8255
8256 /* branch on register are limited to +-128KB. If it is too far away,
8257 change
8258
8259 brnz,pt %g1, .LC30
8260
8261 to
8262
8263 brz,pn %g1, .+12
8264 nop
8265 ba,pt %xcc, .LC30
8266
8267 and
8268
8269 brgez,a,pn %o1, .LC29
8270
8271 to
8272
8273 brlz,pt %o1, .+16
8274 nop
8275 ba,pt %xcc, .LC29 */
8276
8277 far = get_attr_length (insn) >= 3;
8278
8279 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8280 if (reversed ^ far)
8281 code = reverse_condition (code);
8282
8283 /* Only 64 bit versions of these instructions exist. */
8284 gcc_assert (mode == DImode);
8285
8286 /* Start by writing the branch condition. */
8287
8288 switch (code)
8289 {
8290 case NE:
8291 strcpy (string, "brnz");
8292 break;
8293
8294 case EQ:
8295 strcpy (string, "brz");
8296 break;
8297
8298 case GE:
8299 strcpy (string, "brgez");
8300 break;
8301
8302 case LT:
8303 strcpy (string, "brlz");
8304 break;
8305
8306 case LE:
8307 strcpy (string, "brlez");
8308 break;
8309
8310 case GT:
8311 strcpy (string, "brgz");
8312 break;
8313
8314 default:
8315 gcc_unreachable ();
8316 }
8317
8318 p = strchr (string, '\0');
8319
8320 /* Now add the annulling, reg, label, and nop. */
8321 if (annul && ! far)
8322 {
8323 strcpy (p, ",a");
8324 p += 2;
8325 }
8326
8327 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8328 {
8329 strcpy (p,
8330 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8331 ? ",pt" : ",pn");
8332 p += 3;
8333 }
8334
8335 *p = p < string + 8 ? '\t' : ' ';
8336 p++;
8337 *p++ = '%';
8338 *p++ = '0' + reg;
8339 *p++ = ',';
8340 *p++ = ' ';
8341 if (far)
8342 {
8343 int veryfar = 1, delta;
8344
8345 if (INSN_ADDRESSES_SET_P ())
8346 {
8347 delta = (INSN_ADDRESSES (INSN_UID (dest))
8348 - INSN_ADDRESSES (INSN_UID (insn)));
8349 /* Leave some instructions for "slop". */
8350 if (delta >= -260000 && delta < 260000)
8351 veryfar = 0;
8352 }
8353
8354 strcpy (p, ".+12\n\t nop\n\t");
8355 /* Skip the next insn if requested or
8356 if we know that it will be a nop. */
8357 if (annul || ! final_sequence)
8358 p[3] = '6';
8359 p += 12;
8360 if (veryfar)
8361 {
8362 strcpy (p, "b\t");
8363 p += 2;
8364 }
8365 else
8366 {
8367 strcpy (p, "ba,pt\t%%xcc, ");
8368 p += 13;
8369 }
8370 }
8371 *p++ = '%';
8372 *p++ = 'l';
8373 *p++ = '0' + label;
8374 *p++ = '%';
8375 *p++ = '#';
8376 *p = '\0';
8377
8378 return string;
8379 }
8380
8381 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8382 Such instructions cannot be used in the delay slot of return insn on v9.
8383 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8384 */
8385
8386 static int
8387 epilogue_renumber (register rtx *where, int test)
8388 {
8389 register const char *fmt;
8390 register int i;
8391 register enum rtx_code code;
8392
8393 if (*where == 0)
8394 return 0;
8395
8396 code = GET_CODE (*where);
8397
8398 switch (code)
8399 {
8400 case REG:
8401 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8402 return 1;
8403 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8404 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8405 /* fallthrough */
8406 case SCRATCH:
8407 case CC0:
8408 case PC:
8409 case CONST_INT:
8410 case CONST_WIDE_INT:
8411 case CONST_DOUBLE:
8412 return 0;
8413
8414 /* Do not replace the frame pointer with the stack pointer because
8415 it can cause the delayed instruction to load below the stack.
8416 This occurs when instructions like:
8417
8418 (set (reg/i:SI 24 %i0)
8419 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8420 (const_int -20 [0xffffffec])) 0))
8421
8422 are in the return delayed slot. */
8423 case PLUS:
8424 if (GET_CODE (XEXP (*where, 0)) == REG
8425 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8426 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8427 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8428 return 1;
8429 break;
8430
8431 case MEM:
8432 if (SPARC_STACK_BIAS
8433 && GET_CODE (XEXP (*where, 0)) == REG
8434 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8435 return 1;
8436 break;
8437
8438 default:
8439 break;
8440 }
8441
8442 fmt = GET_RTX_FORMAT (code);
8443
8444 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8445 {
8446 if (fmt[i] == 'E')
8447 {
8448 register int j;
8449 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8450 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8451 return 1;
8452 }
8453 else if (fmt[i] == 'e'
8454 && epilogue_renumber (&(XEXP (*where, i)), test))
8455 return 1;
8456 }
8457 return 0;
8458 }
8459 \f
8460 /* Leaf functions and non-leaf functions have different needs. */
8461
8462 static const int
8463 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8464
8465 static const int
8466 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8467
8468 static const int *const reg_alloc_orders[] = {
8469 reg_leaf_alloc_order,
8470 reg_nonleaf_alloc_order};
8471
8472 void
8473 order_regs_for_local_alloc (void)
8474 {
8475 static int last_order_nonleaf = 1;
8476
8477 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8478 {
8479 last_order_nonleaf = !last_order_nonleaf;
8480 memcpy ((char *) reg_alloc_order,
8481 (const char *) reg_alloc_orders[last_order_nonleaf],
8482 FIRST_PSEUDO_REGISTER * sizeof (int));
8483 }
8484 }
8485 \f
8486 /* Return 1 if REG and MEM are legitimate enough to allow the various
8487 mem<-->reg splits to be run. */
8488
8489 int
8490 sparc_splitdi_legitimate (rtx reg, rtx mem)
8491 {
8492 /* Punt if we are here by mistake. */
8493 gcc_assert (reload_completed);
8494
8495 /* We must have an offsettable memory reference. */
8496 if (! offsettable_memref_p (mem))
8497 return 0;
8498
8499 /* If we have legitimate args for ldd/std, we do not want
8500 the split to happen. */
8501 if ((REGNO (reg) % 2) == 0
8502 && mem_min_alignment (mem, 8))
8503 return 0;
8504
8505 /* Success. */
8506 return 1;
8507 }
8508
8509 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8510
8511 int
8512 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8513 {
8514 int regno1, regno2;
8515
8516 if (GET_CODE (reg1) == SUBREG)
8517 reg1 = SUBREG_REG (reg1);
8518 if (GET_CODE (reg1) != REG)
8519 return 0;
8520 regno1 = REGNO (reg1);
8521
8522 if (GET_CODE (reg2) == SUBREG)
8523 reg2 = SUBREG_REG (reg2);
8524 if (GET_CODE (reg2) != REG)
8525 return 0;
8526 regno2 = REGNO (reg2);
8527
8528 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8529 return 1;
8530
8531 if (TARGET_VIS3)
8532 {
8533 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8534 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8535 return 1;
8536 }
8537
8538 return 0;
8539 }
8540
8541 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8542 This makes them candidates for using ldd and std insns.
8543
8544 Note reg1 and reg2 *must* be hard registers. */
8545
8546 int
8547 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8548 {
8549 /* We might have been passed a SUBREG. */
8550 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8551 return 0;
8552
8553 if (REGNO (reg1) % 2 != 0)
8554 return 0;
8555
8556 /* Integer ldd is deprecated in SPARC V9 */
8557 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8558 return 0;
8559
8560 return (REGNO (reg1) == REGNO (reg2) - 1);
8561 }
8562
8563 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8564 an ldd or std insn.
8565
8566 This can only happen when addr1 and addr2, the addresses in mem1
8567 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8568 addr1 must also be aligned on a 64-bit boundary.
8569
8570 Also iff dependent_reg_rtx is not null it should not be used to
8571 compute the address for mem1, i.e. we cannot optimize a sequence
8572 like:
8573 ld [%o0], %o0
8574 ld [%o0 + 4], %o1
8575 to
8576 ldd [%o0], %o0
8577 nor:
8578 ld [%g3 + 4], %g3
8579 ld [%g3], %g2
8580 to
8581 ldd [%g3], %g2
8582
8583 But, note that the transformation from:
8584 ld [%g2 + 4], %g3
8585 ld [%g2], %g2
8586 to
8587 ldd [%g2], %g2
8588 is perfectly fine. Thus, the peephole2 patterns always pass us
8589 the destination register of the first load, never the second one.
8590
8591 For stores we don't have a similar problem, so dependent_reg_rtx is
8592 NULL_RTX. */
8593
8594 int
8595 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8596 {
8597 rtx addr1, addr2;
8598 unsigned int reg1;
8599 HOST_WIDE_INT offset1;
8600
8601 /* The mems cannot be volatile. */
8602 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8603 return 0;
8604
8605 /* MEM1 should be aligned on a 64-bit boundary. */
8606 if (MEM_ALIGN (mem1) < 64)
8607 return 0;
8608
8609 addr1 = XEXP (mem1, 0);
8610 addr2 = XEXP (mem2, 0);
8611
8612 /* Extract a register number and offset (if used) from the first addr. */
8613 if (GET_CODE (addr1) == PLUS)
8614 {
8615 /* If not a REG, return zero. */
8616 if (GET_CODE (XEXP (addr1, 0)) != REG)
8617 return 0;
8618 else
8619 {
8620 reg1 = REGNO (XEXP (addr1, 0));
8621 /* The offset must be constant! */
8622 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8623 return 0;
8624 offset1 = INTVAL (XEXP (addr1, 1));
8625 }
8626 }
8627 else if (GET_CODE (addr1) != REG)
8628 return 0;
8629 else
8630 {
8631 reg1 = REGNO (addr1);
8632 /* This was a simple (mem (reg)) expression. Offset is 0. */
8633 offset1 = 0;
8634 }
8635
8636 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8637 if (GET_CODE (addr2) != PLUS)
8638 return 0;
8639
8640 if (GET_CODE (XEXP (addr2, 0)) != REG
8641 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8642 return 0;
8643
8644 if (reg1 != REGNO (XEXP (addr2, 0)))
8645 return 0;
8646
8647 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8648 return 0;
8649
8650 /* The first offset must be evenly divisible by 8 to ensure the
8651 address is 64 bit aligned. */
8652 if (offset1 % 8 != 0)
8653 return 0;
8654
8655 /* The offset for the second addr must be 4 more than the first addr. */
8656 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8657 return 0;
8658
8659 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8660 instructions. */
8661 return 1;
8662 }
8663
8664 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8665
8666 rtx
8667 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8668 {
8669 rtx x = widen_memory_access (mem1, mode, 0);
8670 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8671 return x;
8672 }
8673
8674 /* Return 1 if reg is a pseudo, or is the first register in
8675 a hard register pair. This makes it suitable for use in
8676 ldd and std insns. */
8677
8678 int
8679 register_ok_for_ldd (rtx reg)
8680 {
8681 /* We might have been passed a SUBREG. */
8682 if (!REG_P (reg))
8683 return 0;
8684
8685 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8686 return (REGNO (reg) % 2 == 0);
8687
8688 return 1;
8689 }
8690
8691 /* Return 1 if OP, a MEM, has an address which is known to be
8692 aligned to an 8-byte boundary. */
8693
8694 int
8695 memory_ok_for_ldd (rtx op)
8696 {
8697 /* In 64-bit mode, we assume that the address is word-aligned. */
8698 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8699 return 0;
8700
8701 if (! can_create_pseudo_p ()
8702 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8703 return 0;
8704
8705 return 1;
8706 }
8707 \f
8708 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8709
8710 static bool
8711 sparc_print_operand_punct_valid_p (unsigned char code)
8712 {
8713 if (code == '#'
8714 || code == '*'
8715 || code == '('
8716 || code == ')'
8717 || code == '_'
8718 || code == '&')
8719 return true;
8720
8721 return false;
8722 }
8723
8724 /* Implement TARGET_PRINT_OPERAND.
8725 Print operand X (an rtx) in assembler syntax to file FILE.
8726 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8727 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8728
8729 static void
8730 sparc_print_operand (FILE *file, rtx x, int code)
8731 {
8732 const char *s;
8733
8734 switch (code)
8735 {
8736 case '#':
8737 /* Output an insn in a delay slot. */
8738 if (final_sequence)
8739 sparc_indent_opcode = 1;
8740 else
8741 fputs ("\n\t nop", file);
8742 return;
8743 case '*':
8744 /* Output an annul flag if there's nothing for the delay slot and we
8745 are optimizing. This is always used with '(' below.
8746 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8747 this is a dbx bug. So, we only do this when optimizing.
8748 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8749 Always emit a nop in case the next instruction is a branch. */
8750 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8751 fputs (",a", file);
8752 return;
8753 case '(':
8754 /* Output a 'nop' if there's nothing for the delay slot and we are
8755 not optimizing. This is always used with '*' above. */
8756 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8757 fputs ("\n\t nop", file);
8758 else if (final_sequence)
8759 sparc_indent_opcode = 1;
8760 return;
8761 case ')':
8762 /* Output the right displacement from the saved PC on function return.
8763 The caller may have placed an "unimp" insn immediately after the call
8764 so we have to account for it. This insn is used in the 32-bit ABI
8765 when calling a function that returns a non zero-sized structure. The
8766 64-bit ABI doesn't have it. Be careful to have this test be the same
8767 as that for the call. The exception is when sparc_std_struct_return
8768 is enabled, the psABI is followed exactly and the adjustment is made
8769 by the code in sparc_struct_value_rtx. The call emitted is the same
8770 when sparc_std_struct_return is enabled. */
8771 if (!TARGET_ARCH64
8772 && cfun->returns_struct
8773 && !sparc_std_struct_return
8774 && DECL_SIZE (DECL_RESULT (current_function_decl))
8775 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8776 == INTEGER_CST
8777 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8778 fputs ("12", file);
8779 else
8780 fputc ('8', file);
8781 return;
8782 case '_':
8783 /* Output the Embedded Medium/Anywhere code model base register. */
8784 fputs (EMBMEDANY_BASE_REG, file);
8785 return;
8786 case '&':
8787 /* Print some local dynamic TLS name. */
8788 if (const char *name = get_some_local_dynamic_name ())
8789 assemble_name (file, name);
8790 else
8791 output_operand_lossage ("'%%&' used without any "
8792 "local dynamic TLS references");
8793 return;
8794
8795 case 'Y':
8796 /* Adjust the operand to take into account a RESTORE operation. */
8797 if (GET_CODE (x) == CONST_INT)
8798 break;
8799 else if (GET_CODE (x) != REG)
8800 output_operand_lossage ("invalid %%Y operand");
8801 else if (REGNO (x) < 8)
8802 fputs (reg_names[REGNO (x)], file);
8803 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8804 fputs (reg_names[REGNO (x)-16], file);
8805 else
8806 output_operand_lossage ("invalid %%Y operand");
8807 return;
8808 case 'L':
8809 /* Print out the low order register name of a register pair. */
8810 if (WORDS_BIG_ENDIAN)
8811 fputs (reg_names[REGNO (x)+1], file);
8812 else
8813 fputs (reg_names[REGNO (x)], file);
8814 return;
8815 case 'H':
8816 /* Print out the high order register name of a register pair. */
8817 if (WORDS_BIG_ENDIAN)
8818 fputs (reg_names[REGNO (x)], file);
8819 else
8820 fputs (reg_names[REGNO (x)+1], file);
8821 return;
8822 case 'R':
8823 /* Print out the second register name of a register pair or quad.
8824 I.e., R (%o0) => %o1. */
8825 fputs (reg_names[REGNO (x)+1], file);
8826 return;
8827 case 'S':
8828 /* Print out the third register name of a register quad.
8829 I.e., S (%o0) => %o2. */
8830 fputs (reg_names[REGNO (x)+2], file);
8831 return;
8832 case 'T':
8833 /* Print out the fourth register name of a register quad.
8834 I.e., T (%o0) => %o3. */
8835 fputs (reg_names[REGNO (x)+3], file);
8836 return;
8837 case 'x':
8838 /* Print a condition code register. */
8839 if (REGNO (x) == SPARC_ICC_REG)
8840 {
8841 switch (GET_MODE (x))
8842 {
8843 case CCmode:
8844 case CCNZmode:
8845 case CCCmode:
8846 case CCVmode:
8847 s = "%icc";
8848 break;
8849 case CCXmode:
8850 case CCXNZmode:
8851 case CCXCmode:
8852 case CCXVmode:
8853 s = "%xcc";
8854 break;
8855 default:
8856 gcc_unreachable ();
8857 }
8858 fputs (s, file);
8859 }
8860 else
8861 /* %fccN register */
8862 fputs (reg_names[REGNO (x)], file);
8863 return;
8864 case 'm':
8865 /* Print the operand's address only. */
8866 output_address (GET_MODE (x), XEXP (x, 0));
8867 return;
8868 case 'r':
8869 /* In this case we need a register. Use %g0 if the
8870 operand is const0_rtx. */
8871 if (x == const0_rtx
8872 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8873 {
8874 fputs ("%g0", file);
8875 return;
8876 }
8877 else
8878 break;
8879
8880 case 'A':
8881 switch (GET_CODE (x))
8882 {
8883 case IOR:
8884 s = "or";
8885 break;
8886 case AND:
8887 s = "and";
8888 break;
8889 case XOR:
8890 s = "xor";
8891 break;
8892 default:
8893 output_operand_lossage ("invalid %%A operand");
8894 s = "";
8895 break;
8896 }
8897 fputs (s, file);
8898 return;
8899
8900 case 'B':
8901 switch (GET_CODE (x))
8902 {
8903 case IOR:
8904 s = "orn";
8905 break;
8906 case AND:
8907 s = "andn";
8908 break;
8909 case XOR:
8910 s = "xnor";
8911 break;
8912 default:
8913 output_operand_lossage ("invalid %%B operand");
8914 s = "";
8915 break;
8916 }
8917 fputs (s, file);
8918 return;
8919
8920 /* This is used by the conditional move instructions. */
8921 case 'C':
8922 {
8923 machine_mode mode = GET_MODE (XEXP (x, 0));
8924 switch (GET_CODE (x))
8925 {
8926 case NE:
8927 if (mode == CCVmode || mode == CCXVmode)
8928 s = "vs";
8929 else
8930 s = "ne";
8931 break;
8932 case EQ:
8933 if (mode == CCVmode || mode == CCXVmode)
8934 s = "vc";
8935 else
8936 s = "e";
8937 break;
8938 case GE:
8939 if (mode == CCNZmode || mode == CCXNZmode)
8940 s = "pos";
8941 else
8942 s = "ge";
8943 break;
8944 case GT:
8945 s = "g";
8946 break;
8947 case LE:
8948 s = "le";
8949 break;
8950 case LT:
8951 if (mode == CCNZmode || mode == CCXNZmode)
8952 s = "neg";
8953 else
8954 s = "l";
8955 break;
8956 case GEU:
8957 s = "geu";
8958 break;
8959 case GTU:
8960 s = "gu";
8961 break;
8962 case LEU:
8963 s = "leu";
8964 break;
8965 case LTU:
8966 s = "lu";
8967 break;
8968 case LTGT:
8969 s = "lg";
8970 break;
8971 case UNORDERED:
8972 s = "u";
8973 break;
8974 case ORDERED:
8975 s = "o";
8976 break;
8977 case UNLT:
8978 s = "ul";
8979 break;
8980 case UNLE:
8981 s = "ule";
8982 break;
8983 case UNGT:
8984 s = "ug";
8985 break;
8986 case UNGE:
8987 s = "uge"
8988 ; break;
8989 case UNEQ:
8990 s = "ue";
8991 break;
8992 default:
8993 output_operand_lossage ("invalid %%C operand");
8994 s = "";
8995 break;
8996 }
8997 fputs (s, file);
8998 return;
8999 }
9000
9001 /* This are used by the movr instruction pattern. */
9002 case 'D':
9003 {
9004 switch (GET_CODE (x))
9005 {
9006 case NE:
9007 s = "ne";
9008 break;
9009 case EQ:
9010 s = "e";
9011 break;
9012 case GE:
9013 s = "gez";
9014 break;
9015 case LT:
9016 s = "lz";
9017 break;
9018 case LE:
9019 s = "lez";
9020 break;
9021 case GT:
9022 s = "gz";
9023 break;
9024 default:
9025 output_operand_lossage ("invalid %%D operand");
9026 s = "";
9027 break;
9028 }
9029 fputs (s, file);
9030 return;
9031 }
9032
9033 case 'b':
9034 {
9035 /* Print a sign-extended character. */
9036 int i = trunc_int_for_mode (INTVAL (x), QImode);
9037 fprintf (file, "%d", i);
9038 return;
9039 }
9040
9041 case 'f':
9042 /* Operand must be a MEM; write its address. */
9043 if (GET_CODE (x) != MEM)
9044 output_operand_lossage ("invalid %%f operand");
9045 output_address (GET_MODE (x), XEXP (x, 0));
9046 return;
9047
9048 case 's':
9049 {
9050 /* Print a sign-extended 32-bit value. */
9051 HOST_WIDE_INT i;
9052 if (GET_CODE(x) == CONST_INT)
9053 i = INTVAL (x);
9054 else
9055 {
9056 output_operand_lossage ("invalid %%s operand");
9057 return;
9058 }
9059 i = trunc_int_for_mode (i, SImode);
9060 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9061 return;
9062 }
9063
9064 case 0:
9065 /* Do nothing special. */
9066 break;
9067
9068 default:
9069 /* Undocumented flag. */
9070 output_operand_lossage ("invalid operand output code");
9071 }
9072
9073 if (GET_CODE (x) == REG)
9074 fputs (reg_names[REGNO (x)], file);
9075 else if (GET_CODE (x) == MEM)
9076 {
9077 fputc ('[', file);
9078 /* Poor Sun assembler doesn't understand absolute addressing. */
9079 if (CONSTANT_P (XEXP (x, 0)))
9080 fputs ("%g0+", file);
9081 output_address (GET_MODE (x), XEXP (x, 0));
9082 fputc (']', file);
9083 }
9084 else if (GET_CODE (x) == HIGH)
9085 {
9086 fputs ("%hi(", file);
9087 output_addr_const (file, XEXP (x, 0));
9088 fputc (')', file);
9089 }
9090 else if (GET_CODE (x) == LO_SUM)
9091 {
9092 sparc_print_operand (file, XEXP (x, 0), 0);
9093 if (TARGET_CM_MEDMID)
9094 fputs ("+%l44(", file);
9095 else
9096 fputs ("+%lo(", file);
9097 output_addr_const (file, XEXP (x, 1));
9098 fputc (')', file);
9099 }
9100 else if (GET_CODE (x) == CONST_DOUBLE)
9101 output_operand_lossage ("floating-point constant not a valid immediate operand");
9102 else
9103 output_addr_const (file, x);
9104 }
9105
9106 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9107
9108 static void
9109 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9110 {
9111 register rtx base, index = 0;
9112 int offset = 0;
9113 register rtx addr = x;
9114
9115 if (REG_P (addr))
9116 fputs (reg_names[REGNO (addr)], file);
9117 else if (GET_CODE (addr) == PLUS)
9118 {
9119 if (CONST_INT_P (XEXP (addr, 0)))
9120 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9121 else if (CONST_INT_P (XEXP (addr, 1)))
9122 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9123 else
9124 base = XEXP (addr, 0), index = XEXP (addr, 1);
9125 if (GET_CODE (base) == LO_SUM)
9126 {
9127 gcc_assert (USE_AS_OFFSETABLE_LO10
9128 && TARGET_ARCH64
9129 && ! TARGET_CM_MEDMID);
9130 output_operand (XEXP (base, 0), 0);
9131 fputs ("+%lo(", file);
9132 output_address (VOIDmode, XEXP (base, 1));
9133 fprintf (file, ")+%d", offset);
9134 }
9135 else
9136 {
9137 fputs (reg_names[REGNO (base)], file);
9138 if (index == 0)
9139 fprintf (file, "%+d", offset);
9140 else if (REG_P (index))
9141 fprintf (file, "+%s", reg_names[REGNO (index)]);
9142 else if (GET_CODE (index) == SYMBOL_REF
9143 || GET_CODE (index) == LABEL_REF
9144 || GET_CODE (index) == CONST)
9145 fputc ('+', file), output_addr_const (file, index);
9146 else gcc_unreachable ();
9147 }
9148 }
9149 else if (GET_CODE (addr) == MINUS
9150 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9151 {
9152 output_addr_const (file, XEXP (addr, 0));
9153 fputs ("-(", file);
9154 output_addr_const (file, XEXP (addr, 1));
9155 fputs ("-.)", file);
9156 }
9157 else if (GET_CODE (addr) == LO_SUM)
9158 {
9159 output_operand (XEXP (addr, 0), 0);
9160 if (TARGET_CM_MEDMID)
9161 fputs ("+%l44(", file);
9162 else
9163 fputs ("+%lo(", file);
9164 output_address (VOIDmode, XEXP (addr, 1));
9165 fputc (')', file);
9166 }
9167 else if (flag_pic
9168 && GET_CODE (addr) == CONST
9169 && GET_CODE (XEXP (addr, 0)) == MINUS
9170 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9171 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9172 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9173 {
9174 addr = XEXP (addr, 0);
9175 output_addr_const (file, XEXP (addr, 0));
9176 /* Group the args of the second CONST in parenthesis. */
9177 fputs ("-(", file);
9178 /* Skip past the second CONST--it does nothing for us. */
9179 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9180 /* Close the parenthesis. */
9181 fputc (')', file);
9182 }
9183 else
9184 {
9185 output_addr_const (file, addr);
9186 }
9187 }
9188 \f
9189 /* Target hook for assembling integer objects. The sparc version has
9190 special handling for aligned DI-mode objects. */
9191
9192 static bool
9193 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9194 {
9195 /* ??? We only output .xword's for symbols and only then in environments
9196 where the assembler can handle them. */
9197 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9198 {
9199 if (TARGET_V9)
9200 {
9201 assemble_integer_with_op ("\t.xword\t", x);
9202 return true;
9203 }
9204 else
9205 {
9206 assemble_aligned_integer (4, const0_rtx);
9207 assemble_aligned_integer (4, x);
9208 return true;
9209 }
9210 }
9211 return default_assemble_integer (x, size, aligned_p);
9212 }
9213 \f
9214 /* Return the value of a code used in the .proc pseudo-op that says
9215 what kind of result this function returns. For non-C types, we pick
9216 the closest C type. */
9217
9218 #ifndef SHORT_TYPE_SIZE
9219 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9220 #endif
9221
9222 #ifndef INT_TYPE_SIZE
9223 #define INT_TYPE_SIZE BITS_PER_WORD
9224 #endif
9225
9226 #ifndef LONG_TYPE_SIZE
9227 #define LONG_TYPE_SIZE BITS_PER_WORD
9228 #endif
9229
9230 #ifndef LONG_LONG_TYPE_SIZE
9231 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9232 #endif
9233
9234 #ifndef FLOAT_TYPE_SIZE
9235 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9236 #endif
9237
9238 #ifndef DOUBLE_TYPE_SIZE
9239 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9240 #endif
9241
9242 #ifndef LONG_DOUBLE_TYPE_SIZE
9243 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9244 #endif
9245
9246 unsigned long
9247 sparc_type_code (register tree type)
9248 {
9249 register unsigned long qualifiers = 0;
9250 register unsigned shift;
9251
9252 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9253 setting more, since some assemblers will give an error for this. Also,
9254 we must be careful to avoid shifts of 32 bits or more to avoid getting
9255 unpredictable results. */
9256
9257 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9258 {
9259 switch (TREE_CODE (type))
9260 {
9261 case ERROR_MARK:
9262 return qualifiers;
9263
9264 case ARRAY_TYPE:
9265 qualifiers |= (3 << shift);
9266 break;
9267
9268 case FUNCTION_TYPE:
9269 case METHOD_TYPE:
9270 qualifiers |= (2 << shift);
9271 break;
9272
9273 case POINTER_TYPE:
9274 case REFERENCE_TYPE:
9275 case OFFSET_TYPE:
9276 qualifiers |= (1 << shift);
9277 break;
9278
9279 case RECORD_TYPE:
9280 return (qualifiers | 8);
9281
9282 case UNION_TYPE:
9283 case QUAL_UNION_TYPE:
9284 return (qualifiers | 9);
9285
9286 case ENUMERAL_TYPE:
9287 return (qualifiers | 10);
9288
9289 case VOID_TYPE:
9290 return (qualifiers | 16);
9291
9292 case INTEGER_TYPE:
9293 /* If this is a range type, consider it to be the underlying
9294 type. */
9295 if (TREE_TYPE (type) != 0)
9296 break;
9297
9298 /* Carefully distinguish all the standard types of C,
9299 without messing up if the language is not C. We do this by
9300 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9301 look at both the names and the above fields, but that's redundant.
9302 Any type whose size is between two C types will be considered
9303 to be the wider of the two types. Also, we do not have a
9304 special code to use for "long long", so anything wider than
9305 long is treated the same. Note that we can't distinguish
9306 between "int" and "long" in this code if they are the same
9307 size, but that's fine, since neither can the assembler. */
9308
9309 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9310 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9311
9312 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9313 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9314
9315 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9316 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9317
9318 else
9319 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9320
9321 case REAL_TYPE:
9322 /* If this is a range type, consider it to be the underlying
9323 type. */
9324 if (TREE_TYPE (type) != 0)
9325 break;
9326
9327 /* Carefully distinguish all the standard types of C,
9328 without messing up if the language is not C. */
9329
9330 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9331 return (qualifiers | 6);
9332
9333 else
9334 return (qualifiers | 7);
9335
9336 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9337 /* ??? We need to distinguish between double and float complex types,
9338 but I don't know how yet because I can't reach this code from
9339 existing front-ends. */
9340 return (qualifiers | 7); /* Who knows? */
9341
9342 case VECTOR_TYPE:
9343 case BOOLEAN_TYPE: /* Boolean truth value type. */
9344 case LANG_TYPE:
9345 case NULLPTR_TYPE:
9346 return qualifiers;
9347
9348 default:
9349 gcc_unreachable (); /* Not a type! */
9350 }
9351 }
9352
9353 return qualifiers;
9354 }
9355 \f
9356 /* Nested function support. */
9357
9358 /* Emit RTL insns to initialize the variable parts of a trampoline.
9359 FNADDR is an RTX for the address of the function's pure code.
9360 CXT is an RTX for the static chain value for the function.
9361
9362 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9363 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9364 (to store insns). This is a bit excessive. Perhaps a different
9365 mechanism would be better here.
9366
9367 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9368
9369 static void
9370 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9371 {
9372 /* SPARC 32-bit trampoline:
9373
9374 sethi %hi(fn), %g1
9375 sethi %hi(static), %g2
9376 jmp %g1+%lo(fn)
9377 or %g2, %lo(static), %g2
9378
9379 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9380 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9381 */
9382
9383 emit_move_insn
9384 (adjust_address (m_tramp, SImode, 0),
9385 expand_binop (SImode, ior_optab,
9386 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9387 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9388 NULL_RTX, 1, OPTAB_DIRECT));
9389
9390 emit_move_insn
9391 (adjust_address (m_tramp, SImode, 4),
9392 expand_binop (SImode, ior_optab,
9393 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9394 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9395 NULL_RTX, 1, OPTAB_DIRECT));
9396
9397 emit_move_insn
9398 (adjust_address (m_tramp, SImode, 8),
9399 expand_binop (SImode, ior_optab,
9400 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9401 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9402 NULL_RTX, 1, OPTAB_DIRECT));
9403
9404 emit_move_insn
9405 (adjust_address (m_tramp, SImode, 12),
9406 expand_binop (SImode, ior_optab,
9407 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9408 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9409 NULL_RTX, 1, OPTAB_DIRECT));
9410
9411 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9412 aligned on a 16 byte boundary so one flush clears it all. */
9413 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9414 if (sparc_cpu != PROCESSOR_ULTRASPARC
9415 && sparc_cpu != PROCESSOR_ULTRASPARC3
9416 && sparc_cpu != PROCESSOR_NIAGARA
9417 && sparc_cpu != PROCESSOR_NIAGARA2
9418 && sparc_cpu != PROCESSOR_NIAGARA3
9419 && sparc_cpu != PROCESSOR_NIAGARA4
9420 && sparc_cpu != PROCESSOR_NIAGARA7)
9421 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9422
9423 /* Call __enable_execute_stack after writing onto the stack to make sure
9424 the stack address is accessible. */
9425 #ifdef HAVE_ENABLE_EXECUTE_STACK
9426 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9427 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9428 #endif
9429
9430 }
9431
9432 /* The 64-bit version is simpler because it makes more sense to load the
9433 values as "immediate" data out of the trampoline. It's also easier since
9434 we can read the PC without clobbering a register. */
9435
9436 static void
9437 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9438 {
9439 /* SPARC 64-bit trampoline:
9440
9441 rd %pc, %g1
9442 ldx [%g1+24], %g5
9443 jmp %g5
9444 ldx [%g1+16], %g5
9445 +16 bytes data
9446 */
9447
9448 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9449 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9450 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9451 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9452 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9453 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9454 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9455 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9456 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9457 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9458 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9459
9460 if (sparc_cpu != PROCESSOR_ULTRASPARC
9461 && sparc_cpu != PROCESSOR_ULTRASPARC3
9462 && sparc_cpu != PROCESSOR_NIAGARA
9463 && sparc_cpu != PROCESSOR_NIAGARA2
9464 && sparc_cpu != PROCESSOR_NIAGARA3
9465 && sparc_cpu != PROCESSOR_NIAGARA4
9466 && sparc_cpu != PROCESSOR_NIAGARA7)
9467 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9468
9469 /* Call __enable_execute_stack after writing onto the stack to make sure
9470 the stack address is accessible. */
9471 #ifdef HAVE_ENABLE_EXECUTE_STACK
9472 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9473 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9474 #endif
9475 }
9476
9477 /* Worker for TARGET_TRAMPOLINE_INIT. */
9478
9479 static void
9480 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9481 {
9482 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9483 cxt = force_reg (Pmode, cxt);
9484 if (TARGET_ARCH64)
9485 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9486 else
9487 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9488 }
9489 \f
9490 /* Adjust the cost of a scheduling dependency. Return the new cost of
9491 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9492
9493 static int
9494 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9495 int cost)
9496 {
9497 enum attr_type insn_type;
9498
9499 if (recog_memoized (insn) < 0)
9500 return cost;
9501
9502 insn_type = get_attr_type (insn);
9503
9504 if (dep_type == 0)
9505 {
9506 /* Data dependency; DEP_INSN writes a register that INSN reads some
9507 cycles later. */
9508
9509 /* if a load, then the dependence must be on the memory address;
9510 add an extra "cycle". Note that the cost could be two cycles
9511 if the reg was written late in an instruction group; we ca not tell
9512 here. */
9513 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9514 return cost + 3;
9515
9516 /* Get the delay only if the address of the store is the dependence. */
9517 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9518 {
9519 rtx pat = PATTERN(insn);
9520 rtx dep_pat = PATTERN (dep_insn);
9521
9522 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9523 return cost; /* This should not happen! */
9524
9525 /* The dependency between the two instructions was on the data that
9526 is being stored. Assume that this implies that the address of the
9527 store is not dependent. */
9528 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9529 return cost;
9530
9531 return cost + 3; /* An approximation. */
9532 }
9533
9534 /* A shift instruction cannot receive its data from an instruction
9535 in the same cycle; add a one cycle penalty. */
9536 if (insn_type == TYPE_SHIFT)
9537 return cost + 3; /* Split before cascade into shift. */
9538 }
9539 else
9540 {
9541 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9542 INSN writes some cycles later. */
9543
9544 /* These are only significant for the fpu unit; writing a fp reg before
9545 the fpu has finished with it stalls the processor. */
9546
9547 /* Reusing an integer register causes no problems. */
9548 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9549 return 0;
9550 }
9551
9552 return cost;
9553 }
9554
9555 static int
9556 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9557 int cost)
9558 {
9559 enum attr_type insn_type, dep_type;
9560 rtx pat = PATTERN(insn);
9561 rtx dep_pat = PATTERN (dep_insn);
9562
9563 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9564 return cost;
9565
9566 insn_type = get_attr_type (insn);
9567 dep_type = get_attr_type (dep_insn);
9568
9569 switch (dtype)
9570 {
9571 case 0:
9572 /* Data dependency; DEP_INSN writes a register that INSN reads some
9573 cycles later. */
9574
9575 switch (insn_type)
9576 {
9577 case TYPE_STORE:
9578 case TYPE_FPSTORE:
9579 /* Get the delay iff the address of the store is the dependence. */
9580 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9581 return cost;
9582
9583 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9584 return cost;
9585 return cost + 3;
9586
9587 case TYPE_LOAD:
9588 case TYPE_SLOAD:
9589 case TYPE_FPLOAD:
9590 /* If a load, then the dependence must be on the memory address. If
9591 the addresses aren't equal, then it might be a false dependency */
9592 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9593 {
9594 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9595 || GET_CODE (SET_DEST (dep_pat)) != MEM
9596 || GET_CODE (SET_SRC (pat)) != MEM
9597 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9598 XEXP (SET_SRC (pat), 0)))
9599 return cost + 2;
9600
9601 return cost + 8;
9602 }
9603 break;
9604
9605 case TYPE_BRANCH:
9606 /* Compare to branch latency is 0. There is no benefit from
9607 separating compare and branch. */
9608 if (dep_type == TYPE_COMPARE)
9609 return 0;
9610 /* Floating point compare to branch latency is less than
9611 compare to conditional move. */
9612 if (dep_type == TYPE_FPCMP)
9613 return cost - 1;
9614 break;
9615 default:
9616 break;
9617 }
9618 break;
9619
9620 case REG_DEP_ANTI:
9621 /* Anti-dependencies only penalize the fpu unit. */
9622 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9623 return 0;
9624 break;
9625
9626 default:
9627 break;
9628 }
9629
9630 return cost;
9631 }
9632
9633 static int
9634 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9635 unsigned int)
9636 {
9637 switch (sparc_cpu)
9638 {
9639 case PROCESSOR_SUPERSPARC:
9640 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9641 break;
9642 case PROCESSOR_HYPERSPARC:
9643 case PROCESSOR_SPARCLITE86X:
9644 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9645 break;
9646 default:
9647 break;
9648 }
9649 return cost;
9650 }
9651
9652 static void
9653 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9654 int sched_verbose ATTRIBUTE_UNUSED,
9655 int max_ready ATTRIBUTE_UNUSED)
9656 {}
9657
9658 static int
9659 sparc_use_sched_lookahead (void)
9660 {
9661 if (sparc_cpu == PROCESSOR_NIAGARA
9662 || sparc_cpu == PROCESSOR_NIAGARA2
9663 || sparc_cpu == PROCESSOR_NIAGARA3)
9664 return 0;
9665 if (sparc_cpu == PROCESSOR_NIAGARA4
9666 || sparc_cpu == PROCESSOR_NIAGARA7)
9667 return 2;
9668 if (sparc_cpu == PROCESSOR_ULTRASPARC
9669 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9670 return 4;
9671 if ((1 << sparc_cpu) &
9672 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9673 (1 << PROCESSOR_SPARCLITE86X)))
9674 return 3;
9675 return 0;
9676 }
9677
9678 static int
9679 sparc_issue_rate (void)
9680 {
9681 switch (sparc_cpu)
9682 {
9683 case PROCESSOR_NIAGARA:
9684 case PROCESSOR_NIAGARA2:
9685 case PROCESSOR_NIAGARA3:
9686 default:
9687 return 1;
9688 case PROCESSOR_NIAGARA4:
9689 case PROCESSOR_NIAGARA7:
9690 case PROCESSOR_V9:
9691 /* Assume V9 processors are capable of at least dual-issue. */
9692 return 2;
9693 case PROCESSOR_SUPERSPARC:
9694 return 3;
9695 case PROCESSOR_HYPERSPARC:
9696 case PROCESSOR_SPARCLITE86X:
9697 return 2;
9698 case PROCESSOR_ULTRASPARC:
9699 case PROCESSOR_ULTRASPARC3:
9700 return 4;
9701 }
9702 }
9703
9704 static int
9705 set_extends (rtx_insn *insn)
9706 {
9707 register rtx pat = PATTERN (insn);
9708
9709 switch (GET_CODE (SET_SRC (pat)))
9710 {
9711 /* Load and some shift instructions zero extend. */
9712 case MEM:
9713 case ZERO_EXTEND:
9714 /* sethi clears the high bits */
9715 case HIGH:
9716 /* LO_SUM is used with sethi. sethi cleared the high
9717 bits and the values used with lo_sum are positive */
9718 case LO_SUM:
9719 /* Store flag stores 0 or 1 */
9720 case LT: case LTU:
9721 case GT: case GTU:
9722 case LE: case LEU:
9723 case GE: case GEU:
9724 case EQ:
9725 case NE:
9726 return 1;
9727 case AND:
9728 {
9729 rtx op0 = XEXP (SET_SRC (pat), 0);
9730 rtx op1 = XEXP (SET_SRC (pat), 1);
9731 if (GET_CODE (op1) == CONST_INT)
9732 return INTVAL (op1) >= 0;
9733 if (GET_CODE (op0) != REG)
9734 return 0;
9735 if (sparc_check_64 (op0, insn) == 1)
9736 return 1;
9737 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9738 }
9739 case IOR:
9740 case XOR:
9741 {
9742 rtx op0 = XEXP (SET_SRC (pat), 0);
9743 rtx op1 = XEXP (SET_SRC (pat), 1);
9744 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9745 return 0;
9746 if (GET_CODE (op1) == CONST_INT)
9747 return INTVAL (op1) >= 0;
9748 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9749 }
9750 case LSHIFTRT:
9751 return GET_MODE (SET_SRC (pat)) == SImode;
9752 /* Positive integers leave the high bits zero. */
9753 case CONST_INT:
9754 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9755 case ASHIFTRT:
9756 case SIGN_EXTEND:
9757 return - (GET_MODE (SET_SRC (pat)) == SImode);
9758 case REG:
9759 return sparc_check_64 (SET_SRC (pat), insn);
9760 default:
9761 return 0;
9762 }
9763 }
9764
9765 /* We _ought_ to have only one kind per function, but... */
9766 static GTY(()) rtx sparc_addr_diff_list;
9767 static GTY(()) rtx sparc_addr_list;
9768
9769 void
9770 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9771 {
9772 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9773 if (diff)
9774 sparc_addr_diff_list
9775 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9776 else
9777 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9778 }
9779
9780 static void
9781 sparc_output_addr_vec (rtx vec)
9782 {
9783 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9784 int idx, vlen = XVECLEN (body, 0);
9785
9786 #ifdef ASM_OUTPUT_ADDR_VEC_START
9787 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9788 #endif
9789
9790 #ifdef ASM_OUTPUT_CASE_LABEL
9791 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9792 NEXT_INSN (lab));
9793 #else
9794 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9795 #endif
9796
9797 for (idx = 0; idx < vlen; idx++)
9798 {
9799 ASM_OUTPUT_ADDR_VEC_ELT
9800 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9801 }
9802
9803 #ifdef ASM_OUTPUT_ADDR_VEC_END
9804 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9805 #endif
9806 }
9807
9808 static void
9809 sparc_output_addr_diff_vec (rtx vec)
9810 {
9811 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9812 rtx base = XEXP (XEXP (body, 0), 0);
9813 int idx, vlen = XVECLEN (body, 1);
9814
9815 #ifdef ASM_OUTPUT_ADDR_VEC_START
9816 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9817 #endif
9818
9819 #ifdef ASM_OUTPUT_CASE_LABEL
9820 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9821 NEXT_INSN (lab));
9822 #else
9823 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9824 #endif
9825
9826 for (idx = 0; idx < vlen; idx++)
9827 {
9828 ASM_OUTPUT_ADDR_DIFF_ELT
9829 (asm_out_file,
9830 body,
9831 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9832 CODE_LABEL_NUMBER (base));
9833 }
9834
9835 #ifdef ASM_OUTPUT_ADDR_VEC_END
9836 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9837 #endif
9838 }
9839
9840 static void
9841 sparc_output_deferred_case_vectors (void)
9842 {
9843 rtx t;
9844 int align;
9845
9846 if (sparc_addr_list == NULL_RTX
9847 && sparc_addr_diff_list == NULL_RTX)
9848 return;
9849
9850 /* Align to cache line in the function's code section. */
9851 switch_to_section (current_function_section ());
9852
9853 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9854 if (align > 0)
9855 ASM_OUTPUT_ALIGN (asm_out_file, align);
9856
9857 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9858 sparc_output_addr_vec (XEXP (t, 0));
9859 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9860 sparc_output_addr_diff_vec (XEXP (t, 0));
9861
9862 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9863 }
9864
9865 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9866 unknown. Return 1 if the high bits are zero, -1 if the register is
9867 sign extended. */
9868 int
9869 sparc_check_64 (rtx x, rtx_insn *insn)
9870 {
9871 /* If a register is set only once it is safe to ignore insns this
9872 code does not know how to handle. The loop will either recognize
9873 the single set and return the correct value or fail to recognize
9874 it and return 0. */
9875 int set_once = 0;
9876 rtx y = x;
9877
9878 gcc_assert (GET_CODE (x) == REG);
9879
9880 if (GET_MODE (x) == DImode)
9881 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9882
9883 if (flag_expensive_optimizations
9884 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9885 set_once = 1;
9886
9887 if (insn == 0)
9888 {
9889 if (set_once)
9890 insn = get_last_insn_anywhere ();
9891 else
9892 return 0;
9893 }
9894
9895 while ((insn = PREV_INSN (insn)))
9896 {
9897 switch (GET_CODE (insn))
9898 {
9899 case JUMP_INSN:
9900 case NOTE:
9901 break;
9902 case CODE_LABEL:
9903 case CALL_INSN:
9904 default:
9905 if (! set_once)
9906 return 0;
9907 break;
9908 case INSN:
9909 {
9910 rtx pat = PATTERN (insn);
9911 if (GET_CODE (pat) != SET)
9912 return 0;
9913 if (rtx_equal_p (x, SET_DEST (pat)))
9914 return set_extends (insn);
9915 if (y && rtx_equal_p (y, SET_DEST (pat)))
9916 return set_extends (insn);
9917 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9918 return 0;
9919 }
9920 }
9921 }
9922 return 0;
9923 }
9924
9925 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9926 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9927
9928 const char *
9929 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9930 {
9931 static char asm_code[60];
9932
9933 /* The scratch register is only required when the destination
9934 register is not a 64-bit global or out register. */
9935 if (which_alternative != 2)
9936 operands[3] = operands[0];
9937
9938 /* We can only shift by constants <= 63. */
9939 if (GET_CODE (operands[2]) == CONST_INT)
9940 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9941
9942 if (GET_CODE (operands[1]) == CONST_INT)
9943 {
9944 output_asm_insn ("mov\t%1, %3", operands);
9945 }
9946 else
9947 {
9948 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9949 if (sparc_check_64 (operands[1], insn) <= 0)
9950 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9951 output_asm_insn ("or\t%L1, %3, %3", operands);
9952 }
9953
9954 strcpy (asm_code, opcode);
9955
9956 if (which_alternative != 2)
9957 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9958 else
9959 return
9960 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9961 }
9962 \f
9963 /* Output rtl to increment the profiler label LABELNO
9964 for profiling a function entry. */
9965
9966 void
9967 sparc_profile_hook (int labelno)
9968 {
9969 char buf[32];
9970 rtx lab, fun;
9971
9972 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9973 if (NO_PROFILE_COUNTERS)
9974 {
9975 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9976 }
9977 else
9978 {
9979 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9980 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9981 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9982 }
9983 }
9984 \f
9985 #ifdef TARGET_SOLARIS
9986 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9987
9988 static void
9989 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9990 tree decl ATTRIBUTE_UNUSED)
9991 {
9992 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9993 {
9994 solaris_elf_asm_comdat_section (name, flags, decl);
9995 return;
9996 }
9997
9998 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9999
10000 if (!(flags & SECTION_DEBUG))
10001 fputs (",#alloc", asm_out_file);
10002 if (flags & SECTION_WRITE)
10003 fputs (",#write", asm_out_file);
10004 if (flags & SECTION_TLS)
10005 fputs (",#tls", asm_out_file);
10006 if (flags & SECTION_CODE)
10007 fputs (",#execinstr", asm_out_file);
10008
10009 if (flags & SECTION_NOTYPE)
10010 ;
10011 else if (flags & SECTION_BSS)
10012 fputs (",#nobits", asm_out_file);
10013 else
10014 fputs (",#progbits", asm_out_file);
10015
10016 fputc ('\n', asm_out_file);
10017 }
10018 #endif /* TARGET_SOLARIS */
10019
10020 /* We do not allow indirect calls to be optimized into sibling calls.
10021
10022 We cannot use sibling calls when delayed branches are disabled
10023 because they will likely require the call delay slot to be filled.
10024
10025 Also, on SPARC 32-bit we cannot emit a sibling call when the
10026 current function returns a structure. This is because the "unimp
10027 after call" convention would cause the callee to return to the
10028 wrong place. The generic code already disallows cases where the
10029 function being called returns a structure.
10030
10031 It may seem strange how this last case could occur. Usually there
10032 is code after the call which jumps to epilogue code which dumps the
10033 return value into the struct return area. That ought to invalidate
10034 the sibling call right? Well, in the C++ case we can end up passing
10035 the pointer to the struct return area to a constructor (which returns
10036 void) and then nothing else happens. Such a sibling call would look
10037 valid without the added check here.
10038
10039 VxWorks PIC PLT entries require the global pointer to be initialized
10040 on entry. We therefore can't emit sibling calls to them. */
10041 static bool
10042 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10043 {
10044 return (decl
10045 && flag_delayed_branch
10046 && (TARGET_ARCH64 || ! cfun->returns_struct)
10047 && !(TARGET_VXWORKS_RTP
10048 && flag_pic
10049 && !targetm.binds_local_p (decl)));
10050 }
10051 \f
10052 /* libfunc renaming. */
10053
10054 static void
10055 sparc_init_libfuncs (void)
10056 {
10057 if (TARGET_ARCH32)
10058 {
10059 /* Use the subroutines that Sun's library provides for integer
10060 multiply and divide. The `*' prevents an underscore from
10061 being prepended by the compiler. .umul is a little faster
10062 than .mul. */
10063 set_optab_libfunc (smul_optab, SImode, "*.umul");
10064 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10065 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10066 set_optab_libfunc (smod_optab, SImode, "*.rem");
10067 set_optab_libfunc (umod_optab, SImode, "*.urem");
10068
10069 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10070 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10071 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10072 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10073 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10074 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10075
10076 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10077 is because with soft-float, the SFmode and DFmode sqrt
10078 instructions will be absent, and the compiler will notice and
10079 try to use the TFmode sqrt instruction for calls to the
10080 builtin function sqrt, but this fails. */
10081 if (TARGET_FPU)
10082 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10083
10084 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10085 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10086 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10087 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10088 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10089 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10090
10091 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10092 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10093 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10094 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10095
10096 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10097 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10098 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10099 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10100
10101 if (DITF_CONVERSION_LIBFUNCS)
10102 {
10103 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10104 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10105 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10106 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10107 }
10108
10109 if (SUN_CONVERSION_LIBFUNCS)
10110 {
10111 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10112 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10113 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10114 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10115 }
10116 }
10117 if (TARGET_ARCH64)
10118 {
10119 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10120 do not exist in the library. Make sure the compiler does not
10121 emit calls to them by accident. (It should always use the
10122 hardware instructions.) */
10123 set_optab_libfunc (smul_optab, SImode, 0);
10124 set_optab_libfunc (sdiv_optab, SImode, 0);
10125 set_optab_libfunc (udiv_optab, SImode, 0);
10126 set_optab_libfunc (smod_optab, SImode, 0);
10127 set_optab_libfunc (umod_optab, SImode, 0);
10128
10129 if (SUN_INTEGER_MULTIPLY_64)
10130 {
10131 set_optab_libfunc (smul_optab, DImode, "__mul64");
10132 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10133 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10134 set_optab_libfunc (smod_optab, DImode, "__rem64");
10135 set_optab_libfunc (umod_optab, DImode, "__urem64");
10136 }
10137
10138 if (SUN_CONVERSION_LIBFUNCS)
10139 {
10140 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10141 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10142 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10143 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10144 }
10145 }
10146 }
10147 \f
10148 /* SPARC builtins. */
10149 enum sparc_builtins
10150 {
10151 /* FPU builtins. */
10152 SPARC_BUILTIN_LDFSR,
10153 SPARC_BUILTIN_STFSR,
10154
10155 /* VIS 1.0 builtins. */
10156 SPARC_BUILTIN_FPACK16,
10157 SPARC_BUILTIN_FPACK32,
10158 SPARC_BUILTIN_FPACKFIX,
10159 SPARC_BUILTIN_FEXPAND,
10160 SPARC_BUILTIN_FPMERGE,
10161 SPARC_BUILTIN_FMUL8X16,
10162 SPARC_BUILTIN_FMUL8X16AU,
10163 SPARC_BUILTIN_FMUL8X16AL,
10164 SPARC_BUILTIN_FMUL8SUX16,
10165 SPARC_BUILTIN_FMUL8ULX16,
10166 SPARC_BUILTIN_FMULD8SUX16,
10167 SPARC_BUILTIN_FMULD8ULX16,
10168 SPARC_BUILTIN_FALIGNDATAV4HI,
10169 SPARC_BUILTIN_FALIGNDATAV8QI,
10170 SPARC_BUILTIN_FALIGNDATAV2SI,
10171 SPARC_BUILTIN_FALIGNDATADI,
10172 SPARC_BUILTIN_WRGSR,
10173 SPARC_BUILTIN_RDGSR,
10174 SPARC_BUILTIN_ALIGNADDR,
10175 SPARC_BUILTIN_ALIGNADDRL,
10176 SPARC_BUILTIN_PDIST,
10177 SPARC_BUILTIN_EDGE8,
10178 SPARC_BUILTIN_EDGE8L,
10179 SPARC_BUILTIN_EDGE16,
10180 SPARC_BUILTIN_EDGE16L,
10181 SPARC_BUILTIN_EDGE32,
10182 SPARC_BUILTIN_EDGE32L,
10183 SPARC_BUILTIN_FCMPLE16,
10184 SPARC_BUILTIN_FCMPLE32,
10185 SPARC_BUILTIN_FCMPNE16,
10186 SPARC_BUILTIN_FCMPNE32,
10187 SPARC_BUILTIN_FCMPGT16,
10188 SPARC_BUILTIN_FCMPGT32,
10189 SPARC_BUILTIN_FCMPEQ16,
10190 SPARC_BUILTIN_FCMPEQ32,
10191 SPARC_BUILTIN_FPADD16,
10192 SPARC_BUILTIN_FPADD16S,
10193 SPARC_BUILTIN_FPADD32,
10194 SPARC_BUILTIN_FPADD32S,
10195 SPARC_BUILTIN_FPSUB16,
10196 SPARC_BUILTIN_FPSUB16S,
10197 SPARC_BUILTIN_FPSUB32,
10198 SPARC_BUILTIN_FPSUB32S,
10199 SPARC_BUILTIN_ARRAY8,
10200 SPARC_BUILTIN_ARRAY16,
10201 SPARC_BUILTIN_ARRAY32,
10202
10203 /* VIS 2.0 builtins. */
10204 SPARC_BUILTIN_EDGE8N,
10205 SPARC_BUILTIN_EDGE8LN,
10206 SPARC_BUILTIN_EDGE16N,
10207 SPARC_BUILTIN_EDGE16LN,
10208 SPARC_BUILTIN_EDGE32N,
10209 SPARC_BUILTIN_EDGE32LN,
10210 SPARC_BUILTIN_BMASK,
10211 SPARC_BUILTIN_BSHUFFLEV4HI,
10212 SPARC_BUILTIN_BSHUFFLEV8QI,
10213 SPARC_BUILTIN_BSHUFFLEV2SI,
10214 SPARC_BUILTIN_BSHUFFLEDI,
10215
10216 /* VIS 3.0 builtins. */
10217 SPARC_BUILTIN_CMASK8,
10218 SPARC_BUILTIN_CMASK16,
10219 SPARC_BUILTIN_CMASK32,
10220 SPARC_BUILTIN_FCHKSM16,
10221 SPARC_BUILTIN_FSLL16,
10222 SPARC_BUILTIN_FSLAS16,
10223 SPARC_BUILTIN_FSRL16,
10224 SPARC_BUILTIN_FSRA16,
10225 SPARC_BUILTIN_FSLL32,
10226 SPARC_BUILTIN_FSLAS32,
10227 SPARC_BUILTIN_FSRL32,
10228 SPARC_BUILTIN_FSRA32,
10229 SPARC_BUILTIN_PDISTN,
10230 SPARC_BUILTIN_FMEAN16,
10231 SPARC_BUILTIN_FPADD64,
10232 SPARC_BUILTIN_FPSUB64,
10233 SPARC_BUILTIN_FPADDS16,
10234 SPARC_BUILTIN_FPADDS16S,
10235 SPARC_BUILTIN_FPSUBS16,
10236 SPARC_BUILTIN_FPSUBS16S,
10237 SPARC_BUILTIN_FPADDS32,
10238 SPARC_BUILTIN_FPADDS32S,
10239 SPARC_BUILTIN_FPSUBS32,
10240 SPARC_BUILTIN_FPSUBS32S,
10241 SPARC_BUILTIN_FUCMPLE8,
10242 SPARC_BUILTIN_FUCMPNE8,
10243 SPARC_BUILTIN_FUCMPGT8,
10244 SPARC_BUILTIN_FUCMPEQ8,
10245 SPARC_BUILTIN_FHADDS,
10246 SPARC_BUILTIN_FHADDD,
10247 SPARC_BUILTIN_FHSUBS,
10248 SPARC_BUILTIN_FHSUBD,
10249 SPARC_BUILTIN_FNHADDS,
10250 SPARC_BUILTIN_FNHADDD,
10251 SPARC_BUILTIN_UMULXHI,
10252 SPARC_BUILTIN_XMULX,
10253 SPARC_BUILTIN_XMULXHI,
10254
10255 /* VIS 4.0 builtins. */
10256 SPARC_BUILTIN_FPADD8,
10257 SPARC_BUILTIN_FPADDS8,
10258 SPARC_BUILTIN_FPADDUS8,
10259 SPARC_BUILTIN_FPADDUS16,
10260 SPARC_BUILTIN_FPCMPLE8,
10261 SPARC_BUILTIN_FPCMPGT8,
10262 SPARC_BUILTIN_FPCMPULE16,
10263 SPARC_BUILTIN_FPCMPUGT16,
10264 SPARC_BUILTIN_FPCMPULE32,
10265 SPARC_BUILTIN_FPCMPUGT32,
10266 SPARC_BUILTIN_FPMAX8,
10267 SPARC_BUILTIN_FPMAX16,
10268 SPARC_BUILTIN_FPMAX32,
10269 SPARC_BUILTIN_FPMAXU8,
10270 SPARC_BUILTIN_FPMAXU16,
10271 SPARC_BUILTIN_FPMAXU32,
10272 SPARC_BUILTIN_FPMIN8,
10273 SPARC_BUILTIN_FPMIN16,
10274 SPARC_BUILTIN_FPMIN32,
10275 SPARC_BUILTIN_FPMINU8,
10276 SPARC_BUILTIN_FPMINU16,
10277 SPARC_BUILTIN_FPMINU32,
10278 SPARC_BUILTIN_FPSUB8,
10279 SPARC_BUILTIN_FPSUBS8,
10280 SPARC_BUILTIN_FPSUBUS8,
10281 SPARC_BUILTIN_FPSUBUS16,
10282
10283 SPARC_BUILTIN_MAX
10284 };
10285
10286 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10287 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10288
10289 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10290 function decl or NULL_TREE if the builtin was not added. */
10291
10292 static tree
10293 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10294 tree type)
10295 {
10296 tree t
10297 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10298
10299 if (t)
10300 {
10301 sparc_builtins[code] = t;
10302 sparc_builtins_icode[code] = icode;
10303 }
10304
10305 return t;
10306 }
10307
10308 /* Likewise, but also marks the function as "const". */
10309
10310 static tree
10311 def_builtin_const (const char *name, enum insn_code icode,
10312 enum sparc_builtins code, tree type)
10313 {
10314 tree t = def_builtin (name, icode, code, type);
10315
10316 if (t)
10317 TREE_READONLY (t) = 1;
10318
10319 return t;
10320 }
10321
10322 /* Implement the TARGET_INIT_BUILTINS target hook.
10323 Create builtin functions for special SPARC instructions. */
10324
10325 static void
10326 sparc_init_builtins (void)
10327 {
10328 if (TARGET_FPU)
10329 sparc_fpu_init_builtins ();
10330
10331 if (TARGET_VIS)
10332 sparc_vis_init_builtins ();
10333 }
10334
10335 /* Create builtin functions for FPU instructions. */
10336
10337 static void
10338 sparc_fpu_init_builtins (void)
10339 {
10340 tree ftype
10341 = build_function_type_list (void_type_node,
10342 build_pointer_type (unsigned_type_node), 0);
10343 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10344 SPARC_BUILTIN_LDFSR, ftype);
10345 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10346 SPARC_BUILTIN_STFSR, ftype);
10347 }
10348
10349 /* Create builtin functions for VIS instructions. */
10350
10351 static void
10352 sparc_vis_init_builtins (void)
10353 {
10354 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10355 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10356 tree v4hi = build_vector_type (intHI_type_node, 4);
10357 tree v2hi = build_vector_type (intHI_type_node, 2);
10358 tree v2si = build_vector_type (intSI_type_node, 2);
10359 tree v1si = build_vector_type (intSI_type_node, 1);
10360
10361 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10362 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10363 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10364 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10365 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10366 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10367 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10368 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10369 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10370 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10371 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10372 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10373 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10374 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10375 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10376 v8qi, v8qi,
10377 intDI_type_node, 0);
10378 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10379 v8qi, v8qi, 0);
10380 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10381 v8qi, v8qi, 0);
10382 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10383 intDI_type_node,
10384 intDI_type_node, 0);
10385 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10386 intSI_type_node,
10387 intSI_type_node, 0);
10388 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10389 ptr_type_node,
10390 intSI_type_node, 0);
10391 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10392 ptr_type_node,
10393 intDI_type_node, 0);
10394 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10395 ptr_type_node,
10396 ptr_type_node, 0);
10397 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10398 ptr_type_node,
10399 ptr_type_node, 0);
10400 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10401 v4hi, v4hi, 0);
10402 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10403 v2si, v2si, 0);
10404 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10405 v4hi, v4hi, 0);
10406 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10407 v2si, v2si, 0);
10408 tree void_ftype_di = build_function_type_list (void_type_node,
10409 intDI_type_node, 0);
10410 tree di_ftype_void = build_function_type_list (intDI_type_node,
10411 void_type_node, 0);
10412 tree void_ftype_si = build_function_type_list (void_type_node,
10413 intSI_type_node, 0);
10414 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10415 float_type_node,
10416 float_type_node, 0);
10417 tree df_ftype_df_df = build_function_type_list (double_type_node,
10418 double_type_node,
10419 double_type_node, 0);
10420
10421 /* Packing and expanding vectors. */
10422 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10423 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10424 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10425 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10426 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10427 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10428 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10429 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10430 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10431 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10432
10433 /* Multiplications. */
10434 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10435 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10436 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10437 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10438 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10439 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10440 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10441 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10442 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10443 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10444 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10445 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10446 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10447 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10448
10449 /* Data aligning. */
10450 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10451 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10452 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10453 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10454 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10455 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10456 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10457 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10458
10459 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10460 SPARC_BUILTIN_WRGSR, void_ftype_di);
10461 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10462 SPARC_BUILTIN_RDGSR, di_ftype_void);
10463
10464 if (TARGET_ARCH64)
10465 {
10466 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10467 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10468 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10469 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10470 }
10471 else
10472 {
10473 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10474 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10475 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10476 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10477 }
10478
10479 /* Pixel distance. */
10480 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10481 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10482
10483 /* Edge handling. */
10484 if (TARGET_ARCH64)
10485 {
10486 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10487 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10488 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10489 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10490 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10491 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10492 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10493 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10494 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10495 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10496 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10497 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10498 }
10499 else
10500 {
10501 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10502 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10503 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10504 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10505 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10506 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10507 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10508 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10509 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10510 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10511 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10512 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10513 }
10514
10515 /* Pixel compare. */
10516 if (TARGET_ARCH64)
10517 {
10518 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10519 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10520 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10521 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10522 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10523 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10524 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10525 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10526 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10527 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10528 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10529 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10530 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10531 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10532 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10533 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10534 }
10535 else
10536 {
10537 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10538 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10539 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10540 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10541 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10542 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10543 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10544 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10545 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10546 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10547 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10548 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10549 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10550 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10551 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10552 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10553 }
10554
10555 /* Addition and subtraction. */
10556 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10557 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10558 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10559 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10560 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10561 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10562 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10563 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10564 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10565 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10566 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10567 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10568 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10569 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10570 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10571 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10572
10573 /* Three-dimensional array addressing. */
10574 if (TARGET_ARCH64)
10575 {
10576 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10577 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10578 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10579 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10580 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10581 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10582 }
10583 else
10584 {
10585 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10586 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10587 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10588 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10589 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10590 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10591 }
10592
10593 if (TARGET_VIS2)
10594 {
10595 /* Edge handling. */
10596 if (TARGET_ARCH64)
10597 {
10598 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10599 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10600 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10601 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10602 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10603 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10604 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10605 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10606 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10607 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10608 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10609 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10610 }
10611 else
10612 {
10613 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10614 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10615 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10616 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10617 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10618 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10619 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10620 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10621 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10622 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10623 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10624 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10625 }
10626
10627 /* Byte mask and shuffle. */
10628 if (TARGET_ARCH64)
10629 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10630 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10631 else
10632 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10633 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10634 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10635 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10636 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10637 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10638 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10639 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10640 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10641 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10642 }
10643
10644 if (TARGET_VIS3)
10645 {
10646 if (TARGET_ARCH64)
10647 {
10648 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10649 SPARC_BUILTIN_CMASK8, void_ftype_di);
10650 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10651 SPARC_BUILTIN_CMASK16, void_ftype_di);
10652 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10653 SPARC_BUILTIN_CMASK32, void_ftype_di);
10654 }
10655 else
10656 {
10657 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10658 SPARC_BUILTIN_CMASK8, void_ftype_si);
10659 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10660 SPARC_BUILTIN_CMASK16, void_ftype_si);
10661 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10662 SPARC_BUILTIN_CMASK32, void_ftype_si);
10663 }
10664
10665 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10666 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10667
10668 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10669 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10670 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10671 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10672 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10673 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10674 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10675 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10676 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10677 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10678 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10679 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10680 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10681 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10682 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10683 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10684
10685 if (TARGET_ARCH64)
10686 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10687 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10688 else
10689 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10690 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10691
10692 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10693 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10694 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10695 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10696 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10697 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10698
10699 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10700 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10701 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10702 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10703 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10704 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10705 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10706 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10707 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10708 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10709 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10710 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10711 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10712 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10713 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10714 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10715
10716 if (TARGET_ARCH64)
10717 {
10718 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10719 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10720 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10721 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10722 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10723 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10724 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10725 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10726 }
10727 else
10728 {
10729 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10730 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10731 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10732 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10733 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10734 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10735 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10736 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10737 }
10738
10739 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10740 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10741 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10742 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10743 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10744 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10745 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10746 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10747 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10748 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10749 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10750 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10751
10752 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10753 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10754 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10755 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10756 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10757 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10758 }
10759
10760 if (TARGET_VIS4)
10761 {
10762 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10763 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10764 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10765 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10766 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10767 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10768 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10769 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10770
10771
10772 if (TARGET_ARCH64)
10773 {
10774 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10775 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10776 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10777 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10778 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10779 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10780 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10781 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10782 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10783 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10784 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10785 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10786 }
10787 else
10788 {
10789 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10790 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10791 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10792 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10793 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10794 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10795 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10796 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10797 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10798 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10799 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10800 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10801 }
10802
10803 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10804 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10805 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10806 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10807 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10808 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10809 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10810 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10811 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10812 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10813 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10814 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10815 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10816 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10817 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10818 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10819 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10820 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10821 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10822 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10823 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10824 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10825 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10826 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10827 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10828 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10829 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10830 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10831 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10832 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10833 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10834 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10835 }
10836 }
10837
10838 /* Implement TARGET_BUILTIN_DECL hook. */
10839
10840 static tree
10841 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10842 {
10843 if (code >= SPARC_BUILTIN_MAX)
10844 return error_mark_node;
10845
10846 return sparc_builtins[code];
10847 }
10848
10849 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10850
10851 static rtx
10852 sparc_expand_builtin (tree exp, rtx target,
10853 rtx subtarget ATTRIBUTE_UNUSED,
10854 machine_mode tmode ATTRIBUTE_UNUSED,
10855 int ignore ATTRIBUTE_UNUSED)
10856 {
10857 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10858 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10859 enum insn_code icode = sparc_builtins_icode[code];
10860 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10861 call_expr_arg_iterator iter;
10862 int arg_count = 0;
10863 rtx pat, op[4];
10864 tree arg;
10865
10866 if (nonvoid)
10867 {
10868 machine_mode tmode = insn_data[icode].operand[0].mode;
10869 if (!target
10870 || GET_MODE (target) != tmode
10871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10872 op[0] = gen_reg_rtx (tmode);
10873 else
10874 op[0] = target;
10875 }
10876
10877 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10878 {
10879 const struct insn_operand_data *insn_op;
10880 int idx;
10881
10882 if (arg == error_mark_node)
10883 return NULL_RTX;
10884
10885 arg_count++;
10886 idx = arg_count - !nonvoid;
10887 insn_op = &insn_data[icode].operand[idx];
10888 op[arg_count] = expand_normal (arg);
10889
10890 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10891 {
10892 if (!address_operand (op[arg_count], SImode))
10893 {
10894 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10895 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10896 }
10897 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10898 }
10899
10900 else if (insn_op->mode == V1DImode
10901 && GET_MODE (op[arg_count]) == DImode)
10902 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10903
10904 else if (insn_op->mode == V1SImode
10905 && GET_MODE (op[arg_count]) == SImode)
10906 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10907
10908 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10909 insn_op->mode))
10910 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10911 }
10912
10913 switch (arg_count)
10914 {
10915 case 0:
10916 pat = GEN_FCN (icode) (op[0]);
10917 break;
10918 case 1:
10919 if (nonvoid)
10920 pat = GEN_FCN (icode) (op[0], op[1]);
10921 else
10922 pat = GEN_FCN (icode) (op[1]);
10923 break;
10924 case 2:
10925 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10926 break;
10927 case 3:
10928 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10929 break;
10930 default:
10931 gcc_unreachable ();
10932 }
10933
10934 if (!pat)
10935 return NULL_RTX;
10936
10937 emit_insn (pat);
10938
10939 return (nonvoid ? op[0] : const0_rtx);
10940 }
10941
10942 /* Return the upper 16 bits of the 8x16 multiplication. */
10943
10944 static int
10945 sparc_vis_mul8x16 (int e8, int e16)
10946 {
10947 return (e8 * e16 + 128) / 256;
10948 }
10949
10950 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10951 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10952
10953 static void
10954 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10955 tree inner_type, tree cst0, tree cst1)
10956 {
10957 unsigned i, num = VECTOR_CST_NELTS (cst0);
10958 int scale;
10959
10960 switch (fncode)
10961 {
10962 case SPARC_BUILTIN_FMUL8X16:
10963 for (i = 0; i < num; ++i)
10964 {
10965 int val
10966 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10967 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10968 n_elts[i] = build_int_cst (inner_type, val);
10969 }
10970 break;
10971
10972 case SPARC_BUILTIN_FMUL8X16AU:
10973 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10974
10975 for (i = 0; i < num; ++i)
10976 {
10977 int val
10978 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10979 scale);
10980 n_elts[i] = build_int_cst (inner_type, val);
10981 }
10982 break;
10983
10984 case SPARC_BUILTIN_FMUL8X16AL:
10985 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10986
10987 for (i = 0; i < num; ++i)
10988 {
10989 int val
10990 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10991 scale);
10992 n_elts[i] = build_int_cst (inner_type, val);
10993 }
10994 break;
10995
10996 default:
10997 gcc_unreachable ();
10998 }
10999 }
11000
11001 /* Implement TARGET_FOLD_BUILTIN hook.
11002
11003 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11004 result of the function call is ignored. NULL_TREE is returned if the
11005 function could not be folded. */
11006
11007 static tree
11008 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11009 tree *args, bool ignore)
11010 {
11011 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11012 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11013 tree arg0, arg1, arg2;
11014
11015 if (ignore)
11016 switch (code)
11017 {
11018 case SPARC_BUILTIN_LDFSR:
11019 case SPARC_BUILTIN_STFSR:
11020 case SPARC_BUILTIN_ALIGNADDR:
11021 case SPARC_BUILTIN_WRGSR:
11022 case SPARC_BUILTIN_BMASK:
11023 case SPARC_BUILTIN_CMASK8:
11024 case SPARC_BUILTIN_CMASK16:
11025 case SPARC_BUILTIN_CMASK32:
11026 break;
11027
11028 default:
11029 return build_zero_cst (rtype);
11030 }
11031
11032 switch (code)
11033 {
11034 case SPARC_BUILTIN_FEXPAND:
11035 arg0 = args[0];
11036 STRIP_NOPS (arg0);
11037
11038 if (TREE_CODE (arg0) == VECTOR_CST)
11039 {
11040 tree inner_type = TREE_TYPE (rtype);
11041 tree *n_elts;
11042 unsigned i;
11043
11044 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11045 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11046 n_elts[i] = build_int_cst (inner_type,
11047 TREE_INT_CST_LOW
11048 (VECTOR_CST_ELT (arg0, i)) << 4);
11049 return build_vector (rtype, n_elts);
11050 }
11051 break;
11052
11053 case SPARC_BUILTIN_FMUL8X16:
11054 case SPARC_BUILTIN_FMUL8X16AU:
11055 case SPARC_BUILTIN_FMUL8X16AL:
11056 arg0 = args[0];
11057 arg1 = args[1];
11058 STRIP_NOPS (arg0);
11059 STRIP_NOPS (arg1);
11060
11061 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11062 {
11063 tree inner_type = TREE_TYPE (rtype);
11064 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11065 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11066 return build_vector (rtype, n_elts);
11067 }
11068 break;
11069
11070 case SPARC_BUILTIN_FPMERGE:
11071 arg0 = args[0];
11072 arg1 = args[1];
11073 STRIP_NOPS (arg0);
11074 STRIP_NOPS (arg1);
11075
11076 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11077 {
11078 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11079 unsigned i;
11080 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11081 {
11082 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11083 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11084 }
11085
11086 return build_vector (rtype, n_elts);
11087 }
11088 break;
11089
11090 case SPARC_BUILTIN_PDIST:
11091 case SPARC_BUILTIN_PDISTN:
11092 arg0 = args[0];
11093 arg1 = args[1];
11094 STRIP_NOPS (arg0);
11095 STRIP_NOPS (arg1);
11096 if (code == SPARC_BUILTIN_PDIST)
11097 {
11098 arg2 = args[2];
11099 STRIP_NOPS (arg2);
11100 }
11101 else
11102 arg2 = integer_zero_node;
11103
11104 if (TREE_CODE (arg0) == VECTOR_CST
11105 && TREE_CODE (arg1) == VECTOR_CST
11106 && TREE_CODE (arg2) == INTEGER_CST)
11107 {
11108 bool overflow = false;
11109 widest_int result = wi::to_widest (arg2);
11110 widest_int tmp;
11111 unsigned i;
11112
11113 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11114 {
11115 tree e0 = VECTOR_CST_ELT (arg0, i);
11116 tree e1 = VECTOR_CST_ELT (arg1, i);
11117
11118 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11119
11120 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11121 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11122 if (wi::neg_p (tmp))
11123 tmp = wi::neg (tmp, &neg2_ovf);
11124 else
11125 neg2_ovf = false;
11126 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11127 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11128 }
11129
11130 gcc_assert (!overflow);
11131
11132 return wide_int_to_tree (rtype, result);
11133 }
11134
11135 default:
11136 break;
11137 }
11138
11139 return NULL_TREE;
11140 }
11141 \f
11142 /* ??? This duplicates information provided to the compiler by the
11143 ??? scheduler description. Some day, teach genautomata to output
11144 ??? the latencies and then CSE will just use that. */
11145
11146 static bool
11147 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11148 int opno ATTRIBUTE_UNUSED,
11149 int *total, bool speed ATTRIBUTE_UNUSED)
11150 {
11151 int code = GET_CODE (x);
11152 bool float_mode_p = FLOAT_MODE_P (mode);
11153
11154 switch (code)
11155 {
11156 case CONST_INT:
11157 if (SMALL_INT (x))
11158 *total = 0;
11159 else
11160 *total = 2;
11161 return true;
11162
11163 case CONST_WIDE_INT:
11164 *total = 0;
11165 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11166 *total += 2;
11167 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11168 *total += 2;
11169 return true;
11170
11171 case HIGH:
11172 *total = 2;
11173 return true;
11174
11175 case CONST:
11176 case LABEL_REF:
11177 case SYMBOL_REF:
11178 *total = 4;
11179 return true;
11180
11181 case CONST_DOUBLE:
11182 *total = 8;
11183 return true;
11184
11185 case MEM:
11186 /* If outer-code was a sign or zero extension, a cost
11187 of COSTS_N_INSNS (1) was already added in. This is
11188 why we are subtracting it back out. */
11189 if (outer_code == ZERO_EXTEND)
11190 {
11191 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11192 }
11193 else if (outer_code == SIGN_EXTEND)
11194 {
11195 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11196 }
11197 else if (float_mode_p)
11198 {
11199 *total = sparc_costs->float_load;
11200 }
11201 else
11202 {
11203 *total = sparc_costs->int_load;
11204 }
11205
11206 return true;
11207
11208 case PLUS:
11209 case MINUS:
11210 if (float_mode_p)
11211 *total = sparc_costs->float_plusminus;
11212 else
11213 *total = COSTS_N_INSNS (1);
11214 return false;
11215
11216 case FMA:
11217 {
11218 rtx sub;
11219
11220 gcc_assert (float_mode_p);
11221 *total = sparc_costs->float_mul;
11222
11223 sub = XEXP (x, 0);
11224 if (GET_CODE (sub) == NEG)
11225 sub = XEXP (sub, 0);
11226 *total += rtx_cost (sub, mode, FMA, 0, speed);
11227
11228 sub = XEXP (x, 2);
11229 if (GET_CODE (sub) == NEG)
11230 sub = XEXP (sub, 0);
11231 *total += rtx_cost (sub, mode, FMA, 2, speed);
11232 return true;
11233 }
11234
11235 case MULT:
11236 if (float_mode_p)
11237 *total = sparc_costs->float_mul;
11238 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11239 *total = COSTS_N_INSNS (25);
11240 else
11241 {
11242 int bit_cost;
11243
11244 bit_cost = 0;
11245 if (sparc_costs->int_mul_bit_factor)
11246 {
11247 int nbits;
11248
11249 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11250 {
11251 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11252 for (nbits = 0; value != 0; value &= value - 1)
11253 nbits++;
11254 }
11255 else
11256 nbits = 7;
11257
11258 if (nbits < 3)
11259 nbits = 3;
11260 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11261 bit_cost = COSTS_N_INSNS (bit_cost);
11262 }
11263
11264 if (mode == DImode || !TARGET_HARD_MUL)
11265 *total = sparc_costs->int_mulX + bit_cost;
11266 else
11267 *total = sparc_costs->int_mul + bit_cost;
11268 }
11269 return false;
11270
11271 case ASHIFT:
11272 case ASHIFTRT:
11273 case LSHIFTRT:
11274 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11275 return false;
11276
11277 case DIV:
11278 case UDIV:
11279 case MOD:
11280 case UMOD:
11281 if (float_mode_p)
11282 {
11283 if (mode == DFmode)
11284 *total = sparc_costs->float_div_df;
11285 else
11286 *total = sparc_costs->float_div_sf;
11287 }
11288 else
11289 {
11290 if (mode == DImode)
11291 *total = sparc_costs->int_divX;
11292 else
11293 *total = sparc_costs->int_div;
11294 }
11295 return false;
11296
11297 case NEG:
11298 if (! float_mode_p)
11299 {
11300 *total = COSTS_N_INSNS (1);
11301 return false;
11302 }
11303 /* FALLTHRU */
11304
11305 case ABS:
11306 case FLOAT:
11307 case UNSIGNED_FLOAT:
11308 case FIX:
11309 case UNSIGNED_FIX:
11310 case FLOAT_EXTEND:
11311 case FLOAT_TRUNCATE:
11312 *total = sparc_costs->float_move;
11313 return false;
11314
11315 case SQRT:
11316 if (mode == DFmode)
11317 *total = sparc_costs->float_sqrt_df;
11318 else
11319 *total = sparc_costs->float_sqrt_sf;
11320 return false;
11321
11322 case COMPARE:
11323 if (float_mode_p)
11324 *total = sparc_costs->float_cmp;
11325 else
11326 *total = COSTS_N_INSNS (1);
11327 return false;
11328
11329 case IF_THEN_ELSE:
11330 if (float_mode_p)
11331 *total = sparc_costs->float_cmove;
11332 else
11333 *total = sparc_costs->int_cmove;
11334 return false;
11335
11336 case IOR:
11337 /* Handle the NAND vector patterns. */
11338 if (sparc_vector_mode_supported_p (mode)
11339 && GET_CODE (XEXP (x, 0)) == NOT
11340 && GET_CODE (XEXP (x, 1)) == NOT)
11341 {
11342 *total = COSTS_N_INSNS (1);
11343 return true;
11344 }
11345 else
11346 return false;
11347
11348 default:
11349 return false;
11350 }
11351 }
11352
11353 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11354
11355 static inline bool
11356 general_or_i64_p (reg_class_t rclass)
11357 {
11358 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11359 }
11360
11361 /* Implement TARGET_REGISTER_MOVE_COST. */
11362
11363 static int
11364 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11365 reg_class_t from, reg_class_t to)
11366 {
11367 bool need_memory = false;
11368
11369 /* This helps postreload CSE to eliminate redundant comparisons. */
11370 if (from == NO_REGS || to == NO_REGS)
11371 return 100;
11372
11373 if (from == FPCC_REGS || to == FPCC_REGS)
11374 need_memory = true;
11375 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11376 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11377 {
11378 if (TARGET_VIS3)
11379 {
11380 int size = GET_MODE_SIZE (mode);
11381 if (size == 8 || size == 4)
11382 {
11383 if (! TARGET_ARCH32 || size == 4)
11384 return 4;
11385 else
11386 return 6;
11387 }
11388 }
11389 need_memory = true;
11390 }
11391
11392 if (need_memory)
11393 {
11394 if (sparc_cpu == PROCESSOR_ULTRASPARC
11395 || sparc_cpu == PROCESSOR_ULTRASPARC3
11396 || sparc_cpu == PROCESSOR_NIAGARA
11397 || sparc_cpu == PROCESSOR_NIAGARA2
11398 || sparc_cpu == PROCESSOR_NIAGARA3
11399 || sparc_cpu == PROCESSOR_NIAGARA4
11400 || sparc_cpu == PROCESSOR_NIAGARA7)
11401 return 12;
11402
11403 return 6;
11404 }
11405
11406 return 2;
11407 }
11408
11409 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11410 This is achieved by means of a manual dynamic stack space allocation in
11411 the current frame. We make the assumption that SEQ doesn't contain any
11412 function calls, with the possible exception of calls to the GOT helper. */
11413
11414 static void
11415 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11416 {
11417 /* We must preserve the lowest 16 words for the register save area. */
11418 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11419 /* We really need only 2 words of fresh stack space. */
11420 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11421
11422 rtx slot
11423 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11424 SPARC_STACK_BIAS + offset));
11425
11426 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11427 emit_insn (gen_rtx_SET (slot, reg));
11428 if (reg2)
11429 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11430 reg2));
11431 emit_insn (seq);
11432 if (reg2)
11433 emit_insn (gen_rtx_SET (reg2,
11434 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11435 emit_insn (gen_rtx_SET (reg, slot));
11436 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11437 }
11438
11439 /* Output the assembler code for a thunk function. THUNK_DECL is the
11440 declaration for the thunk function itself, FUNCTION is the decl for
11441 the target function. DELTA is an immediate constant offset to be
11442 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11443 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11444
11445 static void
11446 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11447 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11448 tree function)
11449 {
11450 rtx this_rtx, funexp;
11451 rtx_insn *insn;
11452 unsigned int int_arg_first;
11453
11454 reload_completed = 1;
11455 epilogue_completed = 1;
11456
11457 emit_note (NOTE_INSN_PROLOGUE_END);
11458
11459 if (TARGET_FLAT)
11460 {
11461 sparc_leaf_function_p = 1;
11462
11463 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11464 }
11465 else if (flag_delayed_branch)
11466 {
11467 /* We will emit a regular sibcall below, so we need to instruct
11468 output_sibcall that we are in a leaf function. */
11469 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11470
11471 /* This will cause final.c to invoke leaf_renumber_regs so we
11472 must behave as if we were in a not-yet-leafified function. */
11473 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11474 }
11475 else
11476 {
11477 /* We will emit the sibcall manually below, so we will need to
11478 manually spill non-leaf registers. */
11479 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11480
11481 /* We really are in a leaf function. */
11482 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11483 }
11484
11485 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11486 returns a structure, the structure return pointer is there instead. */
11487 if (TARGET_ARCH64
11488 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11489 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11490 else
11491 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11492
11493 /* Add DELTA. When possible use a plain add, otherwise load it into
11494 a register first. */
11495 if (delta)
11496 {
11497 rtx delta_rtx = GEN_INT (delta);
11498
11499 if (! SPARC_SIMM13_P (delta))
11500 {
11501 rtx scratch = gen_rtx_REG (Pmode, 1);
11502 emit_move_insn (scratch, delta_rtx);
11503 delta_rtx = scratch;
11504 }
11505
11506 /* THIS_RTX += DELTA. */
11507 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11508 }
11509
11510 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11511 if (vcall_offset)
11512 {
11513 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11514 rtx scratch = gen_rtx_REG (Pmode, 1);
11515
11516 gcc_assert (vcall_offset < 0);
11517
11518 /* SCRATCH = *THIS_RTX. */
11519 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11520
11521 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11522 may not have any available scratch register at this point. */
11523 if (SPARC_SIMM13_P (vcall_offset))
11524 ;
11525 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11526 else if (! fixed_regs[5]
11527 /* The below sequence is made up of at least 2 insns,
11528 while the default method may need only one. */
11529 && vcall_offset < -8192)
11530 {
11531 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11532 emit_move_insn (scratch2, vcall_offset_rtx);
11533 vcall_offset_rtx = scratch2;
11534 }
11535 else
11536 {
11537 rtx increment = GEN_INT (-4096);
11538
11539 /* VCALL_OFFSET is a negative number whose typical range can be
11540 estimated as -32768..0 in 32-bit mode. In almost all cases
11541 it is therefore cheaper to emit multiple add insns than
11542 spilling and loading the constant into a register (at least
11543 6 insns). */
11544 while (! SPARC_SIMM13_P (vcall_offset))
11545 {
11546 emit_insn (gen_add2_insn (scratch, increment));
11547 vcall_offset += 4096;
11548 }
11549 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11550 }
11551
11552 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11553 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11554 gen_rtx_PLUS (Pmode,
11555 scratch,
11556 vcall_offset_rtx)));
11557
11558 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11559 emit_insn (gen_add2_insn (this_rtx, scratch));
11560 }
11561
11562 /* Generate a tail call to the target function. */
11563 if (! TREE_USED (function))
11564 {
11565 assemble_external (function);
11566 TREE_USED (function) = 1;
11567 }
11568 funexp = XEXP (DECL_RTL (function), 0);
11569
11570 if (flag_delayed_branch)
11571 {
11572 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11573 insn = emit_call_insn (gen_sibcall (funexp));
11574 SIBLING_CALL_P (insn) = 1;
11575 }
11576 else
11577 {
11578 /* The hoops we have to jump through in order to generate a sibcall
11579 without using delay slots... */
11580 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11581
11582 if (flag_pic)
11583 {
11584 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11585 start_sequence ();
11586 load_got_register (); /* clobbers %o7 */
11587 scratch = sparc_legitimize_pic_address (funexp, scratch);
11588 seq = get_insns ();
11589 end_sequence ();
11590 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11591 }
11592 else if (TARGET_ARCH32)
11593 {
11594 emit_insn (gen_rtx_SET (scratch,
11595 gen_rtx_HIGH (SImode, funexp)));
11596 emit_insn (gen_rtx_SET (scratch,
11597 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11598 }
11599 else /* TARGET_ARCH64 */
11600 {
11601 switch (sparc_cmodel)
11602 {
11603 case CM_MEDLOW:
11604 case CM_MEDMID:
11605 /* The destination can serve as a temporary. */
11606 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11607 break;
11608
11609 case CM_MEDANY:
11610 case CM_EMBMEDANY:
11611 /* The destination cannot serve as a temporary. */
11612 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11613 start_sequence ();
11614 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11615 seq = get_insns ();
11616 end_sequence ();
11617 emit_and_preserve (seq, spill_reg, 0);
11618 break;
11619
11620 default:
11621 gcc_unreachable ();
11622 }
11623 }
11624
11625 emit_jump_insn (gen_indirect_jump (scratch));
11626 }
11627
11628 emit_barrier ();
11629
11630 /* Run just enough of rest_of_compilation to get the insns emitted.
11631 There's not really enough bulk here to make other passes such as
11632 instruction scheduling worth while. Note that use_thunk calls
11633 assemble_start_function and assemble_end_function. */
11634 insn = get_insns ();
11635 shorten_branches (insn);
11636 final_start_function (insn, file, 1);
11637 final (insn, file, 1);
11638 final_end_function ();
11639
11640 reload_completed = 0;
11641 epilogue_completed = 0;
11642 }
11643
11644 /* Return true if sparc_output_mi_thunk would be able to output the
11645 assembler code for the thunk function specified by the arguments
11646 it is passed, and false otherwise. */
11647 static bool
11648 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11649 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11650 HOST_WIDE_INT vcall_offset,
11651 const_tree function ATTRIBUTE_UNUSED)
11652 {
11653 /* Bound the loop used in the default method above. */
11654 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11655 }
11656
11657 /* How to allocate a 'struct machine_function'. */
11658
11659 static struct machine_function *
11660 sparc_init_machine_status (void)
11661 {
11662 return ggc_cleared_alloc<machine_function> ();
11663 }
11664
11665 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11666 We need to emit DTP-relative relocations. */
11667
11668 static void
11669 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11670 {
11671 switch (size)
11672 {
11673 case 4:
11674 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11675 break;
11676 case 8:
11677 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11678 break;
11679 default:
11680 gcc_unreachable ();
11681 }
11682 output_addr_const (file, x);
11683 fputs (")", file);
11684 }
11685
11686 /* Do whatever processing is required at the end of a file. */
11687
11688 static void
11689 sparc_file_end (void)
11690 {
11691 /* If we need to emit the special GOT helper function, do so now. */
11692 if (got_helper_rtx)
11693 {
11694 const char *name = XSTR (got_helper_rtx, 0);
11695 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11696 #ifdef DWARF2_UNWIND_INFO
11697 bool do_cfi;
11698 #endif
11699
11700 if (USE_HIDDEN_LINKONCE)
11701 {
11702 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11703 get_identifier (name),
11704 build_function_type_list (void_type_node,
11705 NULL_TREE));
11706 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11707 NULL_TREE, void_type_node);
11708 TREE_PUBLIC (decl) = 1;
11709 TREE_STATIC (decl) = 1;
11710 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11711 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11712 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11713 resolve_unique_section (decl, 0, flag_function_sections);
11714 allocate_struct_function (decl, true);
11715 cfun->is_thunk = 1;
11716 current_function_decl = decl;
11717 init_varasm_status ();
11718 assemble_start_function (decl, name);
11719 }
11720 else
11721 {
11722 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11723 switch_to_section (text_section);
11724 if (align > 0)
11725 ASM_OUTPUT_ALIGN (asm_out_file, align);
11726 ASM_OUTPUT_LABEL (asm_out_file, name);
11727 }
11728
11729 #ifdef DWARF2_UNWIND_INFO
11730 do_cfi = dwarf2out_do_cfi_asm ();
11731 if (do_cfi)
11732 fprintf (asm_out_file, "\t.cfi_startproc\n");
11733 #endif
11734 if (flag_delayed_branch)
11735 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11736 reg_name, reg_name);
11737 else
11738 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11739 reg_name, reg_name);
11740 #ifdef DWARF2_UNWIND_INFO
11741 if (do_cfi)
11742 fprintf (asm_out_file, "\t.cfi_endproc\n");
11743 #endif
11744 }
11745
11746 if (NEED_INDICATE_EXEC_STACK)
11747 file_end_indicate_exec_stack ();
11748
11749 #ifdef TARGET_SOLARIS
11750 solaris_file_end ();
11751 #endif
11752 }
11753
11754 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11755 /* Implement TARGET_MANGLE_TYPE. */
11756
11757 static const char *
11758 sparc_mangle_type (const_tree type)
11759 {
11760 if (TARGET_ARCH32
11761 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11762 && TARGET_LONG_DOUBLE_128)
11763 return "g";
11764
11765 /* For all other types, use normal C++ mangling. */
11766 return NULL;
11767 }
11768 #endif
11769
11770 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11771 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11772 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11773
11774 void
11775 sparc_emit_membar_for_model (enum memmodel model,
11776 int load_store, int before_after)
11777 {
11778 /* Bits for the MEMBAR mmask field. */
11779 const int LoadLoad = 1;
11780 const int StoreLoad = 2;
11781 const int LoadStore = 4;
11782 const int StoreStore = 8;
11783
11784 int mm = 0, implied = 0;
11785
11786 switch (sparc_memory_model)
11787 {
11788 case SMM_SC:
11789 /* Sequential Consistency. All memory transactions are immediately
11790 visible in sequential execution order. No barriers needed. */
11791 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11792 break;
11793
11794 case SMM_TSO:
11795 /* Total Store Ordering: all memory transactions with store semantics
11796 are followed by an implied StoreStore. */
11797 implied |= StoreStore;
11798
11799 /* If we're not looking for a raw barrer (before+after), then atomic
11800 operations get the benefit of being both load and store. */
11801 if (load_store == 3 && before_after == 1)
11802 implied |= StoreLoad;
11803 /* FALLTHRU */
11804
11805 case SMM_PSO:
11806 /* Partial Store Ordering: all memory transactions with load semantics
11807 are followed by an implied LoadLoad | LoadStore. */
11808 implied |= LoadLoad | LoadStore;
11809
11810 /* If we're not looking for a raw barrer (before+after), then atomic
11811 operations get the benefit of being both load and store. */
11812 if (load_store == 3 && before_after == 2)
11813 implied |= StoreLoad | StoreStore;
11814 /* FALLTHRU */
11815
11816 case SMM_RMO:
11817 /* Relaxed Memory Ordering: no implicit bits. */
11818 break;
11819
11820 default:
11821 gcc_unreachable ();
11822 }
11823
11824 if (before_after & 1)
11825 {
11826 if (is_mm_release (model) || is_mm_acq_rel (model)
11827 || is_mm_seq_cst (model))
11828 {
11829 if (load_store & 1)
11830 mm |= LoadLoad | StoreLoad;
11831 if (load_store & 2)
11832 mm |= LoadStore | StoreStore;
11833 }
11834 }
11835 if (before_after & 2)
11836 {
11837 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11838 || is_mm_seq_cst (model))
11839 {
11840 if (load_store & 1)
11841 mm |= LoadLoad | LoadStore;
11842 if (load_store & 2)
11843 mm |= StoreLoad | StoreStore;
11844 }
11845 }
11846
11847 /* Remove the bits implied by the system memory model. */
11848 mm &= ~implied;
11849
11850 /* For raw barriers (before+after), always emit a barrier.
11851 This will become a compile-time barrier if needed. */
11852 if (mm || before_after == 3)
11853 emit_insn (gen_membar (GEN_INT (mm)));
11854 }
11855
11856 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11857 compare and swap on the word containing the byte or half-word. */
11858
11859 static void
11860 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11861 rtx oldval, rtx newval)
11862 {
11863 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11864 rtx addr = gen_reg_rtx (Pmode);
11865 rtx off = gen_reg_rtx (SImode);
11866 rtx oldv = gen_reg_rtx (SImode);
11867 rtx newv = gen_reg_rtx (SImode);
11868 rtx oldvalue = gen_reg_rtx (SImode);
11869 rtx newvalue = gen_reg_rtx (SImode);
11870 rtx res = gen_reg_rtx (SImode);
11871 rtx resv = gen_reg_rtx (SImode);
11872 rtx memsi, val, mask, cc;
11873
11874 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11875
11876 if (Pmode != SImode)
11877 addr1 = gen_lowpart (SImode, addr1);
11878 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11879
11880 memsi = gen_rtx_MEM (SImode, addr);
11881 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11882 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11883
11884 val = copy_to_reg (memsi);
11885
11886 emit_insn (gen_rtx_SET (off,
11887 gen_rtx_XOR (SImode, off,
11888 GEN_INT (GET_MODE (mem) == QImode
11889 ? 3 : 2))));
11890
11891 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11892
11893 if (GET_MODE (mem) == QImode)
11894 mask = force_reg (SImode, GEN_INT (0xff));
11895 else
11896 mask = force_reg (SImode, GEN_INT (0xffff));
11897
11898 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11899
11900 emit_insn (gen_rtx_SET (val,
11901 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11902 val)));
11903
11904 oldval = gen_lowpart (SImode, oldval);
11905 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11906
11907 newval = gen_lowpart_common (SImode, newval);
11908 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11909
11910 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11911
11912 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11913
11914 rtx_code_label *end_label = gen_label_rtx ();
11915 rtx_code_label *loop_label = gen_label_rtx ();
11916 emit_label (loop_label);
11917
11918 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11919
11920 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11921
11922 emit_move_insn (bool_result, const1_rtx);
11923
11924 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11925
11926 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11927
11928 emit_insn (gen_rtx_SET (resv,
11929 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11930 res)));
11931
11932 emit_move_insn (bool_result, const0_rtx);
11933
11934 cc = gen_compare_reg_1 (NE, resv, val);
11935 emit_insn (gen_rtx_SET (val, resv));
11936
11937 /* Use cbranchcc4 to separate the compare and branch! */
11938 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11939 cc, const0_rtx, loop_label));
11940
11941 emit_label (end_label);
11942
11943 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11944
11945 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11946
11947 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11948 }
11949
11950 /* Expand code to perform a compare-and-swap. */
11951
11952 void
11953 sparc_expand_compare_and_swap (rtx operands[])
11954 {
11955 rtx bval, retval, mem, oldval, newval;
11956 machine_mode mode;
11957 enum memmodel model;
11958
11959 bval = operands[0];
11960 retval = operands[1];
11961 mem = operands[2];
11962 oldval = operands[3];
11963 newval = operands[4];
11964 model = (enum memmodel) INTVAL (operands[6]);
11965 mode = GET_MODE (mem);
11966
11967 sparc_emit_membar_for_model (model, 3, 1);
11968
11969 if (reg_overlap_mentioned_p (retval, oldval))
11970 oldval = copy_to_reg (oldval);
11971
11972 if (mode == QImode || mode == HImode)
11973 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11974 else
11975 {
11976 rtx (*gen) (rtx, rtx, rtx, rtx);
11977 rtx x;
11978
11979 if (mode == SImode)
11980 gen = gen_atomic_compare_and_swapsi_1;
11981 else
11982 gen = gen_atomic_compare_and_swapdi_1;
11983 emit_insn (gen (retval, mem, oldval, newval));
11984
11985 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11986 if (x != bval)
11987 convert_move (bval, x, 1);
11988 }
11989
11990 sparc_emit_membar_for_model (model, 3, 2);
11991 }
11992
11993 void
11994 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11995 {
11996 rtx t_1, t_2, t_3;
11997
11998 sel = gen_lowpart (DImode, sel);
11999 switch (vmode)
12000 {
12001 case V2SImode:
12002 /* inp = xxxxxxxAxxxxxxxB */
12003 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12004 NULL_RTX, 1, OPTAB_DIRECT);
12005 /* t_1 = ....xxxxxxxAxxx. */
12006 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12007 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12008 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12009 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12010 /* sel = .......B */
12011 /* t_1 = ...A.... */
12012 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12013 /* sel = ...A...B */
12014 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12015 /* sel = AAAABBBB * 4 */
12016 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12017 /* sel = { A*4, A*4+1, A*4+2, ... } */
12018 break;
12019
12020 case V4HImode:
12021 /* inp = xxxAxxxBxxxCxxxD */
12022 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12023 NULL_RTX, 1, OPTAB_DIRECT);
12024 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12025 NULL_RTX, 1, OPTAB_DIRECT);
12026 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12027 NULL_RTX, 1, OPTAB_DIRECT);
12028 /* t_1 = ..xxxAxxxBxxxCxx */
12029 /* t_2 = ....xxxAxxxBxxxC */
12030 /* t_3 = ......xxxAxxxBxx */
12031 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12032 GEN_INT (0x07),
12033 NULL_RTX, 1, OPTAB_DIRECT);
12034 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12035 GEN_INT (0x0700),
12036 NULL_RTX, 1, OPTAB_DIRECT);
12037 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12038 GEN_INT (0x070000),
12039 NULL_RTX, 1, OPTAB_DIRECT);
12040 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12041 GEN_INT (0x07000000),
12042 NULL_RTX, 1, OPTAB_DIRECT);
12043 /* sel = .......D */
12044 /* t_1 = .....C.. */
12045 /* t_2 = ...B.... */
12046 /* t_3 = .A...... */
12047 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12048 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12049 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12050 /* sel = .A.B.C.D */
12051 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12052 /* sel = AABBCCDD * 2 */
12053 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12054 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12055 break;
12056
12057 case V8QImode:
12058 /* input = xAxBxCxDxExFxGxH */
12059 sel = expand_simple_binop (DImode, AND, sel,
12060 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12061 | 0x0f0f0f0f),
12062 NULL_RTX, 1, OPTAB_DIRECT);
12063 /* sel = .A.B.C.D.E.F.G.H */
12064 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12065 NULL_RTX, 1, OPTAB_DIRECT);
12066 /* t_1 = ..A.B.C.D.E.F.G. */
12067 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12068 NULL_RTX, 1, OPTAB_DIRECT);
12069 /* sel = .AABBCCDDEEFFGGH */
12070 sel = expand_simple_binop (DImode, AND, sel,
12071 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12072 | 0xff00ff),
12073 NULL_RTX, 1, OPTAB_DIRECT);
12074 /* sel = ..AB..CD..EF..GH */
12075 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12076 NULL_RTX, 1, OPTAB_DIRECT);
12077 /* t_1 = ....AB..CD..EF.. */
12078 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12079 NULL_RTX, 1, OPTAB_DIRECT);
12080 /* sel = ..ABABCDCDEFEFGH */
12081 sel = expand_simple_binop (DImode, AND, sel,
12082 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12083 NULL_RTX, 1, OPTAB_DIRECT);
12084 /* sel = ....ABCD....EFGH */
12085 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12086 NULL_RTX, 1, OPTAB_DIRECT);
12087 /* t_1 = ........ABCD.... */
12088 sel = gen_lowpart (SImode, sel);
12089 t_1 = gen_lowpart (SImode, t_1);
12090 break;
12091
12092 default:
12093 gcc_unreachable ();
12094 }
12095
12096 /* Always perform the final addition/merge within the bmask insn. */
12097 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12098 }
12099
12100 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12101
12102 static bool
12103 sparc_frame_pointer_required (void)
12104 {
12105 /* If the stack pointer is dynamically modified in the function, it cannot
12106 serve as the frame pointer. */
12107 if (cfun->calls_alloca)
12108 return true;
12109
12110 /* If the function receives nonlocal gotos, it needs to save the frame
12111 pointer in the nonlocal_goto_save_area object. */
12112 if (cfun->has_nonlocal_label)
12113 return true;
12114
12115 /* In flat mode, that's it. */
12116 if (TARGET_FLAT)
12117 return false;
12118
12119 /* Otherwise, the frame pointer is required if the function isn't leaf. */
12120 return !(crtl->is_leaf && only_leaf_regs_used ());
12121 }
12122
12123 /* The way this is structured, we can't eliminate SFP in favor of SP
12124 if the frame pointer is required: we want to use the SFP->HFP elimination
12125 in that case. But the test in update_eliminables doesn't know we are
12126 assuming below that we only do the former elimination. */
12127
12128 static bool
12129 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12130 {
12131 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12132 }
12133
12134 /* Return the hard frame pointer directly to bypass the stack bias. */
12135
12136 static rtx
12137 sparc_builtin_setjmp_frame_value (void)
12138 {
12139 return hard_frame_pointer_rtx;
12140 }
12141
12142 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12143 they won't be allocated. */
12144
12145 static void
12146 sparc_conditional_register_usage (void)
12147 {
12148 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12149 {
12150 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12151 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12152 }
12153 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12154 /* then honor it. */
12155 if (TARGET_ARCH32 && fixed_regs[5])
12156 fixed_regs[5] = 1;
12157 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12158 fixed_regs[5] = 0;
12159 if (! TARGET_V9)
12160 {
12161 int regno;
12162 for (regno = SPARC_FIRST_V9_FP_REG;
12163 regno <= SPARC_LAST_V9_FP_REG;
12164 regno++)
12165 fixed_regs[regno] = 1;
12166 /* %fcc0 is used by v8 and v9. */
12167 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12168 regno <= SPARC_LAST_V9_FCC_REG;
12169 regno++)
12170 fixed_regs[regno] = 1;
12171 }
12172 if (! TARGET_FPU)
12173 {
12174 int regno;
12175 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12176 fixed_regs[regno] = 1;
12177 }
12178 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12179 /* then honor it. Likewise with g3 and g4. */
12180 if (fixed_regs[2] == 2)
12181 fixed_regs[2] = ! TARGET_APP_REGS;
12182 if (fixed_regs[3] == 2)
12183 fixed_regs[3] = ! TARGET_APP_REGS;
12184 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12185 fixed_regs[4] = ! TARGET_APP_REGS;
12186 else if (TARGET_CM_EMBMEDANY)
12187 fixed_regs[4] = 1;
12188 else if (fixed_regs[4] == 2)
12189 fixed_regs[4] = 0;
12190 if (TARGET_FLAT)
12191 {
12192 int regno;
12193 /* Disable leaf functions. */
12194 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12195 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12196 leaf_reg_remap [regno] = regno;
12197 }
12198 if (TARGET_VIS)
12199 global_regs[SPARC_GSR_REG] = 1;
12200 }
12201
12202 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12203
12204 - We can't load constants into FP registers.
12205 - We can't load FP constants into integer registers when soft-float,
12206 because there is no soft-float pattern with a r/F constraint.
12207 - We can't load FP constants into integer registers for TFmode unless
12208 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12209 - Try and reload integer constants (symbolic or otherwise) back into
12210 registers directly, rather than having them dumped to memory. */
12211
12212 static reg_class_t
12213 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12214 {
12215 machine_mode mode = GET_MODE (x);
12216 if (CONSTANT_P (x))
12217 {
12218 if (FP_REG_CLASS_P (rclass)
12219 || rclass == GENERAL_OR_FP_REGS
12220 || rclass == GENERAL_OR_EXTRA_FP_REGS
12221 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12222 || (mode == TFmode && ! const_zero_operand (x, mode)))
12223 return NO_REGS;
12224
12225 if (GET_MODE_CLASS (mode) == MODE_INT)
12226 return GENERAL_REGS;
12227
12228 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12229 {
12230 if (! FP_REG_CLASS_P (rclass)
12231 || !(const_zero_operand (x, mode)
12232 || const_all_ones_operand (x, mode)))
12233 return NO_REGS;
12234 }
12235 }
12236
12237 if (TARGET_VIS3
12238 && ! TARGET_ARCH64
12239 && (rclass == EXTRA_FP_REGS
12240 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12241 {
12242 int regno = true_regnum (x);
12243
12244 if (SPARC_INT_REG_P (regno))
12245 return (rclass == EXTRA_FP_REGS
12246 ? FP_REGS : GENERAL_OR_FP_REGS);
12247 }
12248
12249 return rclass;
12250 }
12251
12252 /* Return true if we use LRA instead of reload pass. */
12253
12254 static bool
12255 sparc_lra_p (void)
12256 {
12257 return TARGET_LRA;
12258 }
12259
12260 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12261 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12262
12263 const char *
12264 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12265 {
12266 char mulstr[32];
12267
12268 gcc_assert (! TARGET_ARCH64);
12269
12270 if (sparc_check_64 (operands[1], insn) <= 0)
12271 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12272 if (which_alternative == 1)
12273 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12274 if (GET_CODE (operands[2]) == CONST_INT)
12275 {
12276 if (which_alternative == 1)
12277 {
12278 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12279 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12280 output_asm_insn (mulstr, operands);
12281 return "srlx\t%L0, 32, %H0";
12282 }
12283 else
12284 {
12285 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12286 output_asm_insn ("or\t%L1, %3, %3", operands);
12287 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12288 output_asm_insn (mulstr, operands);
12289 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12290 return "mov\t%3, %L0";
12291 }
12292 }
12293 else if (rtx_equal_p (operands[1], operands[2]))
12294 {
12295 if (which_alternative == 1)
12296 {
12297 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12298 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12299 output_asm_insn (mulstr, operands);
12300 return "srlx\t%L0, 32, %H0";
12301 }
12302 else
12303 {
12304 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12305 output_asm_insn ("or\t%L1, %3, %3", operands);
12306 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12307 output_asm_insn (mulstr, operands);
12308 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12309 return "mov\t%3, %L0";
12310 }
12311 }
12312 if (sparc_check_64 (operands[2], insn) <= 0)
12313 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12314 if (which_alternative == 1)
12315 {
12316 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12317 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12318 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12319 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12320 output_asm_insn (mulstr, operands);
12321 return "srlx\t%L0, 32, %H0";
12322 }
12323 else
12324 {
12325 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12326 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12327 output_asm_insn ("or\t%L1, %3, %3", operands);
12328 output_asm_insn ("or\t%L2, %4, %4", operands);
12329 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12330 output_asm_insn (mulstr, operands);
12331 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12332 return "mov\t%3, %L0";
12333 }
12334 }
12335
12336 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12337 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12338 and INNER_MODE are the modes describing TARGET. */
12339
12340 static void
12341 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12342 machine_mode inner_mode)
12343 {
12344 rtx t1, final_insn, sel;
12345 int bmask;
12346
12347 t1 = gen_reg_rtx (mode);
12348
12349 elt = convert_modes (SImode, inner_mode, elt, true);
12350 emit_move_insn (gen_lowpart(SImode, t1), elt);
12351
12352 switch (mode)
12353 {
12354 case V2SImode:
12355 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12356 bmask = 0x45674567;
12357 break;
12358 case V4HImode:
12359 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12360 bmask = 0x67676767;
12361 break;
12362 case V8QImode:
12363 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12364 bmask = 0x77777777;
12365 break;
12366 default:
12367 gcc_unreachable ();
12368 }
12369
12370 sel = force_reg (SImode, GEN_INT (bmask));
12371 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12372 emit_insn (final_insn);
12373 }
12374
12375 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12376 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12377
12378 static void
12379 vector_init_fpmerge (rtx target, rtx elt)
12380 {
12381 rtx t1, t2, t2_low, t3, t3_low;
12382
12383 t1 = gen_reg_rtx (V4QImode);
12384 elt = convert_modes (SImode, QImode, elt, true);
12385 emit_move_insn (gen_lowpart (SImode, t1), elt);
12386
12387 t2 = gen_reg_rtx (V8QImode);
12388 t2_low = gen_lowpart (V4QImode, t2);
12389 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12390
12391 t3 = gen_reg_rtx (V8QImode);
12392 t3_low = gen_lowpart (V4QImode, t3);
12393 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12394
12395 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12396 }
12397
12398 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12399 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12400
12401 static void
12402 vector_init_faligndata (rtx target, rtx elt)
12403 {
12404 rtx t1 = gen_reg_rtx (V4HImode);
12405 int i;
12406
12407 elt = convert_modes (SImode, HImode, elt, true);
12408 emit_move_insn (gen_lowpart (SImode, t1), elt);
12409
12410 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12411 force_reg (SImode, GEN_INT (6)),
12412 const0_rtx));
12413
12414 for (i = 0; i < 4; i++)
12415 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12416 }
12417
12418 /* Emit code to initialize TARGET to values for individual fields VALS. */
12419
12420 void
12421 sparc_expand_vector_init (rtx target, rtx vals)
12422 {
12423 const machine_mode mode = GET_MODE (target);
12424 const machine_mode inner_mode = GET_MODE_INNER (mode);
12425 const int n_elts = GET_MODE_NUNITS (mode);
12426 int i, n_var = 0;
12427 bool all_same = true;
12428 rtx mem;
12429
12430 for (i = 0; i < n_elts; i++)
12431 {
12432 rtx x = XVECEXP (vals, 0, i);
12433 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12434 n_var++;
12435
12436 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12437 all_same = false;
12438 }
12439
12440 if (n_var == 0)
12441 {
12442 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12443 return;
12444 }
12445
12446 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12447 {
12448 if (GET_MODE_SIZE (inner_mode) == 4)
12449 {
12450 emit_move_insn (gen_lowpart (SImode, target),
12451 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12452 return;
12453 }
12454 else if (GET_MODE_SIZE (inner_mode) == 8)
12455 {
12456 emit_move_insn (gen_lowpart (DImode, target),
12457 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12458 return;
12459 }
12460 }
12461 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12462 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12463 {
12464 emit_move_insn (gen_highpart (word_mode, target),
12465 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12466 emit_move_insn (gen_lowpart (word_mode, target),
12467 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12468 return;
12469 }
12470
12471 if (all_same && GET_MODE_SIZE (mode) == 8)
12472 {
12473 if (TARGET_VIS2)
12474 {
12475 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12476 return;
12477 }
12478 if (mode == V8QImode)
12479 {
12480 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12481 return;
12482 }
12483 if (mode == V4HImode)
12484 {
12485 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12486 return;
12487 }
12488 }
12489
12490 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12491 for (i = 0; i < n_elts; i++)
12492 emit_move_insn (adjust_address_nv (mem, inner_mode,
12493 i * GET_MODE_SIZE (inner_mode)),
12494 XVECEXP (vals, 0, i));
12495 emit_move_insn (target, mem);
12496 }
12497
12498 /* Implement TARGET_SECONDARY_RELOAD. */
12499
12500 static reg_class_t
12501 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12502 machine_mode mode, secondary_reload_info *sri)
12503 {
12504 enum reg_class rclass = (enum reg_class) rclass_i;
12505
12506 sri->icode = CODE_FOR_nothing;
12507 sri->extra_cost = 0;
12508
12509 /* We need a temporary when loading/storing a HImode/QImode value
12510 between memory and the FPU registers. This can happen when combine puts
12511 a paradoxical subreg in a float/fix conversion insn. */
12512 if (FP_REG_CLASS_P (rclass)
12513 && (mode == HImode || mode == QImode)
12514 && (GET_CODE (x) == MEM
12515 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12516 && true_regnum (x) == -1)))
12517 return GENERAL_REGS;
12518
12519 /* On 32-bit we need a temporary when loading/storing a DFmode value
12520 between unaligned memory and the upper FPU registers. */
12521 if (TARGET_ARCH32
12522 && rclass == EXTRA_FP_REGS
12523 && mode == DFmode
12524 && GET_CODE (x) == MEM
12525 && ! mem_min_alignment (x, 8))
12526 return FP_REGS;
12527
12528 if (((TARGET_CM_MEDANY
12529 && symbolic_operand (x, mode))
12530 || (TARGET_CM_EMBMEDANY
12531 && text_segment_operand (x, mode)))
12532 && ! flag_pic)
12533 {
12534 if (in_p)
12535 sri->icode = direct_optab_handler (reload_in_optab, mode);
12536 else
12537 sri->icode = direct_optab_handler (reload_out_optab, mode);
12538 return NO_REGS;
12539 }
12540
12541 if (TARGET_VIS3 && TARGET_ARCH32)
12542 {
12543 int regno = true_regnum (x);
12544
12545 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12546 to move 8-byte values in 4-byte pieces. This only works via
12547 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12548 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12549 an FP_REGS intermediate move. */
12550 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12551 || ((general_or_i64_p (rclass)
12552 || rclass == GENERAL_OR_FP_REGS)
12553 && SPARC_FP_REG_P (regno)))
12554 {
12555 sri->extra_cost = 2;
12556 return FP_REGS;
12557 }
12558 }
12559
12560 return NO_REGS;
12561 }
12562
12563 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12564 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12565
12566 bool
12567 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12568 {
12569 enum rtx_code rc = GET_CODE (operands[1]);
12570 machine_mode cmp_mode;
12571 rtx cc_reg, dst, cmp;
12572
12573 cmp = operands[1];
12574 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12575 return false;
12576
12577 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12578 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12579
12580 cmp_mode = GET_MODE (XEXP (cmp, 0));
12581 rc = GET_CODE (cmp);
12582
12583 dst = operands[0];
12584 if (! rtx_equal_p (operands[2], dst)
12585 && ! rtx_equal_p (operands[3], dst))
12586 {
12587 if (reg_overlap_mentioned_p (dst, cmp))
12588 dst = gen_reg_rtx (mode);
12589
12590 emit_move_insn (dst, operands[3]);
12591 }
12592 else if (operands[2] == dst)
12593 {
12594 operands[2] = operands[3];
12595
12596 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12597 rc = reverse_condition_maybe_unordered (rc);
12598 else
12599 rc = reverse_condition (rc);
12600 }
12601
12602 if (XEXP (cmp, 1) == const0_rtx
12603 && GET_CODE (XEXP (cmp, 0)) == REG
12604 && cmp_mode == DImode
12605 && v9_regcmp_p (rc))
12606 cc_reg = XEXP (cmp, 0);
12607 else
12608 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12609
12610 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12611
12612 emit_insn (gen_rtx_SET (dst,
12613 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12614
12615 if (dst != operands[0])
12616 emit_move_insn (operands[0], dst);
12617
12618 return true;
12619 }
12620
12621 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12622 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12623 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12624 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12625 code to be used for the condition mask. */
12626
12627 void
12628 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12629 {
12630 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12631 enum rtx_code code = GET_CODE (operands[3]);
12632
12633 mask = gen_reg_rtx (Pmode);
12634 cop0 = operands[4];
12635 cop1 = operands[5];
12636 if (code == LT || code == GE)
12637 {
12638 rtx t;
12639
12640 code = swap_condition (code);
12641 t = cop0; cop0 = cop1; cop1 = t;
12642 }
12643
12644 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12645
12646 fcmp = gen_rtx_UNSPEC (Pmode,
12647 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12648 fcode);
12649
12650 cmask = gen_rtx_UNSPEC (DImode,
12651 gen_rtvec (2, mask, gsr),
12652 ccode);
12653
12654 bshuf = gen_rtx_UNSPEC (mode,
12655 gen_rtvec (3, operands[1], operands[2], gsr),
12656 UNSPEC_BSHUFFLE);
12657
12658 emit_insn (gen_rtx_SET (mask, fcmp));
12659 emit_insn (gen_rtx_SET (gsr, cmask));
12660
12661 emit_insn (gen_rtx_SET (operands[0], bshuf));
12662 }
12663
12664 /* On sparc, any mode which naturally allocates into the float
12665 registers should return 4 here. */
12666
12667 unsigned int
12668 sparc_regmode_natural_size (machine_mode mode)
12669 {
12670 int size = UNITS_PER_WORD;
12671
12672 if (TARGET_ARCH64)
12673 {
12674 enum mode_class mclass = GET_MODE_CLASS (mode);
12675
12676 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12677 size = 4;
12678 }
12679
12680 return size;
12681 }
12682
12683 /* Return TRUE if it is a good idea to tie two pseudo registers
12684 when one has mode MODE1 and one has mode MODE2.
12685 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12686 for any hard reg, then this must be FALSE for correct output.
12687
12688 For V9 we have to deal with the fact that only the lower 32 floating
12689 point registers are 32-bit addressable. */
12690
12691 bool
12692 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12693 {
12694 enum mode_class mclass1, mclass2;
12695 unsigned short size1, size2;
12696
12697 if (mode1 == mode2)
12698 return true;
12699
12700 mclass1 = GET_MODE_CLASS (mode1);
12701 mclass2 = GET_MODE_CLASS (mode2);
12702 if (mclass1 != mclass2)
12703 return false;
12704
12705 if (! TARGET_V9)
12706 return true;
12707
12708 /* Classes are the same and we are V9 so we have to deal with upper
12709 vs. lower floating point registers. If one of the modes is a
12710 4-byte mode, and the other is not, we have to mark them as not
12711 tieable because only the lower 32 floating point register are
12712 addressable 32-bits at a time.
12713
12714 We can't just test explicitly for SFmode, otherwise we won't
12715 cover the vector mode cases properly. */
12716
12717 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12718 return true;
12719
12720 size1 = GET_MODE_SIZE (mode1);
12721 size2 = GET_MODE_SIZE (mode2);
12722 if ((size1 > 4 && size2 == 4)
12723 || (size2 > 4 && size1 == 4))
12724 return false;
12725
12726 return true;
12727 }
12728
12729 /* Implement TARGET_CSTORE_MODE. */
12730
12731 static machine_mode
12732 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12733 {
12734 return (TARGET_ARCH64 ? DImode : SImode);
12735 }
12736
12737 /* Return the compound expression made of T1 and T2. */
12738
12739 static inline tree
12740 compound_expr (tree t1, tree t2)
12741 {
12742 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12743 }
12744
12745 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12746
12747 static void
12748 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12749 {
12750 if (!TARGET_FPU)
12751 return;
12752
12753 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12754 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12755
12756 /* We generate the equivalent of feholdexcept (&fenv_var):
12757
12758 unsigned int fenv_var;
12759 __builtin_store_fsr (&fenv_var);
12760
12761 unsigned int tmp1_var;
12762 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12763
12764 __builtin_load_fsr (&tmp1_var); */
12765
12766 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12767 TREE_ADDRESSABLE (fenv_var) = 1;
12768 tree fenv_addr = build_fold_addr_expr (fenv_var);
12769 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12770 tree hold_stfsr
12771 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12772 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12773
12774 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12775 TREE_ADDRESSABLE (tmp1_var) = 1;
12776 tree masked_fenv_var
12777 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12778 build_int_cst (unsigned_type_node,
12779 ~(accrued_exception_mask | trap_enable_mask)));
12780 tree hold_mask
12781 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12782 NULL_TREE, NULL_TREE);
12783
12784 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12785 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12786 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12787
12788 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12789
12790 /* We reload the value of tmp1_var to clear the exceptions:
12791
12792 __builtin_load_fsr (&tmp1_var); */
12793
12794 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12795
12796 /* We generate the equivalent of feupdateenv (&fenv_var):
12797
12798 unsigned int tmp2_var;
12799 __builtin_store_fsr (&tmp2_var);
12800
12801 __builtin_load_fsr (&fenv_var);
12802
12803 if (SPARC_LOW_FE_EXCEPT_VALUES)
12804 tmp2_var >>= 5;
12805 __atomic_feraiseexcept ((int) tmp2_var); */
12806
12807 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12808 TREE_ADDRESSABLE (tmp2_var) = 1;
12809 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12810 tree update_stfsr
12811 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12812 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12813
12814 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12815
12816 tree atomic_feraiseexcept
12817 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12818 tree update_call
12819 = build_call_expr (atomic_feraiseexcept, 1,
12820 fold_convert (integer_type_node, tmp2_var));
12821
12822 if (SPARC_LOW_FE_EXCEPT_VALUES)
12823 {
12824 tree shifted_tmp2_var
12825 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12826 build_int_cst (unsigned_type_node, 5));
12827 tree update_shift
12828 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12829 update_call = compound_expr (update_shift, update_call);
12830 }
12831
12832 *update
12833 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12834 }
12835
12836 #include "gt-sparc.h"