]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
* config/sparc/sparc-protos.h (sparc_splitdi_legitimate): Rename to...
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_lra_p (void);
643 static bool sparc_print_operand_punct_valid_p (unsigned char);
644 static void sparc_print_operand (FILE *, rtx, int);
645 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
646 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
647 machine_mode,
648 secondary_reload_info *);
649 static machine_mode sparc_cstore_mode (enum insn_code icode);
650 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
651 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
652 static unsigned int sparc_min_arithmetic_precision (void);
653 \f
654 #ifdef SUBTARGET_ATTRIBUTE_TABLE
655 /* Table of valid machine attributes. */
656 static const struct attribute_spec sparc_attribute_table[] =
657 {
658 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
659 do_diagnostic } */
660 SUBTARGET_ATTRIBUTE_TABLE,
661 { NULL, 0, 0, false, false, false, NULL, false }
662 };
663 #endif
664 \f
665 /* Option handling. */
666
667 /* Parsed value. */
668 enum cmodel sparc_cmodel;
669
670 char sparc_hard_reg_printed[8];
671
672 /* Initialize the GCC target structure. */
673
674 /* The default is to use .half rather than .short for aligned HI objects. */
675 #undef TARGET_ASM_ALIGNED_HI_OP
676 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
677
678 #undef TARGET_ASM_UNALIGNED_HI_OP
679 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
680 #undef TARGET_ASM_UNALIGNED_SI_OP
681 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
682 #undef TARGET_ASM_UNALIGNED_DI_OP
683 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
684
685 /* The target hook has to handle DI-mode values. */
686 #undef TARGET_ASM_INTEGER
687 #define TARGET_ASM_INTEGER sparc_assemble_integer
688
689 #undef TARGET_ASM_FUNCTION_PROLOGUE
690 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
691 #undef TARGET_ASM_FUNCTION_EPILOGUE
692 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
693
694 #undef TARGET_SCHED_ADJUST_COST
695 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
696 #undef TARGET_SCHED_ISSUE_RATE
697 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
698 #undef TARGET_SCHED_INIT
699 #define TARGET_SCHED_INIT sparc_sched_init
700 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
701 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
702
703 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
704 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
705
706 #undef TARGET_INIT_LIBFUNCS
707 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
708
709 #undef TARGET_LEGITIMIZE_ADDRESS
710 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
711 #undef TARGET_DELEGITIMIZE_ADDRESS
712 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
713 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
714 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
715
716 #undef TARGET_INIT_BUILTINS
717 #define TARGET_INIT_BUILTINS sparc_init_builtins
718 #undef TARGET_BUILTIN_DECL
719 #define TARGET_BUILTIN_DECL sparc_builtin_decl
720 #undef TARGET_EXPAND_BUILTIN
721 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
722 #undef TARGET_FOLD_BUILTIN
723 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
724
725 #if TARGET_TLS
726 #undef TARGET_HAVE_TLS
727 #define TARGET_HAVE_TLS true
728 #endif
729
730 #undef TARGET_CANNOT_FORCE_CONST_MEM
731 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
732
733 #undef TARGET_ASM_OUTPUT_MI_THUNK
734 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
735 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
736 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
737
738 #undef TARGET_RTX_COSTS
739 #define TARGET_RTX_COSTS sparc_rtx_costs
740 #undef TARGET_ADDRESS_COST
741 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
742 #undef TARGET_REGISTER_MOVE_COST
743 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
744
745 #undef TARGET_PROMOTE_FUNCTION_MODE
746 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
747
748 #undef TARGET_FUNCTION_VALUE
749 #define TARGET_FUNCTION_VALUE sparc_function_value
750 #undef TARGET_LIBCALL_VALUE
751 #define TARGET_LIBCALL_VALUE sparc_libcall_value
752 #undef TARGET_FUNCTION_VALUE_REGNO_P
753 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
754
755 #undef TARGET_STRUCT_VALUE_RTX
756 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
757 #undef TARGET_RETURN_IN_MEMORY
758 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
759 #undef TARGET_MUST_PASS_IN_STACK
760 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
761 #undef TARGET_PASS_BY_REFERENCE
762 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
763 #undef TARGET_ARG_PARTIAL_BYTES
764 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
765 #undef TARGET_FUNCTION_ARG_ADVANCE
766 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
767 #undef TARGET_FUNCTION_ARG
768 #define TARGET_FUNCTION_ARG sparc_function_arg
769 #undef TARGET_FUNCTION_INCOMING_ARG
770 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
771 #undef TARGET_FUNCTION_ARG_BOUNDARY
772 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
773
774 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
775 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
776 #undef TARGET_STRICT_ARGUMENT_NAMING
777 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
778
779 #undef TARGET_EXPAND_BUILTIN_VA_START
780 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
781 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
782 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
783
784 #undef TARGET_VECTOR_MODE_SUPPORTED_P
785 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
786
787 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
788 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
789
790 #ifdef SUBTARGET_INSERT_ATTRIBUTES
791 #undef TARGET_INSERT_ATTRIBUTES
792 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
793 #endif
794
795 #ifdef SUBTARGET_ATTRIBUTE_TABLE
796 #undef TARGET_ATTRIBUTE_TABLE
797 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
798 #endif
799
800 #undef TARGET_OPTION_OVERRIDE
801 #define TARGET_OPTION_OVERRIDE sparc_option_override
802
803 #ifdef TARGET_THREAD_SSP_OFFSET
804 #undef TARGET_STACK_PROTECT_GUARD
805 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
806 #endif
807
808 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
809 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
810 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
811 #endif
812
813 #undef TARGET_ASM_FILE_END
814 #define TARGET_ASM_FILE_END sparc_file_end
815
816 #undef TARGET_FRAME_POINTER_REQUIRED
817 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
818
819 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
820 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
821
822 #undef TARGET_CAN_ELIMINATE
823 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
824
825 #undef TARGET_PREFERRED_RELOAD_CLASS
826 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
827
828 #undef TARGET_SECONDARY_RELOAD
829 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
830
831 #undef TARGET_CONDITIONAL_REGISTER_USAGE
832 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
833
834 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
835 #undef TARGET_MANGLE_TYPE
836 #define TARGET_MANGLE_TYPE sparc_mangle_type
837 #endif
838
839 #undef TARGET_LRA_P
840 #define TARGET_LRA_P sparc_lra_p
841
842 #undef TARGET_LEGITIMATE_ADDRESS_P
843 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
844
845 #undef TARGET_LEGITIMATE_CONSTANT_P
846 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
847
848 #undef TARGET_TRAMPOLINE_INIT
849 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
850
851 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
852 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
853 #undef TARGET_PRINT_OPERAND
854 #define TARGET_PRINT_OPERAND sparc_print_operand
855 #undef TARGET_PRINT_OPERAND_ADDRESS
856 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
857
858 /* The value stored by LDSTUB. */
859 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
860 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
861
862 #undef TARGET_CSTORE_MODE
863 #define TARGET_CSTORE_MODE sparc_cstore_mode
864
865 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
866 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
867
868 #undef TARGET_FIXED_CONDITION_CODE_REGS
869 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
870
871 #undef TARGET_MIN_ARITHMETIC_PRECISION
872 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
873
874 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
875 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
876
877 struct gcc_target targetm = TARGET_INITIALIZER;
878
879 /* Return the memory reference contained in X if any, zero otherwise. */
880
881 static rtx
882 mem_ref (rtx x)
883 {
884 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
885 x = XEXP (x, 0);
886
887 if (MEM_P (x))
888 return x;
889
890 return NULL_RTX;
891 }
892
893 /* We use a machine specific pass to enable workarounds for errata.
894
895 We need to have the (essentially) final form of the insn stream in order
896 to properly detect the various hazards. Therefore, this machine specific
897 pass runs as late as possible. */
898
899 static unsigned int
900 sparc_do_work_around_errata (void)
901 {
902 rtx_insn *insn, *next;
903
904 /* Force all instructions to be split into their final form. */
905 split_all_insns_noflow ();
906
907 /* Now look for specific patterns in the insn stream. */
908 for (insn = get_insns (); insn; insn = next)
909 {
910 bool insert_nop = false;
911 rtx set;
912
913 /* Look into the instruction in a delay slot. */
914 if (NONJUMP_INSN_P (insn))
915 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
916 insn = seq->insn (1);
917
918 /* Look for a single-word load into an odd-numbered FP register. */
919 if (sparc_fix_at697f
920 && NONJUMP_INSN_P (insn)
921 && (set = single_set (insn)) != NULL_RTX
922 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
923 && MEM_P (SET_SRC (set))
924 && REG_P (SET_DEST (set))
925 && REGNO (SET_DEST (set)) > 31
926 && REGNO (SET_DEST (set)) % 2 != 0)
927 {
928 /* The wrong dependency is on the enclosing double register. */
929 const unsigned int x = REGNO (SET_DEST (set)) - 1;
930 unsigned int src1, src2, dest;
931 int code;
932
933 next = next_active_insn (insn);
934 if (!next)
935 break;
936 /* If the insn is a branch, then it cannot be problematic. */
937 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
938 continue;
939
940 extract_insn (next);
941 code = INSN_CODE (next);
942
943 switch (code)
944 {
945 case CODE_FOR_adddf3:
946 case CODE_FOR_subdf3:
947 case CODE_FOR_muldf3:
948 case CODE_FOR_divdf3:
949 dest = REGNO (recog_data.operand[0]);
950 src1 = REGNO (recog_data.operand[1]);
951 src2 = REGNO (recog_data.operand[2]);
952 if (src1 != src2)
953 {
954 /* Case [1-4]:
955 ld [address], %fx+1
956 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
957 if ((src1 == x || src2 == x)
958 && (dest == src1 || dest == src2))
959 insert_nop = true;
960 }
961 else
962 {
963 /* Case 5:
964 ld [address], %fx+1
965 FPOPd %fx, %fx, %fx */
966 if (src1 == x
967 && dest == src1
968 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
969 insert_nop = true;
970 }
971 break;
972
973 case CODE_FOR_sqrtdf2:
974 dest = REGNO (recog_data.operand[0]);
975 src1 = REGNO (recog_data.operand[1]);
976 /* Case 6:
977 ld [address], %fx+1
978 fsqrtd %fx, %fx */
979 if (src1 == x && dest == src1)
980 insert_nop = true;
981 break;
982
983 default:
984 break;
985 }
986 }
987
988 /* Look for a single-word load into an integer register. */
989 else if (sparc_fix_ut699
990 && NONJUMP_INSN_P (insn)
991 && (set = single_set (insn)) != NULL_RTX
992 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
993 && mem_ref (SET_SRC (set)) != NULL_RTX
994 && REG_P (SET_DEST (set))
995 && REGNO (SET_DEST (set)) < 32)
996 {
997 /* There is no problem if the second memory access has a data
998 dependency on the first single-cycle load. */
999 rtx x = SET_DEST (set);
1000
1001 next = next_active_insn (insn);
1002 if (!next)
1003 break;
1004 /* If the insn is a branch, then it cannot be problematic. */
1005 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1006 continue;
1007
1008 /* Look for a second memory access to/from an integer register. */
1009 if ((set = single_set (next)) != NULL_RTX)
1010 {
1011 rtx src = SET_SRC (set);
1012 rtx dest = SET_DEST (set);
1013 rtx mem;
1014
1015 /* LDD is affected. */
1016 if ((mem = mem_ref (src)) != NULL_RTX
1017 && REG_P (dest)
1018 && REGNO (dest) < 32
1019 && !reg_mentioned_p (x, XEXP (mem, 0)))
1020 insert_nop = true;
1021
1022 /* STD is *not* affected. */
1023 else if (MEM_P (dest)
1024 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1025 && (src == CONST0_RTX (GET_MODE (dest))
1026 || (REG_P (src)
1027 && REGNO (src) < 32
1028 && REGNO (src) != REGNO (x)))
1029 && !reg_mentioned_p (x, XEXP (dest, 0)))
1030 insert_nop = true;
1031 }
1032 }
1033
1034 /* Look for a single-word load/operation into an FP register. */
1035 else if (sparc_fix_ut699
1036 && NONJUMP_INSN_P (insn)
1037 && (set = single_set (insn)) != NULL_RTX
1038 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1039 && REG_P (SET_DEST (set))
1040 && REGNO (SET_DEST (set)) > 31)
1041 {
1042 /* Number of instructions in the problematic window. */
1043 const int n_insns = 4;
1044 /* The problematic combination is with the sibling FP register. */
1045 const unsigned int x = REGNO (SET_DEST (set));
1046 const unsigned int y = x ^ 1;
1047 rtx_insn *after;
1048 int i;
1049
1050 next = next_active_insn (insn);
1051 if (!next)
1052 break;
1053 /* If the insn is a branch, then it cannot be problematic. */
1054 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1055 continue;
1056
1057 /* Look for a second load/operation into the sibling FP register. */
1058 if (!((set = single_set (next)) != NULL_RTX
1059 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1060 && REG_P (SET_DEST (set))
1061 && REGNO (SET_DEST (set)) == y))
1062 continue;
1063
1064 /* Look for a (possible) store from the FP register in the next N
1065 instructions, but bail out if it is again modified or if there
1066 is a store from the sibling FP register before this store. */
1067 for (after = next, i = 0; i < n_insns; i++)
1068 {
1069 bool branch_p;
1070
1071 after = next_active_insn (after);
1072 if (!after)
1073 break;
1074
1075 /* This is a branch with an empty delay slot. */
1076 if (!NONJUMP_INSN_P (after))
1077 {
1078 if (++i == n_insns)
1079 break;
1080 branch_p = true;
1081 after = NULL;
1082 }
1083 /* This is a branch with a filled delay slot. */
1084 else if (rtx_sequence *seq =
1085 dyn_cast <rtx_sequence *> (PATTERN (after)))
1086 {
1087 if (++i == n_insns)
1088 break;
1089 branch_p = true;
1090 after = seq->insn (1);
1091 }
1092 /* This is a regular instruction. */
1093 else
1094 branch_p = false;
1095
1096 if (after && (set = single_set (after)) != NULL_RTX)
1097 {
1098 const rtx src = SET_SRC (set);
1099 const rtx dest = SET_DEST (set);
1100 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1101
1102 /* If the FP register is again modified before the store,
1103 then the store isn't affected. */
1104 if (REG_P (dest)
1105 && (REGNO (dest) == x
1106 || (REGNO (dest) == y && size == 8)))
1107 break;
1108
1109 if (MEM_P (dest) && REG_P (src))
1110 {
1111 /* If there is a store from the sibling FP register
1112 before the store, then the store is not affected. */
1113 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1114 break;
1115
1116 /* Otherwise, the store is affected. */
1117 if (REGNO (src) == x && size == 4)
1118 {
1119 insert_nop = true;
1120 break;
1121 }
1122 }
1123 }
1124
1125 /* If we have a branch in the first M instructions, then we
1126 cannot see the (M+2)th instruction so we play safe. */
1127 if (branch_p && i <= (n_insns - 2))
1128 {
1129 insert_nop = true;
1130 break;
1131 }
1132 }
1133 }
1134
1135 else
1136 next = NEXT_INSN (insn);
1137
1138 if (insert_nop)
1139 emit_insn_before (gen_nop (), next);
1140 }
1141
1142 return 0;
1143 }
1144
1145 namespace {
1146
1147 const pass_data pass_data_work_around_errata =
1148 {
1149 RTL_PASS, /* type */
1150 "errata", /* name */
1151 OPTGROUP_NONE, /* optinfo_flags */
1152 TV_MACH_DEP, /* tv_id */
1153 0, /* properties_required */
1154 0, /* properties_provided */
1155 0, /* properties_destroyed */
1156 0, /* todo_flags_start */
1157 0, /* todo_flags_finish */
1158 };
1159
1160 class pass_work_around_errata : public rtl_opt_pass
1161 {
1162 public:
1163 pass_work_around_errata(gcc::context *ctxt)
1164 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1165 {}
1166
1167 /* opt_pass methods: */
1168 virtual bool gate (function *)
1169 {
1170 /* The only errata we handle are those of the AT697F and UT699. */
1171 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1172 }
1173
1174 virtual unsigned int execute (function *)
1175 {
1176 return sparc_do_work_around_errata ();
1177 }
1178
1179 }; // class pass_work_around_errata
1180
1181 } // anon namespace
1182
1183 rtl_opt_pass *
1184 make_pass_work_around_errata (gcc::context *ctxt)
1185 {
1186 return new pass_work_around_errata (ctxt);
1187 }
1188
1189 /* Helpers for TARGET_DEBUG_OPTIONS. */
1190 static void
1191 dump_target_flag_bits (const int flags)
1192 {
1193 if (flags & MASK_64BIT)
1194 fprintf (stderr, "64BIT ");
1195 if (flags & MASK_APP_REGS)
1196 fprintf (stderr, "APP_REGS ");
1197 if (flags & MASK_FASTER_STRUCTS)
1198 fprintf (stderr, "FASTER_STRUCTS ");
1199 if (flags & MASK_FLAT)
1200 fprintf (stderr, "FLAT ");
1201 if (flags & MASK_FMAF)
1202 fprintf (stderr, "FMAF ");
1203 if (flags & MASK_FPU)
1204 fprintf (stderr, "FPU ");
1205 if (flags & MASK_HARD_QUAD)
1206 fprintf (stderr, "HARD_QUAD ");
1207 if (flags & MASK_POPC)
1208 fprintf (stderr, "POPC ");
1209 if (flags & MASK_PTR64)
1210 fprintf (stderr, "PTR64 ");
1211 if (flags & MASK_STACK_BIAS)
1212 fprintf (stderr, "STACK_BIAS ");
1213 if (flags & MASK_UNALIGNED_DOUBLES)
1214 fprintf (stderr, "UNALIGNED_DOUBLES ");
1215 if (flags & MASK_V8PLUS)
1216 fprintf (stderr, "V8PLUS ");
1217 if (flags & MASK_VIS)
1218 fprintf (stderr, "VIS ");
1219 if (flags & MASK_VIS2)
1220 fprintf (stderr, "VIS2 ");
1221 if (flags & MASK_VIS3)
1222 fprintf (stderr, "VIS3 ");
1223 if (flags & MASK_VIS4)
1224 fprintf (stderr, "VIS4 ");
1225 if (flags & MASK_CBCOND)
1226 fprintf (stderr, "CBCOND ");
1227 if (flags & MASK_DEPRECATED_V8_INSNS)
1228 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1229 if (flags & MASK_SPARCLET)
1230 fprintf (stderr, "SPARCLET ");
1231 if (flags & MASK_SPARCLITE)
1232 fprintf (stderr, "SPARCLITE ");
1233 if (flags & MASK_V8)
1234 fprintf (stderr, "V8 ");
1235 if (flags & MASK_V9)
1236 fprintf (stderr, "V9 ");
1237 }
1238
1239 static void
1240 dump_target_flags (const char *prefix, const int flags)
1241 {
1242 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1243 dump_target_flag_bits (flags);
1244 fprintf(stderr, "]\n");
1245 }
1246
1247 /* Validate and override various options, and do some machine dependent
1248 initialization. */
1249
1250 static void
1251 sparc_option_override (void)
1252 {
1253 static struct code_model {
1254 const char *const name;
1255 const enum cmodel value;
1256 } const cmodels[] = {
1257 { "32", CM_32 },
1258 { "medlow", CM_MEDLOW },
1259 { "medmid", CM_MEDMID },
1260 { "medany", CM_MEDANY },
1261 { "embmedany", CM_EMBMEDANY },
1262 { NULL, (enum cmodel) 0 }
1263 };
1264 const struct code_model *cmodel;
1265 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1266 static struct cpu_default {
1267 const int cpu;
1268 const enum processor_type processor;
1269 } const cpu_default[] = {
1270 /* There must be one entry here for each TARGET_CPU value. */
1271 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1272 { TARGET_CPU_v8, PROCESSOR_V8 },
1273 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1274 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1275 { TARGET_CPU_leon, PROCESSOR_LEON },
1276 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1277 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1278 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1279 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1280 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1281 { TARGET_CPU_v9, PROCESSOR_V9 },
1282 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1283 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1284 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1285 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1286 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1287 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1288 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1289 { -1, PROCESSOR_V7 }
1290 };
1291 const struct cpu_default *def;
1292 /* Table of values for -m{cpu,tune}=. This must match the order of
1293 the enum processor_type in sparc-opts.h. */
1294 static struct cpu_table {
1295 const char *const name;
1296 const int disable;
1297 const int enable;
1298 } const cpu_table[] = {
1299 { "v7", MASK_ISA, 0 },
1300 { "cypress", MASK_ISA, 0 },
1301 { "v8", MASK_ISA, MASK_V8 },
1302 /* TI TMS390Z55 supersparc */
1303 { "supersparc", MASK_ISA, MASK_V8 },
1304 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1305 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1306 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1307 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1308 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1309 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1310 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1311 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1312 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1313 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1314 { "sparclet", MASK_ISA, MASK_SPARCLET },
1315 /* TEMIC sparclet */
1316 { "tsc701", MASK_ISA, MASK_SPARCLET },
1317 { "v9", MASK_ISA, MASK_V9 },
1318 /* UltraSPARC I, II, IIi */
1319 { "ultrasparc", MASK_ISA,
1320 /* Although insns using %y are deprecated, it is a clear win. */
1321 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1322 /* UltraSPARC III */
1323 /* ??? Check if %y issue still holds true. */
1324 { "ultrasparc3", MASK_ISA,
1325 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1326 /* UltraSPARC T1 */
1327 { "niagara", MASK_ISA,
1328 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1329 /* UltraSPARC T2 */
1330 { "niagara2", MASK_ISA,
1331 MASK_V9|MASK_POPC|MASK_VIS2 },
1332 /* UltraSPARC T3 */
1333 { "niagara3", MASK_ISA,
1334 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1335 /* UltraSPARC T4 */
1336 { "niagara4", MASK_ISA,
1337 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1338 /* UltraSPARC M7 */
1339 { "niagara7", MASK_ISA,
1340 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1341 };
1342 const struct cpu_table *cpu;
1343 unsigned int i;
1344 int fpu;
1345
1346 if (sparc_debug_string != NULL)
1347 {
1348 const char *q;
1349 char *p;
1350
1351 p = ASTRDUP (sparc_debug_string);
1352 while ((q = strtok (p, ",")) != NULL)
1353 {
1354 bool invert;
1355 int mask;
1356
1357 p = NULL;
1358 if (*q == '!')
1359 {
1360 invert = true;
1361 q++;
1362 }
1363 else
1364 invert = false;
1365
1366 if (! strcmp (q, "all"))
1367 mask = MASK_DEBUG_ALL;
1368 else if (! strcmp (q, "options"))
1369 mask = MASK_DEBUG_OPTIONS;
1370 else
1371 error ("unknown -mdebug-%s switch", q);
1372
1373 if (invert)
1374 sparc_debug &= ~mask;
1375 else
1376 sparc_debug |= mask;
1377 }
1378 }
1379
1380 if (TARGET_DEBUG_OPTIONS)
1381 {
1382 dump_target_flags("Initial target_flags", target_flags);
1383 dump_target_flags("target_flags_explicit", target_flags_explicit);
1384 }
1385
1386 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1387 SUBTARGET_OVERRIDE_OPTIONS;
1388 #endif
1389
1390 #ifndef SPARC_BI_ARCH
1391 /* Check for unsupported architecture size. */
1392 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1393 error ("%s is not supported by this configuration",
1394 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1395 #endif
1396
1397 /* We force all 64bit archs to use 128 bit long double */
1398 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1399 {
1400 error ("-mlong-double-64 not allowed with -m64");
1401 target_flags |= MASK_LONG_DOUBLE_128;
1402 }
1403
1404 /* Code model selection. */
1405 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1406
1407 #ifdef SPARC_BI_ARCH
1408 if (TARGET_ARCH32)
1409 sparc_cmodel = CM_32;
1410 #endif
1411
1412 if (sparc_cmodel_string != NULL)
1413 {
1414 if (TARGET_ARCH64)
1415 {
1416 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1417 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1418 break;
1419 if (cmodel->name == NULL)
1420 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1421 else
1422 sparc_cmodel = cmodel->value;
1423 }
1424 else
1425 error ("-mcmodel= is not supported on 32 bit systems");
1426 }
1427
1428 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1429 for (i = 8; i < 16; i++)
1430 if (!call_used_regs [i])
1431 {
1432 error ("-fcall-saved-REG is not supported for out registers");
1433 call_used_regs [i] = 1;
1434 }
1435
1436 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1437
1438 /* Set the default CPU. */
1439 if (!global_options_set.x_sparc_cpu_and_features)
1440 {
1441 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1442 if (def->cpu == TARGET_CPU_DEFAULT)
1443 break;
1444 gcc_assert (def->cpu != -1);
1445 sparc_cpu_and_features = def->processor;
1446 }
1447
1448 if (!global_options_set.x_sparc_cpu)
1449 sparc_cpu = sparc_cpu_and_features;
1450
1451 cpu = &cpu_table[(int) sparc_cpu_and_features];
1452
1453 if (TARGET_DEBUG_OPTIONS)
1454 {
1455 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1456 fprintf (stderr, "sparc_cpu: %s\n",
1457 cpu_table[(int) sparc_cpu].name);
1458 dump_target_flags ("cpu->disable", cpu->disable);
1459 dump_target_flags ("cpu->enable", cpu->enable);
1460 }
1461
1462 target_flags &= ~cpu->disable;
1463 target_flags |= (cpu->enable
1464 #ifndef HAVE_AS_FMAF_HPC_VIS3
1465 & ~(MASK_FMAF | MASK_VIS3)
1466 #endif
1467 #ifndef HAVE_AS_SPARC4
1468 & ~MASK_CBCOND
1469 #endif
1470 #ifndef HAVE_AS_SPARC5_VIS4
1471 & ~(MASK_VIS4 | MASK_SUBXC)
1472 #endif
1473 #ifndef HAVE_AS_LEON
1474 & ~(MASK_LEON | MASK_LEON3)
1475 #endif
1476 );
1477
1478 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1479 the processor default. */
1480 if (target_flags_explicit & MASK_FPU)
1481 target_flags = (target_flags & ~MASK_FPU) | fpu;
1482
1483 /* -mvis2 implies -mvis */
1484 if (TARGET_VIS2)
1485 target_flags |= MASK_VIS;
1486
1487 /* -mvis3 implies -mvis2 and -mvis */
1488 if (TARGET_VIS3)
1489 target_flags |= MASK_VIS2 | MASK_VIS;
1490
1491 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1492 if (TARGET_VIS4)
1493 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1494
1495 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1496 disabled. */
1497 if (! TARGET_FPU)
1498 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1499 | MASK_FMAF);
1500
1501 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1502 are available.
1503 -m64 also implies v9. */
1504 if (TARGET_VIS || TARGET_ARCH64)
1505 {
1506 target_flags |= MASK_V9;
1507 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1508 }
1509
1510 /* -mvis also implies -mv8plus on 32-bit */
1511 if (TARGET_VIS && ! TARGET_ARCH64)
1512 target_flags |= MASK_V8PLUS;
1513
1514 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1515 if (TARGET_V9 && TARGET_ARCH32)
1516 target_flags |= MASK_DEPRECATED_V8_INSNS;
1517
1518 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1519 if (! TARGET_V9 || TARGET_ARCH64)
1520 target_flags &= ~MASK_V8PLUS;
1521
1522 /* Don't use stack biasing in 32 bit mode. */
1523 if (TARGET_ARCH32)
1524 target_flags &= ~MASK_STACK_BIAS;
1525
1526 /* Supply a default value for align_functions. */
1527 if (align_functions == 0
1528 && (sparc_cpu == PROCESSOR_ULTRASPARC
1529 || sparc_cpu == PROCESSOR_ULTRASPARC3
1530 || sparc_cpu == PROCESSOR_NIAGARA
1531 || sparc_cpu == PROCESSOR_NIAGARA2
1532 || sparc_cpu == PROCESSOR_NIAGARA3
1533 || sparc_cpu == PROCESSOR_NIAGARA4
1534 || sparc_cpu == PROCESSOR_NIAGARA7))
1535 align_functions = 32;
1536
1537 /* Validate PCC_STRUCT_RETURN. */
1538 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1539 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1540
1541 /* Only use .uaxword when compiling for a 64-bit target. */
1542 if (!TARGET_ARCH64)
1543 targetm.asm_out.unaligned_op.di = NULL;
1544
1545 /* Do various machine dependent initializations. */
1546 sparc_init_modes ();
1547
1548 /* Set up function hooks. */
1549 init_machine_status = sparc_init_machine_status;
1550
1551 switch (sparc_cpu)
1552 {
1553 case PROCESSOR_V7:
1554 case PROCESSOR_CYPRESS:
1555 sparc_costs = &cypress_costs;
1556 break;
1557 case PROCESSOR_V8:
1558 case PROCESSOR_SPARCLITE:
1559 case PROCESSOR_SUPERSPARC:
1560 sparc_costs = &supersparc_costs;
1561 break;
1562 case PROCESSOR_F930:
1563 case PROCESSOR_F934:
1564 case PROCESSOR_HYPERSPARC:
1565 case PROCESSOR_SPARCLITE86X:
1566 sparc_costs = &hypersparc_costs;
1567 break;
1568 case PROCESSOR_LEON:
1569 sparc_costs = &leon_costs;
1570 break;
1571 case PROCESSOR_LEON3:
1572 case PROCESSOR_LEON3V7:
1573 sparc_costs = &leon3_costs;
1574 break;
1575 case PROCESSOR_SPARCLET:
1576 case PROCESSOR_TSC701:
1577 sparc_costs = &sparclet_costs;
1578 break;
1579 case PROCESSOR_V9:
1580 case PROCESSOR_ULTRASPARC:
1581 sparc_costs = &ultrasparc_costs;
1582 break;
1583 case PROCESSOR_ULTRASPARC3:
1584 sparc_costs = &ultrasparc3_costs;
1585 break;
1586 case PROCESSOR_NIAGARA:
1587 sparc_costs = &niagara_costs;
1588 break;
1589 case PROCESSOR_NIAGARA2:
1590 sparc_costs = &niagara2_costs;
1591 break;
1592 case PROCESSOR_NIAGARA3:
1593 sparc_costs = &niagara3_costs;
1594 break;
1595 case PROCESSOR_NIAGARA4:
1596 sparc_costs = &niagara4_costs;
1597 break;
1598 case PROCESSOR_NIAGARA7:
1599 sparc_costs = &niagara7_costs;
1600 break;
1601 case PROCESSOR_NATIVE:
1602 gcc_unreachable ();
1603 };
1604
1605 if (sparc_memory_model == SMM_DEFAULT)
1606 {
1607 /* Choose the memory model for the operating system. */
1608 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1609 if (os_default != SMM_DEFAULT)
1610 sparc_memory_model = os_default;
1611 /* Choose the most relaxed model for the processor. */
1612 else if (TARGET_V9)
1613 sparc_memory_model = SMM_RMO;
1614 else if (TARGET_LEON3)
1615 sparc_memory_model = SMM_TSO;
1616 else if (TARGET_LEON)
1617 sparc_memory_model = SMM_SC;
1618 else if (TARGET_V8)
1619 sparc_memory_model = SMM_PSO;
1620 else
1621 sparc_memory_model = SMM_SC;
1622 }
1623
1624 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1625 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1626 target_flags |= MASK_LONG_DOUBLE_128;
1627 #endif
1628
1629 if (TARGET_DEBUG_OPTIONS)
1630 dump_target_flags ("Final target_flags", target_flags);
1631
1632 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1633 can run at the same time. More important, it is the threshold
1634 defining when additional prefetches will be dropped by the
1635 hardware.
1636
1637 The UltraSPARC-III features a documented prefetch queue with a
1638 size of 8. Additional prefetches issued in the cpu are
1639 dropped.
1640
1641 Niagara processors are different. In these processors prefetches
1642 are handled much like regular loads. The L1 miss buffer is 32
1643 entries, but prefetches start getting affected when 30 entries
1644 become occupied. That occupation could be a mix of regular loads
1645 and prefetches though. And that buffer is shared by all threads.
1646 Once the threshold is reached, if the core is running a single
1647 thread the prefetch will retry. If more than one thread is
1648 running, the prefetch will be dropped.
1649
1650 All this makes it very difficult to determine how many
1651 simultaneous prefetches can be issued simultaneously, even in a
1652 single-threaded program. Experimental results show that setting
1653 this parameter to 32 works well when the number of threads is not
1654 high. */
1655 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1656 ((sparc_cpu == PROCESSOR_ULTRASPARC
1657 || sparc_cpu == PROCESSOR_NIAGARA
1658 || sparc_cpu == PROCESSOR_NIAGARA2
1659 || sparc_cpu == PROCESSOR_NIAGARA3
1660 || sparc_cpu == PROCESSOR_NIAGARA4)
1661 ? 2
1662 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1663 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1664 ? 32 : 3))),
1665 global_options.x_param_values,
1666 global_options_set.x_param_values);
1667
1668 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1669 params.def), so no maybe_set_param_value is needed.
1670
1671 The Oracle SPARC Architecture (previously the UltraSPARC
1672 Architecture) specification states that when a PREFETCH[A]
1673 instruction is executed an implementation-specific amount of data
1674 is prefetched, and that it is at least 64 bytes long (aligned to
1675 at least 64 bytes).
1676
1677 However, this is not correct. The M7 (and implementations prior
1678 to that) does not guarantee a 64B prefetch into a cache if the
1679 line size is smaller. A single cache line is all that is ever
1680 prefetched. So for the M7, where the L1D$ has 32B lines and the
1681 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1682 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1683 is a read_n prefetch, which is the only type which allocates to
1684 the L1.) */
1685
1686 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1687 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1688 Niagara processors feature a L1D$ of 16KB. */
1689 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1690 ((sparc_cpu == PROCESSOR_ULTRASPARC
1691 || sparc_cpu == PROCESSOR_ULTRASPARC3
1692 || sparc_cpu == PROCESSOR_NIAGARA
1693 || sparc_cpu == PROCESSOR_NIAGARA2
1694 || sparc_cpu == PROCESSOR_NIAGARA3
1695 || sparc_cpu == PROCESSOR_NIAGARA4
1696 || sparc_cpu == PROCESSOR_NIAGARA7)
1697 ? 16 : 64),
1698 global_options.x_param_values,
1699 global_options_set.x_param_values);
1700
1701
1702 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1703 that 512 is the default in params.def. */
1704 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1705 (sparc_cpu == PROCESSOR_NIAGARA4
1706 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1707 ? 256 : 512)),
1708 global_options.x_param_values,
1709 global_options_set.x_param_values);
1710
1711
1712 /* Disable save slot sharing for call-clobbered registers by default.
1713 The IRA sharing algorithm works on single registers only and this
1714 pessimizes for double floating-point registers. */
1715 if (!global_options_set.x_flag_ira_share_save_slots)
1716 flag_ira_share_save_slots = 0;
1717
1718 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1719 redundant 32-to-64-bit extensions. */
1720 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1721 flag_ree = 0;
1722 }
1723 \f
1724 /* Miscellaneous utilities. */
1725
1726 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1727 or branch on register contents instructions. */
1728
1729 int
1730 v9_regcmp_p (enum rtx_code code)
1731 {
1732 return (code == EQ || code == NE || code == GE || code == LT
1733 || code == LE || code == GT);
1734 }
1735
1736 /* Nonzero if OP is a floating point constant which can
1737 be loaded into an integer register using a single
1738 sethi instruction. */
1739
1740 int
1741 fp_sethi_p (rtx op)
1742 {
1743 if (GET_CODE (op) == CONST_DOUBLE)
1744 {
1745 long i;
1746
1747 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1748 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1749 }
1750
1751 return 0;
1752 }
1753
1754 /* Nonzero if OP is a floating point constant which can
1755 be loaded into an integer register using a single
1756 mov instruction. */
1757
1758 int
1759 fp_mov_p (rtx op)
1760 {
1761 if (GET_CODE (op) == CONST_DOUBLE)
1762 {
1763 long i;
1764
1765 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1766 return SPARC_SIMM13_P (i);
1767 }
1768
1769 return 0;
1770 }
1771
1772 /* Nonzero if OP is a floating point constant which can
1773 be loaded into an integer register using a high/losum
1774 instruction sequence. */
1775
1776 int
1777 fp_high_losum_p (rtx op)
1778 {
1779 /* The constraints calling this should only be in
1780 SFmode move insns, so any constant which cannot
1781 be moved using a single insn will do. */
1782 if (GET_CODE (op) == CONST_DOUBLE)
1783 {
1784 long i;
1785
1786 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1787 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1788 }
1789
1790 return 0;
1791 }
1792
1793 /* Return true if the address of LABEL can be loaded by means of the
1794 mov{si,di}_pic_label_ref patterns in PIC mode. */
1795
1796 static bool
1797 can_use_mov_pic_label_ref (rtx label)
1798 {
1799 /* VxWorks does not impose a fixed gap between segments; the run-time
1800 gap can be different from the object-file gap. We therefore can't
1801 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1802 are absolutely sure that X is in the same segment as the GOT.
1803 Unfortunately, the flexibility of linker scripts means that we
1804 can't be sure of that in general, so assume that GOT-relative
1805 accesses are never valid on VxWorks. */
1806 if (TARGET_VXWORKS_RTP)
1807 return false;
1808
1809 /* Similarly, if the label is non-local, it might end up being placed
1810 in a different section than the current one; now mov_pic_label_ref
1811 requires the label and the code to be in the same section. */
1812 if (LABEL_REF_NONLOCAL_P (label))
1813 return false;
1814
1815 /* Finally, if we are reordering basic blocks and partition into hot
1816 and cold sections, this might happen for any label. */
1817 if (flag_reorder_blocks_and_partition)
1818 return false;
1819
1820 return true;
1821 }
1822
1823 /* Expand a move instruction. Return true if all work is done. */
1824
1825 bool
1826 sparc_expand_move (machine_mode mode, rtx *operands)
1827 {
1828 /* Handle sets of MEM first. */
1829 if (GET_CODE (operands[0]) == MEM)
1830 {
1831 /* 0 is a register (or a pair of registers) on SPARC. */
1832 if (register_or_zero_operand (operands[1], mode))
1833 return false;
1834
1835 if (!reload_in_progress)
1836 {
1837 operands[0] = validize_mem (operands[0]);
1838 operands[1] = force_reg (mode, operands[1]);
1839 }
1840 }
1841
1842 /* Fixup TLS cases. */
1843 if (TARGET_HAVE_TLS
1844 && CONSTANT_P (operands[1])
1845 && sparc_tls_referenced_p (operands [1]))
1846 {
1847 operands[1] = sparc_legitimize_tls_address (operands[1]);
1848 return false;
1849 }
1850
1851 /* Fixup PIC cases. */
1852 if (flag_pic && CONSTANT_P (operands[1]))
1853 {
1854 if (pic_address_needs_scratch (operands[1]))
1855 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1856
1857 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1858 if (GET_CODE (operands[1]) == LABEL_REF
1859 && can_use_mov_pic_label_ref (operands[1]))
1860 {
1861 if (mode == SImode)
1862 {
1863 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1864 return true;
1865 }
1866
1867 if (mode == DImode)
1868 {
1869 gcc_assert (TARGET_ARCH64);
1870 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1871 return true;
1872 }
1873 }
1874
1875 if (symbolic_operand (operands[1], mode))
1876 {
1877 operands[1]
1878 = sparc_legitimize_pic_address (operands[1],
1879 reload_in_progress
1880 ? operands[0] : NULL_RTX);
1881 return false;
1882 }
1883 }
1884
1885 /* If we are trying to toss an integer constant into FP registers,
1886 or loading a FP or vector constant, force it into memory. */
1887 if (CONSTANT_P (operands[1])
1888 && REG_P (operands[0])
1889 && (SPARC_FP_REG_P (REGNO (operands[0]))
1890 || SCALAR_FLOAT_MODE_P (mode)
1891 || VECTOR_MODE_P (mode)))
1892 {
1893 /* emit_group_store will send such bogosity to us when it is
1894 not storing directly into memory. So fix this up to avoid
1895 crashes in output_constant_pool. */
1896 if (operands [1] == const0_rtx)
1897 operands[1] = CONST0_RTX (mode);
1898
1899 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1900 always other regs. */
1901 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1902 && (const_zero_operand (operands[1], mode)
1903 || const_all_ones_operand (operands[1], mode)))
1904 return false;
1905
1906 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1907 /* We are able to build any SF constant in integer registers
1908 with at most 2 instructions. */
1909 && (mode == SFmode
1910 /* And any DF constant in integer registers. */
1911 || (mode == DFmode
1912 && ! can_create_pseudo_p ())))
1913 return false;
1914
1915 operands[1] = force_const_mem (mode, operands[1]);
1916 if (!reload_in_progress)
1917 operands[1] = validize_mem (operands[1]);
1918 return false;
1919 }
1920
1921 /* Accept non-constants and valid constants unmodified. */
1922 if (!CONSTANT_P (operands[1])
1923 || GET_CODE (operands[1]) == HIGH
1924 || input_operand (operands[1], mode))
1925 return false;
1926
1927 switch (mode)
1928 {
1929 case QImode:
1930 /* All QImode constants require only one insn, so proceed. */
1931 break;
1932
1933 case HImode:
1934 case SImode:
1935 sparc_emit_set_const32 (operands[0], operands[1]);
1936 return true;
1937
1938 case DImode:
1939 /* input_operand should have filtered out 32-bit mode. */
1940 sparc_emit_set_const64 (operands[0], operands[1]);
1941 return true;
1942
1943 case TImode:
1944 {
1945 rtx high, low;
1946 /* TImode isn't available in 32-bit mode. */
1947 split_double (operands[1], &high, &low);
1948 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1949 high));
1950 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1951 low));
1952 }
1953 return true;
1954
1955 default:
1956 gcc_unreachable ();
1957 }
1958
1959 return false;
1960 }
1961
1962 /* Load OP1, a 32-bit constant, into OP0, a register.
1963 We know it can't be done in one insn when we get
1964 here, the move expander guarantees this. */
1965
1966 static void
1967 sparc_emit_set_const32 (rtx op0, rtx op1)
1968 {
1969 machine_mode mode = GET_MODE (op0);
1970 rtx temp = op0;
1971
1972 if (can_create_pseudo_p ())
1973 temp = gen_reg_rtx (mode);
1974
1975 if (GET_CODE (op1) == CONST_INT)
1976 {
1977 gcc_assert (!small_int_operand (op1, mode)
1978 && !const_high_operand (op1, mode));
1979
1980 /* Emit them as real moves instead of a HIGH/LO_SUM,
1981 this way CSE can see everything and reuse intermediate
1982 values if it wants. */
1983 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1984 & ~(HOST_WIDE_INT) 0x3ff)));
1985
1986 emit_insn (gen_rtx_SET (op0,
1987 gen_rtx_IOR (mode, temp,
1988 GEN_INT (INTVAL (op1) & 0x3ff))));
1989 }
1990 else
1991 {
1992 /* A symbol, emit in the traditional way. */
1993 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1994 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1995 }
1996 }
1997
1998 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1999 If TEMP is nonzero, we are forbidden to use any other scratch
2000 registers. Otherwise, we are allowed to generate them as needed.
2001
2002 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2003 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2004
2005 void
2006 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2007 {
2008 rtx temp1, temp2, temp3, temp4, temp5;
2009 rtx ti_temp = 0;
2010
2011 if (temp && GET_MODE (temp) == TImode)
2012 {
2013 ti_temp = temp;
2014 temp = gen_rtx_REG (DImode, REGNO (temp));
2015 }
2016
2017 /* SPARC-V9 code-model support. */
2018 switch (sparc_cmodel)
2019 {
2020 case CM_MEDLOW:
2021 /* The range spanned by all instructions in the object is less
2022 than 2^31 bytes (2GB) and the distance from any instruction
2023 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2024 than 2^31 bytes (2GB).
2025
2026 The executable must be in the low 4TB of the virtual address
2027 space.
2028
2029 sethi %hi(symbol), %temp1
2030 or %temp1, %lo(symbol), %reg */
2031 if (temp)
2032 temp1 = temp; /* op0 is allowed. */
2033 else
2034 temp1 = gen_reg_rtx (DImode);
2035
2036 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2037 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2038 break;
2039
2040 case CM_MEDMID:
2041 /* The range spanned by all instructions in the object is less
2042 than 2^31 bytes (2GB) and the distance from any instruction
2043 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2044 than 2^31 bytes (2GB).
2045
2046 The executable must be in the low 16TB of the virtual address
2047 space.
2048
2049 sethi %h44(symbol), %temp1
2050 or %temp1, %m44(symbol), %temp2
2051 sllx %temp2, 12, %temp3
2052 or %temp3, %l44(symbol), %reg */
2053 if (temp)
2054 {
2055 temp1 = op0;
2056 temp2 = op0;
2057 temp3 = temp; /* op0 is allowed. */
2058 }
2059 else
2060 {
2061 temp1 = gen_reg_rtx (DImode);
2062 temp2 = gen_reg_rtx (DImode);
2063 temp3 = gen_reg_rtx (DImode);
2064 }
2065
2066 emit_insn (gen_seth44 (temp1, op1));
2067 emit_insn (gen_setm44 (temp2, temp1, op1));
2068 emit_insn (gen_rtx_SET (temp3,
2069 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2070 emit_insn (gen_setl44 (op0, temp3, op1));
2071 break;
2072
2073 case CM_MEDANY:
2074 /* The range spanned by all instructions in the object is less
2075 than 2^31 bytes (2GB) and the distance from any instruction
2076 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2077 than 2^31 bytes (2GB).
2078
2079 The executable can be placed anywhere in the virtual address
2080 space.
2081
2082 sethi %hh(symbol), %temp1
2083 sethi %lm(symbol), %temp2
2084 or %temp1, %hm(symbol), %temp3
2085 sllx %temp3, 32, %temp4
2086 or %temp4, %temp2, %temp5
2087 or %temp5, %lo(symbol), %reg */
2088 if (temp)
2089 {
2090 /* It is possible that one of the registers we got for operands[2]
2091 might coincide with that of operands[0] (which is why we made
2092 it TImode). Pick the other one to use as our scratch. */
2093 if (rtx_equal_p (temp, op0))
2094 {
2095 gcc_assert (ti_temp);
2096 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2097 }
2098 temp1 = op0;
2099 temp2 = temp; /* op0 is _not_ allowed, see above. */
2100 temp3 = op0;
2101 temp4 = op0;
2102 temp5 = op0;
2103 }
2104 else
2105 {
2106 temp1 = gen_reg_rtx (DImode);
2107 temp2 = gen_reg_rtx (DImode);
2108 temp3 = gen_reg_rtx (DImode);
2109 temp4 = gen_reg_rtx (DImode);
2110 temp5 = gen_reg_rtx (DImode);
2111 }
2112
2113 emit_insn (gen_sethh (temp1, op1));
2114 emit_insn (gen_setlm (temp2, op1));
2115 emit_insn (gen_sethm (temp3, temp1, op1));
2116 emit_insn (gen_rtx_SET (temp4,
2117 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2118 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2119 emit_insn (gen_setlo (op0, temp5, op1));
2120 break;
2121
2122 case CM_EMBMEDANY:
2123 /* Old old old backwards compatibility kruft here.
2124 Essentially it is MEDLOW with a fixed 64-bit
2125 virtual base added to all data segment addresses.
2126 Text-segment stuff is computed like MEDANY, we can't
2127 reuse the code above because the relocation knobs
2128 look different.
2129
2130 Data segment: sethi %hi(symbol), %temp1
2131 add %temp1, EMBMEDANY_BASE_REG, %temp2
2132 or %temp2, %lo(symbol), %reg */
2133 if (data_segment_operand (op1, GET_MODE (op1)))
2134 {
2135 if (temp)
2136 {
2137 temp1 = temp; /* op0 is allowed. */
2138 temp2 = op0;
2139 }
2140 else
2141 {
2142 temp1 = gen_reg_rtx (DImode);
2143 temp2 = gen_reg_rtx (DImode);
2144 }
2145
2146 emit_insn (gen_embmedany_sethi (temp1, op1));
2147 emit_insn (gen_embmedany_brsum (temp2, temp1));
2148 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2149 }
2150
2151 /* Text segment: sethi %uhi(symbol), %temp1
2152 sethi %hi(symbol), %temp2
2153 or %temp1, %ulo(symbol), %temp3
2154 sllx %temp3, 32, %temp4
2155 or %temp4, %temp2, %temp5
2156 or %temp5, %lo(symbol), %reg */
2157 else
2158 {
2159 if (temp)
2160 {
2161 /* It is possible that one of the registers we got for operands[2]
2162 might coincide with that of operands[0] (which is why we made
2163 it TImode). Pick the other one to use as our scratch. */
2164 if (rtx_equal_p (temp, op0))
2165 {
2166 gcc_assert (ti_temp);
2167 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2168 }
2169 temp1 = op0;
2170 temp2 = temp; /* op0 is _not_ allowed, see above. */
2171 temp3 = op0;
2172 temp4 = op0;
2173 temp5 = op0;
2174 }
2175 else
2176 {
2177 temp1 = gen_reg_rtx (DImode);
2178 temp2 = gen_reg_rtx (DImode);
2179 temp3 = gen_reg_rtx (DImode);
2180 temp4 = gen_reg_rtx (DImode);
2181 temp5 = gen_reg_rtx (DImode);
2182 }
2183
2184 emit_insn (gen_embmedany_textuhi (temp1, op1));
2185 emit_insn (gen_embmedany_texthi (temp2, op1));
2186 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2187 emit_insn (gen_rtx_SET (temp4,
2188 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2189 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2190 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2191 }
2192 break;
2193
2194 default:
2195 gcc_unreachable ();
2196 }
2197 }
2198
2199 /* These avoid problems when cross compiling. If we do not
2200 go through all this hair then the optimizer will see
2201 invalid REG_EQUAL notes or in some cases none at all. */
2202 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2203 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2204 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2205 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2206
2207 /* The optimizer is not to assume anything about exactly
2208 which bits are set for a HIGH, they are unspecified.
2209 Unfortunately this leads to many missed optimizations
2210 during CSE. We mask out the non-HIGH bits, and matches
2211 a plain movdi, to alleviate this problem. */
2212 static rtx
2213 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2214 {
2215 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2216 }
2217
2218 static rtx
2219 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2220 {
2221 return gen_rtx_SET (dest, GEN_INT (val));
2222 }
2223
2224 static rtx
2225 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2226 {
2227 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2228 }
2229
2230 static rtx
2231 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2232 {
2233 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2234 }
2235
2236 /* Worker routines for 64-bit constant formation on arch64.
2237 One of the key things to be doing in these emissions is
2238 to create as many temp REGs as possible. This makes it
2239 possible for half-built constants to be used later when
2240 such values are similar to something required later on.
2241 Without doing this, the optimizer cannot see such
2242 opportunities. */
2243
2244 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2245 unsigned HOST_WIDE_INT, int);
2246
2247 static void
2248 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2249 unsigned HOST_WIDE_INT low_bits, int is_neg)
2250 {
2251 unsigned HOST_WIDE_INT high_bits;
2252
2253 if (is_neg)
2254 high_bits = (~low_bits) & 0xffffffff;
2255 else
2256 high_bits = low_bits;
2257
2258 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2259 if (!is_neg)
2260 {
2261 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2262 }
2263 else
2264 {
2265 /* If we are XOR'ing with -1, then we should emit a one's complement
2266 instead. This way the combiner will notice logical operations
2267 such as ANDN later on and substitute. */
2268 if ((low_bits & 0x3ff) == 0x3ff)
2269 {
2270 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2271 }
2272 else
2273 {
2274 emit_insn (gen_rtx_SET (op0,
2275 gen_safe_XOR64 (temp,
2276 (-(HOST_WIDE_INT)0x400
2277 | (low_bits & 0x3ff)))));
2278 }
2279 }
2280 }
2281
2282 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2283 unsigned HOST_WIDE_INT, int);
2284
2285 static void
2286 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2287 unsigned HOST_WIDE_INT high_bits,
2288 unsigned HOST_WIDE_INT low_immediate,
2289 int shift_count)
2290 {
2291 rtx temp2 = op0;
2292
2293 if ((high_bits & 0xfffffc00) != 0)
2294 {
2295 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2296 if ((high_bits & ~0xfffffc00) != 0)
2297 emit_insn (gen_rtx_SET (op0,
2298 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2299 else
2300 temp2 = temp;
2301 }
2302 else
2303 {
2304 emit_insn (gen_safe_SET64 (temp, high_bits));
2305 temp2 = temp;
2306 }
2307
2308 /* Now shift it up into place. */
2309 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2310 GEN_INT (shift_count))));
2311
2312 /* If there is a low immediate part piece, finish up by
2313 putting that in as well. */
2314 if (low_immediate != 0)
2315 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2316 }
2317
2318 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2319 unsigned HOST_WIDE_INT);
2320
2321 /* Full 64-bit constant decomposition. Even though this is the
2322 'worst' case, we still optimize a few things away. */
2323 static void
2324 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2325 unsigned HOST_WIDE_INT high_bits,
2326 unsigned HOST_WIDE_INT low_bits)
2327 {
2328 rtx sub_temp = op0;
2329
2330 if (can_create_pseudo_p ())
2331 sub_temp = gen_reg_rtx (DImode);
2332
2333 if ((high_bits & 0xfffffc00) != 0)
2334 {
2335 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2336 if ((high_bits & ~0xfffffc00) != 0)
2337 emit_insn (gen_rtx_SET (sub_temp,
2338 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2339 else
2340 sub_temp = temp;
2341 }
2342 else
2343 {
2344 emit_insn (gen_safe_SET64 (temp, high_bits));
2345 sub_temp = temp;
2346 }
2347
2348 if (can_create_pseudo_p ())
2349 {
2350 rtx temp2 = gen_reg_rtx (DImode);
2351 rtx temp3 = gen_reg_rtx (DImode);
2352 rtx temp4 = gen_reg_rtx (DImode);
2353
2354 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2355 GEN_INT (32))));
2356
2357 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2358 if ((low_bits & ~0xfffffc00) != 0)
2359 {
2360 emit_insn (gen_rtx_SET (temp3,
2361 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2362 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2363 }
2364 else
2365 {
2366 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2367 }
2368 }
2369 else
2370 {
2371 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2372 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2373 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2374 int to_shift = 12;
2375
2376 /* We are in the middle of reload, so this is really
2377 painful. However we do still make an attempt to
2378 avoid emitting truly stupid code. */
2379 if (low1 != const0_rtx)
2380 {
2381 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2382 GEN_INT (to_shift))));
2383 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2384 sub_temp = op0;
2385 to_shift = 12;
2386 }
2387 else
2388 {
2389 to_shift += 12;
2390 }
2391 if (low2 != const0_rtx)
2392 {
2393 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2394 GEN_INT (to_shift))));
2395 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2396 sub_temp = op0;
2397 to_shift = 8;
2398 }
2399 else
2400 {
2401 to_shift += 8;
2402 }
2403 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2404 GEN_INT (to_shift))));
2405 if (low3 != const0_rtx)
2406 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2407 /* phew... */
2408 }
2409 }
2410
2411 /* Analyze a 64-bit constant for certain properties. */
2412 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2413 unsigned HOST_WIDE_INT,
2414 int *, int *, int *);
2415
2416 static void
2417 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2418 unsigned HOST_WIDE_INT low_bits,
2419 int *hbsp, int *lbsp, int *abbasp)
2420 {
2421 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2422 int i;
2423
2424 lowest_bit_set = highest_bit_set = -1;
2425 i = 0;
2426 do
2427 {
2428 if ((lowest_bit_set == -1)
2429 && ((low_bits >> i) & 1))
2430 lowest_bit_set = i;
2431 if ((highest_bit_set == -1)
2432 && ((high_bits >> (32 - i - 1)) & 1))
2433 highest_bit_set = (64 - i - 1);
2434 }
2435 while (++i < 32
2436 && ((highest_bit_set == -1)
2437 || (lowest_bit_set == -1)));
2438 if (i == 32)
2439 {
2440 i = 0;
2441 do
2442 {
2443 if ((lowest_bit_set == -1)
2444 && ((high_bits >> i) & 1))
2445 lowest_bit_set = i + 32;
2446 if ((highest_bit_set == -1)
2447 && ((low_bits >> (32 - i - 1)) & 1))
2448 highest_bit_set = 32 - i - 1;
2449 }
2450 while (++i < 32
2451 && ((highest_bit_set == -1)
2452 || (lowest_bit_set == -1)));
2453 }
2454 /* If there are no bits set this should have gone out
2455 as one instruction! */
2456 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2457 all_bits_between_are_set = 1;
2458 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2459 {
2460 if (i < 32)
2461 {
2462 if ((low_bits & (1 << i)) != 0)
2463 continue;
2464 }
2465 else
2466 {
2467 if ((high_bits & (1 << (i - 32))) != 0)
2468 continue;
2469 }
2470 all_bits_between_are_set = 0;
2471 break;
2472 }
2473 *hbsp = highest_bit_set;
2474 *lbsp = lowest_bit_set;
2475 *abbasp = all_bits_between_are_set;
2476 }
2477
2478 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2479
2480 static int
2481 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2482 unsigned HOST_WIDE_INT low_bits)
2483 {
2484 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2485
2486 if (high_bits == 0
2487 || high_bits == 0xffffffff)
2488 return 1;
2489
2490 analyze_64bit_constant (high_bits, low_bits,
2491 &highest_bit_set, &lowest_bit_set,
2492 &all_bits_between_are_set);
2493
2494 if ((highest_bit_set == 63
2495 || lowest_bit_set == 0)
2496 && all_bits_between_are_set != 0)
2497 return 1;
2498
2499 if ((highest_bit_set - lowest_bit_set) < 21)
2500 return 1;
2501
2502 return 0;
2503 }
2504
2505 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2506 unsigned HOST_WIDE_INT,
2507 int, int);
2508
2509 static unsigned HOST_WIDE_INT
2510 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2511 unsigned HOST_WIDE_INT low_bits,
2512 int lowest_bit_set, int shift)
2513 {
2514 HOST_WIDE_INT hi, lo;
2515
2516 if (lowest_bit_set < 32)
2517 {
2518 lo = (low_bits >> lowest_bit_set) << shift;
2519 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2520 }
2521 else
2522 {
2523 lo = 0;
2524 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2525 }
2526 gcc_assert (! (hi & lo));
2527 return (hi | lo);
2528 }
2529
2530 /* Here we are sure to be arch64 and this is an integer constant
2531 being loaded into a register. Emit the most efficient
2532 insn sequence possible. Detection of all the 1-insn cases
2533 has been done already. */
2534 static void
2535 sparc_emit_set_const64 (rtx op0, rtx op1)
2536 {
2537 unsigned HOST_WIDE_INT high_bits, low_bits;
2538 int lowest_bit_set, highest_bit_set;
2539 int all_bits_between_are_set;
2540 rtx temp = 0;
2541
2542 /* Sanity check that we know what we are working with. */
2543 gcc_assert (TARGET_ARCH64
2544 && (GET_CODE (op0) == SUBREG
2545 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2546
2547 if (! can_create_pseudo_p ())
2548 temp = op0;
2549
2550 if (GET_CODE (op1) != CONST_INT)
2551 {
2552 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2553 return;
2554 }
2555
2556 if (! temp)
2557 temp = gen_reg_rtx (DImode);
2558
2559 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2560 low_bits = (INTVAL (op1) & 0xffffffff);
2561
2562 /* low_bits bits 0 --> 31
2563 high_bits bits 32 --> 63 */
2564
2565 analyze_64bit_constant (high_bits, low_bits,
2566 &highest_bit_set, &lowest_bit_set,
2567 &all_bits_between_are_set);
2568
2569 /* First try for a 2-insn sequence. */
2570
2571 /* These situations are preferred because the optimizer can
2572 * do more things with them:
2573 * 1) mov -1, %reg
2574 * sllx %reg, shift, %reg
2575 * 2) mov -1, %reg
2576 * srlx %reg, shift, %reg
2577 * 3) mov some_small_const, %reg
2578 * sllx %reg, shift, %reg
2579 */
2580 if (((highest_bit_set == 63
2581 || lowest_bit_set == 0)
2582 && all_bits_between_are_set != 0)
2583 || ((highest_bit_set - lowest_bit_set) < 12))
2584 {
2585 HOST_WIDE_INT the_const = -1;
2586 int shift = lowest_bit_set;
2587
2588 if ((highest_bit_set != 63
2589 && lowest_bit_set != 0)
2590 || all_bits_between_are_set == 0)
2591 {
2592 the_const =
2593 create_simple_focus_bits (high_bits, low_bits,
2594 lowest_bit_set, 0);
2595 }
2596 else if (lowest_bit_set == 0)
2597 shift = -(63 - highest_bit_set);
2598
2599 gcc_assert (SPARC_SIMM13_P (the_const));
2600 gcc_assert (shift != 0);
2601
2602 emit_insn (gen_safe_SET64 (temp, the_const));
2603 if (shift > 0)
2604 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2605 GEN_INT (shift))));
2606 else if (shift < 0)
2607 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2608 GEN_INT (-shift))));
2609 return;
2610 }
2611
2612 /* Now a range of 22 or less bits set somewhere.
2613 * 1) sethi %hi(focus_bits), %reg
2614 * sllx %reg, shift, %reg
2615 * 2) sethi %hi(focus_bits), %reg
2616 * srlx %reg, shift, %reg
2617 */
2618 if ((highest_bit_set - lowest_bit_set) < 21)
2619 {
2620 unsigned HOST_WIDE_INT focus_bits =
2621 create_simple_focus_bits (high_bits, low_bits,
2622 lowest_bit_set, 10);
2623
2624 gcc_assert (SPARC_SETHI_P (focus_bits));
2625 gcc_assert (lowest_bit_set != 10);
2626
2627 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2628
2629 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2630 if (lowest_bit_set < 10)
2631 emit_insn (gen_rtx_SET (op0,
2632 gen_rtx_LSHIFTRT (DImode, temp,
2633 GEN_INT (10 - lowest_bit_set))));
2634 else if (lowest_bit_set > 10)
2635 emit_insn (gen_rtx_SET (op0,
2636 gen_rtx_ASHIFT (DImode, temp,
2637 GEN_INT (lowest_bit_set - 10))));
2638 return;
2639 }
2640
2641 /* 1) sethi %hi(low_bits), %reg
2642 * or %reg, %lo(low_bits), %reg
2643 * 2) sethi %hi(~low_bits), %reg
2644 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2645 */
2646 if (high_bits == 0
2647 || high_bits == 0xffffffff)
2648 {
2649 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2650 (high_bits == 0xffffffff));
2651 return;
2652 }
2653
2654 /* Now, try 3-insn sequences. */
2655
2656 /* 1) sethi %hi(high_bits), %reg
2657 * or %reg, %lo(high_bits), %reg
2658 * sllx %reg, 32, %reg
2659 */
2660 if (low_bits == 0)
2661 {
2662 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2663 return;
2664 }
2665
2666 /* We may be able to do something quick
2667 when the constant is negated, so try that. */
2668 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2669 (~low_bits) & 0xfffffc00))
2670 {
2671 /* NOTE: The trailing bits get XOR'd so we need the
2672 non-negated bits, not the negated ones. */
2673 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2674
2675 if ((((~high_bits) & 0xffffffff) == 0
2676 && ((~low_bits) & 0x80000000) == 0)
2677 || (((~high_bits) & 0xffffffff) == 0xffffffff
2678 && ((~low_bits) & 0x80000000) != 0))
2679 {
2680 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2681
2682 if ((SPARC_SETHI_P (fast_int)
2683 && (~high_bits & 0xffffffff) == 0)
2684 || SPARC_SIMM13_P (fast_int))
2685 emit_insn (gen_safe_SET64 (temp, fast_int));
2686 else
2687 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2688 }
2689 else
2690 {
2691 rtx negated_const;
2692 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2693 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2694 sparc_emit_set_const64 (temp, negated_const);
2695 }
2696
2697 /* If we are XOR'ing with -1, then we should emit a one's complement
2698 instead. This way the combiner will notice logical operations
2699 such as ANDN later on and substitute. */
2700 if (trailing_bits == 0x3ff)
2701 {
2702 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2703 }
2704 else
2705 {
2706 emit_insn (gen_rtx_SET (op0,
2707 gen_safe_XOR64 (temp,
2708 (-0x400 | trailing_bits))));
2709 }
2710 return;
2711 }
2712
2713 /* 1) sethi %hi(xxx), %reg
2714 * or %reg, %lo(xxx), %reg
2715 * sllx %reg, yyy, %reg
2716 *
2717 * ??? This is just a generalized version of the low_bits==0
2718 * thing above, FIXME...
2719 */
2720 if ((highest_bit_set - lowest_bit_set) < 32)
2721 {
2722 unsigned HOST_WIDE_INT focus_bits =
2723 create_simple_focus_bits (high_bits, low_bits,
2724 lowest_bit_set, 0);
2725
2726 /* We can't get here in this state. */
2727 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2728
2729 /* So what we know is that the set bits straddle the
2730 middle of the 64-bit word. */
2731 sparc_emit_set_const64_quick2 (op0, temp,
2732 focus_bits, 0,
2733 lowest_bit_set);
2734 return;
2735 }
2736
2737 /* 1) sethi %hi(high_bits), %reg
2738 * or %reg, %lo(high_bits), %reg
2739 * sllx %reg, 32, %reg
2740 * or %reg, low_bits, %reg
2741 */
2742 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2743 {
2744 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2745 return;
2746 }
2747
2748 /* The easiest way when all else fails, is full decomposition. */
2749 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2750 }
2751
2752 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2753
2754 static bool
2755 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2756 {
2757 *p1 = SPARC_ICC_REG;
2758 *p2 = SPARC_FCC_REG;
2759 return true;
2760 }
2761
2762 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2763
2764 static unsigned int
2765 sparc_min_arithmetic_precision (void)
2766 {
2767 return 32;
2768 }
2769
2770 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2771 return the mode to be used for the comparison. For floating-point,
2772 CCFP[E]mode is used. CCNZmode should be used when the first operand
2773 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2774 processing is needed. */
2775
2776 machine_mode
2777 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2778 {
2779 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2780 {
2781 switch (op)
2782 {
2783 case EQ:
2784 case NE:
2785 case UNORDERED:
2786 case ORDERED:
2787 case UNLT:
2788 case UNLE:
2789 case UNGT:
2790 case UNGE:
2791 case UNEQ:
2792 case LTGT:
2793 return CCFPmode;
2794
2795 case LT:
2796 case LE:
2797 case GT:
2798 case GE:
2799 return CCFPEmode;
2800
2801 default:
2802 gcc_unreachable ();
2803 }
2804 }
2805 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2806 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2807 && y == const0_rtx)
2808 {
2809 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2810 return CCXNZmode;
2811 else
2812 return CCNZmode;
2813 }
2814 else
2815 {
2816 /* This is for the cmp<mode>_sne pattern. */
2817 if (GET_CODE (x) == NOT && y == constm1_rtx)
2818 {
2819 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2820 return CCXCmode;
2821 else
2822 return CCCmode;
2823 }
2824
2825 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
2826 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
2827 {
2828 if (GET_CODE (y) == UNSPEC
2829 && (XINT (y, 1) == UNSPEC_ADDV
2830 || XINT (y, 1) == UNSPEC_SUBV
2831 || XINT (y, 1) == UNSPEC_NEGV))
2832 return CCVmode;
2833 else
2834 return CCCmode;
2835 }
2836
2837 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2838 return CCXmode;
2839 else
2840 return CCmode;
2841 }
2842 }
2843
2844 /* Emit the compare insn and return the CC reg for a CODE comparison
2845 with operands X and Y. */
2846
2847 static rtx
2848 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2849 {
2850 machine_mode mode;
2851 rtx cc_reg;
2852
2853 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2854 return x;
2855
2856 mode = SELECT_CC_MODE (code, x, y);
2857
2858 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2859 fcc regs (cse can't tell they're really call clobbered regs and will
2860 remove a duplicate comparison even if there is an intervening function
2861 call - it will then try to reload the cc reg via an int reg which is why
2862 we need the movcc patterns). It is possible to provide the movcc
2863 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2864 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2865 to tell cse that CCFPE mode registers (even pseudos) are call
2866 clobbered. */
2867
2868 /* ??? This is an experiment. Rather than making changes to cse which may
2869 or may not be easy/clean, we do our own cse. This is possible because
2870 we will generate hard registers. Cse knows they're call clobbered (it
2871 doesn't know the same thing about pseudos). If we guess wrong, no big
2872 deal, but if we win, great! */
2873
2874 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2875 #if 1 /* experiment */
2876 {
2877 int reg;
2878 /* We cycle through the registers to ensure they're all exercised. */
2879 static int next_fcc_reg = 0;
2880 /* Previous x,y for each fcc reg. */
2881 static rtx prev_args[4][2];
2882
2883 /* Scan prev_args for x,y. */
2884 for (reg = 0; reg < 4; reg++)
2885 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2886 break;
2887 if (reg == 4)
2888 {
2889 reg = next_fcc_reg;
2890 prev_args[reg][0] = x;
2891 prev_args[reg][1] = y;
2892 next_fcc_reg = (next_fcc_reg + 1) & 3;
2893 }
2894 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2895 }
2896 #else
2897 cc_reg = gen_reg_rtx (mode);
2898 #endif /* ! experiment */
2899 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2900 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2901 else
2902 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2903
2904 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2905 will only result in an unrecognizable insn so no point in asserting. */
2906 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2907
2908 return cc_reg;
2909 }
2910
2911
2912 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2913
2914 rtx
2915 gen_compare_reg (rtx cmp)
2916 {
2917 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2918 }
2919
2920 /* This function is used for v9 only.
2921 DEST is the target of the Scc insn.
2922 CODE is the code for an Scc's comparison.
2923 X and Y are the values we compare.
2924
2925 This function is needed to turn
2926
2927 (set (reg:SI 110)
2928 (gt (reg:CCX 100 %icc)
2929 (const_int 0)))
2930 into
2931 (set (reg:SI 110)
2932 (gt:DI (reg:CCX 100 %icc)
2933 (const_int 0)))
2934
2935 IE: The instruction recognizer needs to see the mode of the comparison to
2936 find the right instruction. We could use "gt:DI" right in the
2937 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2938
2939 static int
2940 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2941 {
2942 if (! TARGET_ARCH64
2943 && (GET_MODE (x) == DImode
2944 || GET_MODE (dest) == DImode))
2945 return 0;
2946
2947 /* Try to use the movrCC insns. */
2948 if (TARGET_ARCH64
2949 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2950 && y == const0_rtx
2951 && v9_regcmp_p (compare_code))
2952 {
2953 rtx op0 = x;
2954 rtx temp;
2955
2956 /* Special case for op0 != 0. This can be done with one instruction if
2957 dest == x. */
2958
2959 if (compare_code == NE
2960 && GET_MODE (dest) == DImode
2961 && rtx_equal_p (op0, dest))
2962 {
2963 emit_insn (gen_rtx_SET (dest,
2964 gen_rtx_IF_THEN_ELSE (DImode,
2965 gen_rtx_fmt_ee (compare_code, DImode,
2966 op0, const0_rtx),
2967 const1_rtx,
2968 dest)));
2969 return 1;
2970 }
2971
2972 if (reg_overlap_mentioned_p (dest, op0))
2973 {
2974 /* Handle the case where dest == x.
2975 We "early clobber" the result. */
2976 op0 = gen_reg_rtx (GET_MODE (x));
2977 emit_move_insn (op0, x);
2978 }
2979
2980 emit_insn (gen_rtx_SET (dest, const0_rtx));
2981 if (GET_MODE (op0) != DImode)
2982 {
2983 temp = gen_reg_rtx (DImode);
2984 convert_move (temp, op0, 0);
2985 }
2986 else
2987 temp = op0;
2988 emit_insn (gen_rtx_SET (dest,
2989 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2990 gen_rtx_fmt_ee (compare_code, DImode,
2991 temp, const0_rtx),
2992 const1_rtx,
2993 dest)));
2994 return 1;
2995 }
2996 else
2997 {
2998 x = gen_compare_reg_1 (compare_code, x, y);
2999 y = const0_rtx;
3000
3001 emit_insn (gen_rtx_SET (dest, const0_rtx));
3002 emit_insn (gen_rtx_SET (dest,
3003 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3004 gen_rtx_fmt_ee (compare_code,
3005 GET_MODE (x), x, y),
3006 const1_rtx, dest)));
3007 return 1;
3008 }
3009 }
3010
3011
3012 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3013 without jumps using the addx/subx instructions. */
3014
3015 bool
3016 emit_scc_insn (rtx operands[])
3017 {
3018 rtx tem, x, y;
3019 enum rtx_code code;
3020 machine_mode mode;
3021
3022 /* The quad-word fp compare library routines all return nonzero to indicate
3023 true, which is different from the equivalent libgcc routines, so we must
3024 handle them specially here. */
3025 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3026 {
3027 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3028 GET_CODE (operands[1]));
3029 operands[2] = XEXP (operands[1], 0);
3030 operands[3] = XEXP (operands[1], 1);
3031 }
3032
3033 code = GET_CODE (operands[1]);
3034 x = operands[2];
3035 y = operands[3];
3036 mode = GET_MODE (x);
3037
3038 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3039 more applications). The exception to this is "reg != 0" which can
3040 be done in one instruction on v9 (so we do it). */
3041 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3042 {
3043 if (y != const0_rtx)
3044 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3045
3046 rtx pat = gen_rtx_SET (operands[0],
3047 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3048 x, const0_rtx));
3049
3050 /* If we can use addx/subx or addxc, add a clobber for CC. */
3051 if (mode == SImode || (code == NE && TARGET_VIS3))
3052 {
3053 rtx clobber
3054 = gen_rtx_CLOBBER (VOIDmode,
3055 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3056 SPARC_ICC_REG));
3057 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3058 }
3059
3060 emit_insn (pat);
3061 return true;
3062 }
3063
3064 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3065 if (TARGET_ARCH64
3066 && mode == DImode
3067 && !((code == LTU || code == GTU) && TARGET_VIS3)
3068 && gen_v9_scc (operands[0], code, x, y))
3069 return true;
3070
3071 /* We can do LTU and GEU using the addx/subx instructions too. And
3072 for GTU/LEU, if both operands are registers swap them and fall
3073 back to the easy case. */
3074 if (code == GTU || code == LEU)
3075 {
3076 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3077 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3078 {
3079 tem = x;
3080 x = y;
3081 y = tem;
3082 code = swap_condition (code);
3083 }
3084 }
3085
3086 if (code == LTU || code == GEU)
3087 {
3088 emit_insn (gen_rtx_SET (operands[0],
3089 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3090 gen_compare_reg_1 (code, x, y),
3091 const0_rtx)));
3092 return true;
3093 }
3094
3095 /* All the posibilities to use addx/subx based sequences has been
3096 exhausted, try for a 3 instruction sequence using v9 conditional
3097 moves. */
3098 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3099 return true;
3100
3101 /* Nope, do branches. */
3102 return false;
3103 }
3104
3105 /* Emit a conditional jump insn for the v9 architecture using comparison code
3106 CODE and jump target LABEL.
3107 This function exists to take advantage of the v9 brxx insns. */
3108
3109 static void
3110 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3111 {
3112 emit_jump_insn (gen_rtx_SET (pc_rtx,
3113 gen_rtx_IF_THEN_ELSE (VOIDmode,
3114 gen_rtx_fmt_ee (code, GET_MODE (op0),
3115 op0, const0_rtx),
3116 gen_rtx_LABEL_REF (VOIDmode, label),
3117 pc_rtx)));
3118 }
3119
3120 /* Emit a conditional jump insn for the UA2011 architecture using
3121 comparison code CODE and jump target LABEL. This function exists
3122 to take advantage of the UA2011 Compare and Branch insns. */
3123
3124 static void
3125 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3126 {
3127 rtx if_then_else;
3128
3129 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3130 gen_rtx_fmt_ee(code, GET_MODE(op0),
3131 op0, op1),
3132 gen_rtx_LABEL_REF (VOIDmode, label),
3133 pc_rtx);
3134
3135 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3136 }
3137
3138 void
3139 emit_conditional_branch_insn (rtx operands[])
3140 {
3141 /* The quad-word fp compare library routines all return nonzero to indicate
3142 true, which is different from the equivalent libgcc routines, so we must
3143 handle them specially here. */
3144 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3145 {
3146 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3147 GET_CODE (operands[0]));
3148 operands[1] = XEXP (operands[0], 0);
3149 operands[2] = XEXP (operands[0], 1);
3150 }
3151
3152 /* If we can tell early on that the comparison is against a constant
3153 that won't fit in the 5-bit signed immediate field of a cbcond,
3154 use one of the other v9 conditional branch sequences. */
3155 if (TARGET_CBCOND
3156 && GET_CODE (operands[1]) == REG
3157 && (GET_MODE (operands[1]) == SImode
3158 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3159 && (GET_CODE (operands[2]) != CONST_INT
3160 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3161 {
3162 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3163 return;
3164 }
3165
3166 if (TARGET_ARCH64 && operands[2] == const0_rtx
3167 && GET_CODE (operands[1]) == REG
3168 && GET_MODE (operands[1]) == DImode)
3169 {
3170 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3171 return;
3172 }
3173
3174 operands[1] = gen_compare_reg (operands[0]);
3175 operands[2] = const0_rtx;
3176 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3177 operands[1], operands[2]);
3178 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3179 operands[3]));
3180 }
3181
3182
3183 /* Generate a DFmode part of a hard TFmode register.
3184 REG is the TFmode hard register, LOW is 1 for the
3185 low 64bit of the register and 0 otherwise.
3186 */
3187 rtx
3188 gen_df_reg (rtx reg, int low)
3189 {
3190 int regno = REGNO (reg);
3191
3192 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3193 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3194 return gen_rtx_REG (DFmode, regno);
3195 }
3196 \f
3197 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3198 Unlike normal calls, TFmode operands are passed by reference. It is
3199 assumed that no more than 3 operands are required. */
3200
3201 static void
3202 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3203 {
3204 rtx ret_slot = NULL, arg[3], func_sym;
3205 int i;
3206
3207 /* We only expect to be called for conversions, unary, and binary ops. */
3208 gcc_assert (nargs == 2 || nargs == 3);
3209
3210 for (i = 0; i < nargs; ++i)
3211 {
3212 rtx this_arg = operands[i];
3213 rtx this_slot;
3214
3215 /* TFmode arguments and return values are passed by reference. */
3216 if (GET_MODE (this_arg) == TFmode)
3217 {
3218 int force_stack_temp;
3219
3220 force_stack_temp = 0;
3221 if (TARGET_BUGGY_QP_LIB && i == 0)
3222 force_stack_temp = 1;
3223
3224 if (GET_CODE (this_arg) == MEM
3225 && ! force_stack_temp)
3226 {
3227 tree expr = MEM_EXPR (this_arg);
3228 if (expr)
3229 mark_addressable (expr);
3230 this_arg = XEXP (this_arg, 0);
3231 }
3232 else if (CONSTANT_P (this_arg)
3233 && ! force_stack_temp)
3234 {
3235 this_slot = force_const_mem (TFmode, this_arg);
3236 this_arg = XEXP (this_slot, 0);
3237 }
3238 else
3239 {
3240 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3241
3242 /* Operand 0 is the return value. We'll copy it out later. */
3243 if (i > 0)
3244 emit_move_insn (this_slot, this_arg);
3245 else
3246 ret_slot = this_slot;
3247
3248 this_arg = XEXP (this_slot, 0);
3249 }
3250 }
3251
3252 arg[i] = this_arg;
3253 }
3254
3255 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3256
3257 if (GET_MODE (operands[0]) == TFmode)
3258 {
3259 if (nargs == 2)
3260 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3261 arg[0], GET_MODE (arg[0]),
3262 arg[1], GET_MODE (arg[1]));
3263 else
3264 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3265 arg[0], GET_MODE (arg[0]),
3266 arg[1], GET_MODE (arg[1]),
3267 arg[2], GET_MODE (arg[2]));
3268
3269 if (ret_slot)
3270 emit_move_insn (operands[0], ret_slot);
3271 }
3272 else
3273 {
3274 rtx ret;
3275
3276 gcc_assert (nargs == 2);
3277
3278 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3279 GET_MODE (operands[0]), 1,
3280 arg[1], GET_MODE (arg[1]));
3281
3282 if (ret != operands[0])
3283 emit_move_insn (operands[0], ret);
3284 }
3285 }
3286
3287 /* Expand soft-float TFmode calls to sparc abi routines. */
3288
3289 static void
3290 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3291 {
3292 const char *func;
3293
3294 switch (code)
3295 {
3296 case PLUS:
3297 func = "_Qp_add";
3298 break;
3299 case MINUS:
3300 func = "_Qp_sub";
3301 break;
3302 case MULT:
3303 func = "_Qp_mul";
3304 break;
3305 case DIV:
3306 func = "_Qp_div";
3307 break;
3308 default:
3309 gcc_unreachable ();
3310 }
3311
3312 emit_soft_tfmode_libcall (func, 3, operands);
3313 }
3314
3315 static void
3316 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3317 {
3318 const char *func;
3319
3320 gcc_assert (code == SQRT);
3321 func = "_Qp_sqrt";
3322
3323 emit_soft_tfmode_libcall (func, 2, operands);
3324 }
3325
3326 static void
3327 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3328 {
3329 const char *func;
3330
3331 switch (code)
3332 {
3333 case FLOAT_EXTEND:
3334 switch (GET_MODE (operands[1]))
3335 {
3336 case SFmode:
3337 func = "_Qp_stoq";
3338 break;
3339 case DFmode:
3340 func = "_Qp_dtoq";
3341 break;
3342 default:
3343 gcc_unreachable ();
3344 }
3345 break;
3346
3347 case FLOAT_TRUNCATE:
3348 switch (GET_MODE (operands[0]))
3349 {
3350 case SFmode:
3351 func = "_Qp_qtos";
3352 break;
3353 case DFmode:
3354 func = "_Qp_qtod";
3355 break;
3356 default:
3357 gcc_unreachable ();
3358 }
3359 break;
3360
3361 case FLOAT:
3362 switch (GET_MODE (operands[1]))
3363 {
3364 case SImode:
3365 func = "_Qp_itoq";
3366 if (TARGET_ARCH64)
3367 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3368 break;
3369 case DImode:
3370 func = "_Qp_xtoq";
3371 break;
3372 default:
3373 gcc_unreachable ();
3374 }
3375 break;
3376
3377 case UNSIGNED_FLOAT:
3378 switch (GET_MODE (operands[1]))
3379 {
3380 case SImode:
3381 func = "_Qp_uitoq";
3382 if (TARGET_ARCH64)
3383 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3384 break;
3385 case DImode:
3386 func = "_Qp_uxtoq";
3387 break;
3388 default:
3389 gcc_unreachable ();
3390 }
3391 break;
3392
3393 case FIX:
3394 switch (GET_MODE (operands[0]))
3395 {
3396 case SImode:
3397 func = "_Qp_qtoi";
3398 break;
3399 case DImode:
3400 func = "_Qp_qtox";
3401 break;
3402 default:
3403 gcc_unreachable ();
3404 }
3405 break;
3406
3407 case UNSIGNED_FIX:
3408 switch (GET_MODE (operands[0]))
3409 {
3410 case SImode:
3411 func = "_Qp_qtoui";
3412 break;
3413 case DImode:
3414 func = "_Qp_qtoux";
3415 break;
3416 default:
3417 gcc_unreachable ();
3418 }
3419 break;
3420
3421 default:
3422 gcc_unreachable ();
3423 }
3424
3425 emit_soft_tfmode_libcall (func, 2, operands);
3426 }
3427
3428 /* Expand a hard-float tfmode operation. All arguments must be in
3429 registers. */
3430
3431 static void
3432 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3433 {
3434 rtx op, dest;
3435
3436 if (GET_RTX_CLASS (code) == RTX_UNARY)
3437 {
3438 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3439 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3440 }
3441 else
3442 {
3443 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3444 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3445 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3446 operands[1], operands[2]);
3447 }
3448
3449 if (register_operand (operands[0], VOIDmode))
3450 dest = operands[0];
3451 else
3452 dest = gen_reg_rtx (GET_MODE (operands[0]));
3453
3454 emit_insn (gen_rtx_SET (dest, op));
3455
3456 if (dest != operands[0])
3457 emit_move_insn (operands[0], dest);
3458 }
3459
3460 void
3461 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3462 {
3463 if (TARGET_HARD_QUAD)
3464 emit_hard_tfmode_operation (code, operands);
3465 else
3466 emit_soft_tfmode_binop (code, operands);
3467 }
3468
3469 void
3470 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3471 {
3472 if (TARGET_HARD_QUAD)
3473 emit_hard_tfmode_operation (code, operands);
3474 else
3475 emit_soft_tfmode_unop (code, operands);
3476 }
3477
3478 void
3479 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3480 {
3481 if (TARGET_HARD_QUAD)
3482 emit_hard_tfmode_operation (code, operands);
3483 else
3484 emit_soft_tfmode_cvt (code, operands);
3485 }
3486 \f
3487 /* Return nonzero if a branch/jump/call instruction will be emitting
3488 nop into its delay slot. */
3489
3490 int
3491 empty_delay_slot (rtx_insn *insn)
3492 {
3493 rtx seq;
3494
3495 /* If no previous instruction (should not happen), return true. */
3496 if (PREV_INSN (insn) == NULL)
3497 return 1;
3498
3499 seq = NEXT_INSN (PREV_INSN (insn));
3500 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3501 return 0;
3502
3503 return 1;
3504 }
3505
3506 /* Return nonzero if we should emit a nop after a cbcond instruction.
3507 The cbcond instruction does not have a delay slot, however there is
3508 a severe performance penalty if a control transfer appears right
3509 after a cbcond. Therefore we emit a nop when we detect this
3510 situation. */
3511
3512 int
3513 emit_cbcond_nop (rtx_insn *insn)
3514 {
3515 rtx next = next_active_insn (insn);
3516
3517 if (!next)
3518 return 1;
3519
3520 if (NONJUMP_INSN_P (next)
3521 && GET_CODE (PATTERN (next)) == SEQUENCE)
3522 next = XVECEXP (PATTERN (next), 0, 0);
3523 else if (CALL_P (next)
3524 && GET_CODE (PATTERN (next)) == PARALLEL)
3525 {
3526 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3527
3528 if (GET_CODE (delay) == RETURN)
3529 {
3530 /* It's a sibling call. Do not emit the nop if we're going
3531 to emit something other than the jump itself as the first
3532 instruction of the sibcall sequence. */
3533 if (sparc_leaf_function_p || TARGET_FLAT)
3534 return 0;
3535 }
3536 }
3537
3538 if (NONJUMP_INSN_P (next))
3539 return 0;
3540
3541 return 1;
3542 }
3543
3544 /* Return nonzero if TRIAL can go into the call delay slot. */
3545
3546 int
3547 eligible_for_call_delay (rtx_insn *trial)
3548 {
3549 rtx pat;
3550
3551 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3552 return 0;
3553
3554 /* Binutils allows
3555 call __tls_get_addr, %tgd_call (foo)
3556 add %l7, %o0, %o0, %tgd_add (foo)
3557 while Sun as/ld does not. */
3558 if (TARGET_GNU_TLS || !TARGET_TLS)
3559 return 1;
3560
3561 pat = PATTERN (trial);
3562
3563 /* We must reject tgd_add{32|64}, i.e.
3564 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3565 and tldm_add{32|64}, i.e.
3566 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3567 for Sun as/ld. */
3568 if (GET_CODE (pat) == SET
3569 && GET_CODE (SET_SRC (pat)) == PLUS)
3570 {
3571 rtx unspec = XEXP (SET_SRC (pat), 1);
3572
3573 if (GET_CODE (unspec) == UNSPEC
3574 && (XINT (unspec, 1) == UNSPEC_TLSGD
3575 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3576 return 0;
3577 }
3578
3579 return 1;
3580 }
3581
3582 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3583 instruction. RETURN_P is true if the v9 variant 'return' is to be
3584 considered in the test too.
3585
3586 TRIAL must be a SET whose destination is a REG appropriate for the
3587 'restore' instruction or, if RETURN_P is true, for the 'return'
3588 instruction. */
3589
3590 static int
3591 eligible_for_restore_insn (rtx trial, bool return_p)
3592 {
3593 rtx pat = PATTERN (trial);
3594 rtx src = SET_SRC (pat);
3595 bool src_is_freg = false;
3596 rtx src_reg;
3597
3598 /* Since we now can do moves between float and integer registers when
3599 VIS3 is enabled, we have to catch this case. We can allow such
3600 moves when doing a 'return' however. */
3601 src_reg = src;
3602 if (GET_CODE (src_reg) == SUBREG)
3603 src_reg = SUBREG_REG (src_reg);
3604 if (GET_CODE (src_reg) == REG
3605 && SPARC_FP_REG_P (REGNO (src_reg)))
3606 src_is_freg = true;
3607
3608 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3609 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3610 && arith_operand (src, GET_MODE (src))
3611 && ! src_is_freg)
3612 {
3613 if (TARGET_ARCH64)
3614 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3615 else
3616 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3617 }
3618
3619 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3620 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3621 && arith_double_operand (src, GET_MODE (src))
3622 && ! src_is_freg)
3623 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3624
3625 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3626 else if (! TARGET_FPU && register_operand (src, SFmode))
3627 return 1;
3628
3629 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3630 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3631 return 1;
3632
3633 /* If we have the 'return' instruction, anything that does not use
3634 local or output registers and can go into a delay slot wins. */
3635 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3636 return 1;
3637
3638 /* The 'restore src1,src2,dest' pattern for SImode. */
3639 else if (GET_CODE (src) == PLUS
3640 && register_operand (XEXP (src, 0), SImode)
3641 && arith_operand (XEXP (src, 1), SImode))
3642 return 1;
3643
3644 /* The 'restore src1,src2,dest' pattern for DImode. */
3645 else if (GET_CODE (src) == PLUS
3646 && register_operand (XEXP (src, 0), DImode)
3647 && arith_double_operand (XEXP (src, 1), DImode))
3648 return 1;
3649
3650 /* The 'restore src1,%lo(src2),dest' pattern. */
3651 else if (GET_CODE (src) == LO_SUM
3652 && ! TARGET_CM_MEDMID
3653 && ((register_operand (XEXP (src, 0), SImode)
3654 && immediate_operand (XEXP (src, 1), SImode))
3655 || (TARGET_ARCH64
3656 && register_operand (XEXP (src, 0), DImode)
3657 && immediate_operand (XEXP (src, 1), DImode))))
3658 return 1;
3659
3660 /* The 'restore src,src,dest' pattern. */
3661 else if (GET_CODE (src) == ASHIFT
3662 && (register_operand (XEXP (src, 0), SImode)
3663 || register_operand (XEXP (src, 0), DImode))
3664 && XEXP (src, 1) == const1_rtx)
3665 return 1;
3666
3667 return 0;
3668 }
3669
3670 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3671
3672 int
3673 eligible_for_return_delay (rtx_insn *trial)
3674 {
3675 int regno;
3676 rtx pat;
3677
3678 /* If the function uses __builtin_eh_return, the eh_return machinery
3679 occupies the delay slot. */
3680 if (crtl->calls_eh_return)
3681 return 0;
3682
3683 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3684 return 0;
3685
3686 /* In the case of a leaf or flat function, anything can go into the slot. */
3687 if (sparc_leaf_function_p || TARGET_FLAT)
3688 return 1;
3689
3690 if (!NONJUMP_INSN_P (trial))
3691 return 0;
3692
3693 pat = PATTERN (trial);
3694 if (GET_CODE (pat) == PARALLEL)
3695 {
3696 int i;
3697
3698 if (! TARGET_V9)
3699 return 0;
3700 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3701 {
3702 rtx expr = XVECEXP (pat, 0, i);
3703 if (GET_CODE (expr) != SET)
3704 return 0;
3705 if (GET_CODE (SET_DEST (expr)) != REG)
3706 return 0;
3707 regno = REGNO (SET_DEST (expr));
3708 if (regno >= 8 && regno < 24)
3709 return 0;
3710 }
3711 return !epilogue_renumber (&pat, 1);
3712 }
3713
3714 if (GET_CODE (pat) != SET)
3715 return 0;
3716
3717 if (GET_CODE (SET_DEST (pat)) != REG)
3718 return 0;
3719
3720 regno = REGNO (SET_DEST (pat));
3721
3722 /* Otherwise, only operations which can be done in tandem with
3723 a `restore' or `return' insn can go into the delay slot. */
3724 if (regno >= 8 && regno < 24)
3725 return 0;
3726
3727 /* If this instruction sets up floating point register and we have a return
3728 instruction, it can probably go in. But restore will not work
3729 with FP_REGS. */
3730 if (! SPARC_INT_REG_P (regno))
3731 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3732
3733 return eligible_for_restore_insn (trial, true);
3734 }
3735
3736 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3737
3738 int
3739 eligible_for_sibcall_delay (rtx_insn *trial)
3740 {
3741 rtx pat;
3742
3743 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3744 return 0;
3745
3746 if (!NONJUMP_INSN_P (trial))
3747 return 0;
3748
3749 pat = PATTERN (trial);
3750
3751 if (sparc_leaf_function_p || TARGET_FLAT)
3752 {
3753 /* If the tail call is done using the call instruction,
3754 we have to restore %o7 in the delay slot. */
3755 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3756 return 0;
3757
3758 /* %g1 is used to build the function address */
3759 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3760 return 0;
3761
3762 return 1;
3763 }
3764
3765 if (GET_CODE (pat) != SET)
3766 return 0;
3767
3768 /* Otherwise, only operations which can be done in tandem with
3769 a `restore' insn can go into the delay slot. */
3770 if (GET_CODE (SET_DEST (pat)) != REG
3771 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3772 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3773 return 0;
3774
3775 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3776 in most cases. */
3777 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3778 return 0;
3779
3780 return eligible_for_restore_insn (trial, false);
3781 }
3782 \f
3783 /* Determine if it's legal to put X into the constant pool. This
3784 is not possible if X contains the address of a symbol that is
3785 not constant (TLS) or not known at final link time (PIC). */
3786
3787 static bool
3788 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3789 {
3790 switch (GET_CODE (x))
3791 {
3792 case CONST_INT:
3793 case CONST_WIDE_INT:
3794 case CONST_DOUBLE:
3795 case CONST_VECTOR:
3796 /* Accept all non-symbolic constants. */
3797 return false;
3798
3799 case LABEL_REF:
3800 /* Labels are OK iff we are non-PIC. */
3801 return flag_pic != 0;
3802
3803 case SYMBOL_REF:
3804 /* 'Naked' TLS symbol references are never OK,
3805 non-TLS symbols are OK iff we are non-PIC. */
3806 if (SYMBOL_REF_TLS_MODEL (x))
3807 return true;
3808 else
3809 return flag_pic != 0;
3810
3811 case CONST:
3812 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3813 case PLUS:
3814 case MINUS:
3815 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3816 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3817 case UNSPEC:
3818 return true;
3819 default:
3820 gcc_unreachable ();
3821 }
3822 }
3823 \f
3824 /* Global Offset Table support. */
3825 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3826 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3827
3828 /* Return the SYMBOL_REF for the Global Offset Table. */
3829
3830 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3831
3832 static rtx
3833 sparc_got (void)
3834 {
3835 if (!sparc_got_symbol)
3836 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3837
3838 return sparc_got_symbol;
3839 }
3840
3841 /* Ensure that we are not using patterns that are not OK with PIC. */
3842
3843 int
3844 check_pic (int i)
3845 {
3846 rtx op;
3847
3848 switch (flag_pic)
3849 {
3850 case 1:
3851 op = recog_data.operand[i];
3852 gcc_assert (GET_CODE (op) != SYMBOL_REF
3853 && (GET_CODE (op) != CONST
3854 || (GET_CODE (XEXP (op, 0)) == MINUS
3855 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3856 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3857 /* fallthrough */
3858 case 2:
3859 default:
3860 return 1;
3861 }
3862 }
3863
3864 /* Return true if X is an address which needs a temporary register when
3865 reloaded while generating PIC code. */
3866
3867 int
3868 pic_address_needs_scratch (rtx x)
3869 {
3870 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3871 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3873 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3874 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3875 return 1;
3876
3877 return 0;
3878 }
3879
3880 /* Determine if a given RTX is a valid constant. We already know this
3881 satisfies CONSTANT_P. */
3882
3883 static bool
3884 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3885 {
3886 switch (GET_CODE (x))
3887 {
3888 case CONST:
3889 case SYMBOL_REF:
3890 if (sparc_tls_referenced_p (x))
3891 return false;
3892 break;
3893
3894 case CONST_DOUBLE:
3895 /* Floating point constants are generally not ok.
3896 The only exception is 0.0 and all-ones in VIS. */
3897 if (TARGET_VIS
3898 && SCALAR_FLOAT_MODE_P (mode)
3899 && (const_zero_operand (x, mode)
3900 || const_all_ones_operand (x, mode)))
3901 return true;
3902
3903 return false;
3904
3905 case CONST_VECTOR:
3906 /* Vector constants are generally not ok.
3907 The only exception is 0 or -1 in VIS. */
3908 if (TARGET_VIS
3909 && (const_zero_operand (x, mode)
3910 || const_all_ones_operand (x, mode)))
3911 return true;
3912
3913 return false;
3914
3915 default:
3916 break;
3917 }
3918
3919 return true;
3920 }
3921
3922 /* Determine if a given RTX is a valid constant address. */
3923
3924 bool
3925 constant_address_p (rtx x)
3926 {
3927 switch (GET_CODE (x))
3928 {
3929 case LABEL_REF:
3930 case CONST_INT:
3931 case HIGH:
3932 return true;
3933
3934 case CONST:
3935 if (flag_pic && pic_address_needs_scratch (x))
3936 return false;
3937 return sparc_legitimate_constant_p (Pmode, x);
3938
3939 case SYMBOL_REF:
3940 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3941
3942 default:
3943 return false;
3944 }
3945 }
3946
3947 /* Nonzero if the constant value X is a legitimate general operand
3948 when generating PIC code. It is given that flag_pic is on and
3949 that X satisfies CONSTANT_P. */
3950
3951 bool
3952 legitimate_pic_operand_p (rtx x)
3953 {
3954 if (pic_address_needs_scratch (x))
3955 return false;
3956 if (sparc_tls_referenced_p (x))
3957 return false;
3958 return true;
3959 }
3960
3961 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3962 (CONST_INT_P (X) \
3963 && INTVAL (X) >= -0x1000 \
3964 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3965
3966 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3967 (CONST_INT_P (X) \
3968 && INTVAL (X) >= -0x1000 \
3969 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3970
3971 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3972
3973 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3974 ordinarily. This changes a bit when generating PIC. */
3975
3976 static bool
3977 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3978 {
3979 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3980
3981 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3982 rs1 = addr;
3983 else if (GET_CODE (addr) == PLUS)
3984 {
3985 rs1 = XEXP (addr, 0);
3986 rs2 = XEXP (addr, 1);
3987
3988 /* Canonicalize. REG comes first, if there are no regs,
3989 LO_SUM comes first. */
3990 if (!REG_P (rs1)
3991 && GET_CODE (rs1) != SUBREG
3992 && (REG_P (rs2)
3993 || GET_CODE (rs2) == SUBREG
3994 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3995 {
3996 rs1 = XEXP (addr, 1);
3997 rs2 = XEXP (addr, 0);
3998 }
3999
4000 if ((flag_pic == 1
4001 && rs1 == pic_offset_table_rtx
4002 && !REG_P (rs2)
4003 && GET_CODE (rs2) != SUBREG
4004 && GET_CODE (rs2) != LO_SUM
4005 && GET_CODE (rs2) != MEM
4006 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4007 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4008 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4009 || ((REG_P (rs1)
4010 || GET_CODE (rs1) == SUBREG)
4011 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4012 {
4013 imm1 = rs2;
4014 rs2 = NULL;
4015 }
4016 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4017 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4018 {
4019 /* We prohibit REG + REG for TFmode when there are no quad move insns
4020 and we consequently need to split. We do this because REG+REG
4021 is not an offsettable address. If we get the situation in reload
4022 where source and destination of a movtf pattern are both MEMs with
4023 REG+REG address, then only one of them gets converted to an
4024 offsettable address. */
4025 if (mode == TFmode
4026 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4027 return 0;
4028
4029 /* Likewise for TImode, but in all cases. */
4030 if (mode == TImode)
4031 return 0;
4032
4033 /* We prohibit REG + REG on ARCH32 if not optimizing for
4034 DFmode/DImode because then mem_min_alignment is likely to be zero
4035 after reload and the forced split would lack a matching splitter
4036 pattern. */
4037 if (TARGET_ARCH32 && !optimize
4038 && (mode == DFmode || mode == DImode))
4039 return 0;
4040 }
4041 else if (USE_AS_OFFSETABLE_LO10
4042 && GET_CODE (rs1) == LO_SUM
4043 && TARGET_ARCH64
4044 && ! TARGET_CM_MEDMID
4045 && RTX_OK_FOR_OLO10_P (rs2, mode))
4046 {
4047 rs2 = NULL;
4048 imm1 = XEXP (rs1, 1);
4049 rs1 = XEXP (rs1, 0);
4050 if (!CONSTANT_P (imm1)
4051 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4052 return 0;
4053 }
4054 }
4055 else if (GET_CODE (addr) == LO_SUM)
4056 {
4057 rs1 = XEXP (addr, 0);
4058 imm1 = XEXP (addr, 1);
4059
4060 if (!CONSTANT_P (imm1)
4061 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4062 return 0;
4063
4064 /* We can't allow TFmode in 32-bit mode, because an offset greater
4065 than the alignment (8) may cause the LO_SUM to overflow. */
4066 if (mode == TFmode && TARGET_ARCH32)
4067 return 0;
4068
4069 /* During reload, accept the HIGH+LO_SUM construct generated by
4070 sparc_legitimize_reload_address. */
4071 if (reload_in_progress
4072 && GET_CODE (rs1) == HIGH
4073 && XEXP (rs1, 0) == imm1)
4074 return 1;
4075 }
4076 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4077 return 1;
4078 else
4079 return 0;
4080
4081 if (GET_CODE (rs1) == SUBREG)
4082 rs1 = SUBREG_REG (rs1);
4083 if (!REG_P (rs1))
4084 return 0;
4085
4086 if (rs2)
4087 {
4088 if (GET_CODE (rs2) == SUBREG)
4089 rs2 = SUBREG_REG (rs2);
4090 if (!REG_P (rs2))
4091 return 0;
4092 }
4093
4094 if (strict)
4095 {
4096 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4097 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4098 return 0;
4099 }
4100 else
4101 {
4102 if ((! SPARC_INT_REG_P (REGNO (rs1))
4103 && REGNO (rs1) != FRAME_POINTER_REGNUM
4104 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4105 || (rs2
4106 && (! SPARC_INT_REG_P (REGNO (rs2))
4107 && REGNO (rs2) != FRAME_POINTER_REGNUM
4108 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4109 return 0;
4110 }
4111 return 1;
4112 }
4113
4114 /* Return the SYMBOL_REF for the tls_get_addr function. */
4115
4116 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4117
4118 static rtx
4119 sparc_tls_get_addr (void)
4120 {
4121 if (!sparc_tls_symbol)
4122 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4123
4124 return sparc_tls_symbol;
4125 }
4126
4127 /* Return the Global Offset Table to be used in TLS mode. */
4128
4129 static rtx
4130 sparc_tls_got (void)
4131 {
4132 /* In PIC mode, this is just the PIC offset table. */
4133 if (flag_pic)
4134 {
4135 crtl->uses_pic_offset_table = 1;
4136 return pic_offset_table_rtx;
4137 }
4138
4139 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4140 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4141 if (TARGET_SUN_TLS && TARGET_ARCH32)
4142 {
4143 load_got_register ();
4144 return global_offset_table_rtx;
4145 }
4146
4147 /* In all other cases, we load a new pseudo with the GOT symbol. */
4148 return copy_to_reg (sparc_got ());
4149 }
4150
4151 /* Return true if X contains a thread-local symbol. */
4152
4153 static bool
4154 sparc_tls_referenced_p (rtx x)
4155 {
4156 if (!TARGET_HAVE_TLS)
4157 return false;
4158
4159 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4160 x = XEXP (XEXP (x, 0), 0);
4161
4162 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4163 return true;
4164
4165 /* That's all we handle in sparc_legitimize_tls_address for now. */
4166 return false;
4167 }
4168
4169 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4170 this (thread-local) address. */
4171
4172 static rtx
4173 sparc_legitimize_tls_address (rtx addr)
4174 {
4175 rtx temp1, temp2, temp3, ret, o0, got;
4176 rtx_insn *insn;
4177
4178 gcc_assert (can_create_pseudo_p ());
4179
4180 if (GET_CODE (addr) == SYMBOL_REF)
4181 switch (SYMBOL_REF_TLS_MODEL (addr))
4182 {
4183 case TLS_MODEL_GLOBAL_DYNAMIC:
4184 start_sequence ();
4185 temp1 = gen_reg_rtx (SImode);
4186 temp2 = gen_reg_rtx (SImode);
4187 ret = gen_reg_rtx (Pmode);
4188 o0 = gen_rtx_REG (Pmode, 8);
4189 got = sparc_tls_got ();
4190 emit_insn (gen_tgd_hi22 (temp1, addr));
4191 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4192 if (TARGET_ARCH32)
4193 {
4194 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4195 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4196 addr, const1_rtx));
4197 }
4198 else
4199 {
4200 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4201 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4202 addr, const1_rtx));
4203 }
4204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4205 insn = get_insns ();
4206 end_sequence ();
4207 emit_libcall_block (insn, ret, o0, addr);
4208 break;
4209
4210 case TLS_MODEL_LOCAL_DYNAMIC:
4211 start_sequence ();
4212 temp1 = gen_reg_rtx (SImode);
4213 temp2 = gen_reg_rtx (SImode);
4214 temp3 = gen_reg_rtx (Pmode);
4215 ret = gen_reg_rtx (Pmode);
4216 o0 = gen_rtx_REG (Pmode, 8);
4217 got = sparc_tls_got ();
4218 emit_insn (gen_tldm_hi22 (temp1));
4219 emit_insn (gen_tldm_lo10 (temp2, temp1));
4220 if (TARGET_ARCH32)
4221 {
4222 emit_insn (gen_tldm_add32 (o0, got, temp2));
4223 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4224 const1_rtx));
4225 }
4226 else
4227 {
4228 emit_insn (gen_tldm_add64 (o0, got, temp2));
4229 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4230 const1_rtx));
4231 }
4232 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4233 insn = get_insns ();
4234 end_sequence ();
4235 emit_libcall_block (insn, temp3, o0,
4236 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4237 UNSPEC_TLSLD_BASE));
4238 temp1 = gen_reg_rtx (SImode);
4239 temp2 = gen_reg_rtx (SImode);
4240 emit_insn (gen_tldo_hix22 (temp1, addr));
4241 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4242 if (TARGET_ARCH32)
4243 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4244 else
4245 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4246 break;
4247
4248 case TLS_MODEL_INITIAL_EXEC:
4249 temp1 = gen_reg_rtx (SImode);
4250 temp2 = gen_reg_rtx (SImode);
4251 temp3 = gen_reg_rtx (Pmode);
4252 got = sparc_tls_got ();
4253 emit_insn (gen_tie_hi22 (temp1, addr));
4254 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4255 if (TARGET_ARCH32)
4256 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4257 else
4258 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4259 if (TARGET_SUN_TLS)
4260 {
4261 ret = gen_reg_rtx (Pmode);
4262 if (TARGET_ARCH32)
4263 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4264 temp3, addr));
4265 else
4266 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4267 temp3, addr));
4268 }
4269 else
4270 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4271 break;
4272
4273 case TLS_MODEL_LOCAL_EXEC:
4274 temp1 = gen_reg_rtx (Pmode);
4275 temp2 = gen_reg_rtx (Pmode);
4276 if (TARGET_ARCH32)
4277 {
4278 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4279 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4280 }
4281 else
4282 {
4283 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4284 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4285 }
4286 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4287 break;
4288
4289 default:
4290 gcc_unreachable ();
4291 }
4292
4293 else if (GET_CODE (addr) == CONST)
4294 {
4295 rtx base, offset;
4296
4297 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4298
4299 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4300 offset = XEXP (XEXP (addr, 0), 1);
4301
4302 base = force_operand (base, NULL_RTX);
4303 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4304 offset = force_reg (Pmode, offset);
4305 ret = gen_rtx_PLUS (Pmode, base, offset);
4306 }
4307
4308 else
4309 gcc_unreachable (); /* for now ... */
4310
4311 return ret;
4312 }
4313
4314 /* Legitimize PIC addresses. If the address is already position-independent,
4315 we return ORIG. Newly generated position-independent addresses go into a
4316 reg. This is REG if nonzero, otherwise we allocate register(s) as
4317 necessary. */
4318
4319 static rtx
4320 sparc_legitimize_pic_address (rtx orig, rtx reg)
4321 {
4322 bool gotdata_op = false;
4323
4324 if (GET_CODE (orig) == SYMBOL_REF
4325 /* See the comment in sparc_expand_move. */
4326 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4327 {
4328 rtx pic_ref, address;
4329 rtx_insn *insn;
4330
4331 if (reg == 0)
4332 {
4333 gcc_assert (can_create_pseudo_p ());
4334 reg = gen_reg_rtx (Pmode);
4335 }
4336
4337 if (flag_pic == 2)
4338 {
4339 /* If not during reload, allocate another temp reg here for loading
4340 in the address, so that these instructions can be optimized
4341 properly. */
4342 rtx temp_reg = (! can_create_pseudo_p ()
4343 ? reg : gen_reg_rtx (Pmode));
4344
4345 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4346 won't get confused into thinking that these two instructions
4347 are loading in the true address of the symbol. If in the
4348 future a PIC rtx exists, that should be used instead. */
4349 if (TARGET_ARCH64)
4350 {
4351 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4352 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4353 }
4354 else
4355 {
4356 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4357 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4358 }
4359 address = temp_reg;
4360 gotdata_op = true;
4361 }
4362 else
4363 address = orig;
4364
4365 crtl->uses_pic_offset_table = 1;
4366 if (gotdata_op)
4367 {
4368 if (TARGET_ARCH64)
4369 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4370 pic_offset_table_rtx,
4371 address, orig));
4372 else
4373 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4374 pic_offset_table_rtx,
4375 address, orig));
4376 }
4377 else
4378 {
4379 pic_ref
4380 = gen_const_mem (Pmode,
4381 gen_rtx_PLUS (Pmode,
4382 pic_offset_table_rtx, address));
4383 insn = emit_move_insn (reg, pic_ref);
4384 }
4385
4386 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4387 by loop. */
4388 set_unique_reg_note (insn, REG_EQUAL, orig);
4389 return reg;
4390 }
4391 else if (GET_CODE (orig) == CONST)
4392 {
4393 rtx base, offset;
4394
4395 if (GET_CODE (XEXP (orig, 0)) == PLUS
4396 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4397 return orig;
4398
4399 if (reg == 0)
4400 {
4401 gcc_assert (can_create_pseudo_p ());
4402 reg = gen_reg_rtx (Pmode);
4403 }
4404
4405 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4406 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4407 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4408 base == reg ? NULL_RTX : reg);
4409
4410 if (GET_CODE (offset) == CONST_INT)
4411 {
4412 if (SMALL_INT (offset))
4413 return plus_constant (Pmode, base, INTVAL (offset));
4414 else if (can_create_pseudo_p ())
4415 offset = force_reg (Pmode, offset);
4416 else
4417 /* If we reach here, then something is seriously wrong. */
4418 gcc_unreachable ();
4419 }
4420 return gen_rtx_PLUS (Pmode, base, offset);
4421 }
4422 else if (GET_CODE (orig) == LABEL_REF)
4423 /* ??? We ought to be checking that the register is live instead, in case
4424 it is eliminated. */
4425 crtl->uses_pic_offset_table = 1;
4426
4427 return orig;
4428 }
4429
4430 /* Try machine-dependent ways of modifying an illegitimate address X
4431 to be legitimate. If we find one, return the new, valid address.
4432
4433 OLDX is the address as it was before break_out_memory_refs was called.
4434 In some cases it is useful to look at this to decide what needs to be done.
4435
4436 MODE is the mode of the operand pointed to by X.
4437
4438 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4439
4440 static rtx
4441 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4442 machine_mode mode)
4443 {
4444 rtx orig_x = x;
4445
4446 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4447 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4448 force_operand (XEXP (x, 0), NULL_RTX));
4449 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4450 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4451 force_operand (XEXP (x, 1), NULL_RTX));
4452 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4453 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4454 XEXP (x, 1));
4455 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4456 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4457 force_operand (XEXP (x, 1), NULL_RTX));
4458
4459 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4460 return x;
4461
4462 if (sparc_tls_referenced_p (x))
4463 x = sparc_legitimize_tls_address (x);
4464 else if (flag_pic)
4465 x = sparc_legitimize_pic_address (x, NULL_RTX);
4466 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4467 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4468 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4469 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4470 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4471 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4472 else if (GET_CODE (x) == SYMBOL_REF
4473 || GET_CODE (x) == CONST
4474 || GET_CODE (x) == LABEL_REF)
4475 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4476
4477 return x;
4478 }
4479
4480 /* Delegitimize an address that was legitimized by the above function. */
4481
4482 static rtx
4483 sparc_delegitimize_address (rtx x)
4484 {
4485 x = delegitimize_mem_from_attrs (x);
4486
4487 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4488 switch (XINT (XEXP (x, 1), 1))
4489 {
4490 case UNSPEC_MOVE_PIC:
4491 case UNSPEC_TLSLE:
4492 x = XVECEXP (XEXP (x, 1), 0, 0);
4493 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4494 break;
4495 default:
4496 break;
4497 }
4498
4499 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4500 if (GET_CODE (x) == MINUS
4501 && REG_P (XEXP (x, 0))
4502 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4503 && GET_CODE (XEXP (x, 1)) == LO_SUM
4504 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4505 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4506 {
4507 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4508 gcc_assert (GET_CODE (x) == LABEL_REF);
4509 }
4510
4511 return x;
4512 }
4513
4514 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4515 replace the input X, or the original X if no replacement is called for.
4516 The output parameter *WIN is 1 if the calling macro should goto WIN,
4517 0 if it should not.
4518
4519 For SPARC, we wish to handle addresses by splitting them into
4520 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4521 This cuts the number of extra insns by one.
4522
4523 Do nothing when generating PIC code and the address is a symbolic
4524 operand or requires a scratch register. */
4525
4526 rtx
4527 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4528 int opnum, int type,
4529 int ind_levels ATTRIBUTE_UNUSED, int *win)
4530 {
4531 /* Decompose SImode constants into HIGH+LO_SUM. */
4532 if (CONSTANT_P (x)
4533 && (mode != TFmode || TARGET_ARCH64)
4534 && GET_MODE (x) == SImode
4535 && GET_CODE (x) != LO_SUM
4536 && GET_CODE (x) != HIGH
4537 && sparc_cmodel <= CM_MEDLOW
4538 && !(flag_pic
4539 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4540 {
4541 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4542 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4543 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4544 opnum, (enum reload_type)type);
4545 *win = 1;
4546 return x;
4547 }
4548
4549 /* We have to recognize what we have already generated above. */
4550 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4551 {
4552 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4553 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4554 opnum, (enum reload_type)type);
4555 *win = 1;
4556 return x;
4557 }
4558
4559 *win = 0;
4560 return x;
4561 }
4562
4563 /* Return true if ADDR (a legitimate address expression)
4564 has an effect that depends on the machine mode it is used for.
4565
4566 In PIC mode,
4567
4568 (mem:HI [%l7+a])
4569
4570 is not equivalent to
4571
4572 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4573
4574 because [%l7+a+1] is interpreted as the address of (a+1). */
4575
4576
4577 static bool
4578 sparc_mode_dependent_address_p (const_rtx addr,
4579 addr_space_t as ATTRIBUTE_UNUSED)
4580 {
4581 if (flag_pic && GET_CODE (addr) == PLUS)
4582 {
4583 rtx op0 = XEXP (addr, 0);
4584 rtx op1 = XEXP (addr, 1);
4585 if (op0 == pic_offset_table_rtx
4586 && symbolic_operand (op1, VOIDmode))
4587 return true;
4588 }
4589
4590 return false;
4591 }
4592
4593 #ifdef HAVE_GAS_HIDDEN
4594 # define USE_HIDDEN_LINKONCE 1
4595 #else
4596 # define USE_HIDDEN_LINKONCE 0
4597 #endif
4598
4599 static void
4600 get_pc_thunk_name (char name[32], unsigned int regno)
4601 {
4602 const char *reg_name = reg_names[regno];
4603
4604 /* Skip the leading '%' as that cannot be used in a
4605 symbol name. */
4606 reg_name += 1;
4607
4608 if (USE_HIDDEN_LINKONCE)
4609 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4610 else
4611 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4612 }
4613
4614 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4615
4616 static rtx
4617 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4618 {
4619 int orig_flag_pic = flag_pic;
4620 rtx insn;
4621
4622 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4623 flag_pic = 0;
4624 if (TARGET_ARCH64)
4625 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4626 else
4627 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4628 flag_pic = orig_flag_pic;
4629
4630 return insn;
4631 }
4632
4633 /* Emit code to load the GOT register. */
4634
4635 void
4636 load_got_register (void)
4637 {
4638 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4639 if (!global_offset_table_rtx)
4640 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4641
4642 if (TARGET_VXWORKS_RTP)
4643 emit_insn (gen_vxworks_load_got ());
4644 else
4645 {
4646 /* The GOT symbol is subject to a PC-relative relocation so we need a
4647 helper function to add the PC value and thus get the final value. */
4648 if (!got_helper_rtx)
4649 {
4650 char name[32];
4651 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4652 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4653 }
4654
4655 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4656 got_helper_rtx,
4657 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4658 }
4659
4660 /* Need to emit this whether or not we obey regdecls,
4661 since setjmp/longjmp can cause life info to screw up.
4662 ??? In the case where we don't obey regdecls, this is not sufficient
4663 since we may not fall out the bottom. */
4664 emit_use (global_offset_table_rtx);
4665 }
4666
4667 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4668 address of the call target. */
4669
4670 void
4671 sparc_emit_call_insn (rtx pat, rtx addr)
4672 {
4673 rtx_insn *insn;
4674
4675 insn = emit_call_insn (pat);
4676
4677 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4678 if (TARGET_VXWORKS_RTP
4679 && flag_pic
4680 && GET_CODE (addr) == SYMBOL_REF
4681 && (SYMBOL_REF_DECL (addr)
4682 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4683 : !SYMBOL_REF_LOCAL_P (addr)))
4684 {
4685 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4686 crtl->uses_pic_offset_table = 1;
4687 }
4688 }
4689 \f
4690 /* Return 1 if RTX is a MEM which is known to be aligned to at
4691 least a DESIRED byte boundary. */
4692
4693 int
4694 mem_min_alignment (rtx mem, int desired)
4695 {
4696 rtx addr, base, offset;
4697
4698 /* If it's not a MEM we can't accept it. */
4699 if (GET_CODE (mem) != MEM)
4700 return 0;
4701
4702 /* Obviously... */
4703 if (!TARGET_UNALIGNED_DOUBLES
4704 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4705 return 1;
4706
4707 /* ??? The rest of the function predates MEM_ALIGN so
4708 there is probably a bit of redundancy. */
4709 addr = XEXP (mem, 0);
4710 base = offset = NULL_RTX;
4711 if (GET_CODE (addr) == PLUS)
4712 {
4713 if (GET_CODE (XEXP (addr, 0)) == REG)
4714 {
4715 base = XEXP (addr, 0);
4716
4717 /* What we are saying here is that if the base
4718 REG is aligned properly, the compiler will make
4719 sure any REG based index upon it will be so
4720 as well. */
4721 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4722 offset = XEXP (addr, 1);
4723 else
4724 offset = const0_rtx;
4725 }
4726 }
4727 else if (GET_CODE (addr) == REG)
4728 {
4729 base = addr;
4730 offset = const0_rtx;
4731 }
4732
4733 if (base != NULL_RTX)
4734 {
4735 int regno = REGNO (base);
4736
4737 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4738 {
4739 /* Check if the compiler has recorded some information
4740 about the alignment of the base REG. If reload has
4741 completed, we already matched with proper alignments.
4742 If not running global_alloc, reload might give us
4743 unaligned pointer to local stack though. */
4744 if (((cfun != 0
4745 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4746 || (optimize && reload_completed))
4747 && (INTVAL (offset) & (desired - 1)) == 0)
4748 return 1;
4749 }
4750 else
4751 {
4752 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4753 return 1;
4754 }
4755 }
4756 else if (! TARGET_UNALIGNED_DOUBLES
4757 || CONSTANT_P (addr)
4758 || GET_CODE (addr) == LO_SUM)
4759 {
4760 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4761 is true, in which case we can only assume that an access is aligned if
4762 it is to a constant address, or the address involves a LO_SUM. */
4763 return 1;
4764 }
4765
4766 /* An obviously unaligned address. */
4767 return 0;
4768 }
4769
4770 \f
4771 /* Vectors to keep interesting information about registers where it can easily
4772 be got. We used to use the actual mode value as the bit number, but there
4773 are more than 32 modes now. Instead we use two tables: one indexed by
4774 hard register number, and one indexed by mode. */
4775
4776 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4777 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4778 mapped into one sparc_mode_class mode. */
4779
4780 enum sparc_mode_class {
4781 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4782 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4783 CC_MODE, CCFP_MODE
4784 };
4785
4786 /* Modes for single-word and smaller quantities. */
4787 #define S_MODES \
4788 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4789
4790 /* Modes for double-word and smaller quantities. */
4791 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4792
4793 /* Modes for quad-word and smaller quantities. */
4794 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4795
4796 /* Modes for 8-word and smaller quantities. */
4797 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4798
4799 /* Modes for single-float quantities. */
4800 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4801
4802 /* Modes for double-float and smaller quantities. */
4803 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4804
4805 /* Modes for quad-float and smaller quantities. */
4806 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4807
4808 /* Modes for quad-float pairs and smaller quantities. */
4809 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4810
4811 /* Modes for double-float only quantities. */
4812 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4813
4814 /* Modes for quad-float and double-float only quantities. */
4815 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4816
4817 /* Modes for quad-float pairs and double-float only quantities. */
4818 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4819
4820 /* Modes for condition codes. */
4821 #define CC_MODES (1 << (int) CC_MODE)
4822 #define CCFP_MODES (1 << (int) CCFP_MODE)
4823
4824 /* Value is 1 if register/mode pair is acceptable on sparc.
4825
4826 The funny mixture of D and T modes is because integer operations
4827 do not specially operate on tetra quantities, so non-quad-aligned
4828 registers can hold quadword quantities (except %o4 and %i4 because
4829 they cross fixed registers).
4830
4831 ??? Note that, despite the settings, non-double-aligned parameter
4832 registers can hold double-word quantities in 32-bit mode. */
4833
4834 /* This points to either the 32 bit or the 64 bit version. */
4835 const int *hard_regno_mode_classes;
4836
4837 static const int hard_32bit_mode_classes[] = {
4838 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4839 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4840 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4841 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4842
4843 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4844 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4845 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4846 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4847
4848 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4849 and none can hold SFmode/SImode values. */
4850 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4851 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4852 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4853 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4854
4855 /* %fcc[0123] */
4856 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4857
4858 /* %icc, %sfp, %gsr */
4859 CC_MODES, 0, D_MODES
4860 };
4861
4862 static const int hard_64bit_mode_classes[] = {
4863 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4864 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4865 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4866 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4867
4868 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4869 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4870 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4871 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4872
4873 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4874 and none can hold SFmode/SImode values. */
4875 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4876 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4877 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4878 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4879
4880 /* %fcc[0123] */
4881 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4882
4883 /* %icc, %sfp, %gsr */
4884 CC_MODES, 0, D_MODES
4885 };
4886
4887 int sparc_mode_class [NUM_MACHINE_MODES];
4888
4889 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4890
4891 static void
4892 sparc_init_modes (void)
4893 {
4894 int i;
4895
4896 for (i = 0; i < NUM_MACHINE_MODES; i++)
4897 {
4898 machine_mode m = (machine_mode) i;
4899 unsigned int size = GET_MODE_SIZE (m);
4900
4901 switch (GET_MODE_CLASS (m))
4902 {
4903 case MODE_INT:
4904 case MODE_PARTIAL_INT:
4905 case MODE_COMPLEX_INT:
4906 if (size < 4)
4907 sparc_mode_class[i] = 1 << (int) H_MODE;
4908 else if (size == 4)
4909 sparc_mode_class[i] = 1 << (int) S_MODE;
4910 else if (size == 8)
4911 sparc_mode_class[i] = 1 << (int) D_MODE;
4912 else if (size == 16)
4913 sparc_mode_class[i] = 1 << (int) T_MODE;
4914 else if (size == 32)
4915 sparc_mode_class[i] = 1 << (int) O_MODE;
4916 else
4917 sparc_mode_class[i] = 0;
4918 break;
4919 case MODE_VECTOR_INT:
4920 if (size == 4)
4921 sparc_mode_class[i] = 1 << (int) SF_MODE;
4922 else if (size == 8)
4923 sparc_mode_class[i] = 1 << (int) DF_MODE;
4924 else
4925 sparc_mode_class[i] = 0;
4926 break;
4927 case MODE_FLOAT:
4928 case MODE_COMPLEX_FLOAT:
4929 if (size == 4)
4930 sparc_mode_class[i] = 1 << (int) SF_MODE;
4931 else if (size == 8)
4932 sparc_mode_class[i] = 1 << (int) DF_MODE;
4933 else if (size == 16)
4934 sparc_mode_class[i] = 1 << (int) TF_MODE;
4935 else if (size == 32)
4936 sparc_mode_class[i] = 1 << (int) OF_MODE;
4937 else
4938 sparc_mode_class[i] = 0;
4939 break;
4940 case MODE_CC:
4941 if (m == CCFPmode || m == CCFPEmode)
4942 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4943 else
4944 sparc_mode_class[i] = 1 << (int) CC_MODE;
4945 break;
4946 default:
4947 sparc_mode_class[i] = 0;
4948 break;
4949 }
4950 }
4951
4952 if (TARGET_ARCH64)
4953 hard_regno_mode_classes = hard_64bit_mode_classes;
4954 else
4955 hard_regno_mode_classes = hard_32bit_mode_classes;
4956
4957 /* Initialize the array used by REGNO_REG_CLASS. */
4958 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4959 {
4960 if (i < 16 && TARGET_V8PLUS)
4961 sparc_regno_reg_class[i] = I64_REGS;
4962 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4963 sparc_regno_reg_class[i] = GENERAL_REGS;
4964 else if (i < 64)
4965 sparc_regno_reg_class[i] = FP_REGS;
4966 else if (i < 96)
4967 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4968 else if (i < 100)
4969 sparc_regno_reg_class[i] = FPCC_REGS;
4970 else
4971 sparc_regno_reg_class[i] = NO_REGS;
4972 }
4973 }
4974 \f
4975 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4976
4977 static inline bool
4978 save_global_or_fp_reg_p (unsigned int regno,
4979 int leaf_function ATTRIBUTE_UNUSED)
4980 {
4981 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4982 }
4983
4984 /* Return whether the return address register (%i7) is needed. */
4985
4986 static inline bool
4987 return_addr_reg_needed_p (int leaf_function)
4988 {
4989 /* If it is live, for example because of __builtin_return_address (0). */
4990 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4991 return true;
4992
4993 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4994 if (!leaf_function
4995 /* Loading the GOT register clobbers %o7. */
4996 || crtl->uses_pic_offset_table
4997 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4998 return true;
4999
5000 return false;
5001 }
5002
5003 /* Return whether REGNO, a local or in register, must be saved/restored. */
5004
5005 static bool
5006 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5007 {
5008 /* General case: call-saved registers live at some point. */
5009 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5010 return true;
5011
5012 /* Frame pointer register (%fp) if needed. */
5013 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5014 return true;
5015
5016 /* Return address register (%i7) if needed. */
5017 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5018 return true;
5019
5020 /* GOT register (%l7) if needed. */
5021 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5022 return true;
5023
5024 /* If the function accesses prior frames, the frame pointer and the return
5025 address of the previous frame must be saved on the stack. */
5026 if (crtl->accesses_prior_frames
5027 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5028 return true;
5029
5030 return false;
5031 }
5032
5033 /* Compute the frame size required by the function. This function is called
5034 during the reload pass and also by sparc_expand_prologue. */
5035
5036 HOST_WIDE_INT
5037 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5038 {
5039 HOST_WIDE_INT frame_size, apparent_frame_size;
5040 int args_size, n_global_fp_regs = 0;
5041 bool save_local_in_regs_p = false;
5042 unsigned int i;
5043
5044 /* If the function allocates dynamic stack space, the dynamic offset is
5045 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5046 if (leaf_function && !cfun->calls_alloca)
5047 args_size = 0;
5048 else
5049 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5050
5051 /* Calculate space needed for global registers. */
5052 if (TARGET_ARCH64)
5053 {
5054 for (i = 0; i < 8; i++)
5055 if (save_global_or_fp_reg_p (i, 0))
5056 n_global_fp_regs += 2;
5057 }
5058 else
5059 {
5060 for (i = 0; i < 8; i += 2)
5061 if (save_global_or_fp_reg_p (i, 0)
5062 || save_global_or_fp_reg_p (i + 1, 0))
5063 n_global_fp_regs += 2;
5064 }
5065
5066 /* In the flat window model, find out which local and in registers need to
5067 be saved. We don't reserve space in the current frame for them as they
5068 will be spilled into the register window save area of the caller's frame.
5069 However, as soon as we use this register window save area, we must create
5070 that of the current frame to make it the live one. */
5071 if (TARGET_FLAT)
5072 for (i = 16; i < 32; i++)
5073 if (save_local_or_in_reg_p (i, leaf_function))
5074 {
5075 save_local_in_regs_p = true;
5076 break;
5077 }
5078
5079 /* Calculate space needed for FP registers. */
5080 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5081 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5082 n_global_fp_regs += 2;
5083
5084 if (size == 0
5085 && n_global_fp_regs == 0
5086 && args_size == 0
5087 && !save_local_in_regs_p)
5088 frame_size = apparent_frame_size = 0;
5089 else
5090 {
5091 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5092 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5093 apparent_frame_size += n_global_fp_regs * 4;
5094
5095 /* We need to add the size of the outgoing argument area. */
5096 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5097
5098 /* And that of the register window save area. */
5099 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5100
5101 /* Finally, bump to the appropriate alignment. */
5102 frame_size = SPARC_STACK_ALIGN (frame_size);
5103 }
5104
5105 /* Set up values for use in prologue and epilogue. */
5106 sparc_frame_size = frame_size;
5107 sparc_apparent_frame_size = apparent_frame_size;
5108 sparc_n_global_fp_regs = n_global_fp_regs;
5109 sparc_save_local_in_regs_p = save_local_in_regs_p;
5110
5111 return frame_size;
5112 }
5113
5114 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5115
5116 int
5117 sparc_initial_elimination_offset (int to)
5118 {
5119 int offset;
5120
5121 if (to == STACK_POINTER_REGNUM)
5122 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5123 else
5124 offset = 0;
5125
5126 offset += SPARC_STACK_BIAS;
5127 return offset;
5128 }
5129
5130 /* Output any necessary .register pseudo-ops. */
5131
5132 void
5133 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5134 {
5135 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5136 int i;
5137
5138 if (TARGET_ARCH32)
5139 return;
5140
5141 /* Check if %g[2367] were used without
5142 .register being printed for them already. */
5143 for (i = 2; i < 8; i++)
5144 {
5145 if (df_regs_ever_live_p (i)
5146 && ! sparc_hard_reg_printed [i])
5147 {
5148 sparc_hard_reg_printed [i] = 1;
5149 /* %g7 is used as TLS base register, use #ignore
5150 for it instead of #scratch. */
5151 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5152 i == 7 ? "ignore" : "scratch");
5153 }
5154 if (i == 3) i = 5;
5155 }
5156 #endif
5157 }
5158
5159 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5160
5161 #if PROBE_INTERVAL > 4096
5162 #error Cannot use indexed addressing mode for stack probing
5163 #endif
5164
5165 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5166 inclusive. These are offsets from the current stack pointer.
5167
5168 Note that we don't use the REG+REG addressing mode for the probes because
5169 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5170 so the advantages of having a single code win here. */
5171
5172 static void
5173 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5174 {
5175 rtx g1 = gen_rtx_REG (Pmode, 1);
5176
5177 /* See if we have a constant small number of probes to generate. If so,
5178 that's the easy case. */
5179 if (size <= PROBE_INTERVAL)
5180 {
5181 emit_move_insn (g1, GEN_INT (first));
5182 emit_insn (gen_rtx_SET (g1,
5183 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5184 emit_stack_probe (plus_constant (Pmode, g1, -size));
5185 }
5186
5187 /* The run-time loop is made up of 9 insns in the generic case while the
5188 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5189 else if (size <= 4 * PROBE_INTERVAL)
5190 {
5191 HOST_WIDE_INT i;
5192
5193 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5194 emit_insn (gen_rtx_SET (g1,
5195 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5196 emit_stack_probe (g1);
5197
5198 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5199 it exceeds SIZE. If only two probes are needed, this will not
5200 generate any code. Then probe at FIRST + SIZE. */
5201 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5202 {
5203 emit_insn (gen_rtx_SET (g1,
5204 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5205 emit_stack_probe (g1);
5206 }
5207
5208 emit_stack_probe (plus_constant (Pmode, g1,
5209 (i - PROBE_INTERVAL) - size));
5210 }
5211
5212 /* Otherwise, do the same as above, but in a loop. Note that we must be
5213 extra careful with variables wrapping around because we might be at
5214 the very top (or the very bottom) of the address space and we have
5215 to be able to handle this case properly; in particular, we use an
5216 equality test for the loop condition. */
5217 else
5218 {
5219 HOST_WIDE_INT rounded_size;
5220 rtx g4 = gen_rtx_REG (Pmode, 4);
5221
5222 emit_move_insn (g1, GEN_INT (first));
5223
5224
5225 /* Step 1: round SIZE to the previous multiple of the interval. */
5226
5227 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5228 emit_move_insn (g4, GEN_INT (rounded_size));
5229
5230
5231 /* Step 2: compute initial and final value of the loop counter. */
5232
5233 /* TEST_ADDR = SP + FIRST. */
5234 emit_insn (gen_rtx_SET (g1,
5235 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5236
5237 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5238 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5239
5240
5241 /* Step 3: the loop
5242
5243 while (TEST_ADDR != LAST_ADDR)
5244 {
5245 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5246 probe at TEST_ADDR
5247 }
5248
5249 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5250 until it is equal to ROUNDED_SIZE. */
5251
5252 if (TARGET_ARCH64)
5253 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5254 else
5255 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5256
5257
5258 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5259 that SIZE is equal to ROUNDED_SIZE. */
5260
5261 if (size != rounded_size)
5262 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5263 }
5264
5265 /* Make sure nothing is scheduled before we are done. */
5266 emit_insn (gen_blockage ());
5267 }
5268
5269 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5270 absolute addresses. */
5271
5272 const char *
5273 output_probe_stack_range (rtx reg1, rtx reg2)
5274 {
5275 static int labelno = 0;
5276 char loop_lab[32];
5277 rtx xops[2];
5278
5279 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5280
5281 /* Loop. */
5282 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5283
5284 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5285 xops[0] = reg1;
5286 xops[1] = GEN_INT (-PROBE_INTERVAL);
5287 output_asm_insn ("add\t%0, %1, %0", xops);
5288
5289 /* Test if TEST_ADDR == LAST_ADDR. */
5290 xops[1] = reg2;
5291 output_asm_insn ("cmp\t%0, %1", xops);
5292
5293 /* Probe at TEST_ADDR and branch. */
5294 if (TARGET_ARCH64)
5295 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5296 else
5297 fputs ("\tbne\t", asm_out_file);
5298 assemble_name_raw (asm_out_file, loop_lab);
5299 fputc ('\n', asm_out_file);
5300 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5301 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5302
5303 return "";
5304 }
5305
5306 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5307 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5308 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5309 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5310 the action to be performed if it returns false. Return the new offset. */
5311
5312 typedef bool (*sorr_pred_t) (unsigned int, int);
5313 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5314
5315 static int
5316 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5317 int offset, int leaf_function, sorr_pred_t save_p,
5318 sorr_act_t action_true, sorr_act_t action_false)
5319 {
5320 unsigned int i;
5321 rtx mem;
5322 rtx_insn *insn;
5323
5324 if (TARGET_ARCH64 && high <= 32)
5325 {
5326 int fp_offset = -1;
5327
5328 for (i = low; i < high; i++)
5329 {
5330 if (save_p (i, leaf_function))
5331 {
5332 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5333 base, offset));
5334 if (action_true == SORR_SAVE)
5335 {
5336 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5337 RTX_FRAME_RELATED_P (insn) = 1;
5338 }
5339 else /* action_true == SORR_RESTORE */
5340 {
5341 /* The frame pointer must be restored last since its old
5342 value may be used as base address for the frame. This
5343 is problematic in 64-bit mode only because of the lack
5344 of double-word load instruction. */
5345 if (i == HARD_FRAME_POINTER_REGNUM)
5346 fp_offset = offset;
5347 else
5348 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5349 }
5350 offset += 8;
5351 }
5352 else if (action_false == SORR_ADVANCE)
5353 offset += 8;
5354 }
5355
5356 if (fp_offset >= 0)
5357 {
5358 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5359 emit_move_insn (hard_frame_pointer_rtx, mem);
5360 }
5361 }
5362 else
5363 {
5364 for (i = low; i < high; i += 2)
5365 {
5366 bool reg0 = save_p (i, leaf_function);
5367 bool reg1 = save_p (i + 1, leaf_function);
5368 machine_mode mode;
5369 int regno;
5370
5371 if (reg0 && reg1)
5372 {
5373 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5374 regno = i;
5375 }
5376 else if (reg0)
5377 {
5378 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5379 regno = i;
5380 }
5381 else if (reg1)
5382 {
5383 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5384 regno = i + 1;
5385 offset += 4;
5386 }
5387 else
5388 {
5389 if (action_false == SORR_ADVANCE)
5390 offset += 8;
5391 continue;
5392 }
5393
5394 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5395 if (action_true == SORR_SAVE)
5396 {
5397 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5398 RTX_FRAME_RELATED_P (insn) = 1;
5399 if (mode == DImode)
5400 {
5401 rtx set1, set2;
5402 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5403 offset));
5404 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5405 RTX_FRAME_RELATED_P (set1) = 1;
5406 mem
5407 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5408 offset + 4));
5409 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5410 RTX_FRAME_RELATED_P (set2) = 1;
5411 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5412 gen_rtx_PARALLEL (VOIDmode,
5413 gen_rtvec (2, set1, set2)));
5414 }
5415 }
5416 else /* action_true == SORR_RESTORE */
5417 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5418
5419 /* Bump and round down to double word
5420 in case we already bumped by 4. */
5421 offset = ROUND_DOWN (offset + 8, 8);
5422 }
5423 }
5424
5425 return offset;
5426 }
5427
5428 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5429
5430 static rtx
5431 emit_adjust_base_to_offset (rtx base, int offset)
5432 {
5433 /* ??? This might be optimized a little as %g1 might already have a
5434 value close enough that a single add insn will do. */
5435 /* ??? Although, all of this is probably only a temporary fix because
5436 if %g1 can hold a function result, then sparc_expand_epilogue will
5437 lose (the result will be clobbered). */
5438 rtx new_base = gen_rtx_REG (Pmode, 1);
5439 emit_move_insn (new_base, GEN_INT (offset));
5440 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5441 return new_base;
5442 }
5443
5444 /* Emit code to save/restore call-saved global and FP registers. */
5445
5446 static void
5447 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5448 {
5449 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5450 {
5451 base = emit_adjust_base_to_offset (base, offset);
5452 offset = 0;
5453 }
5454
5455 offset
5456 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5457 save_global_or_fp_reg_p, action, SORR_NONE);
5458 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5459 save_global_or_fp_reg_p, action, SORR_NONE);
5460 }
5461
5462 /* Emit code to save/restore call-saved local and in registers. */
5463
5464 static void
5465 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5466 {
5467 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5468 {
5469 base = emit_adjust_base_to_offset (base, offset);
5470 offset = 0;
5471 }
5472
5473 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5474 save_local_or_in_reg_p, action, SORR_ADVANCE);
5475 }
5476
5477 /* Emit a window_save insn. */
5478
5479 static rtx_insn *
5480 emit_window_save (rtx increment)
5481 {
5482 rtx_insn *insn = emit_insn (gen_window_save (increment));
5483 RTX_FRAME_RELATED_P (insn) = 1;
5484
5485 /* The incoming return address (%o7) is saved in %i7. */
5486 add_reg_note (insn, REG_CFA_REGISTER,
5487 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5488 gen_rtx_REG (Pmode,
5489 INCOMING_RETURN_ADDR_REGNUM)));
5490
5491 /* The window save event. */
5492 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5493
5494 /* The CFA is %fp, the hard frame pointer. */
5495 add_reg_note (insn, REG_CFA_DEF_CFA,
5496 plus_constant (Pmode, hard_frame_pointer_rtx,
5497 INCOMING_FRAME_SP_OFFSET));
5498
5499 return insn;
5500 }
5501
5502 /* Generate an increment for the stack pointer. */
5503
5504 static rtx
5505 gen_stack_pointer_inc (rtx increment)
5506 {
5507 return gen_rtx_SET (stack_pointer_rtx,
5508 gen_rtx_PLUS (Pmode,
5509 stack_pointer_rtx,
5510 increment));
5511 }
5512
5513 /* Expand the function prologue. The prologue is responsible for reserving
5514 storage for the frame, saving the call-saved registers and loading the
5515 GOT register if needed. */
5516
5517 void
5518 sparc_expand_prologue (void)
5519 {
5520 HOST_WIDE_INT size;
5521 rtx_insn *insn;
5522
5523 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5524 on the final value of the flag means deferring the prologue/epilogue
5525 expansion until just before the second scheduling pass, which is too
5526 late to emit multiple epilogues or return insns.
5527
5528 Of course we are making the assumption that the value of the flag
5529 will not change between now and its final value. Of the three parts
5530 of the formula, only the last one can reasonably vary. Let's take a
5531 closer look, after assuming that the first two ones are set to true
5532 (otherwise the last value is effectively silenced).
5533
5534 If only_leaf_regs_used returns false, the global predicate will also
5535 be false so the actual frame size calculated below will be positive.
5536 As a consequence, the save_register_window insn will be emitted in
5537 the instruction stream; now this insn explicitly references %fp
5538 which is not a leaf register so only_leaf_regs_used will always
5539 return false subsequently.
5540
5541 If only_leaf_regs_used returns true, we hope that the subsequent
5542 optimization passes won't cause non-leaf registers to pop up. For
5543 example, the regrename pass has special provisions to not rename to
5544 non-leaf registers in a leaf function. */
5545 sparc_leaf_function_p
5546 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5547
5548 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5549
5550 if (flag_stack_usage_info)
5551 current_function_static_stack_size = size;
5552
5553 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5554 {
5555 if (crtl->is_leaf && !cfun->calls_alloca)
5556 {
5557 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5558 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5559 size - STACK_CHECK_PROTECT);
5560 }
5561 else if (size > 0)
5562 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5563 }
5564
5565 if (size == 0)
5566 ; /* do nothing. */
5567 else if (sparc_leaf_function_p)
5568 {
5569 rtx size_int_rtx = GEN_INT (-size);
5570
5571 if (size <= 4096)
5572 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5573 else if (size <= 8192)
5574 {
5575 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5576 RTX_FRAME_RELATED_P (insn) = 1;
5577
5578 /* %sp is still the CFA register. */
5579 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5580 }
5581 else
5582 {
5583 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5584 emit_move_insn (size_rtx, size_int_rtx);
5585 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5586 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5587 gen_stack_pointer_inc (size_int_rtx));
5588 }
5589
5590 RTX_FRAME_RELATED_P (insn) = 1;
5591 }
5592 else
5593 {
5594 rtx size_int_rtx = GEN_INT (-size);
5595
5596 if (size <= 4096)
5597 emit_window_save (size_int_rtx);
5598 else if (size <= 8192)
5599 {
5600 emit_window_save (GEN_INT (-4096));
5601
5602 /* %sp is not the CFA register anymore. */
5603 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5604
5605 /* Make sure no %fp-based store is issued until after the frame is
5606 established. The offset between the frame pointer and the stack
5607 pointer is calculated relative to the value of the stack pointer
5608 at the end of the function prologue, and moving instructions that
5609 access the stack via the frame pointer between the instructions
5610 that decrement the stack pointer could result in accessing the
5611 register window save area, which is volatile. */
5612 emit_insn (gen_frame_blockage ());
5613 }
5614 else
5615 {
5616 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5617 emit_move_insn (size_rtx, size_int_rtx);
5618 emit_window_save (size_rtx);
5619 }
5620 }
5621
5622 if (sparc_leaf_function_p)
5623 {
5624 sparc_frame_base_reg = stack_pointer_rtx;
5625 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5626 }
5627 else
5628 {
5629 sparc_frame_base_reg = hard_frame_pointer_rtx;
5630 sparc_frame_base_offset = SPARC_STACK_BIAS;
5631 }
5632
5633 if (sparc_n_global_fp_regs > 0)
5634 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5635 sparc_frame_base_offset
5636 - sparc_apparent_frame_size,
5637 SORR_SAVE);
5638
5639 /* Load the GOT register if needed. */
5640 if (crtl->uses_pic_offset_table)
5641 load_got_register ();
5642
5643 /* Advertise that the data calculated just above are now valid. */
5644 sparc_prologue_data_valid_p = true;
5645 }
5646
5647 /* Expand the function prologue. The prologue is responsible for reserving
5648 storage for the frame, saving the call-saved registers and loading the
5649 GOT register if needed. */
5650
5651 void
5652 sparc_flat_expand_prologue (void)
5653 {
5654 HOST_WIDE_INT size;
5655 rtx_insn *insn;
5656
5657 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5658
5659 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5660
5661 if (flag_stack_usage_info)
5662 current_function_static_stack_size = size;
5663
5664 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5665 {
5666 if (crtl->is_leaf && !cfun->calls_alloca)
5667 {
5668 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5669 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5670 size - STACK_CHECK_PROTECT);
5671 }
5672 else if (size > 0)
5673 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5674 }
5675
5676 if (sparc_save_local_in_regs_p)
5677 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5678 SORR_SAVE);
5679
5680 if (size == 0)
5681 ; /* do nothing. */
5682 else
5683 {
5684 rtx size_int_rtx, size_rtx;
5685
5686 size_rtx = size_int_rtx = GEN_INT (-size);
5687
5688 /* We establish the frame (i.e. decrement the stack pointer) first, even
5689 if we use a frame pointer, because we cannot clobber any call-saved
5690 registers, including the frame pointer, if we haven't created a new
5691 register save area, for the sake of compatibility with the ABI. */
5692 if (size <= 4096)
5693 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5694 else if (size <= 8192 && !frame_pointer_needed)
5695 {
5696 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5697 RTX_FRAME_RELATED_P (insn) = 1;
5698 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5699 }
5700 else
5701 {
5702 size_rtx = gen_rtx_REG (Pmode, 1);
5703 emit_move_insn (size_rtx, size_int_rtx);
5704 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5705 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5706 gen_stack_pointer_inc (size_int_rtx));
5707 }
5708 RTX_FRAME_RELATED_P (insn) = 1;
5709
5710 /* Ensure nothing is scheduled until after the frame is established. */
5711 emit_insn (gen_blockage ());
5712
5713 if (frame_pointer_needed)
5714 {
5715 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5716 gen_rtx_MINUS (Pmode,
5717 stack_pointer_rtx,
5718 size_rtx)));
5719 RTX_FRAME_RELATED_P (insn) = 1;
5720
5721 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5722 gen_rtx_SET (hard_frame_pointer_rtx,
5723 plus_constant (Pmode, stack_pointer_rtx,
5724 size)));
5725 }
5726
5727 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5728 {
5729 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5730 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5731
5732 insn = emit_move_insn (i7, o7);
5733 RTX_FRAME_RELATED_P (insn) = 1;
5734
5735 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5736
5737 /* Prevent this instruction from ever being considered dead,
5738 even if this function has no epilogue. */
5739 emit_use (i7);
5740 }
5741 }
5742
5743 if (frame_pointer_needed)
5744 {
5745 sparc_frame_base_reg = hard_frame_pointer_rtx;
5746 sparc_frame_base_offset = SPARC_STACK_BIAS;
5747 }
5748 else
5749 {
5750 sparc_frame_base_reg = stack_pointer_rtx;
5751 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5752 }
5753
5754 if (sparc_n_global_fp_regs > 0)
5755 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5756 sparc_frame_base_offset
5757 - sparc_apparent_frame_size,
5758 SORR_SAVE);
5759
5760 /* Load the GOT register if needed. */
5761 if (crtl->uses_pic_offset_table)
5762 load_got_register ();
5763
5764 /* Advertise that the data calculated just above are now valid. */
5765 sparc_prologue_data_valid_p = true;
5766 }
5767
5768 /* This function generates the assembly code for function entry, which boils
5769 down to emitting the necessary .register directives. */
5770
5771 static void
5772 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5773 {
5774 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5775 if (!TARGET_FLAT)
5776 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5777
5778 sparc_output_scratch_registers (file);
5779 }
5780
5781 /* Expand the function epilogue, either normal or part of a sibcall.
5782 We emit all the instructions except the return or the call. */
5783
5784 void
5785 sparc_expand_epilogue (bool for_eh)
5786 {
5787 HOST_WIDE_INT size = sparc_frame_size;
5788
5789 if (sparc_n_global_fp_regs > 0)
5790 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5791 sparc_frame_base_offset
5792 - sparc_apparent_frame_size,
5793 SORR_RESTORE);
5794
5795 if (size == 0 || for_eh)
5796 ; /* do nothing. */
5797 else if (sparc_leaf_function_p)
5798 {
5799 if (size <= 4096)
5800 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5801 else if (size <= 8192)
5802 {
5803 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5804 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5805 }
5806 else
5807 {
5808 rtx reg = gen_rtx_REG (Pmode, 1);
5809 emit_move_insn (reg, GEN_INT (size));
5810 emit_insn (gen_stack_pointer_inc (reg));
5811 }
5812 }
5813 }
5814
5815 /* Expand the function epilogue, either normal or part of a sibcall.
5816 We emit all the instructions except the return or the call. */
5817
5818 void
5819 sparc_flat_expand_epilogue (bool for_eh)
5820 {
5821 HOST_WIDE_INT size = sparc_frame_size;
5822
5823 if (sparc_n_global_fp_regs > 0)
5824 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5825 sparc_frame_base_offset
5826 - sparc_apparent_frame_size,
5827 SORR_RESTORE);
5828
5829 /* If we have a frame pointer, we'll need both to restore it before the
5830 frame is destroyed and use its current value in destroying the frame.
5831 Since we don't have an atomic way to do that in the flat window model,
5832 we save the current value into a temporary register (%g1). */
5833 if (frame_pointer_needed && !for_eh)
5834 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5835
5836 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5837 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5838 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5839
5840 if (sparc_save_local_in_regs_p)
5841 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5842 sparc_frame_base_offset,
5843 SORR_RESTORE);
5844
5845 if (size == 0 || for_eh)
5846 ; /* do nothing. */
5847 else if (frame_pointer_needed)
5848 {
5849 /* Make sure the frame is destroyed after everything else is done. */
5850 emit_insn (gen_blockage ());
5851
5852 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5853 }
5854 else
5855 {
5856 /* Likewise. */
5857 emit_insn (gen_blockage ());
5858
5859 if (size <= 4096)
5860 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5861 else if (size <= 8192)
5862 {
5863 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5864 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5865 }
5866 else
5867 {
5868 rtx reg = gen_rtx_REG (Pmode, 1);
5869 emit_move_insn (reg, GEN_INT (size));
5870 emit_insn (gen_stack_pointer_inc (reg));
5871 }
5872 }
5873 }
5874
5875 /* Return true if it is appropriate to emit `return' instructions in the
5876 body of a function. */
5877
5878 bool
5879 sparc_can_use_return_insn_p (void)
5880 {
5881 return sparc_prologue_data_valid_p
5882 && sparc_n_global_fp_regs == 0
5883 && TARGET_FLAT
5884 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5885 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5886 }
5887
5888 /* This function generates the assembly code for function exit. */
5889
5890 static void
5891 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5892 {
5893 /* If the last two instructions of a function are "call foo; dslot;"
5894 the return address might point to the first instruction in the next
5895 function and we have to output a dummy nop for the sake of sane
5896 backtraces in such cases. This is pointless for sibling calls since
5897 the return address is explicitly adjusted. */
5898
5899 rtx_insn *insn = get_last_insn ();
5900
5901 rtx last_real_insn = prev_real_insn (insn);
5902 if (last_real_insn
5903 && NONJUMP_INSN_P (last_real_insn)
5904 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5905 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5906
5907 if (last_real_insn
5908 && CALL_P (last_real_insn)
5909 && !SIBLING_CALL_P (last_real_insn))
5910 fputs("\tnop\n", file);
5911
5912 sparc_output_deferred_case_vectors ();
5913 }
5914
5915 /* Output a 'restore' instruction. */
5916
5917 static void
5918 output_restore (rtx pat)
5919 {
5920 rtx operands[3];
5921
5922 if (! pat)
5923 {
5924 fputs ("\t restore\n", asm_out_file);
5925 return;
5926 }
5927
5928 gcc_assert (GET_CODE (pat) == SET);
5929
5930 operands[0] = SET_DEST (pat);
5931 pat = SET_SRC (pat);
5932
5933 switch (GET_CODE (pat))
5934 {
5935 case PLUS:
5936 operands[1] = XEXP (pat, 0);
5937 operands[2] = XEXP (pat, 1);
5938 output_asm_insn (" restore %r1, %2, %Y0", operands);
5939 break;
5940 case LO_SUM:
5941 operands[1] = XEXP (pat, 0);
5942 operands[2] = XEXP (pat, 1);
5943 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5944 break;
5945 case ASHIFT:
5946 operands[1] = XEXP (pat, 0);
5947 gcc_assert (XEXP (pat, 1) == const1_rtx);
5948 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5949 break;
5950 default:
5951 operands[1] = pat;
5952 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5953 break;
5954 }
5955 }
5956
5957 /* Output a return. */
5958
5959 const char *
5960 output_return (rtx_insn *insn)
5961 {
5962 if (crtl->calls_eh_return)
5963 {
5964 /* If the function uses __builtin_eh_return, the eh_return
5965 machinery occupies the delay slot. */
5966 gcc_assert (!final_sequence);
5967
5968 if (flag_delayed_branch)
5969 {
5970 if (!TARGET_FLAT && TARGET_V9)
5971 fputs ("\treturn\t%i7+8\n", asm_out_file);
5972 else
5973 {
5974 if (!TARGET_FLAT)
5975 fputs ("\trestore\n", asm_out_file);
5976
5977 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5978 }
5979
5980 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5981 }
5982 else
5983 {
5984 if (!TARGET_FLAT)
5985 fputs ("\trestore\n", asm_out_file);
5986
5987 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5988 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5989 }
5990 }
5991 else if (sparc_leaf_function_p || TARGET_FLAT)
5992 {
5993 /* This is a leaf or flat function so we don't have to bother restoring
5994 the register window, which frees us from dealing with the convoluted
5995 semantics of restore/return. We simply output the jump to the
5996 return address and the insn in the delay slot (if any). */
5997
5998 return "jmp\t%%o7+%)%#";
5999 }
6000 else
6001 {
6002 /* This is a regular function so we have to restore the register window.
6003 We may have a pending insn for the delay slot, which will be either
6004 combined with the 'restore' instruction or put in the delay slot of
6005 the 'return' instruction. */
6006
6007 if (final_sequence)
6008 {
6009 rtx delay, pat;
6010
6011 delay = NEXT_INSN (insn);
6012 gcc_assert (delay);
6013
6014 pat = PATTERN (delay);
6015
6016 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6017 {
6018 epilogue_renumber (&pat, 0);
6019 return "return\t%%i7+%)%#";
6020 }
6021 else
6022 {
6023 output_asm_insn ("jmp\t%%i7+%)", NULL);
6024 output_restore (pat);
6025 PATTERN (delay) = gen_blockage ();
6026 INSN_CODE (delay) = -1;
6027 }
6028 }
6029 else
6030 {
6031 /* The delay slot is empty. */
6032 if (TARGET_V9)
6033 return "return\t%%i7+%)\n\t nop";
6034 else if (flag_delayed_branch)
6035 return "jmp\t%%i7+%)\n\t restore";
6036 else
6037 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6038 }
6039 }
6040
6041 return "";
6042 }
6043
6044 /* Output a sibling call. */
6045
6046 const char *
6047 output_sibcall (rtx_insn *insn, rtx call_operand)
6048 {
6049 rtx operands[1];
6050
6051 gcc_assert (flag_delayed_branch);
6052
6053 operands[0] = call_operand;
6054
6055 if (sparc_leaf_function_p || TARGET_FLAT)
6056 {
6057 /* This is a leaf or flat function so we don't have to bother restoring
6058 the register window. We simply output the jump to the function and
6059 the insn in the delay slot (if any). */
6060
6061 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6062
6063 if (final_sequence)
6064 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6065 operands);
6066 else
6067 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6068 it into branch if possible. */
6069 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6070 operands);
6071 }
6072 else
6073 {
6074 /* This is a regular function so we have to restore the register window.
6075 We may have a pending insn for the delay slot, which will be combined
6076 with the 'restore' instruction. */
6077
6078 output_asm_insn ("call\t%a0, 0", operands);
6079
6080 if (final_sequence)
6081 {
6082 rtx_insn *delay = NEXT_INSN (insn);
6083 gcc_assert (delay);
6084
6085 output_restore (PATTERN (delay));
6086
6087 PATTERN (delay) = gen_blockage ();
6088 INSN_CODE (delay) = -1;
6089 }
6090 else
6091 output_restore (NULL_RTX);
6092 }
6093
6094 return "";
6095 }
6096 \f
6097 /* Functions for handling argument passing.
6098
6099 For 32-bit, the first 6 args are normally in registers and the rest are
6100 pushed. Any arg that starts within the first 6 words is at least
6101 partially passed in a register unless its data type forbids.
6102
6103 For 64-bit, the argument registers are laid out as an array of 16 elements
6104 and arguments are added sequentially. The first 6 int args and up to the
6105 first 16 fp args (depending on size) are passed in regs.
6106
6107 Slot Stack Integral Float Float in structure Double Long Double
6108 ---- ----- -------- ----- ------------------ ------ -----------
6109 15 [SP+248] %f31 %f30,%f31 %d30
6110 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6111 13 [SP+232] %f27 %f26,%f27 %d26
6112 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6113 11 [SP+216] %f23 %f22,%f23 %d22
6114 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6115 9 [SP+200] %f19 %f18,%f19 %d18
6116 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6117 7 [SP+184] %f15 %f14,%f15 %d14
6118 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6119 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6120 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6121 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6122 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6123 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6124 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6125
6126 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6127
6128 Integral arguments are always passed as 64-bit quantities appropriately
6129 extended.
6130
6131 Passing of floating point values is handled as follows.
6132 If a prototype is in scope:
6133 If the value is in a named argument (i.e. not a stdarg function or a
6134 value not part of the `...') then the value is passed in the appropriate
6135 fp reg.
6136 If the value is part of the `...' and is passed in one of the first 6
6137 slots then the value is passed in the appropriate int reg.
6138 If the value is part of the `...' and is not passed in one of the first 6
6139 slots then the value is passed in memory.
6140 If a prototype is not in scope:
6141 If the value is one of the first 6 arguments the value is passed in the
6142 appropriate integer reg and the appropriate fp reg.
6143 If the value is not one of the first 6 arguments the value is passed in
6144 the appropriate fp reg and in memory.
6145
6146
6147 Summary of the calling conventions implemented by GCC on the SPARC:
6148
6149 32-bit ABI:
6150 size argument return value
6151
6152 small integer <4 int. reg. int. reg.
6153 word 4 int. reg. int. reg.
6154 double word 8 int. reg. int. reg.
6155
6156 _Complex small integer <8 int. reg. int. reg.
6157 _Complex word 8 int. reg. int. reg.
6158 _Complex double word 16 memory int. reg.
6159
6160 vector integer <=8 int. reg. FP reg.
6161 vector integer >8 memory memory
6162
6163 float 4 int. reg. FP reg.
6164 double 8 int. reg. FP reg.
6165 long double 16 memory memory
6166
6167 _Complex float 8 memory FP reg.
6168 _Complex double 16 memory FP reg.
6169 _Complex long double 32 memory FP reg.
6170
6171 vector float any memory memory
6172
6173 aggregate any memory memory
6174
6175
6176
6177 64-bit ABI:
6178 size argument return value
6179
6180 small integer <8 int. reg. int. reg.
6181 word 8 int. reg. int. reg.
6182 double word 16 int. reg. int. reg.
6183
6184 _Complex small integer <16 int. reg. int. reg.
6185 _Complex word 16 int. reg. int. reg.
6186 _Complex double word 32 memory int. reg.
6187
6188 vector integer <=16 FP reg. FP reg.
6189 vector integer 16<s<=32 memory FP reg.
6190 vector integer >32 memory memory
6191
6192 float 4 FP reg. FP reg.
6193 double 8 FP reg. FP reg.
6194 long double 16 FP reg. FP reg.
6195
6196 _Complex float 8 FP reg. FP reg.
6197 _Complex double 16 FP reg. FP reg.
6198 _Complex long double 32 memory FP reg.
6199
6200 vector float <=16 FP reg. FP reg.
6201 vector float 16<s<=32 memory FP reg.
6202 vector float >32 memory memory
6203
6204 aggregate <=16 reg. reg.
6205 aggregate 16<s<=32 memory reg.
6206 aggregate >32 memory memory
6207
6208
6209
6210 Note #1: complex floating-point types follow the extended SPARC ABIs as
6211 implemented by the Sun compiler.
6212
6213 Note #2: integral vector types follow the scalar floating-point types
6214 conventions to match what is implemented by the Sun VIS SDK.
6215
6216 Note #3: floating-point vector types follow the aggregate types
6217 conventions. */
6218
6219
6220 /* Maximum number of int regs for args. */
6221 #define SPARC_INT_ARG_MAX 6
6222 /* Maximum number of fp regs for args. */
6223 #define SPARC_FP_ARG_MAX 16
6224 /* Number of words (partially) occupied for a given size in units. */
6225 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6226
6227 /* Handle the INIT_CUMULATIVE_ARGS macro.
6228 Initialize a variable CUM of type CUMULATIVE_ARGS
6229 for a call to a function whose data type is FNTYPE.
6230 For a library call, FNTYPE is 0. */
6231
6232 void
6233 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6234 {
6235 cum->words = 0;
6236 cum->prototype_p = fntype && prototype_p (fntype);
6237 cum->libcall_p = !fntype;
6238 }
6239
6240 /* Handle promotion of pointer and integer arguments. */
6241
6242 static machine_mode
6243 sparc_promote_function_mode (const_tree type, machine_mode mode,
6244 int *punsignedp, const_tree, int)
6245 {
6246 if (type && POINTER_TYPE_P (type))
6247 {
6248 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6249 return Pmode;
6250 }
6251
6252 /* Integral arguments are passed as full words, as per the ABI. */
6253 if (GET_MODE_CLASS (mode) == MODE_INT
6254 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6255 return word_mode;
6256
6257 return mode;
6258 }
6259
6260 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6261
6262 static bool
6263 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6264 {
6265 return TARGET_ARCH64 ? true : false;
6266 }
6267
6268 /* Traverse the record TYPE recursively and call FUNC on its fields.
6269 NAMED is true if this is for a named parameter. DATA is passed
6270 to FUNC for each field. OFFSET is the starting position and
6271 PACKED is true if we are inside a packed record. */
6272
6273 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6274 static void
6275 traverse_record_type (const_tree type, bool named, T *data,
6276 HOST_WIDE_INT offset = 0, bool packed = false)
6277 {
6278 /* The ABI obviously doesn't specify how packed structures are passed.
6279 These are passed in integer regs if possible, otherwise memory. */
6280 if (!packed)
6281 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6282 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6283 {
6284 packed = true;
6285 break;
6286 }
6287
6288 /* Walk the real fields, but skip those with no size or a zero size.
6289 ??? Fields with variable offset are handled as having zero offset. */
6290 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6291 if (TREE_CODE (field) == FIELD_DECL)
6292 {
6293 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6294 continue;
6295
6296 HOST_WIDE_INT bitpos = offset;
6297 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6298 bitpos += int_bit_position (field);
6299
6300 tree field_type = TREE_TYPE (field);
6301 if (TREE_CODE (field_type) == RECORD_TYPE)
6302 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6303 packed);
6304 else
6305 {
6306 const bool fp_type
6307 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6308 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6309 data);
6310 }
6311 }
6312 }
6313
6314 /* Handle recursive register classifying for structure layout. */
6315
6316 typedef struct
6317 {
6318 bool fp_regs; /* true if field eligible to FP registers. */
6319 bool fp_regs_in_first_word; /* true if such field in first word. */
6320 } classify_data_t;
6321
6322 /* A subroutine of function_arg_slotno. Classify the field. */
6323
6324 inline void
6325 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6326 classify_data_t *data)
6327 {
6328 if (fp)
6329 {
6330 data->fp_regs = true;
6331 if (bitpos < BITS_PER_WORD)
6332 data->fp_regs_in_first_word = true;
6333 }
6334 }
6335
6336 /* Compute the slot number to pass an argument in.
6337 Return the slot number or -1 if passing on the stack.
6338
6339 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6340 the preceding args and about the function being called.
6341 MODE is the argument's machine mode.
6342 TYPE is the data type of the argument (as a tree).
6343 This is null for libcalls where that information may
6344 not be available.
6345 NAMED is nonzero if this argument is a named parameter
6346 (otherwise it is an extra parameter matching an ellipsis).
6347 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6348 *PREGNO records the register number to use if scalar type.
6349 *PPADDING records the amount of padding needed in words. */
6350
6351 static int
6352 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6353 const_tree type, bool named, bool incoming,
6354 int *pregno, int *ppadding)
6355 {
6356 int regbase = (incoming
6357 ? SPARC_INCOMING_INT_ARG_FIRST
6358 : SPARC_OUTGOING_INT_ARG_FIRST);
6359 int slotno = cum->words;
6360 enum mode_class mclass;
6361 int regno;
6362
6363 *ppadding = 0;
6364
6365 if (type && TREE_ADDRESSABLE (type))
6366 return -1;
6367
6368 if (TARGET_ARCH32
6369 && mode == BLKmode
6370 && type
6371 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6372 return -1;
6373
6374 /* For SPARC64, objects requiring 16-byte alignment get it. */
6375 if (TARGET_ARCH64
6376 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6377 && (slotno & 1) != 0)
6378 slotno++, *ppadding = 1;
6379
6380 mclass = GET_MODE_CLASS (mode);
6381 if (type && TREE_CODE (type) == VECTOR_TYPE)
6382 {
6383 /* Vector types deserve special treatment because they are
6384 polymorphic wrt their mode, depending upon whether VIS
6385 instructions are enabled. */
6386 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6387 {
6388 /* The SPARC port defines no floating-point vector modes. */
6389 gcc_assert (mode == BLKmode);
6390 }
6391 else
6392 {
6393 /* Integral vector types should either have a vector
6394 mode or an integral mode, because we are guaranteed
6395 by pass_by_reference that their size is not greater
6396 than 16 bytes and TImode is 16-byte wide. */
6397 gcc_assert (mode != BLKmode);
6398
6399 /* Vector integers are handled like floats according to
6400 the Sun VIS SDK. */
6401 mclass = MODE_FLOAT;
6402 }
6403 }
6404
6405 switch (mclass)
6406 {
6407 case MODE_FLOAT:
6408 case MODE_COMPLEX_FLOAT:
6409 case MODE_VECTOR_INT:
6410 if (TARGET_ARCH64 && TARGET_FPU && named)
6411 {
6412 /* If all arg slots are filled, then must pass on stack. */
6413 if (slotno >= SPARC_FP_ARG_MAX)
6414 return -1;
6415
6416 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6417 /* Arguments filling only one single FP register are
6418 right-justified in the outer double FP register. */
6419 if (GET_MODE_SIZE (mode) <= 4)
6420 regno++;
6421 break;
6422 }
6423 /* fallthrough */
6424
6425 case MODE_INT:
6426 case MODE_COMPLEX_INT:
6427 /* If all arg slots are filled, then must pass on stack. */
6428 if (slotno >= SPARC_INT_ARG_MAX)
6429 return -1;
6430
6431 regno = regbase + slotno;
6432 break;
6433
6434 case MODE_RANDOM:
6435 if (mode == VOIDmode)
6436 /* MODE is VOIDmode when generating the actual call. */
6437 return -1;
6438
6439 gcc_assert (mode == BLKmode);
6440
6441 if (TARGET_ARCH32
6442 || !type
6443 || (TREE_CODE (type) != RECORD_TYPE
6444 && TREE_CODE (type) != VECTOR_TYPE))
6445 {
6446 /* If all arg slots are filled, then must pass on stack. */
6447 if (slotno >= SPARC_INT_ARG_MAX)
6448 return -1;
6449
6450 regno = regbase + slotno;
6451 }
6452 else /* TARGET_ARCH64 && type */
6453 {
6454 /* If all arg slots are filled, then must pass on stack. */
6455 if (slotno >= SPARC_FP_ARG_MAX)
6456 return -1;
6457
6458 if (TREE_CODE (type) == RECORD_TYPE)
6459 {
6460 classify_data_t data = { false, false };
6461 traverse_record_type<classify_data_t, classify_registers>
6462 (type, named, &data);
6463
6464 if (data.fp_regs)
6465 {
6466 /* If all FP slots are filled except for the last one and
6467 there is no FP field in the first word, then must pass
6468 on stack. */
6469 if (slotno >= SPARC_FP_ARG_MAX - 1
6470 && !data.fp_regs_in_first_word)
6471 return -1;
6472 }
6473 else
6474 {
6475 /* If all int slots are filled, then must pass on stack. */
6476 if (slotno >= SPARC_INT_ARG_MAX)
6477 return -1;
6478 }
6479 }
6480
6481 /* PREGNO isn't set since both int and FP regs can be used. */
6482 return slotno;
6483 }
6484 break;
6485
6486 default :
6487 gcc_unreachable ();
6488 }
6489
6490 *pregno = regno;
6491 return slotno;
6492 }
6493
6494 /* Handle recursive register counting/assigning for structure layout. */
6495
6496 typedef struct
6497 {
6498 int slotno; /* slot number of the argument. */
6499 int regbase; /* regno of the base register. */
6500 int intoffset; /* offset of the first pending integer field. */
6501 int nregs; /* number of words passed in registers. */
6502 bool stack; /* true if part of the argument is on the stack. */
6503 rtx ret; /* return expression being built. */
6504 } assign_data_t;
6505
6506 /* A subroutine of function_arg_record_value. Compute the number of integer
6507 registers to be assigned between PARMS->intoffset and BITPOS. Return
6508 true if at least one integer register is assigned or false otherwise. */
6509
6510 static bool
6511 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6512 {
6513 if (data->intoffset < 0)
6514 return false;
6515
6516 const int intoffset = data->intoffset;
6517 data->intoffset = -1;
6518
6519 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6520 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6521 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6522 int nregs = (endbit - startbit) / BITS_PER_WORD;
6523
6524 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6525 {
6526 nregs = SPARC_INT_ARG_MAX - this_slotno;
6527
6528 /* We need to pass this field (partly) on the stack. */
6529 data->stack = 1;
6530 }
6531
6532 if (nregs <= 0)
6533 return false;
6534
6535 *pnregs = nregs;
6536 return true;
6537 }
6538
6539 /* A subroutine of function_arg_record_value. Compute the number and the mode
6540 of the FP registers to be assigned for FIELD. Return true if at least one
6541 FP register is assigned or false otherwise. */
6542
6543 static bool
6544 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6545 assign_data_t *data,
6546 int *pnregs, machine_mode *pmode)
6547 {
6548 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6549 machine_mode mode = DECL_MODE (field);
6550 int nregs, nslots;
6551
6552 /* Slots are counted as words while regs are counted as having the size of
6553 the (inner) mode. */
6554 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6555 {
6556 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6557 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6558 }
6559 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6560 {
6561 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6562 nregs = 2;
6563 }
6564 else
6565 nregs = 1;
6566
6567 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6568
6569 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6570 {
6571 nslots = SPARC_FP_ARG_MAX - this_slotno;
6572 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6573
6574 /* We need to pass this field (partly) on the stack. */
6575 data->stack = 1;
6576
6577 if (nregs <= 0)
6578 return false;
6579 }
6580
6581 *pnregs = nregs;
6582 *pmode = mode;
6583 return true;
6584 }
6585
6586 /* A subroutine of function_arg_record_value. Count the number of registers
6587 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6588
6589 inline void
6590 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6591 assign_data_t *data)
6592 {
6593 if (fp)
6594 {
6595 int nregs;
6596 machine_mode mode;
6597
6598 if (compute_int_layout (bitpos, data, &nregs))
6599 data->nregs += nregs;
6600
6601 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6602 data->nregs += nregs;
6603 }
6604 else
6605 {
6606 if (data->intoffset < 0)
6607 data->intoffset = bitpos;
6608 }
6609 }
6610
6611 /* A subroutine of function_arg_record_value. Assign the bits of the
6612 structure between PARMS->intoffset and BITPOS to integer registers. */
6613
6614 static void
6615 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6616 {
6617 int intoffset = data->intoffset;
6618 machine_mode mode;
6619 int nregs;
6620
6621 if (!compute_int_layout (bitpos, data, &nregs))
6622 return;
6623
6624 /* If this is the trailing part of a word, only load that much into
6625 the register. Otherwise load the whole register. Note that in
6626 the latter case we may pick up unwanted bits. It's not a problem
6627 at the moment but may wish to revisit. */
6628 if (intoffset % BITS_PER_WORD != 0)
6629 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6630 MODE_INT);
6631 else
6632 mode = word_mode;
6633
6634 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6635 unsigned int regno = data->regbase + this_slotno;
6636 intoffset /= BITS_PER_UNIT;
6637
6638 do
6639 {
6640 rtx reg = gen_rtx_REG (mode, regno);
6641 XVECEXP (data->ret, 0, data->stack + data->nregs)
6642 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6643 data->nregs += 1;
6644 mode = word_mode;
6645 regno += 1;
6646 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6647 }
6648 while (--nregs > 0);
6649 }
6650
6651 /* A subroutine of function_arg_record_value. Assign FIELD at position
6652 BITPOS to FP registers. */
6653
6654 static void
6655 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6656 assign_data_t *data)
6657 {
6658 int nregs;
6659 machine_mode mode;
6660
6661 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6662 return;
6663
6664 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6665 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6666 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6667 regno++;
6668 int pos = bitpos / BITS_PER_UNIT;
6669
6670 do
6671 {
6672 rtx reg = gen_rtx_REG (mode, regno);
6673 XVECEXP (data->ret, 0, data->stack + data->nregs)
6674 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6675 data->nregs += 1;
6676 regno += GET_MODE_SIZE (mode) / 4;
6677 pos += GET_MODE_SIZE (mode);
6678 }
6679 while (--nregs > 0);
6680 }
6681
6682 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6683 the structure between PARMS->intoffset and BITPOS to registers. */
6684
6685 inline void
6686 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6687 assign_data_t *data)
6688 {
6689 if (fp)
6690 {
6691 assign_int_registers (bitpos, data);
6692
6693 assign_fp_registers (field, bitpos, data);
6694 }
6695 else
6696 {
6697 if (data->intoffset < 0)
6698 data->intoffset = bitpos;
6699 }
6700 }
6701
6702 /* Used by function_arg and sparc_function_value_1 to implement the complex
6703 conventions of the 64-bit ABI for passing and returning structures.
6704 Return an expression valid as a return value for the FUNCTION_ARG
6705 and TARGET_FUNCTION_VALUE.
6706
6707 TYPE is the data type of the argument (as a tree).
6708 This is null for libcalls where that information may
6709 not be available.
6710 MODE is the argument's machine mode.
6711 SLOTNO is the index number of the argument's slot in the parameter array.
6712 NAMED is true if this argument is a named parameter
6713 (otherwise it is an extra parameter matching an ellipsis).
6714 REGBASE is the regno of the base register for the parameter array. */
6715
6716 static rtx
6717 function_arg_record_value (const_tree type, machine_mode mode,
6718 int slotno, bool named, int regbase)
6719 {
6720 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6721 assign_data_t data;
6722 int nregs;
6723
6724 data.slotno = slotno;
6725 data.regbase = regbase;
6726
6727 /* Count how many registers we need. */
6728 data.nregs = 0;
6729 data.intoffset = 0;
6730 data.stack = false;
6731 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6732
6733 /* Take into account pending integer fields. */
6734 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6735 data.nregs += nregs;
6736
6737 /* Allocate the vector and handle some annoying special cases. */
6738 nregs = data.nregs;
6739
6740 if (nregs == 0)
6741 {
6742 /* ??? Empty structure has no value? Duh? */
6743 if (typesize <= 0)
6744 {
6745 /* Though there's nothing really to store, return a word register
6746 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6747 leads to breakage due to the fact that there are zero bytes to
6748 load. */
6749 return gen_rtx_REG (mode, regbase);
6750 }
6751
6752 /* ??? C++ has structures with no fields, and yet a size. Give up
6753 for now and pass everything back in integer registers. */
6754 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6755 if (nregs + slotno > SPARC_INT_ARG_MAX)
6756 nregs = SPARC_INT_ARG_MAX - slotno;
6757 }
6758
6759 gcc_assert (nregs > 0);
6760
6761 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6762
6763 /* If at least one field must be passed on the stack, generate
6764 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6765 also be passed on the stack. We can't do much better because the
6766 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6767 of structures for which the fields passed exclusively in registers
6768 are not at the beginning of the structure. */
6769 if (data.stack)
6770 XVECEXP (data.ret, 0, 0)
6771 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6772
6773 /* Assign the registers. */
6774 data.nregs = 0;
6775 data.intoffset = 0;
6776 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6777
6778 /* Assign pending integer fields. */
6779 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6780
6781 gcc_assert (data.nregs == nregs);
6782
6783 return data.ret;
6784 }
6785
6786 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6787 of the 64-bit ABI for passing and returning unions.
6788 Return an expression valid as a return value for the FUNCTION_ARG
6789 and TARGET_FUNCTION_VALUE.
6790
6791 SIZE is the size in bytes of the union.
6792 MODE is the argument's machine mode.
6793 REGNO is the hard register the union will be passed in. */
6794
6795 static rtx
6796 function_arg_union_value (int size, machine_mode mode, int slotno,
6797 int regno)
6798 {
6799 int nwords = CEIL_NWORDS (size), i;
6800 rtx regs;
6801
6802 /* See comment in previous function for empty structures. */
6803 if (nwords == 0)
6804 return gen_rtx_REG (mode, regno);
6805
6806 if (slotno == SPARC_INT_ARG_MAX - 1)
6807 nwords = 1;
6808
6809 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6810
6811 for (i = 0; i < nwords; i++)
6812 {
6813 /* Unions are passed left-justified. */
6814 XVECEXP (regs, 0, i)
6815 = gen_rtx_EXPR_LIST (VOIDmode,
6816 gen_rtx_REG (word_mode, regno),
6817 GEN_INT (UNITS_PER_WORD * i));
6818 regno++;
6819 }
6820
6821 return regs;
6822 }
6823
6824 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6825 for passing and returning BLKmode vectors.
6826 Return an expression valid as a return value for the FUNCTION_ARG
6827 and TARGET_FUNCTION_VALUE.
6828
6829 SIZE is the size in bytes of the vector.
6830 REGNO is the FP hard register the vector will be passed in. */
6831
6832 static rtx
6833 function_arg_vector_value (int size, int regno)
6834 {
6835 const int nregs = MAX (1, size / 8);
6836 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6837
6838 if (size < 8)
6839 XVECEXP (regs, 0, 0)
6840 = gen_rtx_EXPR_LIST (VOIDmode,
6841 gen_rtx_REG (SImode, regno),
6842 const0_rtx);
6843 else
6844 for (int i = 0; i < nregs; i++)
6845 XVECEXP (regs, 0, i)
6846 = gen_rtx_EXPR_LIST (VOIDmode,
6847 gen_rtx_REG (DImode, regno + 2*i),
6848 GEN_INT (i*8));
6849
6850 return regs;
6851 }
6852
6853 /* Determine where to put an argument to a function.
6854 Value is zero to push the argument on the stack,
6855 or a hard register in which to store the argument.
6856
6857 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6858 the preceding args and about the function being called.
6859 MODE is the argument's machine mode.
6860 TYPE is the data type of the argument (as a tree).
6861 This is null for libcalls where that information may
6862 not be available.
6863 NAMED is true if this argument is a named parameter
6864 (otherwise it is an extra parameter matching an ellipsis).
6865 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6866 TARGET_FUNCTION_INCOMING_ARG. */
6867
6868 static rtx
6869 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6870 const_tree type, bool named, bool incoming)
6871 {
6872 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6873
6874 int regbase = (incoming
6875 ? SPARC_INCOMING_INT_ARG_FIRST
6876 : SPARC_OUTGOING_INT_ARG_FIRST);
6877 int slotno, regno, padding;
6878 enum mode_class mclass = GET_MODE_CLASS (mode);
6879
6880 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6881 &regno, &padding);
6882 if (slotno == -1)
6883 return 0;
6884
6885 /* Vector types deserve special treatment because they are polymorphic wrt
6886 their mode, depending upon whether VIS instructions are enabled. */
6887 if (type && TREE_CODE (type) == VECTOR_TYPE)
6888 {
6889 HOST_WIDE_INT size = int_size_in_bytes (type);
6890 gcc_assert ((TARGET_ARCH32 && size <= 8)
6891 || (TARGET_ARCH64 && size <= 16));
6892
6893 if (mode == BLKmode)
6894 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6895
6896 mclass = MODE_FLOAT;
6897 }
6898
6899 if (TARGET_ARCH32)
6900 return gen_rtx_REG (mode, regno);
6901
6902 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6903 and are promoted to registers if possible. */
6904 if (type && TREE_CODE (type) == RECORD_TYPE)
6905 {
6906 HOST_WIDE_INT size = int_size_in_bytes (type);
6907 gcc_assert (size <= 16);
6908
6909 return function_arg_record_value (type, mode, slotno, named, regbase);
6910 }
6911
6912 /* Unions up to 16 bytes in size are passed in integer registers. */
6913 else if (type && TREE_CODE (type) == UNION_TYPE)
6914 {
6915 HOST_WIDE_INT size = int_size_in_bytes (type);
6916 gcc_assert (size <= 16);
6917
6918 return function_arg_union_value (size, mode, slotno, regno);
6919 }
6920
6921 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6922 but also have the slot allocated for them.
6923 If no prototype is in scope fp values in register slots get passed
6924 in two places, either fp regs and int regs or fp regs and memory. */
6925 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6926 && SPARC_FP_REG_P (regno))
6927 {
6928 rtx reg = gen_rtx_REG (mode, regno);
6929 if (cum->prototype_p || cum->libcall_p)
6930 return reg;
6931 else
6932 {
6933 rtx v0, v1;
6934
6935 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6936 {
6937 int intreg;
6938
6939 /* On incoming, we don't need to know that the value
6940 is passed in %f0 and %i0, and it confuses other parts
6941 causing needless spillage even on the simplest cases. */
6942 if (incoming)
6943 return reg;
6944
6945 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6946 + (regno - SPARC_FP_ARG_FIRST) / 2);
6947
6948 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6949 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6950 const0_rtx);
6951 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6952 }
6953 else
6954 {
6955 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6956 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6957 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6958 }
6959 }
6960 }
6961
6962 /* All other aggregate types are passed in an integer register in a mode
6963 corresponding to the size of the type. */
6964 else if (type && AGGREGATE_TYPE_P (type))
6965 {
6966 HOST_WIDE_INT size = int_size_in_bytes (type);
6967 gcc_assert (size <= 16);
6968
6969 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6970 }
6971
6972 return gen_rtx_REG (mode, regno);
6973 }
6974
6975 /* Handle the TARGET_FUNCTION_ARG target hook. */
6976
6977 static rtx
6978 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6979 const_tree type, bool named)
6980 {
6981 return sparc_function_arg_1 (cum, mode, type, named, false);
6982 }
6983
6984 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6985
6986 static rtx
6987 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6988 const_tree type, bool named)
6989 {
6990 return sparc_function_arg_1 (cum, mode, type, named, true);
6991 }
6992
6993 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6994
6995 static unsigned int
6996 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6997 {
6998 return ((TARGET_ARCH64
6999 && (GET_MODE_ALIGNMENT (mode) == 128
7000 || (type && TYPE_ALIGN (type) == 128)))
7001 ? 128
7002 : PARM_BOUNDARY);
7003 }
7004
7005 /* For an arg passed partly in registers and partly in memory,
7006 this is the number of bytes of registers used.
7007 For args passed entirely in registers or entirely in memory, zero.
7008
7009 Any arg that starts in the first 6 regs but won't entirely fit in them
7010 needs partial registers on v8. On v9, structures with integer
7011 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7012 values that begin in the last fp reg [where "last fp reg" varies with the
7013 mode] will be split between that reg and memory. */
7014
7015 static int
7016 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7017 tree type, bool named)
7018 {
7019 int slotno, regno, padding;
7020
7021 /* We pass false for incoming here, it doesn't matter. */
7022 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7023 false, &regno, &padding);
7024
7025 if (slotno == -1)
7026 return 0;
7027
7028 if (TARGET_ARCH32)
7029 {
7030 if ((slotno + (mode == BLKmode
7031 ? CEIL_NWORDS (int_size_in_bytes (type))
7032 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7033 > SPARC_INT_ARG_MAX)
7034 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7035 }
7036 else
7037 {
7038 /* We are guaranteed by pass_by_reference that the size of the
7039 argument is not greater than 16 bytes, so we only need to return
7040 one word if the argument is partially passed in registers. */
7041
7042 if (type && AGGREGATE_TYPE_P (type))
7043 {
7044 int size = int_size_in_bytes (type);
7045
7046 if (size > UNITS_PER_WORD
7047 && (slotno == SPARC_INT_ARG_MAX - 1
7048 || slotno == SPARC_FP_ARG_MAX - 1))
7049 return UNITS_PER_WORD;
7050 }
7051 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7052 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7053 && ! (TARGET_FPU && named)))
7054 {
7055 /* The complex types are passed as packed types. */
7056 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7057 && slotno == SPARC_INT_ARG_MAX - 1)
7058 return UNITS_PER_WORD;
7059 }
7060 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7061 {
7062 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7063 > SPARC_FP_ARG_MAX)
7064 return UNITS_PER_WORD;
7065 }
7066 }
7067
7068 return 0;
7069 }
7070
7071 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7072 Specify whether to pass the argument by reference. */
7073
7074 static bool
7075 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7076 machine_mode mode, const_tree type,
7077 bool named ATTRIBUTE_UNUSED)
7078 {
7079 if (TARGET_ARCH32)
7080 /* Original SPARC 32-bit ABI says that structures and unions,
7081 and quad-precision floats are passed by reference. For Pascal,
7082 also pass arrays by reference. All other base types are passed
7083 in registers.
7084
7085 Extended ABI (as implemented by the Sun compiler) says that all
7086 complex floats are passed by reference. Pass complex integers
7087 in registers up to 8 bytes. More generally, enforce the 2-word
7088 cap for passing arguments in registers.
7089
7090 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7091 integers are passed like floats of the same size, that is in
7092 registers up to 8 bytes. Pass all vector floats by reference
7093 like structure and unions. */
7094 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7095 || mode == SCmode
7096 /* Catch CDImode, TFmode, DCmode and TCmode. */
7097 || GET_MODE_SIZE (mode) > 8
7098 || (type
7099 && TREE_CODE (type) == VECTOR_TYPE
7100 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7101 else
7102 /* Original SPARC 64-bit ABI says that structures and unions
7103 smaller than 16 bytes are passed in registers, as well as
7104 all other base types.
7105
7106 Extended ABI (as implemented by the Sun compiler) says that
7107 complex floats are passed in registers up to 16 bytes. Pass
7108 all complex integers in registers up to 16 bytes. More generally,
7109 enforce the 2-word cap for passing arguments in registers.
7110
7111 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7112 integers are passed like floats of the same size, that is in
7113 registers (up to 16 bytes). Pass all vector floats like structure
7114 and unions. */
7115 return ((type
7116 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7117 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7118 /* Catch CTImode and TCmode. */
7119 || GET_MODE_SIZE (mode) > 16);
7120 }
7121
7122 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7123 Update the data in CUM to advance over an argument
7124 of mode MODE and data type TYPE.
7125 TYPE is null for libcalls where that information may not be available. */
7126
7127 static void
7128 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7129 const_tree type, bool named)
7130 {
7131 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7132 int regno, padding;
7133
7134 /* We pass false for incoming here, it doesn't matter. */
7135 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7136
7137 /* If argument requires leading padding, add it. */
7138 cum->words += padding;
7139
7140 if (TARGET_ARCH32)
7141 cum->words += (mode == BLKmode
7142 ? CEIL_NWORDS (int_size_in_bytes (type))
7143 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7144 else
7145 {
7146 if (type && AGGREGATE_TYPE_P (type))
7147 {
7148 int size = int_size_in_bytes (type);
7149
7150 if (size <= 8)
7151 ++cum->words;
7152 else if (size <= 16)
7153 cum->words += 2;
7154 else /* passed by reference */
7155 ++cum->words;
7156 }
7157 else
7158 cum->words += (mode == BLKmode
7159 ? CEIL_NWORDS (int_size_in_bytes (type))
7160 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7161 }
7162 }
7163
7164 /* Handle the FUNCTION_ARG_PADDING macro.
7165 For the 64 bit ABI structs are always stored left shifted in their
7166 argument slot. */
7167
7168 enum direction
7169 function_arg_padding (machine_mode mode, const_tree type)
7170 {
7171 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7172 return upward;
7173
7174 /* Fall back to the default. */
7175 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7176 }
7177
7178 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7179 Specify whether to return the return value in memory. */
7180
7181 static bool
7182 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7183 {
7184 if (TARGET_ARCH32)
7185 /* Original SPARC 32-bit ABI says that structures and unions,
7186 and quad-precision floats are returned in memory. All other
7187 base types are returned in registers.
7188
7189 Extended ABI (as implemented by the Sun compiler) says that
7190 all complex floats are returned in registers (8 FP registers
7191 at most for '_Complex long double'). Return all complex integers
7192 in registers (4 at most for '_Complex long long').
7193
7194 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7195 integers are returned like floats of the same size, that is in
7196 registers up to 8 bytes and in memory otherwise. Return all
7197 vector floats in memory like structure and unions; note that
7198 they always have BLKmode like the latter. */
7199 return (TYPE_MODE (type) == BLKmode
7200 || TYPE_MODE (type) == TFmode
7201 || (TREE_CODE (type) == VECTOR_TYPE
7202 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7203 else
7204 /* Original SPARC 64-bit ABI says that structures and unions
7205 smaller than 32 bytes are returned in registers, as well as
7206 all other base types.
7207
7208 Extended ABI (as implemented by the Sun compiler) says that all
7209 complex floats are returned in registers (8 FP registers at most
7210 for '_Complex long double'). Return all complex integers in
7211 registers (4 at most for '_Complex TItype').
7212
7213 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7214 integers are returned like floats of the same size, that is in
7215 registers. Return all vector floats like structure and unions;
7216 note that they always have BLKmode like the latter. */
7217 return (TYPE_MODE (type) == BLKmode
7218 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7219 }
7220
7221 /* Handle the TARGET_STRUCT_VALUE target hook.
7222 Return where to find the structure return value address. */
7223
7224 static rtx
7225 sparc_struct_value_rtx (tree fndecl, int incoming)
7226 {
7227 if (TARGET_ARCH64)
7228 return 0;
7229 else
7230 {
7231 rtx mem;
7232
7233 if (incoming)
7234 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7235 STRUCT_VALUE_OFFSET));
7236 else
7237 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7238 STRUCT_VALUE_OFFSET));
7239
7240 /* Only follow the SPARC ABI for fixed-size structure returns.
7241 Variable size structure returns are handled per the normal
7242 procedures in GCC. This is enabled by -mstd-struct-return */
7243 if (incoming == 2
7244 && sparc_std_struct_return
7245 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7246 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7247 {
7248 /* We must check and adjust the return address, as it is optional
7249 as to whether the return object is really provided. */
7250 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7251 rtx scratch = gen_reg_rtx (SImode);
7252 rtx_code_label *endlab = gen_label_rtx ();
7253
7254 /* Calculate the return object size. */
7255 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7256 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7257 /* Construct a temporary return value. */
7258 rtx temp_val
7259 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7260
7261 /* Implement SPARC 32-bit psABI callee return struct checking:
7262
7263 Fetch the instruction where we will return to and see if
7264 it's an unimp instruction (the most significant 10 bits
7265 will be zero). */
7266 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7267 plus_constant (Pmode,
7268 ret_reg, 8)));
7269 /* Assume the size is valid and pre-adjust. */
7270 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7271 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7272 0, endlab);
7273 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7274 /* Write the address of the memory pointed to by temp_val into
7275 the memory pointed to by mem. */
7276 emit_move_insn (mem, XEXP (temp_val, 0));
7277 emit_label (endlab);
7278 }
7279
7280 return mem;
7281 }
7282 }
7283
7284 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7285 For v9, function return values are subject to the same rules as arguments,
7286 except that up to 32 bytes may be returned in registers. */
7287
7288 static rtx
7289 sparc_function_value_1 (const_tree type, machine_mode mode,
7290 bool outgoing)
7291 {
7292 /* Beware that the two values are swapped here wrt function_arg. */
7293 int regbase = (outgoing
7294 ? SPARC_INCOMING_INT_ARG_FIRST
7295 : SPARC_OUTGOING_INT_ARG_FIRST);
7296 enum mode_class mclass = GET_MODE_CLASS (mode);
7297 int regno;
7298
7299 /* Vector types deserve special treatment because they are polymorphic wrt
7300 their mode, depending upon whether VIS instructions are enabled. */
7301 if (type && TREE_CODE (type) == VECTOR_TYPE)
7302 {
7303 HOST_WIDE_INT size = int_size_in_bytes (type);
7304 gcc_assert ((TARGET_ARCH32 && size <= 8)
7305 || (TARGET_ARCH64 && size <= 32));
7306
7307 if (mode == BLKmode)
7308 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7309
7310 mclass = MODE_FLOAT;
7311 }
7312
7313 if (TARGET_ARCH64 && type)
7314 {
7315 /* Structures up to 32 bytes in size are returned in registers. */
7316 if (TREE_CODE (type) == RECORD_TYPE)
7317 {
7318 HOST_WIDE_INT size = int_size_in_bytes (type);
7319 gcc_assert (size <= 32);
7320
7321 return function_arg_record_value (type, mode, 0, 1, regbase);
7322 }
7323
7324 /* Unions up to 32 bytes in size are returned in integer registers. */
7325 else if (TREE_CODE (type) == UNION_TYPE)
7326 {
7327 HOST_WIDE_INT size = int_size_in_bytes (type);
7328 gcc_assert (size <= 32);
7329
7330 return function_arg_union_value (size, mode, 0, regbase);
7331 }
7332
7333 /* Objects that require it are returned in FP registers. */
7334 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7335 ;
7336
7337 /* All other aggregate types are returned in an integer register in a
7338 mode corresponding to the size of the type. */
7339 else if (AGGREGATE_TYPE_P (type))
7340 {
7341 /* All other aggregate types are passed in an integer register
7342 in a mode corresponding to the size of the type. */
7343 HOST_WIDE_INT size = int_size_in_bytes (type);
7344 gcc_assert (size <= 32);
7345
7346 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7347
7348 /* ??? We probably should have made the same ABI change in
7349 3.4.0 as the one we made for unions. The latter was
7350 required by the SCD though, while the former is not
7351 specified, so we favored compatibility and efficiency.
7352
7353 Now we're stuck for aggregates larger than 16 bytes,
7354 because OImode vanished in the meantime. Let's not
7355 try to be unduly clever, and simply follow the ABI
7356 for unions in that case. */
7357 if (mode == BLKmode)
7358 return function_arg_union_value (size, mode, 0, regbase);
7359 else
7360 mclass = MODE_INT;
7361 }
7362
7363 /* We should only have pointer and integer types at this point. This
7364 must match sparc_promote_function_mode. */
7365 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7366 mode = word_mode;
7367 }
7368
7369 /* We should only have pointer and integer types at this point, except with
7370 -freg-struct-return. This must match sparc_promote_function_mode. */
7371 else if (TARGET_ARCH32
7372 && !(type && AGGREGATE_TYPE_P (type))
7373 && mclass == MODE_INT
7374 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7375 mode = word_mode;
7376
7377 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7378 regno = SPARC_FP_ARG_FIRST;
7379 else
7380 regno = regbase;
7381
7382 return gen_rtx_REG (mode, regno);
7383 }
7384
7385 /* Handle TARGET_FUNCTION_VALUE.
7386 On the SPARC, the value is found in the first "output" register, but the
7387 called function leaves it in the first "input" register. */
7388
7389 static rtx
7390 sparc_function_value (const_tree valtype,
7391 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7392 bool outgoing)
7393 {
7394 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7395 }
7396
7397 /* Handle TARGET_LIBCALL_VALUE. */
7398
7399 static rtx
7400 sparc_libcall_value (machine_mode mode,
7401 const_rtx fun ATTRIBUTE_UNUSED)
7402 {
7403 return sparc_function_value_1 (NULL_TREE, mode, false);
7404 }
7405
7406 /* Handle FUNCTION_VALUE_REGNO_P.
7407 On the SPARC, the first "output" reg is used for integer values, and the
7408 first floating point register is used for floating point values. */
7409
7410 static bool
7411 sparc_function_value_regno_p (const unsigned int regno)
7412 {
7413 return (regno == 8 || (TARGET_FPU && regno == 32));
7414 }
7415
7416 /* Do what is necessary for `va_start'. We look at the current function
7417 to determine if stdarg or varargs is used and return the address of
7418 the first unnamed parameter. */
7419
7420 static rtx
7421 sparc_builtin_saveregs (void)
7422 {
7423 int first_reg = crtl->args.info.words;
7424 rtx address;
7425 int regno;
7426
7427 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7428 emit_move_insn (gen_rtx_MEM (word_mode,
7429 gen_rtx_PLUS (Pmode,
7430 frame_pointer_rtx,
7431 GEN_INT (FIRST_PARM_OFFSET (0)
7432 + (UNITS_PER_WORD
7433 * regno)))),
7434 gen_rtx_REG (word_mode,
7435 SPARC_INCOMING_INT_ARG_FIRST + regno));
7436
7437 address = gen_rtx_PLUS (Pmode,
7438 frame_pointer_rtx,
7439 GEN_INT (FIRST_PARM_OFFSET (0)
7440 + UNITS_PER_WORD * first_reg));
7441
7442 return address;
7443 }
7444
7445 /* Implement `va_start' for stdarg. */
7446
7447 static void
7448 sparc_va_start (tree valist, rtx nextarg)
7449 {
7450 nextarg = expand_builtin_saveregs ();
7451 std_expand_builtin_va_start (valist, nextarg);
7452 }
7453
7454 /* Implement `va_arg' for stdarg. */
7455
7456 static tree
7457 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7458 gimple_seq *post_p)
7459 {
7460 HOST_WIDE_INT size, rsize, align;
7461 tree addr, incr;
7462 bool indirect;
7463 tree ptrtype = build_pointer_type (type);
7464
7465 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7466 {
7467 indirect = true;
7468 size = rsize = UNITS_PER_WORD;
7469 align = 0;
7470 }
7471 else
7472 {
7473 indirect = false;
7474 size = int_size_in_bytes (type);
7475 rsize = ROUND_UP (size, UNITS_PER_WORD);
7476 align = 0;
7477
7478 if (TARGET_ARCH64)
7479 {
7480 /* For SPARC64, objects requiring 16-byte alignment get it. */
7481 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7482 align = 2 * UNITS_PER_WORD;
7483
7484 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7485 are left-justified in their slots. */
7486 if (AGGREGATE_TYPE_P (type))
7487 {
7488 if (size == 0)
7489 size = rsize = UNITS_PER_WORD;
7490 else
7491 size = rsize;
7492 }
7493 }
7494 }
7495
7496 incr = valist;
7497 if (align)
7498 {
7499 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7500 incr = fold_convert (sizetype, incr);
7501 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7502 size_int (-align));
7503 incr = fold_convert (ptr_type_node, incr);
7504 }
7505
7506 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7507 addr = incr;
7508
7509 if (BYTES_BIG_ENDIAN && size < rsize)
7510 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7511
7512 if (indirect)
7513 {
7514 addr = fold_convert (build_pointer_type (ptrtype), addr);
7515 addr = build_va_arg_indirect_ref (addr);
7516 }
7517
7518 /* If the address isn't aligned properly for the type, we need a temporary.
7519 FIXME: This is inefficient, usually we can do this in registers. */
7520 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7521 {
7522 tree tmp = create_tmp_var (type, "va_arg_tmp");
7523 tree dest_addr = build_fold_addr_expr (tmp);
7524 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7525 3, dest_addr, addr, size_int (rsize));
7526 TREE_ADDRESSABLE (tmp) = 1;
7527 gimplify_and_add (copy, pre_p);
7528 addr = dest_addr;
7529 }
7530
7531 else
7532 addr = fold_convert (ptrtype, addr);
7533
7534 incr = fold_build_pointer_plus_hwi (incr, rsize);
7535 gimplify_assign (valist, incr, post_p);
7536
7537 return build_va_arg_indirect_ref (addr);
7538 }
7539 \f
7540 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7541 Specify whether the vector mode is supported by the hardware. */
7542
7543 static bool
7544 sparc_vector_mode_supported_p (machine_mode mode)
7545 {
7546 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7547 }
7548 \f
7549 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7550
7551 static machine_mode
7552 sparc_preferred_simd_mode (machine_mode mode)
7553 {
7554 if (TARGET_VIS)
7555 switch (mode)
7556 {
7557 case SImode:
7558 return V2SImode;
7559 case HImode:
7560 return V4HImode;
7561 case QImode:
7562 return V8QImode;
7563
7564 default:;
7565 }
7566
7567 return word_mode;
7568 }
7569 \f
7570 /* Return the string to output an unconditional branch to LABEL, which is
7571 the operand number of the label.
7572
7573 DEST is the destination insn (i.e. the label), INSN is the source. */
7574
7575 const char *
7576 output_ubranch (rtx dest, rtx_insn *insn)
7577 {
7578 static char string[64];
7579 bool v9_form = false;
7580 int delta;
7581 char *p;
7582
7583 /* Even if we are trying to use cbcond for this, evaluate
7584 whether we can use V9 branches as our backup plan. */
7585
7586 delta = 5000000;
7587 if (INSN_ADDRESSES_SET_P ())
7588 delta = (INSN_ADDRESSES (INSN_UID (dest))
7589 - INSN_ADDRESSES (INSN_UID (insn)));
7590
7591 /* Leave some instructions for "slop". */
7592 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7593 v9_form = true;
7594
7595 if (TARGET_CBCOND)
7596 {
7597 bool emit_nop = emit_cbcond_nop (insn);
7598 bool far = false;
7599 const char *rval;
7600
7601 if (delta < -500 || delta > 500)
7602 far = true;
7603
7604 if (far)
7605 {
7606 if (v9_form)
7607 rval = "ba,a,pt\t%%xcc, %l0";
7608 else
7609 rval = "b,a\t%l0";
7610 }
7611 else
7612 {
7613 if (emit_nop)
7614 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7615 else
7616 rval = "cwbe\t%%g0, %%g0, %l0";
7617 }
7618 return rval;
7619 }
7620
7621 if (v9_form)
7622 strcpy (string, "ba%*,pt\t%%xcc, ");
7623 else
7624 strcpy (string, "b%*\t");
7625
7626 p = strchr (string, '\0');
7627 *p++ = '%';
7628 *p++ = 'l';
7629 *p++ = '0';
7630 *p++ = '%';
7631 *p++ = '(';
7632 *p = '\0';
7633
7634 return string;
7635 }
7636
7637 /* Return the string to output a conditional branch to LABEL, which is
7638 the operand number of the label. OP is the conditional expression.
7639 XEXP (OP, 0) is assumed to be a condition code register (integer or
7640 floating point) and its mode specifies what kind of comparison we made.
7641
7642 DEST is the destination insn (i.e. the label), INSN is the source.
7643
7644 REVERSED is nonzero if we should reverse the sense of the comparison.
7645
7646 ANNUL is nonzero if we should generate an annulling branch. */
7647
7648 const char *
7649 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7650 rtx_insn *insn)
7651 {
7652 static char string[64];
7653 enum rtx_code code = GET_CODE (op);
7654 rtx cc_reg = XEXP (op, 0);
7655 machine_mode mode = GET_MODE (cc_reg);
7656 const char *labelno, *branch;
7657 int spaces = 8, far;
7658 char *p;
7659
7660 /* v9 branches are limited to +-1MB. If it is too far away,
7661 change
7662
7663 bne,pt %xcc, .LC30
7664
7665 to
7666
7667 be,pn %xcc, .+12
7668 nop
7669 ba .LC30
7670
7671 and
7672
7673 fbne,a,pn %fcc2, .LC29
7674
7675 to
7676
7677 fbe,pt %fcc2, .+16
7678 nop
7679 ba .LC29 */
7680
7681 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7682 if (reversed ^ far)
7683 {
7684 /* Reversal of FP compares takes care -- an ordered compare
7685 becomes an unordered compare and vice versa. */
7686 if (mode == CCFPmode || mode == CCFPEmode)
7687 code = reverse_condition_maybe_unordered (code);
7688 else
7689 code = reverse_condition (code);
7690 }
7691
7692 /* Start by writing the branch condition. */
7693 if (mode == CCFPmode || mode == CCFPEmode)
7694 {
7695 switch (code)
7696 {
7697 case NE:
7698 branch = "fbne";
7699 break;
7700 case EQ:
7701 branch = "fbe";
7702 break;
7703 case GE:
7704 branch = "fbge";
7705 break;
7706 case GT:
7707 branch = "fbg";
7708 break;
7709 case LE:
7710 branch = "fble";
7711 break;
7712 case LT:
7713 branch = "fbl";
7714 break;
7715 case UNORDERED:
7716 branch = "fbu";
7717 break;
7718 case ORDERED:
7719 branch = "fbo";
7720 break;
7721 case UNGT:
7722 branch = "fbug";
7723 break;
7724 case UNLT:
7725 branch = "fbul";
7726 break;
7727 case UNEQ:
7728 branch = "fbue";
7729 break;
7730 case UNGE:
7731 branch = "fbuge";
7732 break;
7733 case UNLE:
7734 branch = "fbule";
7735 break;
7736 case LTGT:
7737 branch = "fblg";
7738 break;
7739 default:
7740 gcc_unreachable ();
7741 }
7742
7743 /* ??? !v9: FP branches cannot be preceded by another floating point
7744 insn. Because there is currently no concept of pre-delay slots,
7745 we can fix this only by always emitting a nop before a floating
7746 point branch. */
7747
7748 string[0] = '\0';
7749 if (! TARGET_V9)
7750 strcpy (string, "nop\n\t");
7751 strcat (string, branch);
7752 }
7753 else
7754 {
7755 switch (code)
7756 {
7757 case NE:
7758 if (mode == CCVmode || mode == CCXVmode)
7759 branch = "bvs";
7760 else
7761 branch = "bne";
7762 break;
7763 case EQ:
7764 if (mode == CCVmode || mode == CCXVmode)
7765 branch = "bvc";
7766 else
7767 branch = "be";
7768 break;
7769 case GE:
7770 if (mode == CCNZmode || mode == CCXNZmode)
7771 branch = "bpos";
7772 else
7773 branch = "bge";
7774 break;
7775 case GT:
7776 branch = "bg";
7777 break;
7778 case LE:
7779 branch = "ble";
7780 break;
7781 case LT:
7782 if (mode == CCNZmode || mode == CCXNZmode)
7783 branch = "bneg";
7784 else
7785 branch = "bl";
7786 break;
7787 case GEU:
7788 branch = "bgeu";
7789 break;
7790 case GTU:
7791 branch = "bgu";
7792 break;
7793 case LEU:
7794 branch = "bleu";
7795 break;
7796 case LTU:
7797 branch = "blu";
7798 break;
7799 default:
7800 gcc_unreachable ();
7801 }
7802 strcpy (string, branch);
7803 }
7804 spaces -= strlen (branch);
7805 p = strchr (string, '\0');
7806
7807 /* Now add the annulling, the label, and a possible noop. */
7808 if (annul && ! far)
7809 {
7810 strcpy (p, ",a");
7811 p += 2;
7812 spaces -= 2;
7813 }
7814
7815 if (TARGET_V9)
7816 {
7817 rtx note;
7818 int v8 = 0;
7819
7820 if (! far && insn && INSN_ADDRESSES_SET_P ())
7821 {
7822 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7823 - INSN_ADDRESSES (INSN_UID (insn)));
7824 /* Leave some instructions for "slop". */
7825 if (delta < -260000 || delta >= 260000)
7826 v8 = 1;
7827 }
7828
7829 switch (mode)
7830 {
7831 case CCmode:
7832 case CCNZmode:
7833 case CCCmode:
7834 case CCVmode:
7835 labelno = "%%icc, ";
7836 if (v8)
7837 labelno = "";
7838 break;
7839 case CCXmode:
7840 case CCXNZmode:
7841 case CCXCmode:
7842 case CCXVmode:
7843 labelno = "%%xcc, ";
7844 gcc_assert (!v8);
7845 break;
7846 case CCFPmode:
7847 case CCFPEmode:
7848 {
7849 static char v9_fcc_labelno[] = "%%fccX, ";
7850 /* Set the char indicating the number of the fcc reg to use. */
7851 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7852 labelno = v9_fcc_labelno;
7853 if (v8)
7854 {
7855 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7856 labelno = "";
7857 }
7858 }
7859 break;
7860 default:
7861 gcc_unreachable ();
7862 }
7863
7864 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7865 {
7866 strcpy (p,
7867 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7868 ? ",pt" : ",pn");
7869 p += 3;
7870 spaces -= 3;
7871 }
7872 }
7873 else
7874 labelno = "";
7875
7876 if (spaces > 0)
7877 *p++ = '\t';
7878 else
7879 *p++ = ' ';
7880 strcpy (p, labelno);
7881 p = strchr (p, '\0');
7882 if (far)
7883 {
7884 strcpy (p, ".+12\n\t nop\n\tb\t");
7885 /* Skip the next insn if requested or
7886 if we know that it will be a nop. */
7887 if (annul || ! final_sequence)
7888 p[3] = '6';
7889 p += 14;
7890 }
7891 *p++ = '%';
7892 *p++ = 'l';
7893 *p++ = label + '0';
7894 *p++ = '%';
7895 *p++ = '#';
7896 *p = '\0';
7897
7898 return string;
7899 }
7900
7901 /* Emit a library call comparison between floating point X and Y.
7902 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7903 Return the new operator to be used in the comparison sequence.
7904
7905 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7906 values as arguments instead of the TFmode registers themselves,
7907 that's why we cannot call emit_float_lib_cmp. */
7908
7909 rtx
7910 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7911 {
7912 const char *qpfunc;
7913 rtx slot0, slot1, result, tem, tem2, libfunc;
7914 machine_mode mode;
7915 enum rtx_code new_comparison;
7916
7917 switch (comparison)
7918 {
7919 case EQ:
7920 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7921 break;
7922
7923 case NE:
7924 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7925 break;
7926
7927 case GT:
7928 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7929 break;
7930
7931 case GE:
7932 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7933 break;
7934
7935 case LT:
7936 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7937 break;
7938
7939 case LE:
7940 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7941 break;
7942
7943 case ORDERED:
7944 case UNORDERED:
7945 case UNGT:
7946 case UNLT:
7947 case UNEQ:
7948 case UNGE:
7949 case UNLE:
7950 case LTGT:
7951 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7952 break;
7953
7954 default:
7955 gcc_unreachable ();
7956 }
7957
7958 if (TARGET_ARCH64)
7959 {
7960 if (MEM_P (x))
7961 {
7962 tree expr = MEM_EXPR (x);
7963 if (expr)
7964 mark_addressable (expr);
7965 slot0 = x;
7966 }
7967 else
7968 {
7969 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7970 emit_move_insn (slot0, x);
7971 }
7972
7973 if (MEM_P (y))
7974 {
7975 tree expr = MEM_EXPR (y);
7976 if (expr)
7977 mark_addressable (expr);
7978 slot1 = y;
7979 }
7980 else
7981 {
7982 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7983 emit_move_insn (slot1, y);
7984 }
7985
7986 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7987 emit_library_call (libfunc, LCT_NORMAL,
7988 DImode, 2,
7989 XEXP (slot0, 0), Pmode,
7990 XEXP (slot1, 0), Pmode);
7991 mode = DImode;
7992 }
7993 else
7994 {
7995 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7996 emit_library_call (libfunc, LCT_NORMAL,
7997 SImode, 2,
7998 x, TFmode, y, TFmode);
7999 mode = SImode;
8000 }
8001
8002
8003 /* Immediately move the result of the libcall into a pseudo
8004 register so reload doesn't clobber the value if it needs
8005 the return register for a spill reg. */
8006 result = gen_reg_rtx (mode);
8007 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8008
8009 switch (comparison)
8010 {
8011 default:
8012 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8013 case ORDERED:
8014 case UNORDERED:
8015 new_comparison = (comparison == UNORDERED ? EQ : NE);
8016 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8017 case UNGT:
8018 case UNGE:
8019 new_comparison = (comparison == UNGT ? GT : NE);
8020 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8021 case UNLE:
8022 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8023 case UNLT:
8024 tem = gen_reg_rtx (mode);
8025 if (TARGET_ARCH32)
8026 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8027 else
8028 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8029 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8030 case UNEQ:
8031 case LTGT:
8032 tem = gen_reg_rtx (mode);
8033 if (TARGET_ARCH32)
8034 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8035 else
8036 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8037 tem2 = gen_reg_rtx (mode);
8038 if (TARGET_ARCH32)
8039 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8040 else
8041 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8042 new_comparison = (comparison == UNEQ ? EQ : NE);
8043 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8044 }
8045
8046 gcc_unreachable ();
8047 }
8048
8049 /* Generate an unsigned DImode to FP conversion. This is the same code
8050 optabs would emit if we didn't have TFmode patterns. */
8051
8052 void
8053 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8054 {
8055 rtx i0, i1, f0, in, out;
8056
8057 out = operands[0];
8058 in = force_reg (DImode, operands[1]);
8059 rtx_code_label *neglab = gen_label_rtx ();
8060 rtx_code_label *donelab = gen_label_rtx ();
8061 i0 = gen_reg_rtx (DImode);
8062 i1 = gen_reg_rtx (DImode);
8063 f0 = gen_reg_rtx (mode);
8064
8065 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8066
8067 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8068 emit_jump_insn (gen_jump (donelab));
8069 emit_barrier ();
8070
8071 emit_label (neglab);
8072
8073 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8074 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8075 emit_insn (gen_iordi3 (i0, i0, i1));
8076 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8077 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8078
8079 emit_label (donelab);
8080 }
8081
8082 /* Generate an FP to unsigned DImode conversion. This is the same code
8083 optabs would emit if we didn't have TFmode patterns. */
8084
8085 void
8086 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8087 {
8088 rtx i0, i1, f0, in, out, limit;
8089
8090 out = operands[0];
8091 in = force_reg (mode, operands[1]);
8092 rtx_code_label *neglab = gen_label_rtx ();
8093 rtx_code_label *donelab = gen_label_rtx ();
8094 i0 = gen_reg_rtx (DImode);
8095 i1 = gen_reg_rtx (DImode);
8096 limit = gen_reg_rtx (mode);
8097 f0 = gen_reg_rtx (mode);
8098
8099 emit_move_insn (limit,
8100 const_double_from_real_value (
8101 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8102 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8103
8104 emit_insn (gen_rtx_SET (out,
8105 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8106 emit_jump_insn (gen_jump (donelab));
8107 emit_barrier ();
8108
8109 emit_label (neglab);
8110
8111 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8112 emit_insn (gen_rtx_SET (i0,
8113 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8114 emit_insn (gen_movdi (i1, const1_rtx));
8115 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8116 emit_insn (gen_xordi3 (out, i0, i1));
8117
8118 emit_label (donelab);
8119 }
8120
8121 /* Return the string to output a compare and branch instruction to DEST.
8122 DEST is the destination insn (i.e. the label), INSN is the source,
8123 and OP is the conditional expression. */
8124
8125 const char *
8126 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8127 {
8128 machine_mode mode = GET_MODE (XEXP (op, 0));
8129 enum rtx_code code = GET_CODE (op);
8130 const char *cond_str, *tmpl;
8131 int far, emit_nop, len;
8132 static char string[64];
8133 char size_char;
8134
8135 /* Compare and Branch is limited to +-2KB. If it is too far away,
8136 change
8137
8138 cxbne X, Y, .LC30
8139
8140 to
8141
8142 cxbe X, Y, .+16
8143 nop
8144 ba,pt xcc, .LC30
8145 nop */
8146
8147 len = get_attr_length (insn);
8148
8149 far = len == 4;
8150 emit_nop = len == 2;
8151
8152 if (far)
8153 code = reverse_condition (code);
8154
8155 size_char = ((mode == SImode) ? 'w' : 'x');
8156
8157 switch (code)
8158 {
8159 case NE:
8160 cond_str = "ne";
8161 break;
8162
8163 case EQ:
8164 cond_str = "e";
8165 break;
8166
8167 case GE:
8168 cond_str = "ge";
8169 break;
8170
8171 case GT:
8172 cond_str = "g";
8173 break;
8174
8175 case LE:
8176 cond_str = "le";
8177 break;
8178
8179 case LT:
8180 cond_str = "l";
8181 break;
8182
8183 case GEU:
8184 cond_str = "cc";
8185 break;
8186
8187 case GTU:
8188 cond_str = "gu";
8189 break;
8190
8191 case LEU:
8192 cond_str = "leu";
8193 break;
8194
8195 case LTU:
8196 cond_str = "cs";
8197 break;
8198
8199 default:
8200 gcc_unreachable ();
8201 }
8202
8203 if (far)
8204 {
8205 int veryfar = 1, delta;
8206
8207 if (INSN_ADDRESSES_SET_P ())
8208 {
8209 delta = (INSN_ADDRESSES (INSN_UID (dest))
8210 - INSN_ADDRESSES (INSN_UID (insn)));
8211 /* Leave some instructions for "slop". */
8212 if (delta >= -260000 && delta < 260000)
8213 veryfar = 0;
8214 }
8215
8216 if (veryfar)
8217 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8218 else
8219 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8220 }
8221 else
8222 {
8223 if (emit_nop)
8224 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8225 else
8226 tmpl = "c%cb%s\t%%1, %%2, %%3";
8227 }
8228
8229 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8230
8231 return string;
8232 }
8233
8234 /* Return the string to output a conditional branch to LABEL, testing
8235 register REG. LABEL is the operand number of the label; REG is the
8236 operand number of the reg. OP is the conditional expression. The mode
8237 of REG says what kind of comparison we made.
8238
8239 DEST is the destination insn (i.e. the label), INSN is the source.
8240
8241 REVERSED is nonzero if we should reverse the sense of the comparison.
8242
8243 ANNUL is nonzero if we should generate an annulling branch. */
8244
8245 const char *
8246 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8247 int annul, rtx_insn *insn)
8248 {
8249 static char string[64];
8250 enum rtx_code code = GET_CODE (op);
8251 machine_mode mode = GET_MODE (XEXP (op, 0));
8252 rtx note;
8253 int far;
8254 char *p;
8255
8256 /* branch on register are limited to +-128KB. If it is too far away,
8257 change
8258
8259 brnz,pt %g1, .LC30
8260
8261 to
8262
8263 brz,pn %g1, .+12
8264 nop
8265 ba,pt %xcc, .LC30
8266
8267 and
8268
8269 brgez,a,pn %o1, .LC29
8270
8271 to
8272
8273 brlz,pt %o1, .+16
8274 nop
8275 ba,pt %xcc, .LC29 */
8276
8277 far = get_attr_length (insn) >= 3;
8278
8279 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8280 if (reversed ^ far)
8281 code = reverse_condition (code);
8282
8283 /* Only 64 bit versions of these instructions exist. */
8284 gcc_assert (mode == DImode);
8285
8286 /* Start by writing the branch condition. */
8287
8288 switch (code)
8289 {
8290 case NE:
8291 strcpy (string, "brnz");
8292 break;
8293
8294 case EQ:
8295 strcpy (string, "brz");
8296 break;
8297
8298 case GE:
8299 strcpy (string, "brgez");
8300 break;
8301
8302 case LT:
8303 strcpy (string, "brlz");
8304 break;
8305
8306 case LE:
8307 strcpy (string, "brlez");
8308 break;
8309
8310 case GT:
8311 strcpy (string, "brgz");
8312 break;
8313
8314 default:
8315 gcc_unreachable ();
8316 }
8317
8318 p = strchr (string, '\0');
8319
8320 /* Now add the annulling, reg, label, and nop. */
8321 if (annul && ! far)
8322 {
8323 strcpy (p, ",a");
8324 p += 2;
8325 }
8326
8327 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8328 {
8329 strcpy (p,
8330 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8331 ? ",pt" : ",pn");
8332 p += 3;
8333 }
8334
8335 *p = p < string + 8 ? '\t' : ' ';
8336 p++;
8337 *p++ = '%';
8338 *p++ = '0' + reg;
8339 *p++ = ',';
8340 *p++ = ' ';
8341 if (far)
8342 {
8343 int veryfar = 1, delta;
8344
8345 if (INSN_ADDRESSES_SET_P ())
8346 {
8347 delta = (INSN_ADDRESSES (INSN_UID (dest))
8348 - INSN_ADDRESSES (INSN_UID (insn)));
8349 /* Leave some instructions for "slop". */
8350 if (delta >= -260000 && delta < 260000)
8351 veryfar = 0;
8352 }
8353
8354 strcpy (p, ".+12\n\t nop\n\t");
8355 /* Skip the next insn if requested or
8356 if we know that it will be a nop. */
8357 if (annul || ! final_sequence)
8358 p[3] = '6';
8359 p += 12;
8360 if (veryfar)
8361 {
8362 strcpy (p, "b\t");
8363 p += 2;
8364 }
8365 else
8366 {
8367 strcpy (p, "ba,pt\t%%xcc, ");
8368 p += 13;
8369 }
8370 }
8371 *p++ = '%';
8372 *p++ = 'l';
8373 *p++ = '0' + label;
8374 *p++ = '%';
8375 *p++ = '#';
8376 *p = '\0';
8377
8378 return string;
8379 }
8380
8381 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8382 Such instructions cannot be used in the delay slot of return insn on v9.
8383 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8384 */
8385
8386 static int
8387 epilogue_renumber (register rtx *where, int test)
8388 {
8389 register const char *fmt;
8390 register int i;
8391 register enum rtx_code code;
8392
8393 if (*where == 0)
8394 return 0;
8395
8396 code = GET_CODE (*where);
8397
8398 switch (code)
8399 {
8400 case REG:
8401 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8402 return 1;
8403 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8404 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8405 /* fallthrough */
8406 case SCRATCH:
8407 case CC0:
8408 case PC:
8409 case CONST_INT:
8410 case CONST_WIDE_INT:
8411 case CONST_DOUBLE:
8412 return 0;
8413
8414 /* Do not replace the frame pointer with the stack pointer because
8415 it can cause the delayed instruction to load below the stack.
8416 This occurs when instructions like:
8417
8418 (set (reg/i:SI 24 %i0)
8419 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8420 (const_int -20 [0xffffffec])) 0))
8421
8422 are in the return delayed slot. */
8423 case PLUS:
8424 if (GET_CODE (XEXP (*where, 0)) == REG
8425 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8426 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8427 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8428 return 1;
8429 break;
8430
8431 case MEM:
8432 if (SPARC_STACK_BIAS
8433 && GET_CODE (XEXP (*where, 0)) == REG
8434 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8435 return 1;
8436 break;
8437
8438 default:
8439 break;
8440 }
8441
8442 fmt = GET_RTX_FORMAT (code);
8443
8444 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8445 {
8446 if (fmt[i] == 'E')
8447 {
8448 register int j;
8449 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8450 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8451 return 1;
8452 }
8453 else if (fmt[i] == 'e'
8454 && epilogue_renumber (&(XEXP (*where, i)), test))
8455 return 1;
8456 }
8457 return 0;
8458 }
8459 \f
8460 /* Leaf functions and non-leaf functions have different needs. */
8461
8462 static const int
8463 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8464
8465 static const int
8466 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8467
8468 static const int *const reg_alloc_orders[] = {
8469 reg_leaf_alloc_order,
8470 reg_nonleaf_alloc_order};
8471
8472 void
8473 order_regs_for_local_alloc (void)
8474 {
8475 static int last_order_nonleaf = 1;
8476
8477 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8478 {
8479 last_order_nonleaf = !last_order_nonleaf;
8480 memcpy ((char *) reg_alloc_order,
8481 (const char *) reg_alloc_orders[last_order_nonleaf],
8482 FIRST_PSEUDO_REGISTER * sizeof (int));
8483 }
8484 }
8485 \f
8486 /* Return 1 if REG and MEM are legitimate enough to allow the various
8487 MEM<-->REG splits to be run. */
8488
8489 int
8490 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8491 {
8492 /* Punt if we are here by mistake. */
8493 gcc_assert (reload_completed);
8494
8495 /* We must have an offsettable memory reference. */
8496 if (!offsettable_memref_p (mem))
8497 return 0;
8498
8499 /* If we have legitimate args for ldd/std, we do not want
8500 the split to happen. */
8501 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8502 return 0;
8503
8504 /* Success. */
8505 return 1;
8506 }
8507
8508 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8509
8510 void
8511 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8512 {
8513 rtx high_part = gen_highpart (mode, dest);
8514 rtx low_part = gen_lowpart (mode, dest);
8515 rtx word0 = adjust_address (src, mode, 0);
8516 rtx word1 = adjust_address (src, mode, 4);
8517
8518 if (reg_overlap_mentioned_p (high_part, word1))
8519 {
8520 emit_move_insn_1 (low_part, word1);
8521 emit_move_insn_1 (high_part, word0);
8522 }
8523 else
8524 {
8525 emit_move_insn_1 (high_part, word0);
8526 emit_move_insn_1 (low_part, word1);
8527 }
8528 }
8529
8530 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8531
8532 void
8533 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8534 {
8535 rtx word0 = adjust_address (dest, mode, 0);
8536 rtx word1 = adjust_address (dest, mode, 4);
8537 rtx high_part = gen_highpart (mode, src);
8538 rtx low_part = gen_lowpart (mode, src);
8539
8540 emit_move_insn_1 (word0, high_part);
8541 emit_move_insn_1 (word1, low_part);
8542 }
8543
8544 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8545
8546 int
8547 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8548 {
8549 /* Punt if we are here by mistake. */
8550 gcc_assert (reload_completed);
8551
8552 if (GET_CODE (reg1) == SUBREG)
8553 reg1 = SUBREG_REG (reg1);
8554 if (GET_CODE (reg1) != REG)
8555 return 0;
8556 const int regno1 = REGNO (reg1);
8557
8558 if (GET_CODE (reg2) == SUBREG)
8559 reg2 = SUBREG_REG (reg2);
8560 if (GET_CODE (reg2) != REG)
8561 return 0;
8562 const int regno2 = REGNO (reg2);
8563
8564 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8565 return 1;
8566
8567 if (TARGET_VIS3)
8568 {
8569 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8570 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8571 return 1;
8572 }
8573
8574 return 0;
8575 }
8576
8577 /* Split a REG <--> REG move into a pair of moves in MODE. */
8578
8579 void
8580 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8581 {
8582 rtx dest1 = gen_highpart (mode, dest);
8583 rtx dest2 = gen_lowpart (mode, dest);
8584 rtx src1 = gen_highpart (mode, src);
8585 rtx src2 = gen_lowpart (mode, src);
8586
8587 /* Now emit using the real source and destination we found, swapping
8588 the order if we detect overlap. */
8589 if (reg_overlap_mentioned_p (dest1, src2))
8590 {
8591 emit_move_insn_1 (dest2, src2);
8592 emit_move_insn_1 (dest1, src1);
8593 }
8594 else
8595 {
8596 emit_move_insn_1 (dest1, src1);
8597 emit_move_insn_1 (dest2, src2);
8598 }
8599 }
8600
8601 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8602 This makes them candidates for using ldd and std insns.
8603
8604 Note reg1 and reg2 *must* be hard registers. */
8605
8606 int
8607 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8608 {
8609 /* We might have been passed a SUBREG. */
8610 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8611 return 0;
8612
8613 if (REGNO (reg1) % 2 != 0)
8614 return 0;
8615
8616 /* Integer ldd is deprecated in SPARC V9 */
8617 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8618 return 0;
8619
8620 return (REGNO (reg1) == REGNO (reg2) - 1);
8621 }
8622
8623 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8624 an ldd or std insn.
8625
8626 This can only happen when addr1 and addr2, the addresses in mem1
8627 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8628 addr1 must also be aligned on a 64-bit boundary.
8629
8630 Also iff dependent_reg_rtx is not null it should not be used to
8631 compute the address for mem1, i.e. we cannot optimize a sequence
8632 like:
8633 ld [%o0], %o0
8634 ld [%o0 + 4], %o1
8635 to
8636 ldd [%o0], %o0
8637 nor:
8638 ld [%g3 + 4], %g3
8639 ld [%g3], %g2
8640 to
8641 ldd [%g3], %g2
8642
8643 But, note that the transformation from:
8644 ld [%g2 + 4], %g3
8645 ld [%g2], %g2
8646 to
8647 ldd [%g2], %g2
8648 is perfectly fine. Thus, the peephole2 patterns always pass us
8649 the destination register of the first load, never the second one.
8650
8651 For stores we don't have a similar problem, so dependent_reg_rtx is
8652 NULL_RTX. */
8653
8654 int
8655 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8656 {
8657 rtx addr1, addr2;
8658 unsigned int reg1;
8659 HOST_WIDE_INT offset1;
8660
8661 /* The mems cannot be volatile. */
8662 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8663 return 0;
8664
8665 /* MEM1 should be aligned on a 64-bit boundary. */
8666 if (MEM_ALIGN (mem1) < 64)
8667 return 0;
8668
8669 addr1 = XEXP (mem1, 0);
8670 addr2 = XEXP (mem2, 0);
8671
8672 /* Extract a register number and offset (if used) from the first addr. */
8673 if (GET_CODE (addr1) == PLUS)
8674 {
8675 /* If not a REG, return zero. */
8676 if (GET_CODE (XEXP (addr1, 0)) != REG)
8677 return 0;
8678 else
8679 {
8680 reg1 = REGNO (XEXP (addr1, 0));
8681 /* The offset must be constant! */
8682 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8683 return 0;
8684 offset1 = INTVAL (XEXP (addr1, 1));
8685 }
8686 }
8687 else if (GET_CODE (addr1) != REG)
8688 return 0;
8689 else
8690 {
8691 reg1 = REGNO (addr1);
8692 /* This was a simple (mem (reg)) expression. Offset is 0. */
8693 offset1 = 0;
8694 }
8695
8696 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8697 if (GET_CODE (addr2) != PLUS)
8698 return 0;
8699
8700 if (GET_CODE (XEXP (addr2, 0)) != REG
8701 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8702 return 0;
8703
8704 if (reg1 != REGNO (XEXP (addr2, 0)))
8705 return 0;
8706
8707 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8708 return 0;
8709
8710 /* The first offset must be evenly divisible by 8 to ensure the
8711 address is 64 bit aligned. */
8712 if (offset1 % 8 != 0)
8713 return 0;
8714
8715 /* The offset for the second addr must be 4 more than the first addr. */
8716 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8717 return 0;
8718
8719 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8720 instructions. */
8721 return 1;
8722 }
8723
8724 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8725
8726 rtx
8727 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8728 {
8729 rtx x = widen_memory_access (mem1, mode, 0);
8730 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8731 return x;
8732 }
8733
8734 /* Return 1 if reg is a pseudo, or is the first register in
8735 a hard register pair. This makes it suitable for use in
8736 ldd and std insns. */
8737
8738 int
8739 register_ok_for_ldd (rtx reg)
8740 {
8741 /* We might have been passed a SUBREG. */
8742 if (!REG_P (reg))
8743 return 0;
8744
8745 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8746 return (REGNO (reg) % 2 == 0);
8747
8748 return 1;
8749 }
8750
8751 /* Return 1 if OP, a MEM, has an address which is known to be
8752 aligned to an 8-byte boundary. */
8753
8754 int
8755 memory_ok_for_ldd (rtx op)
8756 {
8757 /* In 64-bit mode, we assume that the address is word-aligned. */
8758 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8759 return 0;
8760
8761 if (! can_create_pseudo_p ()
8762 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8763 return 0;
8764
8765 return 1;
8766 }
8767 \f
8768 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8769
8770 static bool
8771 sparc_print_operand_punct_valid_p (unsigned char code)
8772 {
8773 if (code == '#'
8774 || code == '*'
8775 || code == '('
8776 || code == ')'
8777 || code == '_'
8778 || code == '&')
8779 return true;
8780
8781 return false;
8782 }
8783
8784 /* Implement TARGET_PRINT_OPERAND.
8785 Print operand X (an rtx) in assembler syntax to file FILE.
8786 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8787 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8788
8789 static void
8790 sparc_print_operand (FILE *file, rtx x, int code)
8791 {
8792 const char *s;
8793
8794 switch (code)
8795 {
8796 case '#':
8797 /* Output an insn in a delay slot. */
8798 if (final_sequence)
8799 sparc_indent_opcode = 1;
8800 else
8801 fputs ("\n\t nop", file);
8802 return;
8803 case '*':
8804 /* Output an annul flag if there's nothing for the delay slot and we
8805 are optimizing. This is always used with '(' below.
8806 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8807 this is a dbx bug. So, we only do this when optimizing.
8808 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8809 Always emit a nop in case the next instruction is a branch. */
8810 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8811 fputs (",a", file);
8812 return;
8813 case '(':
8814 /* Output a 'nop' if there's nothing for the delay slot and we are
8815 not optimizing. This is always used with '*' above. */
8816 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8817 fputs ("\n\t nop", file);
8818 else if (final_sequence)
8819 sparc_indent_opcode = 1;
8820 return;
8821 case ')':
8822 /* Output the right displacement from the saved PC on function return.
8823 The caller may have placed an "unimp" insn immediately after the call
8824 so we have to account for it. This insn is used in the 32-bit ABI
8825 when calling a function that returns a non zero-sized structure. The
8826 64-bit ABI doesn't have it. Be careful to have this test be the same
8827 as that for the call. The exception is when sparc_std_struct_return
8828 is enabled, the psABI is followed exactly and the adjustment is made
8829 by the code in sparc_struct_value_rtx. The call emitted is the same
8830 when sparc_std_struct_return is enabled. */
8831 if (!TARGET_ARCH64
8832 && cfun->returns_struct
8833 && !sparc_std_struct_return
8834 && DECL_SIZE (DECL_RESULT (current_function_decl))
8835 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8836 == INTEGER_CST
8837 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8838 fputs ("12", file);
8839 else
8840 fputc ('8', file);
8841 return;
8842 case '_':
8843 /* Output the Embedded Medium/Anywhere code model base register. */
8844 fputs (EMBMEDANY_BASE_REG, file);
8845 return;
8846 case '&':
8847 /* Print some local dynamic TLS name. */
8848 if (const char *name = get_some_local_dynamic_name ())
8849 assemble_name (file, name);
8850 else
8851 output_operand_lossage ("'%%&' used without any "
8852 "local dynamic TLS references");
8853 return;
8854
8855 case 'Y':
8856 /* Adjust the operand to take into account a RESTORE operation. */
8857 if (GET_CODE (x) == CONST_INT)
8858 break;
8859 else if (GET_CODE (x) != REG)
8860 output_operand_lossage ("invalid %%Y operand");
8861 else if (REGNO (x) < 8)
8862 fputs (reg_names[REGNO (x)], file);
8863 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8864 fputs (reg_names[REGNO (x)-16], file);
8865 else
8866 output_operand_lossage ("invalid %%Y operand");
8867 return;
8868 case 'L':
8869 /* Print out the low order register name of a register pair. */
8870 if (WORDS_BIG_ENDIAN)
8871 fputs (reg_names[REGNO (x)+1], file);
8872 else
8873 fputs (reg_names[REGNO (x)], file);
8874 return;
8875 case 'H':
8876 /* Print out the high order register name of a register pair. */
8877 if (WORDS_BIG_ENDIAN)
8878 fputs (reg_names[REGNO (x)], file);
8879 else
8880 fputs (reg_names[REGNO (x)+1], file);
8881 return;
8882 case 'R':
8883 /* Print out the second register name of a register pair or quad.
8884 I.e., R (%o0) => %o1. */
8885 fputs (reg_names[REGNO (x)+1], file);
8886 return;
8887 case 'S':
8888 /* Print out the third register name of a register quad.
8889 I.e., S (%o0) => %o2. */
8890 fputs (reg_names[REGNO (x)+2], file);
8891 return;
8892 case 'T':
8893 /* Print out the fourth register name of a register quad.
8894 I.e., T (%o0) => %o3. */
8895 fputs (reg_names[REGNO (x)+3], file);
8896 return;
8897 case 'x':
8898 /* Print a condition code register. */
8899 if (REGNO (x) == SPARC_ICC_REG)
8900 {
8901 switch (GET_MODE (x))
8902 {
8903 case CCmode:
8904 case CCNZmode:
8905 case CCCmode:
8906 case CCVmode:
8907 s = "%icc";
8908 break;
8909 case CCXmode:
8910 case CCXNZmode:
8911 case CCXCmode:
8912 case CCXVmode:
8913 s = "%xcc";
8914 break;
8915 default:
8916 gcc_unreachable ();
8917 }
8918 fputs (s, file);
8919 }
8920 else
8921 /* %fccN register */
8922 fputs (reg_names[REGNO (x)], file);
8923 return;
8924 case 'm':
8925 /* Print the operand's address only. */
8926 output_address (GET_MODE (x), XEXP (x, 0));
8927 return;
8928 case 'r':
8929 /* In this case we need a register. Use %g0 if the
8930 operand is const0_rtx. */
8931 if (x == const0_rtx
8932 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8933 {
8934 fputs ("%g0", file);
8935 return;
8936 }
8937 else
8938 break;
8939
8940 case 'A':
8941 switch (GET_CODE (x))
8942 {
8943 case IOR:
8944 s = "or";
8945 break;
8946 case AND:
8947 s = "and";
8948 break;
8949 case XOR:
8950 s = "xor";
8951 break;
8952 default:
8953 output_operand_lossage ("invalid %%A operand");
8954 s = "";
8955 break;
8956 }
8957 fputs (s, file);
8958 return;
8959
8960 case 'B':
8961 switch (GET_CODE (x))
8962 {
8963 case IOR:
8964 s = "orn";
8965 break;
8966 case AND:
8967 s = "andn";
8968 break;
8969 case XOR:
8970 s = "xnor";
8971 break;
8972 default:
8973 output_operand_lossage ("invalid %%B operand");
8974 s = "";
8975 break;
8976 }
8977 fputs (s, file);
8978 return;
8979
8980 /* This is used by the conditional move instructions. */
8981 case 'C':
8982 {
8983 machine_mode mode = GET_MODE (XEXP (x, 0));
8984 switch (GET_CODE (x))
8985 {
8986 case NE:
8987 if (mode == CCVmode || mode == CCXVmode)
8988 s = "vs";
8989 else
8990 s = "ne";
8991 break;
8992 case EQ:
8993 if (mode == CCVmode || mode == CCXVmode)
8994 s = "vc";
8995 else
8996 s = "e";
8997 break;
8998 case GE:
8999 if (mode == CCNZmode || mode == CCXNZmode)
9000 s = "pos";
9001 else
9002 s = "ge";
9003 break;
9004 case GT:
9005 s = "g";
9006 break;
9007 case LE:
9008 s = "le";
9009 break;
9010 case LT:
9011 if (mode == CCNZmode || mode == CCXNZmode)
9012 s = "neg";
9013 else
9014 s = "l";
9015 break;
9016 case GEU:
9017 s = "geu";
9018 break;
9019 case GTU:
9020 s = "gu";
9021 break;
9022 case LEU:
9023 s = "leu";
9024 break;
9025 case LTU:
9026 s = "lu";
9027 break;
9028 case LTGT:
9029 s = "lg";
9030 break;
9031 case UNORDERED:
9032 s = "u";
9033 break;
9034 case ORDERED:
9035 s = "o";
9036 break;
9037 case UNLT:
9038 s = "ul";
9039 break;
9040 case UNLE:
9041 s = "ule";
9042 break;
9043 case UNGT:
9044 s = "ug";
9045 break;
9046 case UNGE:
9047 s = "uge"
9048 ; break;
9049 case UNEQ:
9050 s = "ue";
9051 break;
9052 default:
9053 output_operand_lossage ("invalid %%C operand");
9054 s = "";
9055 break;
9056 }
9057 fputs (s, file);
9058 return;
9059 }
9060
9061 /* This are used by the movr instruction pattern. */
9062 case 'D':
9063 {
9064 switch (GET_CODE (x))
9065 {
9066 case NE:
9067 s = "ne";
9068 break;
9069 case EQ:
9070 s = "e";
9071 break;
9072 case GE:
9073 s = "gez";
9074 break;
9075 case LT:
9076 s = "lz";
9077 break;
9078 case LE:
9079 s = "lez";
9080 break;
9081 case GT:
9082 s = "gz";
9083 break;
9084 default:
9085 output_operand_lossage ("invalid %%D operand");
9086 s = "";
9087 break;
9088 }
9089 fputs (s, file);
9090 return;
9091 }
9092
9093 case 'b':
9094 {
9095 /* Print a sign-extended character. */
9096 int i = trunc_int_for_mode (INTVAL (x), QImode);
9097 fprintf (file, "%d", i);
9098 return;
9099 }
9100
9101 case 'f':
9102 /* Operand must be a MEM; write its address. */
9103 if (GET_CODE (x) != MEM)
9104 output_operand_lossage ("invalid %%f operand");
9105 output_address (GET_MODE (x), XEXP (x, 0));
9106 return;
9107
9108 case 's':
9109 {
9110 /* Print a sign-extended 32-bit value. */
9111 HOST_WIDE_INT i;
9112 if (GET_CODE(x) == CONST_INT)
9113 i = INTVAL (x);
9114 else
9115 {
9116 output_operand_lossage ("invalid %%s operand");
9117 return;
9118 }
9119 i = trunc_int_for_mode (i, SImode);
9120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9121 return;
9122 }
9123
9124 case 0:
9125 /* Do nothing special. */
9126 break;
9127
9128 default:
9129 /* Undocumented flag. */
9130 output_operand_lossage ("invalid operand output code");
9131 }
9132
9133 if (GET_CODE (x) == REG)
9134 fputs (reg_names[REGNO (x)], file);
9135 else if (GET_CODE (x) == MEM)
9136 {
9137 fputc ('[', file);
9138 /* Poor Sun assembler doesn't understand absolute addressing. */
9139 if (CONSTANT_P (XEXP (x, 0)))
9140 fputs ("%g0+", file);
9141 output_address (GET_MODE (x), XEXP (x, 0));
9142 fputc (']', file);
9143 }
9144 else if (GET_CODE (x) == HIGH)
9145 {
9146 fputs ("%hi(", file);
9147 output_addr_const (file, XEXP (x, 0));
9148 fputc (')', file);
9149 }
9150 else if (GET_CODE (x) == LO_SUM)
9151 {
9152 sparc_print_operand (file, XEXP (x, 0), 0);
9153 if (TARGET_CM_MEDMID)
9154 fputs ("+%l44(", file);
9155 else
9156 fputs ("+%lo(", file);
9157 output_addr_const (file, XEXP (x, 1));
9158 fputc (')', file);
9159 }
9160 else if (GET_CODE (x) == CONST_DOUBLE)
9161 output_operand_lossage ("floating-point constant not a valid immediate operand");
9162 else
9163 output_addr_const (file, x);
9164 }
9165
9166 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9167
9168 static void
9169 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9170 {
9171 register rtx base, index = 0;
9172 int offset = 0;
9173 register rtx addr = x;
9174
9175 if (REG_P (addr))
9176 fputs (reg_names[REGNO (addr)], file);
9177 else if (GET_CODE (addr) == PLUS)
9178 {
9179 if (CONST_INT_P (XEXP (addr, 0)))
9180 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9181 else if (CONST_INT_P (XEXP (addr, 1)))
9182 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9183 else
9184 base = XEXP (addr, 0), index = XEXP (addr, 1);
9185 if (GET_CODE (base) == LO_SUM)
9186 {
9187 gcc_assert (USE_AS_OFFSETABLE_LO10
9188 && TARGET_ARCH64
9189 && ! TARGET_CM_MEDMID);
9190 output_operand (XEXP (base, 0), 0);
9191 fputs ("+%lo(", file);
9192 output_address (VOIDmode, XEXP (base, 1));
9193 fprintf (file, ")+%d", offset);
9194 }
9195 else
9196 {
9197 fputs (reg_names[REGNO (base)], file);
9198 if (index == 0)
9199 fprintf (file, "%+d", offset);
9200 else if (REG_P (index))
9201 fprintf (file, "+%s", reg_names[REGNO (index)]);
9202 else if (GET_CODE (index) == SYMBOL_REF
9203 || GET_CODE (index) == LABEL_REF
9204 || GET_CODE (index) == CONST)
9205 fputc ('+', file), output_addr_const (file, index);
9206 else gcc_unreachable ();
9207 }
9208 }
9209 else if (GET_CODE (addr) == MINUS
9210 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9211 {
9212 output_addr_const (file, XEXP (addr, 0));
9213 fputs ("-(", file);
9214 output_addr_const (file, XEXP (addr, 1));
9215 fputs ("-.)", file);
9216 }
9217 else if (GET_CODE (addr) == LO_SUM)
9218 {
9219 output_operand (XEXP (addr, 0), 0);
9220 if (TARGET_CM_MEDMID)
9221 fputs ("+%l44(", file);
9222 else
9223 fputs ("+%lo(", file);
9224 output_address (VOIDmode, XEXP (addr, 1));
9225 fputc (')', file);
9226 }
9227 else if (flag_pic
9228 && GET_CODE (addr) == CONST
9229 && GET_CODE (XEXP (addr, 0)) == MINUS
9230 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9231 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9232 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9233 {
9234 addr = XEXP (addr, 0);
9235 output_addr_const (file, XEXP (addr, 0));
9236 /* Group the args of the second CONST in parenthesis. */
9237 fputs ("-(", file);
9238 /* Skip past the second CONST--it does nothing for us. */
9239 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9240 /* Close the parenthesis. */
9241 fputc (')', file);
9242 }
9243 else
9244 {
9245 output_addr_const (file, addr);
9246 }
9247 }
9248 \f
9249 /* Target hook for assembling integer objects. The sparc version has
9250 special handling for aligned DI-mode objects. */
9251
9252 static bool
9253 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9254 {
9255 /* ??? We only output .xword's for symbols and only then in environments
9256 where the assembler can handle them. */
9257 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9258 {
9259 if (TARGET_V9)
9260 {
9261 assemble_integer_with_op ("\t.xword\t", x);
9262 return true;
9263 }
9264 else
9265 {
9266 assemble_aligned_integer (4, const0_rtx);
9267 assemble_aligned_integer (4, x);
9268 return true;
9269 }
9270 }
9271 return default_assemble_integer (x, size, aligned_p);
9272 }
9273 \f
9274 /* Return the value of a code used in the .proc pseudo-op that says
9275 what kind of result this function returns. For non-C types, we pick
9276 the closest C type. */
9277
9278 #ifndef SHORT_TYPE_SIZE
9279 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9280 #endif
9281
9282 #ifndef INT_TYPE_SIZE
9283 #define INT_TYPE_SIZE BITS_PER_WORD
9284 #endif
9285
9286 #ifndef LONG_TYPE_SIZE
9287 #define LONG_TYPE_SIZE BITS_PER_WORD
9288 #endif
9289
9290 #ifndef LONG_LONG_TYPE_SIZE
9291 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9292 #endif
9293
9294 #ifndef FLOAT_TYPE_SIZE
9295 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9296 #endif
9297
9298 #ifndef DOUBLE_TYPE_SIZE
9299 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9300 #endif
9301
9302 #ifndef LONG_DOUBLE_TYPE_SIZE
9303 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9304 #endif
9305
9306 unsigned long
9307 sparc_type_code (register tree type)
9308 {
9309 register unsigned long qualifiers = 0;
9310 register unsigned shift;
9311
9312 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9313 setting more, since some assemblers will give an error for this. Also,
9314 we must be careful to avoid shifts of 32 bits or more to avoid getting
9315 unpredictable results. */
9316
9317 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9318 {
9319 switch (TREE_CODE (type))
9320 {
9321 case ERROR_MARK:
9322 return qualifiers;
9323
9324 case ARRAY_TYPE:
9325 qualifiers |= (3 << shift);
9326 break;
9327
9328 case FUNCTION_TYPE:
9329 case METHOD_TYPE:
9330 qualifiers |= (2 << shift);
9331 break;
9332
9333 case POINTER_TYPE:
9334 case REFERENCE_TYPE:
9335 case OFFSET_TYPE:
9336 qualifiers |= (1 << shift);
9337 break;
9338
9339 case RECORD_TYPE:
9340 return (qualifiers | 8);
9341
9342 case UNION_TYPE:
9343 case QUAL_UNION_TYPE:
9344 return (qualifiers | 9);
9345
9346 case ENUMERAL_TYPE:
9347 return (qualifiers | 10);
9348
9349 case VOID_TYPE:
9350 return (qualifiers | 16);
9351
9352 case INTEGER_TYPE:
9353 /* If this is a range type, consider it to be the underlying
9354 type. */
9355 if (TREE_TYPE (type) != 0)
9356 break;
9357
9358 /* Carefully distinguish all the standard types of C,
9359 without messing up if the language is not C. We do this by
9360 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9361 look at both the names and the above fields, but that's redundant.
9362 Any type whose size is between two C types will be considered
9363 to be the wider of the two types. Also, we do not have a
9364 special code to use for "long long", so anything wider than
9365 long is treated the same. Note that we can't distinguish
9366 between "int" and "long" in this code if they are the same
9367 size, but that's fine, since neither can the assembler. */
9368
9369 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9370 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9371
9372 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9373 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9374
9375 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9376 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9377
9378 else
9379 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9380
9381 case REAL_TYPE:
9382 /* If this is a range type, consider it to be the underlying
9383 type. */
9384 if (TREE_TYPE (type) != 0)
9385 break;
9386
9387 /* Carefully distinguish all the standard types of C,
9388 without messing up if the language is not C. */
9389
9390 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9391 return (qualifiers | 6);
9392
9393 else
9394 return (qualifiers | 7);
9395
9396 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9397 /* ??? We need to distinguish between double and float complex types,
9398 but I don't know how yet because I can't reach this code from
9399 existing front-ends. */
9400 return (qualifiers | 7); /* Who knows? */
9401
9402 case VECTOR_TYPE:
9403 case BOOLEAN_TYPE: /* Boolean truth value type. */
9404 case LANG_TYPE:
9405 case NULLPTR_TYPE:
9406 return qualifiers;
9407
9408 default:
9409 gcc_unreachable (); /* Not a type! */
9410 }
9411 }
9412
9413 return qualifiers;
9414 }
9415 \f
9416 /* Nested function support. */
9417
9418 /* Emit RTL insns to initialize the variable parts of a trampoline.
9419 FNADDR is an RTX for the address of the function's pure code.
9420 CXT is an RTX for the static chain value for the function.
9421
9422 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9423 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9424 (to store insns). This is a bit excessive. Perhaps a different
9425 mechanism would be better here.
9426
9427 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9428
9429 static void
9430 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9431 {
9432 /* SPARC 32-bit trampoline:
9433
9434 sethi %hi(fn), %g1
9435 sethi %hi(static), %g2
9436 jmp %g1+%lo(fn)
9437 or %g2, %lo(static), %g2
9438
9439 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9440 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9441 */
9442
9443 emit_move_insn
9444 (adjust_address (m_tramp, SImode, 0),
9445 expand_binop (SImode, ior_optab,
9446 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9447 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9448 NULL_RTX, 1, OPTAB_DIRECT));
9449
9450 emit_move_insn
9451 (adjust_address (m_tramp, SImode, 4),
9452 expand_binop (SImode, ior_optab,
9453 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9454 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9455 NULL_RTX, 1, OPTAB_DIRECT));
9456
9457 emit_move_insn
9458 (adjust_address (m_tramp, SImode, 8),
9459 expand_binop (SImode, ior_optab,
9460 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9461 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9462 NULL_RTX, 1, OPTAB_DIRECT));
9463
9464 emit_move_insn
9465 (adjust_address (m_tramp, SImode, 12),
9466 expand_binop (SImode, ior_optab,
9467 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9468 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9469 NULL_RTX, 1, OPTAB_DIRECT));
9470
9471 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9472 aligned on a 16 byte boundary so one flush clears it all. */
9473 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9474 if (sparc_cpu != PROCESSOR_ULTRASPARC
9475 && sparc_cpu != PROCESSOR_ULTRASPARC3
9476 && sparc_cpu != PROCESSOR_NIAGARA
9477 && sparc_cpu != PROCESSOR_NIAGARA2
9478 && sparc_cpu != PROCESSOR_NIAGARA3
9479 && sparc_cpu != PROCESSOR_NIAGARA4
9480 && sparc_cpu != PROCESSOR_NIAGARA7)
9481 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9482
9483 /* Call __enable_execute_stack after writing onto the stack to make sure
9484 the stack address is accessible. */
9485 #ifdef HAVE_ENABLE_EXECUTE_STACK
9486 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9487 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9488 #endif
9489
9490 }
9491
9492 /* The 64-bit version is simpler because it makes more sense to load the
9493 values as "immediate" data out of the trampoline. It's also easier since
9494 we can read the PC without clobbering a register. */
9495
9496 static void
9497 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9498 {
9499 /* SPARC 64-bit trampoline:
9500
9501 rd %pc, %g1
9502 ldx [%g1+24], %g5
9503 jmp %g5
9504 ldx [%g1+16], %g5
9505 +16 bytes data
9506 */
9507
9508 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9509 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9510 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9511 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9512 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9513 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9514 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9515 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9516 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9517 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9518 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9519
9520 if (sparc_cpu != PROCESSOR_ULTRASPARC
9521 && sparc_cpu != PROCESSOR_ULTRASPARC3
9522 && sparc_cpu != PROCESSOR_NIAGARA
9523 && sparc_cpu != PROCESSOR_NIAGARA2
9524 && sparc_cpu != PROCESSOR_NIAGARA3
9525 && sparc_cpu != PROCESSOR_NIAGARA4
9526 && sparc_cpu != PROCESSOR_NIAGARA7)
9527 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9528
9529 /* Call __enable_execute_stack after writing onto the stack to make sure
9530 the stack address is accessible. */
9531 #ifdef HAVE_ENABLE_EXECUTE_STACK
9532 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9533 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9534 #endif
9535 }
9536
9537 /* Worker for TARGET_TRAMPOLINE_INIT. */
9538
9539 static void
9540 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9541 {
9542 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9543 cxt = force_reg (Pmode, cxt);
9544 if (TARGET_ARCH64)
9545 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9546 else
9547 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9548 }
9549 \f
9550 /* Adjust the cost of a scheduling dependency. Return the new cost of
9551 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9552
9553 static int
9554 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9555 int cost)
9556 {
9557 enum attr_type insn_type;
9558
9559 if (recog_memoized (insn) < 0)
9560 return cost;
9561
9562 insn_type = get_attr_type (insn);
9563
9564 if (dep_type == 0)
9565 {
9566 /* Data dependency; DEP_INSN writes a register that INSN reads some
9567 cycles later. */
9568
9569 /* if a load, then the dependence must be on the memory address;
9570 add an extra "cycle". Note that the cost could be two cycles
9571 if the reg was written late in an instruction group; we ca not tell
9572 here. */
9573 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9574 return cost + 3;
9575
9576 /* Get the delay only if the address of the store is the dependence. */
9577 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9578 {
9579 rtx pat = PATTERN(insn);
9580 rtx dep_pat = PATTERN (dep_insn);
9581
9582 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9583 return cost; /* This should not happen! */
9584
9585 /* The dependency between the two instructions was on the data that
9586 is being stored. Assume that this implies that the address of the
9587 store is not dependent. */
9588 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9589 return cost;
9590
9591 return cost + 3; /* An approximation. */
9592 }
9593
9594 /* A shift instruction cannot receive its data from an instruction
9595 in the same cycle; add a one cycle penalty. */
9596 if (insn_type == TYPE_SHIFT)
9597 return cost + 3; /* Split before cascade into shift. */
9598 }
9599 else
9600 {
9601 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9602 INSN writes some cycles later. */
9603
9604 /* These are only significant for the fpu unit; writing a fp reg before
9605 the fpu has finished with it stalls the processor. */
9606
9607 /* Reusing an integer register causes no problems. */
9608 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9609 return 0;
9610 }
9611
9612 return cost;
9613 }
9614
9615 static int
9616 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9617 int cost)
9618 {
9619 enum attr_type insn_type, dep_type;
9620 rtx pat = PATTERN(insn);
9621 rtx dep_pat = PATTERN (dep_insn);
9622
9623 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9624 return cost;
9625
9626 insn_type = get_attr_type (insn);
9627 dep_type = get_attr_type (dep_insn);
9628
9629 switch (dtype)
9630 {
9631 case 0:
9632 /* Data dependency; DEP_INSN writes a register that INSN reads some
9633 cycles later. */
9634
9635 switch (insn_type)
9636 {
9637 case TYPE_STORE:
9638 case TYPE_FPSTORE:
9639 /* Get the delay iff the address of the store is the dependence. */
9640 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9641 return cost;
9642
9643 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9644 return cost;
9645 return cost + 3;
9646
9647 case TYPE_LOAD:
9648 case TYPE_SLOAD:
9649 case TYPE_FPLOAD:
9650 /* If a load, then the dependence must be on the memory address. If
9651 the addresses aren't equal, then it might be a false dependency */
9652 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9653 {
9654 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9655 || GET_CODE (SET_DEST (dep_pat)) != MEM
9656 || GET_CODE (SET_SRC (pat)) != MEM
9657 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9658 XEXP (SET_SRC (pat), 0)))
9659 return cost + 2;
9660
9661 return cost + 8;
9662 }
9663 break;
9664
9665 case TYPE_BRANCH:
9666 /* Compare to branch latency is 0. There is no benefit from
9667 separating compare and branch. */
9668 if (dep_type == TYPE_COMPARE)
9669 return 0;
9670 /* Floating point compare to branch latency is less than
9671 compare to conditional move. */
9672 if (dep_type == TYPE_FPCMP)
9673 return cost - 1;
9674 break;
9675 default:
9676 break;
9677 }
9678 break;
9679
9680 case REG_DEP_ANTI:
9681 /* Anti-dependencies only penalize the fpu unit. */
9682 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9683 return 0;
9684 break;
9685
9686 default:
9687 break;
9688 }
9689
9690 return cost;
9691 }
9692
9693 static int
9694 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9695 unsigned int)
9696 {
9697 switch (sparc_cpu)
9698 {
9699 case PROCESSOR_SUPERSPARC:
9700 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9701 break;
9702 case PROCESSOR_HYPERSPARC:
9703 case PROCESSOR_SPARCLITE86X:
9704 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9705 break;
9706 default:
9707 break;
9708 }
9709 return cost;
9710 }
9711
9712 static void
9713 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9714 int sched_verbose ATTRIBUTE_UNUSED,
9715 int max_ready ATTRIBUTE_UNUSED)
9716 {}
9717
9718 static int
9719 sparc_use_sched_lookahead (void)
9720 {
9721 if (sparc_cpu == PROCESSOR_NIAGARA
9722 || sparc_cpu == PROCESSOR_NIAGARA2
9723 || sparc_cpu == PROCESSOR_NIAGARA3)
9724 return 0;
9725 if (sparc_cpu == PROCESSOR_NIAGARA4
9726 || sparc_cpu == PROCESSOR_NIAGARA7)
9727 return 2;
9728 if (sparc_cpu == PROCESSOR_ULTRASPARC
9729 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9730 return 4;
9731 if ((1 << sparc_cpu) &
9732 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9733 (1 << PROCESSOR_SPARCLITE86X)))
9734 return 3;
9735 return 0;
9736 }
9737
9738 static int
9739 sparc_issue_rate (void)
9740 {
9741 switch (sparc_cpu)
9742 {
9743 case PROCESSOR_NIAGARA:
9744 case PROCESSOR_NIAGARA2:
9745 case PROCESSOR_NIAGARA3:
9746 default:
9747 return 1;
9748 case PROCESSOR_NIAGARA4:
9749 case PROCESSOR_NIAGARA7:
9750 case PROCESSOR_V9:
9751 /* Assume V9 processors are capable of at least dual-issue. */
9752 return 2;
9753 case PROCESSOR_SUPERSPARC:
9754 return 3;
9755 case PROCESSOR_HYPERSPARC:
9756 case PROCESSOR_SPARCLITE86X:
9757 return 2;
9758 case PROCESSOR_ULTRASPARC:
9759 case PROCESSOR_ULTRASPARC3:
9760 return 4;
9761 }
9762 }
9763
9764 static int
9765 set_extends (rtx_insn *insn)
9766 {
9767 register rtx pat = PATTERN (insn);
9768
9769 switch (GET_CODE (SET_SRC (pat)))
9770 {
9771 /* Load and some shift instructions zero extend. */
9772 case MEM:
9773 case ZERO_EXTEND:
9774 /* sethi clears the high bits */
9775 case HIGH:
9776 /* LO_SUM is used with sethi. sethi cleared the high
9777 bits and the values used with lo_sum are positive */
9778 case LO_SUM:
9779 /* Store flag stores 0 or 1 */
9780 case LT: case LTU:
9781 case GT: case GTU:
9782 case LE: case LEU:
9783 case GE: case GEU:
9784 case EQ:
9785 case NE:
9786 return 1;
9787 case AND:
9788 {
9789 rtx op0 = XEXP (SET_SRC (pat), 0);
9790 rtx op1 = XEXP (SET_SRC (pat), 1);
9791 if (GET_CODE (op1) == CONST_INT)
9792 return INTVAL (op1) >= 0;
9793 if (GET_CODE (op0) != REG)
9794 return 0;
9795 if (sparc_check_64 (op0, insn) == 1)
9796 return 1;
9797 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9798 }
9799 case IOR:
9800 case XOR:
9801 {
9802 rtx op0 = XEXP (SET_SRC (pat), 0);
9803 rtx op1 = XEXP (SET_SRC (pat), 1);
9804 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9805 return 0;
9806 if (GET_CODE (op1) == CONST_INT)
9807 return INTVAL (op1) >= 0;
9808 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9809 }
9810 case LSHIFTRT:
9811 return GET_MODE (SET_SRC (pat)) == SImode;
9812 /* Positive integers leave the high bits zero. */
9813 case CONST_INT:
9814 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9815 case ASHIFTRT:
9816 case SIGN_EXTEND:
9817 return - (GET_MODE (SET_SRC (pat)) == SImode);
9818 case REG:
9819 return sparc_check_64 (SET_SRC (pat), insn);
9820 default:
9821 return 0;
9822 }
9823 }
9824
9825 /* We _ought_ to have only one kind per function, but... */
9826 static GTY(()) rtx sparc_addr_diff_list;
9827 static GTY(()) rtx sparc_addr_list;
9828
9829 void
9830 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9831 {
9832 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9833 if (diff)
9834 sparc_addr_diff_list
9835 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9836 else
9837 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9838 }
9839
9840 static void
9841 sparc_output_addr_vec (rtx vec)
9842 {
9843 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9844 int idx, vlen = XVECLEN (body, 0);
9845
9846 #ifdef ASM_OUTPUT_ADDR_VEC_START
9847 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9848 #endif
9849
9850 #ifdef ASM_OUTPUT_CASE_LABEL
9851 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9852 NEXT_INSN (lab));
9853 #else
9854 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9855 #endif
9856
9857 for (idx = 0; idx < vlen; idx++)
9858 {
9859 ASM_OUTPUT_ADDR_VEC_ELT
9860 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9861 }
9862
9863 #ifdef ASM_OUTPUT_ADDR_VEC_END
9864 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9865 #endif
9866 }
9867
9868 static void
9869 sparc_output_addr_diff_vec (rtx vec)
9870 {
9871 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9872 rtx base = XEXP (XEXP (body, 0), 0);
9873 int idx, vlen = XVECLEN (body, 1);
9874
9875 #ifdef ASM_OUTPUT_ADDR_VEC_START
9876 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9877 #endif
9878
9879 #ifdef ASM_OUTPUT_CASE_LABEL
9880 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9881 NEXT_INSN (lab));
9882 #else
9883 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9884 #endif
9885
9886 for (idx = 0; idx < vlen; idx++)
9887 {
9888 ASM_OUTPUT_ADDR_DIFF_ELT
9889 (asm_out_file,
9890 body,
9891 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9892 CODE_LABEL_NUMBER (base));
9893 }
9894
9895 #ifdef ASM_OUTPUT_ADDR_VEC_END
9896 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9897 #endif
9898 }
9899
9900 static void
9901 sparc_output_deferred_case_vectors (void)
9902 {
9903 rtx t;
9904 int align;
9905
9906 if (sparc_addr_list == NULL_RTX
9907 && sparc_addr_diff_list == NULL_RTX)
9908 return;
9909
9910 /* Align to cache line in the function's code section. */
9911 switch_to_section (current_function_section ());
9912
9913 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9914 if (align > 0)
9915 ASM_OUTPUT_ALIGN (asm_out_file, align);
9916
9917 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9918 sparc_output_addr_vec (XEXP (t, 0));
9919 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9920 sparc_output_addr_diff_vec (XEXP (t, 0));
9921
9922 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9923 }
9924
9925 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9926 unknown. Return 1 if the high bits are zero, -1 if the register is
9927 sign extended. */
9928 int
9929 sparc_check_64 (rtx x, rtx_insn *insn)
9930 {
9931 /* If a register is set only once it is safe to ignore insns this
9932 code does not know how to handle. The loop will either recognize
9933 the single set and return the correct value or fail to recognize
9934 it and return 0. */
9935 int set_once = 0;
9936 rtx y = x;
9937
9938 gcc_assert (GET_CODE (x) == REG);
9939
9940 if (GET_MODE (x) == DImode)
9941 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9942
9943 if (flag_expensive_optimizations
9944 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9945 set_once = 1;
9946
9947 if (insn == 0)
9948 {
9949 if (set_once)
9950 insn = get_last_insn_anywhere ();
9951 else
9952 return 0;
9953 }
9954
9955 while ((insn = PREV_INSN (insn)))
9956 {
9957 switch (GET_CODE (insn))
9958 {
9959 case JUMP_INSN:
9960 case NOTE:
9961 break;
9962 case CODE_LABEL:
9963 case CALL_INSN:
9964 default:
9965 if (! set_once)
9966 return 0;
9967 break;
9968 case INSN:
9969 {
9970 rtx pat = PATTERN (insn);
9971 if (GET_CODE (pat) != SET)
9972 return 0;
9973 if (rtx_equal_p (x, SET_DEST (pat)))
9974 return set_extends (insn);
9975 if (y && rtx_equal_p (y, SET_DEST (pat)))
9976 return set_extends (insn);
9977 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9978 return 0;
9979 }
9980 }
9981 }
9982 return 0;
9983 }
9984
9985 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9986 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9987
9988 const char *
9989 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9990 {
9991 static char asm_code[60];
9992
9993 /* The scratch register is only required when the destination
9994 register is not a 64-bit global or out register. */
9995 if (which_alternative != 2)
9996 operands[3] = operands[0];
9997
9998 /* We can only shift by constants <= 63. */
9999 if (GET_CODE (operands[2]) == CONST_INT)
10000 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10001
10002 if (GET_CODE (operands[1]) == CONST_INT)
10003 {
10004 output_asm_insn ("mov\t%1, %3", operands);
10005 }
10006 else
10007 {
10008 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10009 if (sparc_check_64 (operands[1], insn) <= 0)
10010 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10011 output_asm_insn ("or\t%L1, %3, %3", operands);
10012 }
10013
10014 strcpy (asm_code, opcode);
10015
10016 if (which_alternative != 2)
10017 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10018 else
10019 return
10020 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10021 }
10022 \f
10023 /* Output rtl to increment the profiler label LABELNO
10024 for profiling a function entry. */
10025
10026 void
10027 sparc_profile_hook (int labelno)
10028 {
10029 char buf[32];
10030 rtx lab, fun;
10031
10032 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10033 if (NO_PROFILE_COUNTERS)
10034 {
10035 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
10036 }
10037 else
10038 {
10039 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10040 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10041 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
10042 }
10043 }
10044 \f
10045 #ifdef TARGET_SOLARIS
10046 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10047
10048 static void
10049 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10050 tree decl ATTRIBUTE_UNUSED)
10051 {
10052 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10053 {
10054 solaris_elf_asm_comdat_section (name, flags, decl);
10055 return;
10056 }
10057
10058 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10059
10060 if (!(flags & SECTION_DEBUG))
10061 fputs (",#alloc", asm_out_file);
10062 if (flags & SECTION_WRITE)
10063 fputs (",#write", asm_out_file);
10064 if (flags & SECTION_TLS)
10065 fputs (",#tls", asm_out_file);
10066 if (flags & SECTION_CODE)
10067 fputs (",#execinstr", asm_out_file);
10068
10069 if (flags & SECTION_NOTYPE)
10070 ;
10071 else if (flags & SECTION_BSS)
10072 fputs (",#nobits", asm_out_file);
10073 else
10074 fputs (",#progbits", asm_out_file);
10075
10076 fputc ('\n', asm_out_file);
10077 }
10078 #endif /* TARGET_SOLARIS */
10079
10080 /* We do not allow indirect calls to be optimized into sibling calls.
10081
10082 We cannot use sibling calls when delayed branches are disabled
10083 because they will likely require the call delay slot to be filled.
10084
10085 Also, on SPARC 32-bit we cannot emit a sibling call when the
10086 current function returns a structure. This is because the "unimp
10087 after call" convention would cause the callee to return to the
10088 wrong place. The generic code already disallows cases where the
10089 function being called returns a structure.
10090
10091 It may seem strange how this last case could occur. Usually there
10092 is code after the call which jumps to epilogue code which dumps the
10093 return value into the struct return area. That ought to invalidate
10094 the sibling call right? Well, in the C++ case we can end up passing
10095 the pointer to the struct return area to a constructor (which returns
10096 void) and then nothing else happens. Such a sibling call would look
10097 valid without the added check here.
10098
10099 VxWorks PIC PLT entries require the global pointer to be initialized
10100 on entry. We therefore can't emit sibling calls to them. */
10101 static bool
10102 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10103 {
10104 return (decl
10105 && flag_delayed_branch
10106 && (TARGET_ARCH64 || ! cfun->returns_struct)
10107 && !(TARGET_VXWORKS_RTP
10108 && flag_pic
10109 && !targetm.binds_local_p (decl)));
10110 }
10111 \f
10112 /* libfunc renaming. */
10113
10114 static void
10115 sparc_init_libfuncs (void)
10116 {
10117 if (TARGET_ARCH32)
10118 {
10119 /* Use the subroutines that Sun's library provides for integer
10120 multiply and divide. The `*' prevents an underscore from
10121 being prepended by the compiler. .umul is a little faster
10122 than .mul. */
10123 set_optab_libfunc (smul_optab, SImode, "*.umul");
10124 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10125 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10126 set_optab_libfunc (smod_optab, SImode, "*.rem");
10127 set_optab_libfunc (umod_optab, SImode, "*.urem");
10128
10129 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10130 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10131 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10132 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10133 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10134 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10135
10136 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10137 is because with soft-float, the SFmode and DFmode sqrt
10138 instructions will be absent, and the compiler will notice and
10139 try to use the TFmode sqrt instruction for calls to the
10140 builtin function sqrt, but this fails. */
10141 if (TARGET_FPU)
10142 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10143
10144 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10145 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10146 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10147 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10148 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10149 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10150
10151 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10152 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10153 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10154 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10155
10156 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10157 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10158 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10159 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10160
10161 if (DITF_CONVERSION_LIBFUNCS)
10162 {
10163 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10164 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10165 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10166 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10167 }
10168
10169 if (SUN_CONVERSION_LIBFUNCS)
10170 {
10171 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10172 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10173 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10174 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10175 }
10176 }
10177 if (TARGET_ARCH64)
10178 {
10179 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10180 do not exist in the library. Make sure the compiler does not
10181 emit calls to them by accident. (It should always use the
10182 hardware instructions.) */
10183 set_optab_libfunc (smul_optab, SImode, 0);
10184 set_optab_libfunc (sdiv_optab, SImode, 0);
10185 set_optab_libfunc (udiv_optab, SImode, 0);
10186 set_optab_libfunc (smod_optab, SImode, 0);
10187 set_optab_libfunc (umod_optab, SImode, 0);
10188
10189 if (SUN_INTEGER_MULTIPLY_64)
10190 {
10191 set_optab_libfunc (smul_optab, DImode, "__mul64");
10192 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10193 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10194 set_optab_libfunc (smod_optab, DImode, "__rem64");
10195 set_optab_libfunc (umod_optab, DImode, "__urem64");
10196 }
10197
10198 if (SUN_CONVERSION_LIBFUNCS)
10199 {
10200 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10201 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10202 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10203 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10204 }
10205 }
10206 }
10207 \f
10208 /* SPARC builtins. */
10209 enum sparc_builtins
10210 {
10211 /* FPU builtins. */
10212 SPARC_BUILTIN_LDFSR,
10213 SPARC_BUILTIN_STFSR,
10214
10215 /* VIS 1.0 builtins. */
10216 SPARC_BUILTIN_FPACK16,
10217 SPARC_BUILTIN_FPACK32,
10218 SPARC_BUILTIN_FPACKFIX,
10219 SPARC_BUILTIN_FEXPAND,
10220 SPARC_BUILTIN_FPMERGE,
10221 SPARC_BUILTIN_FMUL8X16,
10222 SPARC_BUILTIN_FMUL8X16AU,
10223 SPARC_BUILTIN_FMUL8X16AL,
10224 SPARC_BUILTIN_FMUL8SUX16,
10225 SPARC_BUILTIN_FMUL8ULX16,
10226 SPARC_BUILTIN_FMULD8SUX16,
10227 SPARC_BUILTIN_FMULD8ULX16,
10228 SPARC_BUILTIN_FALIGNDATAV4HI,
10229 SPARC_BUILTIN_FALIGNDATAV8QI,
10230 SPARC_BUILTIN_FALIGNDATAV2SI,
10231 SPARC_BUILTIN_FALIGNDATADI,
10232 SPARC_BUILTIN_WRGSR,
10233 SPARC_BUILTIN_RDGSR,
10234 SPARC_BUILTIN_ALIGNADDR,
10235 SPARC_BUILTIN_ALIGNADDRL,
10236 SPARC_BUILTIN_PDIST,
10237 SPARC_BUILTIN_EDGE8,
10238 SPARC_BUILTIN_EDGE8L,
10239 SPARC_BUILTIN_EDGE16,
10240 SPARC_BUILTIN_EDGE16L,
10241 SPARC_BUILTIN_EDGE32,
10242 SPARC_BUILTIN_EDGE32L,
10243 SPARC_BUILTIN_FCMPLE16,
10244 SPARC_BUILTIN_FCMPLE32,
10245 SPARC_BUILTIN_FCMPNE16,
10246 SPARC_BUILTIN_FCMPNE32,
10247 SPARC_BUILTIN_FCMPGT16,
10248 SPARC_BUILTIN_FCMPGT32,
10249 SPARC_BUILTIN_FCMPEQ16,
10250 SPARC_BUILTIN_FCMPEQ32,
10251 SPARC_BUILTIN_FPADD16,
10252 SPARC_BUILTIN_FPADD16S,
10253 SPARC_BUILTIN_FPADD32,
10254 SPARC_BUILTIN_FPADD32S,
10255 SPARC_BUILTIN_FPSUB16,
10256 SPARC_BUILTIN_FPSUB16S,
10257 SPARC_BUILTIN_FPSUB32,
10258 SPARC_BUILTIN_FPSUB32S,
10259 SPARC_BUILTIN_ARRAY8,
10260 SPARC_BUILTIN_ARRAY16,
10261 SPARC_BUILTIN_ARRAY32,
10262
10263 /* VIS 2.0 builtins. */
10264 SPARC_BUILTIN_EDGE8N,
10265 SPARC_BUILTIN_EDGE8LN,
10266 SPARC_BUILTIN_EDGE16N,
10267 SPARC_BUILTIN_EDGE16LN,
10268 SPARC_BUILTIN_EDGE32N,
10269 SPARC_BUILTIN_EDGE32LN,
10270 SPARC_BUILTIN_BMASK,
10271 SPARC_BUILTIN_BSHUFFLEV4HI,
10272 SPARC_BUILTIN_BSHUFFLEV8QI,
10273 SPARC_BUILTIN_BSHUFFLEV2SI,
10274 SPARC_BUILTIN_BSHUFFLEDI,
10275
10276 /* VIS 3.0 builtins. */
10277 SPARC_BUILTIN_CMASK8,
10278 SPARC_BUILTIN_CMASK16,
10279 SPARC_BUILTIN_CMASK32,
10280 SPARC_BUILTIN_FCHKSM16,
10281 SPARC_BUILTIN_FSLL16,
10282 SPARC_BUILTIN_FSLAS16,
10283 SPARC_BUILTIN_FSRL16,
10284 SPARC_BUILTIN_FSRA16,
10285 SPARC_BUILTIN_FSLL32,
10286 SPARC_BUILTIN_FSLAS32,
10287 SPARC_BUILTIN_FSRL32,
10288 SPARC_BUILTIN_FSRA32,
10289 SPARC_BUILTIN_PDISTN,
10290 SPARC_BUILTIN_FMEAN16,
10291 SPARC_BUILTIN_FPADD64,
10292 SPARC_BUILTIN_FPSUB64,
10293 SPARC_BUILTIN_FPADDS16,
10294 SPARC_BUILTIN_FPADDS16S,
10295 SPARC_BUILTIN_FPSUBS16,
10296 SPARC_BUILTIN_FPSUBS16S,
10297 SPARC_BUILTIN_FPADDS32,
10298 SPARC_BUILTIN_FPADDS32S,
10299 SPARC_BUILTIN_FPSUBS32,
10300 SPARC_BUILTIN_FPSUBS32S,
10301 SPARC_BUILTIN_FUCMPLE8,
10302 SPARC_BUILTIN_FUCMPNE8,
10303 SPARC_BUILTIN_FUCMPGT8,
10304 SPARC_BUILTIN_FUCMPEQ8,
10305 SPARC_BUILTIN_FHADDS,
10306 SPARC_BUILTIN_FHADDD,
10307 SPARC_BUILTIN_FHSUBS,
10308 SPARC_BUILTIN_FHSUBD,
10309 SPARC_BUILTIN_FNHADDS,
10310 SPARC_BUILTIN_FNHADDD,
10311 SPARC_BUILTIN_UMULXHI,
10312 SPARC_BUILTIN_XMULX,
10313 SPARC_BUILTIN_XMULXHI,
10314
10315 /* VIS 4.0 builtins. */
10316 SPARC_BUILTIN_FPADD8,
10317 SPARC_BUILTIN_FPADDS8,
10318 SPARC_BUILTIN_FPADDUS8,
10319 SPARC_BUILTIN_FPADDUS16,
10320 SPARC_BUILTIN_FPCMPLE8,
10321 SPARC_BUILTIN_FPCMPGT8,
10322 SPARC_BUILTIN_FPCMPULE16,
10323 SPARC_BUILTIN_FPCMPUGT16,
10324 SPARC_BUILTIN_FPCMPULE32,
10325 SPARC_BUILTIN_FPCMPUGT32,
10326 SPARC_BUILTIN_FPMAX8,
10327 SPARC_BUILTIN_FPMAX16,
10328 SPARC_BUILTIN_FPMAX32,
10329 SPARC_BUILTIN_FPMAXU8,
10330 SPARC_BUILTIN_FPMAXU16,
10331 SPARC_BUILTIN_FPMAXU32,
10332 SPARC_BUILTIN_FPMIN8,
10333 SPARC_BUILTIN_FPMIN16,
10334 SPARC_BUILTIN_FPMIN32,
10335 SPARC_BUILTIN_FPMINU8,
10336 SPARC_BUILTIN_FPMINU16,
10337 SPARC_BUILTIN_FPMINU32,
10338 SPARC_BUILTIN_FPSUB8,
10339 SPARC_BUILTIN_FPSUBS8,
10340 SPARC_BUILTIN_FPSUBUS8,
10341 SPARC_BUILTIN_FPSUBUS16,
10342
10343 SPARC_BUILTIN_MAX
10344 };
10345
10346 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10347 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10348
10349 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10350 function decl or NULL_TREE if the builtin was not added. */
10351
10352 static tree
10353 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10354 tree type)
10355 {
10356 tree t
10357 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10358
10359 if (t)
10360 {
10361 sparc_builtins[code] = t;
10362 sparc_builtins_icode[code] = icode;
10363 }
10364
10365 return t;
10366 }
10367
10368 /* Likewise, but also marks the function as "const". */
10369
10370 static tree
10371 def_builtin_const (const char *name, enum insn_code icode,
10372 enum sparc_builtins code, tree type)
10373 {
10374 tree t = def_builtin (name, icode, code, type);
10375
10376 if (t)
10377 TREE_READONLY (t) = 1;
10378
10379 return t;
10380 }
10381
10382 /* Implement the TARGET_INIT_BUILTINS target hook.
10383 Create builtin functions for special SPARC instructions. */
10384
10385 static void
10386 sparc_init_builtins (void)
10387 {
10388 if (TARGET_FPU)
10389 sparc_fpu_init_builtins ();
10390
10391 if (TARGET_VIS)
10392 sparc_vis_init_builtins ();
10393 }
10394
10395 /* Create builtin functions for FPU instructions. */
10396
10397 static void
10398 sparc_fpu_init_builtins (void)
10399 {
10400 tree ftype
10401 = build_function_type_list (void_type_node,
10402 build_pointer_type (unsigned_type_node), 0);
10403 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10404 SPARC_BUILTIN_LDFSR, ftype);
10405 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10406 SPARC_BUILTIN_STFSR, ftype);
10407 }
10408
10409 /* Create builtin functions for VIS instructions. */
10410
10411 static void
10412 sparc_vis_init_builtins (void)
10413 {
10414 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10415 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10416 tree v4hi = build_vector_type (intHI_type_node, 4);
10417 tree v2hi = build_vector_type (intHI_type_node, 2);
10418 tree v2si = build_vector_type (intSI_type_node, 2);
10419 tree v1si = build_vector_type (intSI_type_node, 1);
10420
10421 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10422 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10423 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10424 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10425 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10426 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10427 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10428 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10429 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10430 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10431 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10432 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10433 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10434 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10435 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10436 v8qi, v8qi,
10437 intDI_type_node, 0);
10438 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10439 v8qi, v8qi, 0);
10440 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10441 v8qi, v8qi, 0);
10442 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10443 intDI_type_node,
10444 intDI_type_node, 0);
10445 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10446 intSI_type_node,
10447 intSI_type_node, 0);
10448 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10449 ptr_type_node,
10450 intSI_type_node, 0);
10451 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10452 ptr_type_node,
10453 intDI_type_node, 0);
10454 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10455 ptr_type_node,
10456 ptr_type_node, 0);
10457 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10458 ptr_type_node,
10459 ptr_type_node, 0);
10460 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10461 v4hi, v4hi, 0);
10462 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10463 v2si, v2si, 0);
10464 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10465 v4hi, v4hi, 0);
10466 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10467 v2si, v2si, 0);
10468 tree void_ftype_di = build_function_type_list (void_type_node,
10469 intDI_type_node, 0);
10470 tree di_ftype_void = build_function_type_list (intDI_type_node,
10471 void_type_node, 0);
10472 tree void_ftype_si = build_function_type_list (void_type_node,
10473 intSI_type_node, 0);
10474 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10475 float_type_node,
10476 float_type_node, 0);
10477 tree df_ftype_df_df = build_function_type_list (double_type_node,
10478 double_type_node,
10479 double_type_node, 0);
10480
10481 /* Packing and expanding vectors. */
10482 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10483 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10484 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10485 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10486 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10487 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10488 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10489 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10490 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10491 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10492
10493 /* Multiplications. */
10494 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10495 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10496 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10497 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10498 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10499 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10500 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10501 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10502 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10503 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10504 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10505 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10506 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10507 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10508
10509 /* Data aligning. */
10510 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10511 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10512 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10513 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10514 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10515 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10516 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10517 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10518
10519 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10520 SPARC_BUILTIN_WRGSR, void_ftype_di);
10521 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10522 SPARC_BUILTIN_RDGSR, di_ftype_void);
10523
10524 if (TARGET_ARCH64)
10525 {
10526 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10527 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10528 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10529 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10530 }
10531 else
10532 {
10533 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10534 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10535 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10536 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10537 }
10538
10539 /* Pixel distance. */
10540 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10541 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10542
10543 /* Edge handling. */
10544 if (TARGET_ARCH64)
10545 {
10546 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10547 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10548 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10549 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10550 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10551 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10552 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10553 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10554 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10555 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10556 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10557 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10558 }
10559 else
10560 {
10561 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10562 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10563 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10564 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10565 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10566 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10567 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10568 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10569 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10570 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10571 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10572 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10573 }
10574
10575 /* Pixel compare. */
10576 if (TARGET_ARCH64)
10577 {
10578 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10579 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10580 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10581 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10582 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10583 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10584 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10585 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10586 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10587 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10588 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10589 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10590 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10591 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10592 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10593 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10594 }
10595 else
10596 {
10597 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10598 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10599 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10600 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10601 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10602 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10603 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10604 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10605 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10606 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10607 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10608 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10609 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10610 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10611 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10612 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10613 }
10614
10615 /* Addition and subtraction. */
10616 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10617 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10618 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10619 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10620 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10621 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10622 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10623 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10624 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10625 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10626 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10627 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10628 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10629 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10630 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10631 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10632
10633 /* Three-dimensional array addressing. */
10634 if (TARGET_ARCH64)
10635 {
10636 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10637 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10638 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10639 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10640 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10641 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10642 }
10643 else
10644 {
10645 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10646 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10647 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10648 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10649 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10650 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10651 }
10652
10653 if (TARGET_VIS2)
10654 {
10655 /* Edge handling. */
10656 if (TARGET_ARCH64)
10657 {
10658 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10659 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10660 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10661 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10662 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10663 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10664 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10665 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10666 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10667 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10668 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10669 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10670 }
10671 else
10672 {
10673 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10674 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10675 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10676 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10677 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10678 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10679 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10680 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10681 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10682 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10683 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10684 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10685 }
10686
10687 /* Byte mask and shuffle. */
10688 if (TARGET_ARCH64)
10689 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10690 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10691 else
10692 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10693 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10694 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10695 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10696 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10697 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10698 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10699 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10700 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10701 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10702 }
10703
10704 if (TARGET_VIS3)
10705 {
10706 if (TARGET_ARCH64)
10707 {
10708 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10709 SPARC_BUILTIN_CMASK8, void_ftype_di);
10710 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10711 SPARC_BUILTIN_CMASK16, void_ftype_di);
10712 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10713 SPARC_BUILTIN_CMASK32, void_ftype_di);
10714 }
10715 else
10716 {
10717 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10718 SPARC_BUILTIN_CMASK8, void_ftype_si);
10719 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10720 SPARC_BUILTIN_CMASK16, void_ftype_si);
10721 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10722 SPARC_BUILTIN_CMASK32, void_ftype_si);
10723 }
10724
10725 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10726 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10727
10728 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10729 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10730 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10731 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10732 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10733 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10734 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10735 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10736 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10737 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10738 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10739 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10740 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10741 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10742 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10743 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10744
10745 if (TARGET_ARCH64)
10746 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10747 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10748 else
10749 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10750 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10751
10752 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10753 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10754 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10755 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10756 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10757 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10758
10759 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10760 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10761 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10762 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10763 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10764 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10765 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10766 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10767 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10768 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10769 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10770 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10771 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10772 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10773 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10774 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10775
10776 if (TARGET_ARCH64)
10777 {
10778 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10779 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10780 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10781 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10782 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10783 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10784 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10785 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10786 }
10787 else
10788 {
10789 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10790 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10791 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10792 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10793 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10794 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10795 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10796 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10797 }
10798
10799 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10800 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10801 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10802 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10803 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10804 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10805 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10806 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10807 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10808 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10809 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10810 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10811
10812 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10813 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10814 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10815 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10816 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10817 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10818 }
10819
10820 if (TARGET_VIS4)
10821 {
10822 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10823 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10824 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10825 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10826 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10827 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10828 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10829 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10830
10831
10832 if (TARGET_ARCH64)
10833 {
10834 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10835 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10836 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10837 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10838 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10839 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10840 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10841 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10842 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10843 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10844 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10845 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10846 }
10847 else
10848 {
10849 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10850 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10851 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10852 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10853 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10854 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10855 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10856 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10857 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10858 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10859 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10860 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10861 }
10862
10863 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10864 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10865 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10866 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10867 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10868 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10869 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10870 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10871 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10872 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10873 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10874 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10875 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10876 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10877 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10878 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10879 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10880 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10881 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10882 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10883 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10884 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10885 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10886 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10887 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10888 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10889 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10890 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10891 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10892 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10893 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10894 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10895 }
10896 }
10897
10898 /* Implement TARGET_BUILTIN_DECL hook. */
10899
10900 static tree
10901 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10902 {
10903 if (code >= SPARC_BUILTIN_MAX)
10904 return error_mark_node;
10905
10906 return sparc_builtins[code];
10907 }
10908
10909 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10910
10911 static rtx
10912 sparc_expand_builtin (tree exp, rtx target,
10913 rtx subtarget ATTRIBUTE_UNUSED,
10914 machine_mode tmode ATTRIBUTE_UNUSED,
10915 int ignore ATTRIBUTE_UNUSED)
10916 {
10917 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10918 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10919 enum insn_code icode = sparc_builtins_icode[code];
10920 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10921 call_expr_arg_iterator iter;
10922 int arg_count = 0;
10923 rtx pat, op[4];
10924 tree arg;
10925
10926 if (nonvoid)
10927 {
10928 machine_mode tmode = insn_data[icode].operand[0].mode;
10929 if (!target
10930 || GET_MODE (target) != tmode
10931 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10932 op[0] = gen_reg_rtx (tmode);
10933 else
10934 op[0] = target;
10935 }
10936
10937 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10938 {
10939 const struct insn_operand_data *insn_op;
10940 int idx;
10941
10942 if (arg == error_mark_node)
10943 return NULL_RTX;
10944
10945 arg_count++;
10946 idx = arg_count - !nonvoid;
10947 insn_op = &insn_data[icode].operand[idx];
10948 op[arg_count] = expand_normal (arg);
10949
10950 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10951 {
10952 if (!address_operand (op[arg_count], SImode))
10953 {
10954 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10955 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10956 }
10957 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10958 }
10959
10960 else if (insn_op->mode == V1DImode
10961 && GET_MODE (op[arg_count]) == DImode)
10962 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10963
10964 else if (insn_op->mode == V1SImode
10965 && GET_MODE (op[arg_count]) == SImode)
10966 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10967
10968 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10969 insn_op->mode))
10970 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10971 }
10972
10973 switch (arg_count)
10974 {
10975 case 0:
10976 pat = GEN_FCN (icode) (op[0]);
10977 break;
10978 case 1:
10979 if (nonvoid)
10980 pat = GEN_FCN (icode) (op[0], op[1]);
10981 else
10982 pat = GEN_FCN (icode) (op[1]);
10983 break;
10984 case 2:
10985 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10986 break;
10987 case 3:
10988 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10989 break;
10990 default:
10991 gcc_unreachable ();
10992 }
10993
10994 if (!pat)
10995 return NULL_RTX;
10996
10997 emit_insn (pat);
10998
10999 return (nonvoid ? op[0] : const0_rtx);
11000 }
11001
11002 /* Return the upper 16 bits of the 8x16 multiplication. */
11003
11004 static int
11005 sparc_vis_mul8x16 (int e8, int e16)
11006 {
11007 return (e8 * e16 + 128) / 256;
11008 }
11009
11010 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11011 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11012
11013 static void
11014 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
11015 tree inner_type, tree cst0, tree cst1)
11016 {
11017 unsigned i, num = VECTOR_CST_NELTS (cst0);
11018 int scale;
11019
11020 switch (fncode)
11021 {
11022 case SPARC_BUILTIN_FMUL8X16:
11023 for (i = 0; i < num; ++i)
11024 {
11025 int val
11026 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11027 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11028 n_elts[i] = build_int_cst (inner_type, val);
11029 }
11030 break;
11031
11032 case SPARC_BUILTIN_FMUL8X16AU:
11033 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11034
11035 for (i = 0; i < num; ++i)
11036 {
11037 int val
11038 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11039 scale);
11040 n_elts[i] = build_int_cst (inner_type, val);
11041 }
11042 break;
11043
11044 case SPARC_BUILTIN_FMUL8X16AL:
11045 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11046
11047 for (i = 0; i < num; ++i)
11048 {
11049 int val
11050 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11051 scale);
11052 n_elts[i] = build_int_cst (inner_type, val);
11053 }
11054 break;
11055
11056 default:
11057 gcc_unreachable ();
11058 }
11059 }
11060
11061 /* Implement TARGET_FOLD_BUILTIN hook.
11062
11063 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11064 result of the function call is ignored. NULL_TREE is returned if the
11065 function could not be folded. */
11066
11067 static tree
11068 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11069 tree *args, bool ignore)
11070 {
11071 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11072 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11073 tree arg0, arg1, arg2;
11074
11075 if (ignore)
11076 switch (code)
11077 {
11078 case SPARC_BUILTIN_LDFSR:
11079 case SPARC_BUILTIN_STFSR:
11080 case SPARC_BUILTIN_ALIGNADDR:
11081 case SPARC_BUILTIN_WRGSR:
11082 case SPARC_BUILTIN_BMASK:
11083 case SPARC_BUILTIN_CMASK8:
11084 case SPARC_BUILTIN_CMASK16:
11085 case SPARC_BUILTIN_CMASK32:
11086 break;
11087
11088 default:
11089 return build_zero_cst (rtype);
11090 }
11091
11092 switch (code)
11093 {
11094 case SPARC_BUILTIN_FEXPAND:
11095 arg0 = args[0];
11096 STRIP_NOPS (arg0);
11097
11098 if (TREE_CODE (arg0) == VECTOR_CST)
11099 {
11100 tree inner_type = TREE_TYPE (rtype);
11101 tree *n_elts;
11102 unsigned i;
11103
11104 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11105 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11106 n_elts[i] = build_int_cst (inner_type,
11107 TREE_INT_CST_LOW
11108 (VECTOR_CST_ELT (arg0, i)) << 4);
11109 return build_vector (rtype, n_elts);
11110 }
11111 break;
11112
11113 case SPARC_BUILTIN_FMUL8X16:
11114 case SPARC_BUILTIN_FMUL8X16AU:
11115 case SPARC_BUILTIN_FMUL8X16AL:
11116 arg0 = args[0];
11117 arg1 = args[1];
11118 STRIP_NOPS (arg0);
11119 STRIP_NOPS (arg1);
11120
11121 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11122 {
11123 tree inner_type = TREE_TYPE (rtype);
11124 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11125 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11126 return build_vector (rtype, n_elts);
11127 }
11128 break;
11129
11130 case SPARC_BUILTIN_FPMERGE:
11131 arg0 = args[0];
11132 arg1 = args[1];
11133 STRIP_NOPS (arg0);
11134 STRIP_NOPS (arg1);
11135
11136 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11137 {
11138 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11139 unsigned i;
11140 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11141 {
11142 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11143 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11144 }
11145
11146 return build_vector (rtype, n_elts);
11147 }
11148 break;
11149
11150 case SPARC_BUILTIN_PDIST:
11151 case SPARC_BUILTIN_PDISTN:
11152 arg0 = args[0];
11153 arg1 = args[1];
11154 STRIP_NOPS (arg0);
11155 STRIP_NOPS (arg1);
11156 if (code == SPARC_BUILTIN_PDIST)
11157 {
11158 arg2 = args[2];
11159 STRIP_NOPS (arg2);
11160 }
11161 else
11162 arg2 = integer_zero_node;
11163
11164 if (TREE_CODE (arg0) == VECTOR_CST
11165 && TREE_CODE (arg1) == VECTOR_CST
11166 && TREE_CODE (arg2) == INTEGER_CST)
11167 {
11168 bool overflow = false;
11169 widest_int result = wi::to_widest (arg2);
11170 widest_int tmp;
11171 unsigned i;
11172
11173 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11174 {
11175 tree e0 = VECTOR_CST_ELT (arg0, i);
11176 tree e1 = VECTOR_CST_ELT (arg1, i);
11177
11178 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11179
11180 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11181 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11182 if (wi::neg_p (tmp))
11183 tmp = wi::neg (tmp, &neg2_ovf);
11184 else
11185 neg2_ovf = false;
11186 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11187 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11188 }
11189
11190 gcc_assert (!overflow);
11191
11192 return wide_int_to_tree (rtype, result);
11193 }
11194
11195 default:
11196 break;
11197 }
11198
11199 return NULL_TREE;
11200 }
11201 \f
11202 /* ??? This duplicates information provided to the compiler by the
11203 ??? scheduler description. Some day, teach genautomata to output
11204 ??? the latencies and then CSE will just use that. */
11205
11206 static bool
11207 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11208 int opno ATTRIBUTE_UNUSED,
11209 int *total, bool speed ATTRIBUTE_UNUSED)
11210 {
11211 int code = GET_CODE (x);
11212 bool float_mode_p = FLOAT_MODE_P (mode);
11213
11214 switch (code)
11215 {
11216 case CONST_INT:
11217 if (SMALL_INT (x))
11218 *total = 0;
11219 else
11220 *total = 2;
11221 return true;
11222
11223 case CONST_WIDE_INT:
11224 *total = 0;
11225 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11226 *total += 2;
11227 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11228 *total += 2;
11229 return true;
11230
11231 case HIGH:
11232 *total = 2;
11233 return true;
11234
11235 case CONST:
11236 case LABEL_REF:
11237 case SYMBOL_REF:
11238 *total = 4;
11239 return true;
11240
11241 case CONST_DOUBLE:
11242 *total = 8;
11243 return true;
11244
11245 case MEM:
11246 /* If outer-code was a sign or zero extension, a cost
11247 of COSTS_N_INSNS (1) was already added in. This is
11248 why we are subtracting it back out. */
11249 if (outer_code == ZERO_EXTEND)
11250 {
11251 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11252 }
11253 else if (outer_code == SIGN_EXTEND)
11254 {
11255 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11256 }
11257 else if (float_mode_p)
11258 {
11259 *total = sparc_costs->float_load;
11260 }
11261 else
11262 {
11263 *total = sparc_costs->int_load;
11264 }
11265
11266 return true;
11267
11268 case PLUS:
11269 case MINUS:
11270 if (float_mode_p)
11271 *total = sparc_costs->float_plusminus;
11272 else
11273 *total = COSTS_N_INSNS (1);
11274 return false;
11275
11276 case FMA:
11277 {
11278 rtx sub;
11279
11280 gcc_assert (float_mode_p);
11281 *total = sparc_costs->float_mul;
11282
11283 sub = XEXP (x, 0);
11284 if (GET_CODE (sub) == NEG)
11285 sub = XEXP (sub, 0);
11286 *total += rtx_cost (sub, mode, FMA, 0, speed);
11287
11288 sub = XEXP (x, 2);
11289 if (GET_CODE (sub) == NEG)
11290 sub = XEXP (sub, 0);
11291 *total += rtx_cost (sub, mode, FMA, 2, speed);
11292 return true;
11293 }
11294
11295 case MULT:
11296 if (float_mode_p)
11297 *total = sparc_costs->float_mul;
11298 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11299 *total = COSTS_N_INSNS (25);
11300 else
11301 {
11302 int bit_cost;
11303
11304 bit_cost = 0;
11305 if (sparc_costs->int_mul_bit_factor)
11306 {
11307 int nbits;
11308
11309 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11310 {
11311 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11312 for (nbits = 0; value != 0; value &= value - 1)
11313 nbits++;
11314 }
11315 else
11316 nbits = 7;
11317
11318 if (nbits < 3)
11319 nbits = 3;
11320 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11321 bit_cost = COSTS_N_INSNS (bit_cost);
11322 }
11323
11324 if (mode == DImode || !TARGET_HARD_MUL)
11325 *total = sparc_costs->int_mulX + bit_cost;
11326 else
11327 *total = sparc_costs->int_mul + bit_cost;
11328 }
11329 return false;
11330
11331 case ASHIFT:
11332 case ASHIFTRT:
11333 case LSHIFTRT:
11334 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11335 return false;
11336
11337 case DIV:
11338 case UDIV:
11339 case MOD:
11340 case UMOD:
11341 if (float_mode_p)
11342 {
11343 if (mode == DFmode)
11344 *total = sparc_costs->float_div_df;
11345 else
11346 *total = sparc_costs->float_div_sf;
11347 }
11348 else
11349 {
11350 if (mode == DImode)
11351 *total = sparc_costs->int_divX;
11352 else
11353 *total = sparc_costs->int_div;
11354 }
11355 return false;
11356
11357 case NEG:
11358 if (! float_mode_p)
11359 {
11360 *total = COSTS_N_INSNS (1);
11361 return false;
11362 }
11363 /* FALLTHRU */
11364
11365 case ABS:
11366 case FLOAT:
11367 case UNSIGNED_FLOAT:
11368 case FIX:
11369 case UNSIGNED_FIX:
11370 case FLOAT_EXTEND:
11371 case FLOAT_TRUNCATE:
11372 *total = sparc_costs->float_move;
11373 return false;
11374
11375 case SQRT:
11376 if (mode == DFmode)
11377 *total = sparc_costs->float_sqrt_df;
11378 else
11379 *total = sparc_costs->float_sqrt_sf;
11380 return false;
11381
11382 case COMPARE:
11383 if (float_mode_p)
11384 *total = sparc_costs->float_cmp;
11385 else
11386 *total = COSTS_N_INSNS (1);
11387 return false;
11388
11389 case IF_THEN_ELSE:
11390 if (float_mode_p)
11391 *total = sparc_costs->float_cmove;
11392 else
11393 *total = sparc_costs->int_cmove;
11394 return false;
11395
11396 case IOR:
11397 /* Handle the NAND vector patterns. */
11398 if (sparc_vector_mode_supported_p (mode)
11399 && GET_CODE (XEXP (x, 0)) == NOT
11400 && GET_CODE (XEXP (x, 1)) == NOT)
11401 {
11402 *total = COSTS_N_INSNS (1);
11403 return true;
11404 }
11405 else
11406 return false;
11407
11408 default:
11409 return false;
11410 }
11411 }
11412
11413 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11414
11415 static inline bool
11416 general_or_i64_p (reg_class_t rclass)
11417 {
11418 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11419 }
11420
11421 /* Implement TARGET_REGISTER_MOVE_COST. */
11422
11423 static int
11424 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11425 reg_class_t from, reg_class_t to)
11426 {
11427 bool need_memory = false;
11428
11429 /* This helps postreload CSE to eliminate redundant comparisons. */
11430 if (from == NO_REGS || to == NO_REGS)
11431 return 100;
11432
11433 if (from == FPCC_REGS || to == FPCC_REGS)
11434 need_memory = true;
11435 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11436 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11437 {
11438 if (TARGET_VIS3)
11439 {
11440 int size = GET_MODE_SIZE (mode);
11441 if (size == 8 || size == 4)
11442 {
11443 if (! TARGET_ARCH32 || size == 4)
11444 return 4;
11445 else
11446 return 6;
11447 }
11448 }
11449 need_memory = true;
11450 }
11451
11452 if (need_memory)
11453 {
11454 if (sparc_cpu == PROCESSOR_ULTRASPARC
11455 || sparc_cpu == PROCESSOR_ULTRASPARC3
11456 || sparc_cpu == PROCESSOR_NIAGARA
11457 || sparc_cpu == PROCESSOR_NIAGARA2
11458 || sparc_cpu == PROCESSOR_NIAGARA3
11459 || sparc_cpu == PROCESSOR_NIAGARA4
11460 || sparc_cpu == PROCESSOR_NIAGARA7)
11461 return 12;
11462
11463 return 6;
11464 }
11465
11466 return 2;
11467 }
11468
11469 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11470 This is achieved by means of a manual dynamic stack space allocation in
11471 the current frame. We make the assumption that SEQ doesn't contain any
11472 function calls, with the possible exception of calls to the GOT helper. */
11473
11474 static void
11475 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11476 {
11477 /* We must preserve the lowest 16 words for the register save area. */
11478 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11479 /* We really need only 2 words of fresh stack space. */
11480 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11481
11482 rtx slot
11483 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11484 SPARC_STACK_BIAS + offset));
11485
11486 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11487 emit_insn (gen_rtx_SET (slot, reg));
11488 if (reg2)
11489 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11490 reg2));
11491 emit_insn (seq);
11492 if (reg2)
11493 emit_insn (gen_rtx_SET (reg2,
11494 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11495 emit_insn (gen_rtx_SET (reg, slot));
11496 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11497 }
11498
11499 /* Output the assembler code for a thunk function. THUNK_DECL is the
11500 declaration for the thunk function itself, FUNCTION is the decl for
11501 the target function. DELTA is an immediate constant offset to be
11502 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11503 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11504
11505 static void
11506 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11507 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11508 tree function)
11509 {
11510 rtx this_rtx, funexp;
11511 rtx_insn *insn;
11512 unsigned int int_arg_first;
11513
11514 reload_completed = 1;
11515 epilogue_completed = 1;
11516
11517 emit_note (NOTE_INSN_PROLOGUE_END);
11518
11519 if (TARGET_FLAT)
11520 {
11521 sparc_leaf_function_p = 1;
11522
11523 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11524 }
11525 else if (flag_delayed_branch)
11526 {
11527 /* We will emit a regular sibcall below, so we need to instruct
11528 output_sibcall that we are in a leaf function. */
11529 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11530
11531 /* This will cause final.c to invoke leaf_renumber_regs so we
11532 must behave as if we were in a not-yet-leafified function. */
11533 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11534 }
11535 else
11536 {
11537 /* We will emit the sibcall manually below, so we will need to
11538 manually spill non-leaf registers. */
11539 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11540
11541 /* We really are in a leaf function. */
11542 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11543 }
11544
11545 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11546 returns a structure, the structure return pointer is there instead. */
11547 if (TARGET_ARCH64
11548 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11549 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11550 else
11551 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11552
11553 /* Add DELTA. When possible use a plain add, otherwise load it into
11554 a register first. */
11555 if (delta)
11556 {
11557 rtx delta_rtx = GEN_INT (delta);
11558
11559 if (! SPARC_SIMM13_P (delta))
11560 {
11561 rtx scratch = gen_rtx_REG (Pmode, 1);
11562 emit_move_insn (scratch, delta_rtx);
11563 delta_rtx = scratch;
11564 }
11565
11566 /* THIS_RTX += DELTA. */
11567 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11568 }
11569
11570 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11571 if (vcall_offset)
11572 {
11573 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11574 rtx scratch = gen_rtx_REG (Pmode, 1);
11575
11576 gcc_assert (vcall_offset < 0);
11577
11578 /* SCRATCH = *THIS_RTX. */
11579 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11580
11581 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11582 may not have any available scratch register at this point. */
11583 if (SPARC_SIMM13_P (vcall_offset))
11584 ;
11585 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11586 else if (! fixed_regs[5]
11587 /* The below sequence is made up of at least 2 insns,
11588 while the default method may need only one. */
11589 && vcall_offset < -8192)
11590 {
11591 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11592 emit_move_insn (scratch2, vcall_offset_rtx);
11593 vcall_offset_rtx = scratch2;
11594 }
11595 else
11596 {
11597 rtx increment = GEN_INT (-4096);
11598
11599 /* VCALL_OFFSET is a negative number whose typical range can be
11600 estimated as -32768..0 in 32-bit mode. In almost all cases
11601 it is therefore cheaper to emit multiple add insns than
11602 spilling and loading the constant into a register (at least
11603 6 insns). */
11604 while (! SPARC_SIMM13_P (vcall_offset))
11605 {
11606 emit_insn (gen_add2_insn (scratch, increment));
11607 vcall_offset += 4096;
11608 }
11609 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11610 }
11611
11612 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11613 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11614 gen_rtx_PLUS (Pmode,
11615 scratch,
11616 vcall_offset_rtx)));
11617
11618 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11619 emit_insn (gen_add2_insn (this_rtx, scratch));
11620 }
11621
11622 /* Generate a tail call to the target function. */
11623 if (! TREE_USED (function))
11624 {
11625 assemble_external (function);
11626 TREE_USED (function) = 1;
11627 }
11628 funexp = XEXP (DECL_RTL (function), 0);
11629
11630 if (flag_delayed_branch)
11631 {
11632 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11633 insn = emit_call_insn (gen_sibcall (funexp));
11634 SIBLING_CALL_P (insn) = 1;
11635 }
11636 else
11637 {
11638 /* The hoops we have to jump through in order to generate a sibcall
11639 without using delay slots... */
11640 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11641
11642 if (flag_pic)
11643 {
11644 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11645 start_sequence ();
11646 load_got_register (); /* clobbers %o7 */
11647 scratch = sparc_legitimize_pic_address (funexp, scratch);
11648 seq = get_insns ();
11649 end_sequence ();
11650 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11651 }
11652 else if (TARGET_ARCH32)
11653 {
11654 emit_insn (gen_rtx_SET (scratch,
11655 gen_rtx_HIGH (SImode, funexp)));
11656 emit_insn (gen_rtx_SET (scratch,
11657 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11658 }
11659 else /* TARGET_ARCH64 */
11660 {
11661 switch (sparc_cmodel)
11662 {
11663 case CM_MEDLOW:
11664 case CM_MEDMID:
11665 /* The destination can serve as a temporary. */
11666 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11667 break;
11668
11669 case CM_MEDANY:
11670 case CM_EMBMEDANY:
11671 /* The destination cannot serve as a temporary. */
11672 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11673 start_sequence ();
11674 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11675 seq = get_insns ();
11676 end_sequence ();
11677 emit_and_preserve (seq, spill_reg, 0);
11678 break;
11679
11680 default:
11681 gcc_unreachable ();
11682 }
11683 }
11684
11685 emit_jump_insn (gen_indirect_jump (scratch));
11686 }
11687
11688 emit_barrier ();
11689
11690 /* Run just enough of rest_of_compilation to get the insns emitted.
11691 There's not really enough bulk here to make other passes such as
11692 instruction scheduling worth while. Note that use_thunk calls
11693 assemble_start_function and assemble_end_function. */
11694 insn = get_insns ();
11695 shorten_branches (insn);
11696 final_start_function (insn, file, 1);
11697 final (insn, file, 1);
11698 final_end_function ();
11699
11700 reload_completed = 0;
11701 epilogue_completed = 0;
11702 }
11703
11704 /* Return true if sparc_output_mi_thunk would be able to output the
11705 assembler code for the thunk function specified by the arguments
11706 it is passed, and false otherwise. */
11707 static bool
11708 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11709 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11710 HOST_WIDE_INT vcall_offset,
11711 const_tree function ATTRIBUTE_UNUSED)
11712 {
11713 /* Bound the loop used in the default method above. */
11714 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11715 }
11716
11717 /* How to allocate a 'struct machine_function'. */
11718
11719 static struct machine_function *
11720 sparc_init_machine_status (void)
11721 {
11722 return ggc_cleared_alloc<machine_function> ();
11723 }
11724
11725 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11726 We need to emit DTP-relative relocations. */
11727
11728 static void
11729 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11730 {
11731 switch (size)
11732 {
11733 case 4:
11734 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11735 break;
11736 case 8:
11737 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11738 break;
11739 default:
11740 gcc_unreachable ();
11741 }
11742 output_addr_const (file, x);
11743 fputs (")", file);
11744 }
11745
11746 /* Do whatever processing is required at the end of a file. */
11747
11748 static void
11749 sparc_file_end (void)
11750 {
11751 /* If we need to emit the special GOT helper function, do so now. */
11752 if (got_helper_rtx)
11753 {
11754 const char *name = XSTR (got_helper_rtx, 0);
11755 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11756 #ifdef DWARF2_UNWIND_INFO
11757 bool do_cfi;
11758 #endif
11759
11760 if (USE_HIDDEN_LINKONCE)
11761 {
11762 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11763 get_identifier (name),
11764 build_function_type_list (void_type_node,
11765 NULL_TREE));
11766 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11767 NULL_TREE, void_type_node);
11768 TREE_PUBLIC (decl) = 1;
11769 TREE_STATIC (decl) = 1;
11770 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11771 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11772 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11773 resolve_unique_section (decl, 0, flag_function_sections);
11774 allocate_struct_function (decl, true);
11775 cfun->is_thunk = 1;
11776 current_function_decl = decl;
11777 init_varasm_status ();
11778 assemble_start_function (decl, name);
11779 }
11780 else
11781 {
11782 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11783 switch_to_section (text_section);
11784 if (align > 0)
11785 ASM_OUTPUT_ALIGN (asm_out_file, align);
11786 ASM_OUTPUT_LABEL (asm_out_file, name);
11787 }
11788
11789 #ifdef DWARF2_UNWIND_INFO
11790 do_cfi = dwarf2out_do_cfi_asm ();
11791 if (do_cfi)
11792 fprintf (asm_out_file, "\t.cfi_startproc\n");
11793 #endif
11794 if (flag_delayed_branch)
11795 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11796 reg_name, reg_name);
11797 else
11798 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11799 reg_name, reg_name);
11800 #ifdef DWARF2_UNWIND_INFO
11801 if (do_cfi)
11802 fprintf (asm_out_file, "\t.cfi_endproc\n");
11803 #endif
11804 }
11805
11806 if (NEED_INDICATE_EXEC_STACK)
11807 file_end_indicate_exec_stack ();
11808
11809 #ifdef TARGET_SOLARIS
11810 solaris_file_end ();
11811 #endif
11812 }
11813
11814 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11815 /* Implement TARGET_MANGLE_TYPE. */
11816
11817 static const char *
11818 sparc_mangle_type (const_tree type)
11819 {
11820 if (TARGET_ARCH32
11821 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11822 && TARGET_LONG_DOUBLE_128)
11823 return "g";
11824
11825 /* For all other types, use normal C++ mangling. */
11826 return NULL;
11827 }
11828 #endif
11829
11830 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11831 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11832 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11833
11834 void
11835 sparc_emit_membar_for_model (enum memmodel model,
11836 int load_store, int before_after)
11837 {
11838 /* Bits for the MEMBAR mmask field. */
11839 const int LoadLoad = 1;
11840 const int StoreLoad = 2;
11841 const int LoadStore = 4;
11842 const int StoreStore = 8;
11843
11844 int mm = 0, implied = 0;
11845
11846 switch (sparc_memory_model)
11847 {
11848 case SMM_SC:
11849 /* Sequential Consistency. All memory transactions are immediately
11850 visible in sequential execution order. No barriers needed. */
11851 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11852 break;
11853
11854 case SMM_TSO:
11855 /* Total Store Ordering: all memory transactions with store semantics
11856 are followed by an implied StoreStore. */
11857 implied |= StoreStore;
11858
11859 /* If we're not looking for a raw barrer (before+after), then atomic
11860 operations get the benefit of being both load and store. */
11861 if (load_store == 3 && before_after == 1)
11862 implied |= StoreLoad;
11863 /* FALLTHRU */
11864
11865 case SMM_PSO:
11866 /* Partial Store Ordering: all memory transactions with load semantics
11867 are followed by an implied LoadLoad | LoadStore. */
11868 implied |= LoadLoad | LoadStore;
11869
11870 /* If we're not looking for a raw barrer (before+after), then atomic
11871 operations get the benefit of being both load and store. */
11872 if (load_store == 3 && before_after == 2)
11873 implied |= StoreLoad | StoreStore;
11874 /* FALLTHRU */
11875
11876 case SMM_RMO:
11877 /* Relaxed Memory Ordering: no implicit bits. */
11878 break;
11879
11880 default:
11881 gcc_unreachable ();
11882 }
11883
11884 if (before_after & 1)
11885 {
11886 if (is_mm_release (model) || is_mm_acq_rel (model)
11887 || is_mm_seq_cst (model))
11888 {
11889 if (load_store & 1)
11890 mm |= LoadLoad | StoreLoad;
11891 if (load_store & 2)
11892 mm |= LoadStore | StoreStore;
11893 }
11894 }
11895 if (before_after & 2)
11896 {
11897 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11898 || is_mm_seq_cst (model))
11899 {
11900 if (load_store & 1)
11901 mm |= LoadLoad | LoadStore;
11902 if (load_store & 2)
11903 mm |= StoreLoad | StoreStore;
11904 }
11905 }
11906
11907 /* Remove the bits implied by the system memory model. */
11908 mm &= ~implied;
11909
11910 /* For raw barriers (before+after), always emit a barrier.
11911 This will become a compile-time barrier if needed. */
11912 if (mm || before_after == 3)
11913 emit_insn (gen_membar (GEN_INT (mm)));
11914 }
11915
11916 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11917 compare and swap on the word containing the byte or half-word. */
11918
11919 static void
11920 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11921 rtx oldval, rtx newval)
11922 {
11923 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11924 rtx addr = gen_reg_rtx (Pmode);
11925 rtx off = gen_reg_rtx (SImode);
11926 rtx oldv = gen_reg_rtx (SImode);
11927 rtx newv = gen_reg_rtx (SImode);
11928 rtx oldvalue = gen_reg_rtx (SImode);
11929 rtx newvalue = gen_reg_rtx (SImode);
11930 rtx res = gen_reg_rtx (SImode);
11931 rtx resv = gen_reg_rtx (SImode);
11932 rtx memsi, val, mask, cc;
11933
11934 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11935
11936 if (Pmode != SImode)
11937 addr1 = gen_lowpart (SImode, addr1);
11938 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11939
11940 memsi = gen_rtx_MEM (SImode, addr);
11941 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11942 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11943
11944 val = copy_to_reg (memsi);
11945
11946 emit_insn (gen_rtx_SET (off,
11947 gen_rtx_XOR (SImode, off,
11948 GEN_INT (GET_MODE (mem) == QImode
11949 ? 3 : 2))));
11950
11951 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11952
11953 if (GET_MODE (mem) == QImode)
11954 mask = force_reg (SImode, GEN_INT (0xff));
11955 else
11956 mask = force_reg (SImode, GEN_INT (0xffff));
11957
11958 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11959
11960 emit_insn (gen_rtx_SET (val,
11961 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11962 val)));
11963
11964 oldval = gen_lowpart (SImode, oldval);
11965 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11966
11967 newval = gen_lowpart_common (SImode, newval);
11968 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11969
11970 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11971
11972 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11973
11974 rtx_code_label *end_label = gen_label_rtx ();
11975 rtx_code_label *loop_label = gen_label_rtx ();
11976 emit_label (loop_label);
11977
11978 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11979
11980 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11981
11982 emit_move_insn (bool_result, const1_rtx);
11983
11984 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11985
11986 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11987
11988 emit_insn (gen_rtx_SET (resv,
11989 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11990 res)));
11991
11992 emit_move_insn (bool_result, const0_rtx);
11993
11994 cc = gen_compare_reg_1 (NE, resv, val);
11995 emit_insn (gen_rtx_SET (val, resv));
11996
11997 /* Use cbranchcc4 to separate the compare and branch! */
11998 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11999 cc, const0_rtx, loop_label));
12000
12001 emit_label (end_label);
12002
12003 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12004
12005 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12006
12007 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12008 }
12009
12010 /* Expand code to perform a compare-and-swap. */
12011
12012 void
12013 sparc_expand_compare_and_swap (rtx operands[])
12014 {
12015 rtx bval, retval, mem, oldval, newval;
12016 machine_mode mode;
12017 enum memmodel model;
12018
12019 bval = operands[0];
12020 retval = operands[1];
12021 mem = operands[2];
12022 oldval = operands[3];
12023 newval = operands[4];
12024 model = (enum memmodel) INTVAL (operands[6]);
12025 mode = GET_MODE (mem);
12026
12027 sparc_emit_membar_for_model (model, 3, 1);
12028
12029 if (reg_overlap_mentioned_p (retval, oldval))
12030 oldval = copy_to_reg (oldval);
12031
12032 if (mode == QImode || mode == HImode)
12033 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12034 else
12035 {
12036 rtx (*gen) (rtx, rtx, rtx, rtx);
12037 rtx x;
12038
12039 if (mode == SImode)
12040 gen = gen_atomic_compare_and_swapsi_1;
12041 else
12042 gen = gen_atomic_compare_and_swapdi_1;
12043 emit_insn (gen (retval, mem, oldval, newval));
12044
12045 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12046 if (x != bval)
12047 convert_move (bval, x, 1);
12048 }
12049
12050 sparc_emit_membar_for_model (model, 3, 2);
12051 }
12052
12053 void
12054 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12055 {
12056 rtx t_1, t_2, t_3;
12057
12058 sel = gen_lowpart (DImode, sel);
12059 switch (vmode)
12060 {
12061 case V2SImode:
12062 /* inp = xxxxxxxAxxxxxxxB */
12063 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12064 NULL_RTX, 1, OPTAB_DIRECT);
12065 /* t_1 = ....xxxxxxxAxxx. */
12066 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12067 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12068 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12069 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12070 /* sel = .......B */
12071 /* t_1 = ...A.... */
12072 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12073 /* sel = ...A...B */
12074 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12075 /* sel = AAAABBBB * 4 */
12076 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12077 /* sel = { A*4, A*4+1, A*4+2, ... } */
12078 break;
12079
12080 case V4HImode:
12081 /* inp = xxxAxxxBxxxCxxxD */
12082 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12083 NULL_RTX, 1, OPTAB_DIRECT);
12084 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12085 NULL_RTX, 1, OPTAB_DIRECT);
12086 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12087 NULL_RTX, 1, OPTAB_DIRECT);
12088 /* t_1 = ..xxxAxxxBxxxCxx */
12089 /* t_2 = ....xxxAxxxBxxxC */
12090 /* t_3 = ......xxxAxxxBxx */
12091 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12092 GEN_INT (0x07),
12093 NULL_RTX, 1, OPTAB_DIRECT);
12094 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12095 GEN_INT (0x0700),
12096 NULL_RTX, 1, OPTAB_DIRECT);
12097 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12098 GEN_INT (0x070000),
12099 NULL_RTX, 1, OPTAB_DIRECT);
12100 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12101 GEN_INT (0x07000000),
12102 NULL_RTX, 1, OPTAB_DIRECT);
12103 /* sel = .......D */
12104 /* t_1 = .....C.. */
12105 /* t_2 = ...B.... */
12106 /* t_3 = .A...... */
12107 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12108 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12109 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12110 /* sel = .A.B.C.D */
12111 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12112 /* sel = AABBCCDD * 2 */
12113 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12114 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12115 break;
12116
12117 case V8QImode:
12118 /* input = xAxBxCxDxExFxGxH */
12119 sel = expand_simple_binop (DImode, AND, sel,
12120 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12121 | 0x0f0f0f0f),
12122 NULL_RTX, 1, OPTAB_DIRECT);
12123 /* sel = .A.B.C.D.E.F.G.H */
12124 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12125 NULL_RTX, 1, OPTAB_DIRECT);
12126 /* t_1 = ..A.B.C.D.E.F.G. */
12127 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12128 NULL_RTX, 1, OPTAB_DIRECT);
12129 /* sel = .AABBCCDDEEFFGGH */
12130 sel = expand_simple_binop (DImode, AND, sel,
12131 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12132 | 0xff00ff),
12133 NULL_RTX, 1, OPTAB_DIRECT);
12134 /* sel = ..AB..CD..EF..GH */
12135 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12136 NULL_RTX, 1, OPTAB_DIRECT);
12137 /* t_1 = ....AB..CD..EF.. */
12138 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12139 NULL_RTX, 1, OPTAB_DIRECT);
12140 /* sel = ..ABABCDCDEFEFGH */
12141 sel = expand_simple_binop (DImode, AND, sel,
12142 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12143 NULL_RTX, 1, OPTAB_DIRECT);
12144 /* sel = ....ABCD....EFGH */
12145 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12146 NULL_RTX, 1, OPTAB_DIRECT);
12147 /* t_1 = ........ABCD.... */
12148 sel = gen_lowpart (SImode, sel);
12149 t_1 = gen_lowpart (SImode, t_1);
12150 break;
12151
12152 default:
12153 gcc_unreachable ();
12154 }
12155
12156 /* Always perform the final addition/merge within the bmask insn. */
12157 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12158 }
12159
12160 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12161
12162 static bool
12163 sparc_frame_pointer_required (void)
12164 {
12165 /* If the stack pointer is dynamically modified in the function, it cannot
12166 serve as the frame pointer. */
12167 if (cfun->calls_alloca)
12168 return true;
12169
12170 /* If the function receives nonlocal gotos, it needs to save the frame
12171 pointer in the nonlocal_goto_save_area object. */
12172 if (cfun->has_nonlocal_label)
12173 return true;
12174
12175 /* In flat mode, that's it. */
12176 if (TARGET_FLAT)
12177 return false;
12178
12179 /* Otherwise, the frame pointer is required if the function isn't leaf. */
12180 return !(crtl->is_leaf && only_leaf_regs_used ());
12181 }
12182
12183 /* The way this is structured, we can't eliminate SFP in favor of SP
12184 if the frame pointer is required: we want to use the SFP->HFP elimination
12185 in that case. But the test in update_eliminables doesn't know we are
12186 assuming below that we only do the former elimination. */
12187
12188 static bool
12189 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12190 {
12191 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12192 }
12193
12194 /* Return the hard frame pointer directly to bypass the stack bias. */
12195
12196 static rtx
12197 sparc_builtin_setjmp_frame_value (void)
12198 {
12199 return hard_frame_pointer_rtx;
12200 }
12201
12202 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12203 they won't be allocated. */
12204
12205 static void
12206 sparc_conditional_register_usage (void)
12207 {
12208 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12209 {
12210 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12211 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12212 }
12213 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12214 /* then honor it. */
12215 if (TARGET_ARCH32 && fixed_regs[5])
12216 fixed_regs[5] = 1;
12217 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12218 fixed_regs[5] = 0;
12219 if (! TARGET_V9)
12220 {
12221 int regno;
12222 for (regno = SPARC_FIRST_V9_FP_REG;
12223 regno <= SPARC_LAST_V9_FP_REG;
12224 regno++)
12225 fixed_regs[regno] = 1;
12226 /* %fcc0 is used by v8 and v9. */
12227 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12228 regno <= SPARC_LAST_V9_FCC_REG;
12229 regno++)
12230 fixed_regs[regno] = 1;
12231 }
12232 if (! TARGET_FPU)
12233 {
12234 int regno;
12235 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12236 fixed_regs[regno] = 1;
12237 }
12238 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12239 /* then honor it. Likewise with g3 and g4. */
12240 if (fixed_regs[2] == 2)
12241 fixed_regs[2] = ! TARGET_APP_REGS;
12242 if (fixed_regs[3] == 2)
12243 fixed_regs[3] = ! TARGET_APP_REGS;
12244 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12245 fixed_regs[4] = ! TARGET_APP_REGS;
12246 else if (TARGET_CM_EMBMEDANY)
12247 fixed_regs[4] = 1;
12248 else if (fixed_regs[4] == 2)
12249 fixed_regs[4] = 0;
12250 if (TARGET_FLAT)
12251 {
12252 int regno;
12253 /* Disable leaf functions. */
12254 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12255 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12256 leaf_reg_remap [regno] = regno;
12257 }
12258 if (TARGET_VIS)
12259 global_regs[SPARC_GSR_REG] = 1;
12260 }
12261
12262 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12263
12264 - We can't load constants into FP registers.
12265 - We can't load FP constants into integer registers when soft-float,
12266 because there is no soft-float pattern with a r/F constraint.
12267 - We can't load FP constants into integer registers for TFmode unless
12268 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12269 - Try and reload integer constants (symbolic or otherwise) back into
12270 registers directly, rather than having them dumped to memory. */
12271
12272 static reg_class_t
12273 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12274 {
12275 machine_mode mode = GET_MODE (x);
12276 if (CONSTANT_P (x))
12277 {
12278 if (FP_REG_CLASS_P (rclass)
12279 || rclass == GENERAL_OR_FP_REGS
12280 || rclass == GENERAL_OR_EXTRA_FP_REGS
12281 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12282 || (mode == TFmode && ! const_zero_operand (x, mode)))
12283 return NO_REGS;
12284
12285 if (GET_MODE_CLASS (mode) == MODE_INT)
12286 return GENERAL_REGS;
12287
12288 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12289 {
12290 if (! FP_REG_CLASS_P (rclass)
12291 || !(const_zero_operand (x, mode)
12292 || const_all_ones_operand (x, mode)))
12293 return NO_REGS;
12294 }
12295 }
12296
12297 if (TARGET_VIS3
12298 && ! TARGET_ARCH64
12299 && (rclass == EXTRA_FP_REGS
12300 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12301 {
12302 int regno = true_regnum (x);
12303
12304 if (SPARC_INT_REG_P (regno))
12305 return (rclass == EXTRA_FP_REGS
12306 ? FP_REGS : GENERAL_OR_FP_REGS);
12307 }
12308
12309 return rclass;
12310 }
12311
12312 /* Return true if we use LRA instead of reload pass. */
12313
12314 static bool
12315 sparc_lra_p (void)
12316 {
12317 return TARGET_LRA;
12318 }
12319
12320 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12321 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12322
12323 const char *
12324 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12325 {
12326 char mulstr[32];
12327
12328 gcc_assert (! TARGET_ARCH64);
12329
12330 if (sparc_check_64 (operands[1], insn) <= 0)
12331 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12332 if (which_alternative == 1)
12333 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12334 if (GET_CODE (operands[2]) == CONST_INT)
12335 {
12336 if (which_alternative == 1)
12337 {
12338 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12339 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12340 output_asm_insn (mulstr, operands);
12341 return "srlx\t%L0, 32, %H0";
12342 }
12343 else
12344 {
12345 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12346 output_asm_insn ("or\t%L1, %3, %3", operands);
12347 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12348 output_asm_insn (mulstr, operands);
12349 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12350 return "mov\t%3, %L0";
12351 }
12352 }
12353 else if (rtx_equal_p (operands[1], operands[2]))
12354 {
12355 if (which_alternative == 1)
12356 {
12357 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12358 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12359 output_asm_insn (mulstr, operands);
12360 return "srlx\t%L0, 32, %H0";
12361 }
12362 else
12363 {
12364 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12365 output_asm_insn ("or\t%L1, %3, %3", operands);
12366 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12367 output_asm_insn (mulstr, operands);
12368 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12369 return "mov\t%3, %L0";
12370 }
12371 }
12372 if (sparc_check_64 (operands[2], insn) <= 0)
12373 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12374 if (which_alternative == 1)
12375 {
12376 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12377 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12378 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12379 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12380 output_asm_insn (mulstr, operands);
12381 return "srlx\t%L0, 32, %H0";
12382 }
12383 else
12384 {
12385 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12386 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12387 output_asm_insn ("or\t%L1, %3, %3", operands);
12388 output_asm_insn ("or\t%L2, %4, %4", operands);
12389 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12390 output_asm_insn (mulstr, operands);
12391 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12392 return "mov\t%3, %L0";
12393 }
12394 }
12395
12396 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12397 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12398 and INNER_MODE are the modes describing TARGET. */
12399
12400 static void
12401 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12402 machine_mode inner_mode)
12403 {
12404 rtx t1, final_insn, sel;
12405 int bmask;
12406
12407 t1 = gen_reg_rtx (mode);
12408
12409 elt = convert_modes (SImode, inner_mode, elt, true);
12410 emit_move_insn (gen_lowpart(SImode, t1), elt);
12411
12412 switch (mode)
12413 {
12414 case V2SImode:
12415 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12416 bmask = 0x45674567;
12417 break;
12418 case V4HImode:
12419 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12420 bmask = 0x67676767;
12421 break;
12422 case V8QImode:
12423 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12424 bmask = 0x77777777;
12425 break;
12426 default:
12427 gcc_unreachable ();
12428 }
12429
12430 sel = force_reg (SImode, GEN_INT (bmask));
12431 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12432 emit_insn (final_insn);
12433 }
12434
12435 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12436 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12437
12438 static void
12439 vector_init_fpmerge (rtx target, rtx elt)
12440 {
12441 rtx t1, t2, t2_low, t3, t3_low;
12442
12443 t1 = gen_reg_rtx (V4QImode);
12444 elt = convert_modes (SImode, QImode, elt, true);
12445 emit_move_insn (gen_lowpart (SImode, t1), elt);
12446
12447 t2 = gen_reg_rtx (V8QImode);
12448 t2_low = gen_lowpart (V4QImode, t2);
12449 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12450
12451 t3 = gen_reg_rtx (V8QImode);
12452 t3_low = gen_lowpart (V4QImode, t3);
12453 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12454
12455 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12456 }
12457
12458 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12459 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12460
12461 static void
12462 vector_init_faligndata (rtx target, rtx elt)
12463 {
12464 rtx t1 = gen_reg_rtx (V4HImode);
12465 int i;
12466
12467 elt = convert_modes (SImode, HImode, elt, true);
12468 emit_move_insn (gen_lowpart (SImode, t1), elt);
12469
12470 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12471 force_reg (SImode, GEN_INT (6)),
12472 const0_rtx));
12473
12474 for (i = 0; i < 4; i++)
12475 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12476 }
12477
12478 /* Emit code to initialize TARGET to values for individual fields VALS. */
12479
12480 void
12481 sparc_expand_vector_init (rtx target, rtx vals)
12482 {
12483 const machine_mode mode = GET_MODE (target);
12484 const machine_mode inner_mode = GET_MODE_INNER (mode);
12485 const int n_elts = GET_MODE_NUNITS (mode);
12486 int i, n_var = 0;
12487 bool all_same = true;
12488 rtx mem;
12489
12490 for (i = 0; i < n_elts; i++)
12491 {
12492 rtx x = XVECEXP (vals, 0, i);
12493 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12494 n_var++;
12495
12496 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12497 all_same = false;
12498 }
12499
12500 if (n_var == 0)
12501 {
12502 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12503 return;
12504 }
12505
12506 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12507 {
12508 if (GET_MODE_SIZE (inner_mode) == 4)
12509 {
12510 emit_move_insn (gen_lowpart (SImode, target),
12511 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12512 return;
12513 }
12514 else if (GET_MODE_SIZE (inner_mode) == 8)
12515 {
12516 emit_move_insn (gen_lowpart (DImode, target),
12517 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12518 return;
12519 }
12520 }
12521 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12522 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12523 {
12524 emit_move_insn (gen_highpart (word_mode, target),
12525 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12526 emit_move_insn (gen_lowpart (word_mode, target),
12527 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12528 return;
12529 }
12530
12531 if (all_same && GET_MODE_SIZE (mode) == 8)
12532 {
12533 if (TARGET_VIS2)
12534 {
12535 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12536 return;
12537 }
12538 if (mode == V8QImode)
12539 {
12540 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12541 return;
12542 }
12543 if (mode == V4HImode)
12544 {
12545 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12546 return;
12547 }
12548 }
12549
12550 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12551 for (i = 0; i < n_elts; i++)
12552 emit_move_insn (adjust_address_nv (mem, inner_mode,
12553 i * GET_MODE_SIZE (inner_mode)),
12554 XVECEXP (vals, 0, i));
12555 emit_move_insn (target, mem);
12556 }
12557
12558 /* Implement TARGET_SECONDARY_RELOAD. */
12559
12560 static reg_class_t
12561 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12562 machine_mode mode, secondary_reload_info *sri)
12563 {
12564 enum reg_class rclass = (enum reg_class) rclass_i;
12565
12566 sri->icode = CODE_FOR_nothing;
12567 sri->extra_cost = 0;
12568
12569 /* We need a temporary when loading/storing a HImode/QImode value
12570 between memory and the FPU registers. This can happen when combine puts
12571 a paradoxical subreg in a float/fix conversion insn. */
12572 if (FP_REG_CLASS_P (rclass)
12573 && (mode == HImode || mode == QImode)
12574 && (GET_CODE (x) == MEM
12575 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12576 && true_regnum (x) == -1)))
12577 return GENERAL_REGS;
12578
12579 /* On 32-bit we need a temporary when loading/storing a DFmode value
12580 between unaligned memory and the upper FPU registers. */
12581 if (TARGET_ARCH32
12582 && rclass == EXTRA_FP_REGS
12583 && mode == DFmode
12584 && GET_CODE (x) == MEM
12585 && ! mem_min_alignment (x, 8))
12586 return FP_REGS;
12587
12588 if (((TARGET_CM_MEDANY
12589 && symbolic_operand (x, mode))
12590 || (TARGET_CM_EMBMEDANY
12591 && text_segment_operand (x, mode)))
12592 && ! flag_pic)
12593 {
12594 if (in_p)
12595 sri->icode = direct_optab_handler (reload_in_optab, mode);
12596 else
12597 sri->icode = direct_optab_handler (reload_out_optab, mode);
12598 return NO_REGS;
12599 }
12600
12601 if (TARGET_VIS3 && TARGET_ARCH32)
12602 {
12603 int regno = true_regnum (x);
12604
12605 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12606 to move 8-byte values in 4-byte pieces. This only works via
12607 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12608 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12609 an FP_REGS intermediate move. */
12610 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12611 || ((general_or_i64_p (rclass)
12612 || rclass == GENERAL_OR_FP_REGS)
12613 && SPARC_FP_REG_P (regno)))
12614 {
12615 sri->extra_cost = 2;
12616 return FP_REGS;
12617 }
12618 }
12619
12620 return NO_REGS;
12621 }
12622
12623 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12624 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12625
12626 bool
12627 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12628 {
12629 enum rtx_code rc = GET_CODE (operands[1]);
12630 machine_mode cmp_mode;
12631 rtx cc_reg, dst, cmp;
12632
12633 cmp = operands[1];
12634 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12635 return false;
12636
12637 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12638 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12639
12640 cmp_mode = GET_MODE (XEXP (cmp, 0));
12641 rc = GET_CODE (cmp);
12642
12643 dst = operands[0];
12644 if (! rtx_equal_p (operands[2], dst)
12645 && ! rtx_equal_p (operands[3], dst))
12646 {
12647 if (reg_overlap_mentioned_p (dst, cmp))
12648 dst = gen_reg_rtx (mode);
12649
12650 emit_move_insn (dst, operands[3]);
12651 }
12652 else if (operands[2] == dst)
12653 {
12654 operands[2] = operands[3];
12655
12656 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12657 rc = reverse_condition_maybe_unordered (rc);
12658 else
12659 rc = reverse_condition (rc);
12660 }
12661
12662 if (XEXP (cmp, 1) == const0_rtx
12663 && GET_CODE (XEXP (cmp, 0)) == REG
12664 && cmp_mode == DImode
12665 && v9_regcmp_p (rc))
12666 cc_reg = XEXP (cmp, 0);
12667 else
12668 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12669
12670 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12671
12672 emit_insn (gen_rtx_SET (dst,
12673 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12674
12675 if (dst != operands[0])
12676 emit_move_insn (operands[0], dst);
12677
12678 return true;
12679 }
12680
12681 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12682 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12683 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12684 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12685 code to be used for the condition mask. */
12686
12687 void
12688 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12689 {
12690 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12691 enum rtx_code code = GET_CODE (operands[3]);
12692
12693 mask = gen_reg_rtx (Pmode);
12694 cop0 = operands[4];
12695 cop1 = operands[5];
12696 if (code == LT || code == GE)
12697 {
12698 rtx t;
12699
12700 code = swap_condition (code);
12701 t = cop0; cop0 = cop1; cop1 = t;
12702 }
12703
12704 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12705
12706 fcmp = gen_rtx_UNSPEC (Pmode,
12707 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12708 fcode);
12709
12710 cmask = gen_rtx_UNSPEC (DImode,
12711 gen_rtvec (2, mask, gsr),
12712 ccode);
12713
12714 bshuf = gen_rtx_UNSPEC (mode,
12715 gen_rtvec (3, operands[1], operands[2], gsr),
12716 UNSPEC_BSHUFFLE);
12717
12718 emit_insn (gen_rtx_SET (mask, fcmp));
12719 emit_insn (gen_rtx_SET (gsr, cmask));
12720
12721 emit_insn (gen_rtx_SET (operands[0], bshuf));
12722 }
12723
12724 /* On sparc, any mode which naturally allocates into the float
12725 registers should return 4 here. */
12726
12727 unsigned int
12728 sparc_regmode_natural_size (machine_mode mode)
12729 {
12730 int size = UNITS_PER_WORD;
12731
12732 if (TARGET_ARCH64)
12733 {
12734 enum mode_class mclass = GET_MODE_CLASS (mode);
12735
12736 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12737 size = 4;
12738 }
12739
12740 return size;
12741 }
12742
12743 /* Return TRUE if it is a good idea to tie two pseudo registers
12744 when one has mode MODE1 and one has mode MODE2.
12745 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12746 for any hard reg, then this must be FALSE for correct output.
12747
12748 For V9 we have to deal with the fact that only the lower 32 floating
12749 point registers are 32-bit addressable. */
12750
12751 bool
12752 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12753 {
12754 enum mode_class mclass1, mclass2;
12755 unsigned short size1, size2;
12756
12757 if (mode1 == mode2)
12758 return true;
12759
12760 mclass1 = GET_MODE_CLASS (mode1);
12761 mclass2 = GET_MODE_CLASS (mode2);
12762 if (mclass1 != mclass2)
12763 return false;
12764
12765 if (! TARGET_V9)
12766 return true;
12767
12768 /* Classes are the same and we are V9 so we have to deal with upper
12769 vs. lower floating point registers. If one of the modes is a
12770 4-byte mode, and the other is not, we have to mark them as not
12771 tieable because only the lower 32 floating point register are
12772 addressable 32-bits at a time.
12773
12774 We can't just test explicitly for SFmode, otherwise we won't
12775 cover the vector mode cases properly. */
12776
12777 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12778 return true;
12779
12780 size1 = GET_MODE_SIZE (mode1);
12781 size2 = GET_MODE_SIZE (mode2);
12782 if ((size1 > 4 && size2 == 4)
12783 || (size2 > 4 && size1 == 4))
12784 return false;
12785
12786 return true;
12787 }
12788
12789 /* Implement TARGET_CSTORE_MODE. */
12790
12791 static machine_mode
12792 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12793 {
12794 return (TARGET_ARCH64 ? DImode : SImode);
12795 }
12796
12797 /* Return the compound expression made of T1 and T2. */
12798
12799 static inline tree
12800 compound_expr (tree t1, tree t2)
12801 {
12802 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12803 }
12804
12805 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12806
12807 static void
12808 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12809 {
12810 if (!TARGET_FPU)
12811 return;
12812
12813 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12814 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12815
12816 /* We generate the equivalent of feholdexcept (&fenv_var):
12817
12818 unsigned int fenv_var;
12819 __builtin_store_fsr (&fenv_var);
12820
12821 unsigned int tmp1_var;
12822 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12823
12824 __builtin_load_fsr (&tmp1_var); */
12825
12826 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12827 TREE_ADDRESSABLE (fenv_var) = 1;
12828 tree fenv_addr = build_fold_addr_expr (fenv_var);
12829 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12830 tree hold_stfsr
12831 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12832 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12833
12834 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12835 TREE_ADDRESSABLE (tmp1_var) = 1;
12836 tree masked_fenv_var
12837 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12838 build_int_cst (unsigned_type_node,
12839 ~(accrued_exception_mask | trap_enable_mask)));
12840 tree hold_mask
12841 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12842 NULL_TREE, NULL_TREE);
12843
12844 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12845 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12846 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12847
12848 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12849
12850 /* We reload the value of tmp1_var to clear the exceptions:
12851
12852 __builtin_load_fsr (&tmp1_var); */
12853
12854 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12855
12856 /* We generate the equivalent of feupdateenv (&fenv_var):
12857
12858 unsigned int tmp2_var;
12859 __builtin_store_fsr (&tmp2_var);
12860
12861 __builtin_load_fsr (&fenv_var);
12862
12863 if (SPARC_LOW_FE_EXCEPT_VALUES)
12864 tmp2_var >>= 5;
12865 __atomic_feraiseexcept ((int) tmp2_var); */
12866
12867 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12868 TREE_ADDRESSABLE (tmp2_var) = 1;
12869 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12870 tree update_stfsr
12871 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12872 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12873
12874 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12875
12876 tree atomic_feraiseexcept
12877 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12878 tree update_call
12879 = build_call_expr (atomic_feraiseexcept, 1,
12880 fold_convert (integer_type_node, tmp2_var));
12881
12882 if (SPARC_LOW_FE_EXCEPT_VALUES)
12883 {
12884 tree shifted_tmp2_var
12885 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12886 build_int_cst (unsigned_type_node, 5));
12887 tree update_shift
12888 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12889 update_call = compound_expr (update_shift, update_call);
12890 }
12891
12892 *update
12893 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12894 }
12895
12896 #include "gt-sparc.h"