]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
sparc.h (MASK_ISA): Add MASK_LEON and MASK_LEON3.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_lra_p (void);
643 static bool sparc_print_operand_punct_valid_p (unsigned char);
644 static void sparc_print_operand (FILE *, rtx, int);
645 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
646 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
647 machine_mode,
648 secondary_reload_info *);
649 static machine_mode sparc_cstore_mode (enum insn_code icode);
650 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
651 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
652 static unsigned int sparc_min_arithmetic_precision (void);
653 \f
654 #ifdef SUBTARGET_ATTRIBUTE_TABLE
655 /* Table of valid machine attributes. */
656 static const struct attribute_spec sparc_attribute_table[] =
657 {
658 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
659 do_diagnostic } */
660 SUBTARGET_ATTRIBUTE_TABLE,
661 { NULL, 0, 0, false, false, false, NULL, false }
662 };
663 #endif
664 \f
665 /* Option handling. */
666
667 /* Parsed value. */
668 enum cmodel sparc_cmodel;
669
670 char sparc_hard_reg_printed[8];
671
672 /* Initialize the GCC target structure. */
673
674 /* The default is to use .half rather than .short for aligned HI objects. */
675 #undef TARGET_ASM_ALIGNED_HI_OP
676 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
677
678 #undef TARGET_ASM_UNALIGNED_HI_OP
679 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
680 #undef TARGET_ASM_UNALIGNED_SI_OP
681 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
682 #undef TARGET_ASM_UNALIGNED_DI_OP
683 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
684
685 /* The target hook has to handle DI-mode values. */
686 #undef TARGET_ASM_INTEGER
687 #define TARGET_ASM_INTEGER sparc_assemble_integer
688
689 #undef TARGET_ASM_FUNCTION_PROLOGUE
690 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
691 #undef TARGET_ASM_FUNCTION_EPILOGUE
692 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
693
694 #undef TARGET_SCHED_ADJUST_COST
695 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
696 #undef TARGET_SCHED_ISSUE_RATE
697 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
698 #undef TARGET_SCHED_INIT
699 #define TARGET_SCHED_INIT sparc_sched_init
700 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
701 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
702
703 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
704 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
705
706 #undef TARGET_INIT_LIBFUNCS
707 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
708
709 #undef TARGET_LEGITIMIZE_ADDRESS
710 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
711 #undef TARGET_DELEGITIMIZE_ADDRESS
712 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
713 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
714 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
715
716 #undef TARGET_INIT_BUILTINS
717 #define TARGET_INIT_BUILTINS sparc_init_builtins
718 #undef TARGET_BUILTIN_DECL
719 #define TARGET_BUILTIN_DECL sparc_builtin_decl
720 #undef TARGET_EXPAND_BUILTIN
721 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
722 #undef TARGET_FOLD_BUILTIN
723 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
724
725 #if TARGET_TLS
726 #undef TARGET_HAVE_TLS
727 #define TARGET_HAVE_TLS true
728 #endif
729
730 #undef TARGET_CANNOT_FORCE_CONST_MEM
731 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
732
733 #undef TARGET_ASM_OUTPUT_MI_THUNK
734 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
735 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
736 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
737
738 #undef TARGET_RTX_COSTS
739 #define TARGET_RTX_COSTS sparc_rtx_costs
740 #undef TARGET_ADDRESS_COST
741 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
742 #undef TARGET_REGISTER_MOVE_COST
743 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
744
745 #undef TARGET_PROMOTE_FUNCTION_MODE
746 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
747
748 #undef TARGET_FUNCTION_VALUE
749 #define TARGET_FUNCTION_VALUE sparc_function_value
750 #undef TARGET_LIBCALL_VALUE
751 #define TARGET_LIBCALL_VALUE sparc_libcall_value
752 #undef TARGET_FUNCTION_VALUE_REGNO_P
753 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
754
755 #undef TARGET_STRUCT_VALUE_RTX
756 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
757 #undef TARGET_RETURN_IN_MEMORY
758 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
759 #undef TARGET_MUST_PASS_IN_STACK
760 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
761 #undef TARGET_PASS_BY_REFERENCE
762 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
763 #undef TARGET_ARG_PARTIAL_BYTES
764 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
765 #undef TARGET_FUNCTION_ARG_ADVANCE
766 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
767 #undef TARGET_FUNCTION_ARG
768 #define TARGET_FUNCTION_ARG sparc_function_arg
769 #undef TARGET_FUNCTION_INCOMING_ARG
770 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
771 #undef TARGET_FUNCTION_ARG_BOUNDARY
772 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
773
774 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
775 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
776 #undef TARGET_STRICT_ARGUMENT_NAMING
777 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
778
779 #undef TARGET_EXPAND_BUILTIN_VA_START
780 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
781 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
782 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
783
784 #undef TARGET_VECTOR_MODE_SUPPORTED_P
785 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
786
787 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
788 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
789
790 #ifdef SUBTARGET_INSERT_ATTRIBUTES
791 #undef TARGET_INSERT_ATTRIBUTES
792 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
793 #endif
794
795 #ifdef SUBTARGET_ATTRIBUTE_TABLE
796 #undef TARGET_ATTRIBUTE_TABLE
797 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
798 #endif
799
800 #undef TARGET_OPTION_OVERRIDE
801 #define TARGET_OPTION_OVERRIDE sparc_option_override
802
803 #ifdef TARGET_THREAD_SSP_OFFSET
804 #undef TARGET_STACK_PROTECT_GUARD
805 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
806 #endif
807
808 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
809 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
810 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
811 #endif
812
813 #undef TARGET_ASM_FILE_END
814 #define TARGET_ASM_FILE_END sparc_file_end
815
816 #undef TARGET_FRAME_POINTER_REQUIRED
817 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
818
819 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
820 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
821
822 #undef TARGET_CAN_ELIMINATE
823 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
824
825 #undef TARGET_PREFERRED_RELOAD_CLASS
826 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
827
828 #undef TARGET_SECONDARY_RELOAD
829 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
830
831 #undef TARGET_CONDITIONAL_REGISTER_USAGE
832 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
833
834 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
835 #undef TARGET_MANGLE_TYPE
836 #define TARGET_MANGLE_TYPE sparc_mangle_type
837 #endif
838
839 #undef TARGET_LRA_P
840 #define TARGET_LRA_P sparc_lra_p
841
842 #undef TARGET_LEGITIMATE_ADDRESS_P
843 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
844
845 #undef TARGET_LEGITIMATE_CONSTANT_P
846 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
847
848 #undef TARGET_TRAMPOLINE_INIT
849 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
850
851 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
852 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
853 #undef TARGET_PRINT_OPERAND
854 #define TARGET_PRINT_OPERAND sparc_print_operand
855 #undef TARGET_PRINT_OPERAND_ADDRESS
856 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
857
858 /* The value stored by LDSTUB. */
859 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
860 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
861
862 #undef TARGET_CSTORE_MODE
863 #define TARGET_CSTORE_MODE sparc_cstore_mode
864
865 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
866 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
867
868 #undef TARGET_FIXED_CONDITION_CODE_REGS
869 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
870
871 #undef TARGET_MIN_ARITHMETIC_PRECISION
872 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
873
874 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
875 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
876
877 struct gcc_target targetm = TARGET_INITIALIZER;
878
879 /* Return the memory reference contained in X if any, zero otherwise. */
880
881 static rtx
882 mem_ref (rtx x)
883 {
884 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
885 x = XEXP (x, 0);
886
887 if (MEM_P (x))
888 return x;
889
890 return NULL_RTX;
891 }
892
893 /* We use a machine specific pass to enable workarounds for errata.
894
895 We need to have the (essentially) final form of the insn stream in order
896 to properly detect the various hazards. Therefore, this machine specific
897 pass runs as late as possible. */
898
899 static unsigned int
900 sparc_do_work_around_errata (void)
901 {
902 rtx_insn *insn, *next;
903
904 /* Force all instructions to be split into their final form. */
905 split_all_insns_noflow ();
906
907 /* Now look for specific patterns in the insn stream. */
908 for (insn = get_insns (); insn; insn = next)
909 {
910 bool insert_nop = false;
911 rtx set;
912
913 /* Look into the instruction in a delay slot. */
914 if (NONJUMP_INSN_P (insn))
915 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
916 insn = seq->insn (1);
917
918 /* Look for a single-word load into an odd-numbered FP register. */
919 if (sparc_fix_at697f
920 && NONJUMP_INSN_P (insn)
921 && (set = single_set (insn)) != NULL_RTX
922 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
923 && MEM_P (SET_SRC (set))
924 && REG_P (SET_DEST (set))
925 && REGNO (SET_DEST (set)) > 31
926 && REGNO (SET_DEST (set)) % 2 != 0)
927 {
928 /* The wrong dependency is on the enclosing double register. */
929 const unsigned int x = REGNO (SET_DEST (set)) - 1;
930 unsigned int src1, src2, dest;
931 int code;
932
933 next = next_active_insn (insn);
934 if (!next)
935 break;
936 /* If the insn is a branch, then it cannot be problematic. */
937 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
938 continue;
939
940 extract_insn (next);
941 code = INSN_CODE (next);
942
943 switch (code)
944 {
945 case CODE_FOR_adddf3:
946 case CODE_FOR_subdf3:
947 case CODE_FOR_muldf3:
948 case CODE_FOR_divdf3:
949 dest = REGNO (recog_data.operand[0]);
950 src1 = REGNO (recog_data.operand[1]);
951 src2 = REGNO (recog_data.operand[2]);
952 if (src1 != src2)
953 {
954 /* Case [1-4]:
955 ld [address], %fx+1
956 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
957 if ((src1 == x || src2 == x)
958 && (dest == src1 || dest == src2))
959 insert_nop = true;
960 }
961 else
962 {
963 /* Case 5:
964 ld [address], %fx+1
965 FPOPd %fx, %fx, %fx */
966 if (src1 == x
967 && dest == src1
968 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
969 insert_nop = true;
970 }
971 break;
972
973 case CODE_FOR_sqrtdf2:
974 dest = REGNO (recog_data.operand[0]);
975 src1 = REGNO (recog_data.operand[1]);
976 /* Case 6:
977 ld [address], %fx+1
978 fsqrtd %fx, %fx */
979 if (src1 == x && dest == src1)
980 insert_nop = true;
981 break;
982
983 default:
984 break;
985 }
986 }
987
988 /* Look for a single-word load into an integer register. */
989 else if (sparc_fix_ut699
990 && NONJUMP_INSN_P (insn)
991 && (set = single_set (insn)) != NULL_RTX
992 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
993 && mem_ref (SET_SRC (set)) != NULL_RTX
994 && REG_P (SET_DEST (set))
995 && REGNO (SET_DEST (set)) < 32)
996 {
997 /* There is no problem if the second memory access has a data
998 dependency on the first single-cycle load. */
999 rtx x = SET_DEST (set);
1000
1001 next = next_active_insn (insn);
1002 if (!next)
1003 break;
1004 /* If the insn is a branch, then it cannot be problematic. */
1005 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1006 continue;
1007
1008 /* Look for a second memory access to/from an integer register. */
1009 if ((set = single_set (next)) != NULL_RTX)
1010 {
1011 rtx src = SET_SRC (set);
1012 rtx dest = SET_DEST (set);
1013 rtx mem;
1014
1015 /* LDD is affected. */
1016 if ((mem = mem_ref (src)) != NULL_RTX
1017 && REG_P (dest)
1018 && REGNO (dest) < 32
1019 && !reg_mentioned_p (x, XEXP (mem, 0)))
1020 insert_nop = true;
1021
1022 /* STD is *not* affected. */
1023 else if (MEM_P (dest)
1024 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1025 && (src == CONST0_RTX (GET_MODE (dest))
1026 || (REG_P (src)
1027 && REGNO (src) < 32
1028 && REGNO (src) != REGNO (x)))
1029 && !reg_mentioned_p (x, XEXP (dest, 0)))
1030 insert_nop = true;
1031 }
1032 }
1033
1034 /* Look for a single-word load/operation into an FP register. */
1035 else if (sparc_fix_ut699
1036 && NONJUMP_INSN_P (insn)
1037 && (set = single_set (insn)) != NULL_RTX
1038 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1039 && REG_P (SET_DEST (set))
1040 && REGNO (SET_DEST (set)) > 31)
1041 {
1042 /* Number of instructions in the problematic window. */
1043 const int n_insns = 4;
1044 /* The problematic combination is with the sibling FP register. */
1045 const unsigned int x = REGNO (SET_DEST (set));
1046 const unsigned int y = x ^ 1;
1047 rtx_insn *after;
1048 int i;
1049
1050 next = next_active_insn (insn);
1051 if (!next)
1052 break;
1053 /* If the insn is a branch, then it cannot be problematic. */
1054 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1055 continue;
1056
1057 /* Look for a second load/operation into the sibling FP register. */
1058 if (!((set = single_set (next)) != NULL_RTX
1059 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1060 && REG_P (SET_DEST (set))
1061 && REGNO (SET_DEST (set)) == y))
1062 continue;
1063
1064 /* Look for a (possible) store from the FP register in the next N
1065 instructions, but bail out if it is again modified or if there
1066 is a store from the sibling FP register before this store. */
1067 for (after = next, i = 0; i < n_insns; i++)
1068 {
1069 bool branch_p;
1070
1071 after = next_active_insn (after);
1072 if (!after)
1073 break;
1074
1075 /* This is a branch with an empty delay slot. */
1076 if (!NONJUMP_INSN_P (after))
1077 {
1078 if (++i == n_insns)
1079 break;
1080 branch_p = true;
1081 after = NULL;
1082 }
1083 /* This is a branch with a filled delay slot. */
1084 else if (rtx_sequence *seq =
1085 dyn_cast <rtx_sequence *> (PATTERN (after)))
1086 {
1087 if (++i == n_insns)
1088 break;
1089 branch_p = true;
1090 after = seq->insn (1);
1091 }
1092 /* This is a regular instruction. */
1093 else
1094 branch_p = false;
1095
1096 if (after && (set = single_set (after)) != NULL_RTX)
1097 {
1098 const rtx src = SET_SRC (set);
1099 const rtx dest = SET_DEST (set);
1100 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1101
1102 /* If the FP register is again modified before the store,
1103 then the store isn't affected. */
1104 if (REG_P (dest)
1105 && (REGNO (dest) == x
1106 || (REGNO (dest) == y && size == 8)))
1107 break;
1108
1109 if (MEM_P (dest) && REG_P (src))
1110 {
1111 /* If there is a store from the sibling FP register
1112 before the store, then the store is not affected. */
1113 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1114 break;
1115
1116 /* Otherwise, the store is affected. */
1117 if (REGNO (src) == x && size == 4)
1118 {
1119 insert_nop = true;
1120 break;
1121 }
1122 }
1123 }
1124
1125 /* If we have a branch in the first M instructions, then we
1126 cannot see the (M+2)th instruction so we play safe. */
1127 if (branch_p && i <= (n_insns - 2))
1128 {
1129 insert_nop = true;
1130 break;
1131 }
1132 }
1133 }
1134
1135 else
1136 next = NEXT_INSN (insn);
1137
1138 if (insert_nop)
1139 emit_insn_before (gen_nop (), next);
1140 }
1141
1142 return 0;
1143 }
1144
1145 namespace {
1146
1147 const pass_data pass_data_work_around_errata =
1148 {
1149 RTL_PASS, /* type */
1150 "errata", /* name */
1151 OPTGROUP_NONE, /* optinfo_flags */
1152 TV_MACH_DEP, /* tv_id */
1153 0, /* properties_required */
1154 0, /* properties_provided */
1155 0, /* properties_destroyed */
1156 0, /* todo_flags_start */
1157 0, /* todo_flags_finish */
1158 };
1159
1160 class pass_work_around_errata : public rtl_opt_pass
1161 {
1162 public:
1163 pass_work_around_errata(gcc::context *ctxt)
1164 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1165 {}
1166
1167 /* opt_pass methods: */
1168 virtual bool gate (function *)
1169 {
1170 /* The only errata we handle are those of the AT697F and UT699. */
1171 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1172 }
1173
1174 virtual unsigned int execute (function *)
1175 {
1176 return sparc_do_work_around_errata ();
1177 }
1178
1179 }; // class pass_work_around_errata
1180
1181 } // anon namespace
1182
1183 rtl_opt_pass *
1184 make_pass_work_around_errata (gcc::context *ctxt)
1185 {
1186 return new pass_work_around_errata (ctxt);
1187 }
1188
1189 /* Helpers for TARGET_DEBUG_OPTIONS. */
1190 static void
1191 dump_target_flag_bits (const int flags)
1192 {
1193 if (flags & MASK_64BIT)
1194 fprintf (stderr, "64BIT ");
1195 if (flags & MASK_APP_REGS)
1196 fprintf (stderr, "APP_REGS ");
1197 if (flags & MASK_FASTER_STRUCTS)
1198 fprintf (stderr, "FASTER_STRUCTS ");
1199 if (flags & MASK_FLAT)
1200 fprintf (stderr, "FLAT ");
1201 if (flags & MASK_FMAF)
1202 fprintf (stderr, "FMAF ");
1203 if (flags & MASK_FPU)
1204 fprintf (stderr, "FPU ");
1205 if (flags & MASK_HARD_QUAD)
1206 fprintf (stderr, "HARD_QUAD ");
1207 if (flags & MASK_POPC)
1208 fprintf (stderr, "POPC ");
1209 if (flags & MASK_PTR64)
1210 fprintf (stderr, "PTR64 ");
1211 if (flags & MASK_STACK_BIAS)
1212 fprintf (stderr, "STACK_BIAS ");
1213 if (flags & MASK_UNALIGNED_DOUBLES)
1214 fprintf (stderr, "UNALIGNED_DOUBLES ");
1215 if (flags & MASK_V8PLUS)
1216 fprintf (stderr, "V8PLUS ");
1217 if (flags & MASK_VIS)
1218 fprintf (stderr, "VIS ");
1219 if (flags & MASK_VIS2)
1220 fprintf (stderr, "VIS2 ");
1221 if (flags & MASK_VIS3)
1222 fprintf (stderr, "VIS3 ");
1223 if (flags & MASK_VIS4)
1224 fprintf (stderr, "VIS4 ");
1225 if (flags & MASK_CBCOND)
1226 fprintf (stderr, "CBCOND ");
1227 if (flags & MASK_DEPRECATED_V8_INSNS)
1228 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1229 if (flags & MASK_SPARCLET)
1230 fprintf (stderr, "SPARCLET ");
1231 if (flags & MASK_SPARCLITE)
1232 fprintf (stderr, "SPARCLITE ");
1233 if (flags & MASK_V8)
1234 fprintf (stderr, "V8 ");
1235 if (flags & MASK_V9)
1236 fprintf (stderr, "V9 ");
1237 }
1238
1239 static void
1240 dump_target_flags (const char *prefix, const int flags)
1241 {
1242 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1243 dump_target_flag_bits (flags);
1244 fprintf(stderr, "]\n");
1245 }
1246
1247 /* Validate and override various options, and do some machine dependent
1248 initialization. */
1249
1250 static void
1251 sparc_option_override (void)
1252 {
1253 static struct code_model {
1254 const char *const name;
1255 const enum cmodel value;
1256 } const cmodels[] = {
1257 { "32", CM_32 },
1258 { "medlow", CM_MEDLOW },
1259 { "medmid", CM_MEDMID },
1260 { "medany", CM_MEDANY },
1261 { "embmedany", CM_EMBMEDANY },
1262 { NULL, (enum cmodel) 0 }
1263 };
1264 const struct code_model *cmodel;
1265 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1266 static struct cpu_default {
1267 const int cpu;
1268 const enum processor_type processor;
1269 } const cpu_default[] = {
1270 /* There must be one entry here for each TARGET_CPU value. */
1271 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1272 { TARGET_CPU_v8, PROCESSOR_V8 },
1273 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1274 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1275 { TARGET_CPU_leon, PROCESSOR_LEON },
1276 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1277 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1278 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1279 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1280 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1281 { TARGET_CPU_v9, PROCESSOR_V9 },
1282 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1283 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1284 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1285 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1286 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1287 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1288 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1289 { -1, PROCESSOR_V7 }
1290 };
1291 const struct cpu_default *def;
1292 /* Table of values for -m{cpu,tune}=. This must match the order of
1293 the enum processor_type in sparc-opts.h. */
1294 static struct cpu_table {
1295 const char *const name;
1296 const int disable;
1297 const int enable;
1298 } const cpu_table[] = {
1299 { "v7", MASK_ISA, 0 },
1300 { "cypress", MASK_ISA, 0 },
1301 { "v8", MASK_ISA, MASK_V8 },
1302 /* TI TMS390Z55 supersparc */
1303 { "supersparc", MASK_ISA, MASK_V8 },
1304 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1305 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1306 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1307 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1308 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1309 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1310 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1311 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1312 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1313 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1314 { "sparclet", MASK_ISA, MASK_SPARCLET },
1315 /* TEMIC sparclet */
1316 { "tsc701", MASK_ISA, MASK_SPARCLET },
1317 { "v9", MASK_ISA, MASK_V9 },
1318 /* UltraSPARC I, II, IIi */
1319 { "ultrasparc", MASK_ISA,
1320 /* Although insns using %y are deprecated, it is a clear win. */
1321 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1322 /* UltraSPARC III */
1323 /* ??? Check if %y issue still holds true. */
1324 { "ultrasparc3", MASK_ISA,
1325 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1326 /* UltraSPARC T1 */
1327 { "niagara", MASK_ISA,
1328 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1329 /* UltraSPARC T2 */
1330 { "niagara2", MASK_ISA,
1331 MASK_V9|MASK_POPC|MASK_VIS2 },
1332 /* UltraSPARC T3 */
1333 { "niagara3", MASK_ISA,
1334 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1335 /* UltraSPARC T4 */
1336 { "niagara4", MASK_ISA,
1337 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1338 /* UltraSPARC M7 */
1339 { "niagara7", MASK_ISA,
1340 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1341 };
1342 const struct cpu_table *cpu;
1343 unsigned int i;
1344
1345 if (sparc_debug_string != NULL)
1346 {
1347 const char *q;
1348 char *p;
1349
1350 p = ASTRDUP (sparc_debug_string);
1351 while ((q = strtok (p, ",")) != NULL)
1352 {
1353 bool invert;
1354 int mask;
1355
1356 p = NULL;
1357 if (*q == '!')
1358 {
1359 invert = true;
1360 q++;
1361 }
1362 else
1363 invert = false;
1364
1365 if (! strcmp (q, "all"))
1366 mask = MASK_DEBUG_ALL;
1367 else if (! strcmp (q, "options"))
1368 mask = MASK_DEBUG_OPTIONS;
1369 else
1370 error ("unknown -mdebug-%s switch", q);
1371
1372 if (invert)
1373 sparc_debug &= ~mask;
1374 else
1375 sparc_debug |= mask;
1376 }
1377 }
1378
1379 if (TARGET_DEBUG_OPTIONS)
1380 {
1381 dump_target_flags("Initial target_flags", target_flags);
1382 dump_target_flags("target_flags_explicit", target_flags_explicit);
1383 }
1384
1385 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1386 SUBTARGET_OVERRIDE_OPTIONS;
1387 #endif
1388
1389 #ifndef SPARC_BI_ARCH
1390 /* Check for unsupported architecture size. */
1391 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1392 error ("%s is not supported by this configuration",
1393 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1394 #endif
1395
1396 /* We force all 64bit archs to use 128 bit long double */
1397 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1398 {
1399 error ("-mlong-double-64 not allowed with -m64");
1400 target_flags |= MASK_LONG_DOUBLE_128;
1401 }
1402
1403 /* Code model selection. */
1404 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1405
1406 #ifdef SPARC_BI_ARCH
1407 if (TARGET_ARCH32)
1408 sparc_cmodel = CM_32;
1409 #endif
1410
1411 if (sparc_cmodel_string != NULL)
1412 {
1413 if (TARGET_ARCH64)
1414 {
1415 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1416 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1417 break;
1418 if (cmodel->name == NULL)
1419 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1420 else
1421 sparc_cmodel = cmodel->value;
1422 }
1423 else
1424 error ("-mcmodel= is not supported on 32 bit systems");
1425 }
1426
1427 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1428 for (i = 8; i < 16; i++)
1429 if (!call_used_regs [i])
1430 {
1431 error ("-fcall-saved-REG is not supported for out registers");
1432 call_used_regs [i] = 1;
1433 }
1434
1435 /* Set the default CPU. */
1436 if (!global_options_set.x_sparc_cpu_and_features)
1437 {
1438 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1439 if (def->cpu == TARGET_CPU_DEFAULT)
1440 break;
1441 gcc_assert (def->cpu != -1);
1442 sparc_cpu_and_features = def->processor;
1443 }
1444
1445 if (!global_options_set.x_sparc_cpu)
1446 sparc_cpu = sparc_cpu_and_features;
1447
1448 cpu = &cpu_table[(int) sparc_cpu_and_features];
1449
1450 if (TARGET_DEBUG_OPTIONS)
1451 {
1452 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1453 fprintf (stderr, "sparc_cpu: %s\n",
1454 cpu_table[(int) sparc_cpu].name);
1455 dump_target_flags ("cpu->disable", cpu->disable);
1456 dump_target_flags ("cpu->enable", cpu->enable);
1457 }
1458
1459 target_flags &= ~cpu->disable;
1460 target_flags |= (cpu->enable
1461 #ifndef HAVE_AS_FMAF_HPC_VIS3
1462 & ~(MASK_FMAF | MASK_VIS3)
1463 #endif
1464 #ifndef HAVE_AS_SPARC4
1465 & ~MASK_CBCOND
1466 #endif
1467 #ifndef HAVE_AS_SPARC5_VIS4
1468 & ~(MASK_VIS4 | MASK_SUBXC)
1469 #endif
1470 #ifndef HAVE_AS_LEON
1471 & ~(MASK_LEON | MASK_LEON3)
1472 #endif
1473 & ~(target_flags_explicit & MASK_FEATURES)
1474 );
1475
1476 /* -mvis2 implies -mvis. */
1477 if (TARGET_VIS2)
1478 target_flags |= MASK_VIS;
1479
1480 /* -mvis3 implies -mvis2 and -mvis. */
1481 if (TARGET_VIS3)
1482 target_flags |= MASK_VIS2 | MASK_VIS;
1483
1484 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1485 if (TARGET_VIS4)
1486 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1487
1488 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1489 disabled. */
1490 if (! TARGET_FPU)
1491 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1492 | MASK_FMAF);
1493
1494 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1495 are available; -m64 also implies v9. */
1496 if (TARGET_VIS || TARGET_ARCH64)
1497 {
1498 target_flags |= MASK_V9;
1499 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1500 }
1501
1502 /* -mvis also implies -mv8plus on 32-bit. */
1503 if (TARGET_VIS && ! TARGET_ARCH64)
1504 target_flags |= MASK_V8PLUS;
1505
1506 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1507 if (TARGET_V9 && TARGET_ARCH32)
1508 target_flags |= MASK_DEPRECATED_V8_INSNS;
1509
1510 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1511 if (! TARGET_V9 || TARGET_ARCH64)
1512 target_flags &= ~MASK_V8PLUS;
1513
1514 /* Don't use stack biasing in 32 bit mode. */
1515 if (TARGET_ARCH32)
1516 target_flags &= ~MASK_STACK_BIAS;
1517
1518 /* Use LRA instead of reload, unless otherwise instructed. */
1519 if (!(target_flags_explicit & MASK_LRA))
1520 target_flags |= MASK_LRA;
1521
1522 /* Supply a default value for align_functions. */
1523 if (align_functions == 0)
1524 {
1525 if (sparc_cpu == PROCESSOR_ULTRASPARC
1526 || sparc_cpu == PROCESSOR_ULTRASPARC3
1527 || sparc_cpu == PROCESSOR_NIAGARA
1528 || sparc_cpu == PROCESSOR_NIAGARA2
1529 || sparc_cpu == PROCESSOR_NIAGARA3
1530 || sparc_cpu == PROCESSOR_NIAGARA4)
1531 align_functions = 32;
1532 else if (sparc_cpu == PROCESSOR_NIAGARA7)
1533 align_functions = 64;
1534 }
1535
1536 /* Validate PCC_STRUCT_RETURN. */
1537 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1538 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1539
1540 /* Only use .uaxword when compiling for a 64-bit target. */
1541 if (!TARGET_ARCH64)
1542 targetm.asm_out.unaligned_op.di = NULL;
1543
1544 /* Do various machine dependent initializations. */
1545 sparc_init_modes ();
1546
1547 /* Set up function hooks. */
1548 init_machine_status = sparc_init_machine_status;
1549
1550 switch (sparc_cpu)
1551 {
1552 case PROCESSOR_V7:
1553 case PROCESSOR_CYPRESS:
1554 sparc_costs = &cypress_costs;
1555 break;
1556 case PROCESSOR_V8:
1557 case PROCESSOR_SPARCLITE:
1558 case PROCESSOR_SUPERSPARC:
1559 sparc_costs = &supersparc_costs;
1560 break;
1561 case PROCESSOR_F930:
1562 case PROCESSOR_F934:
1563 case PROCESSOR_HYPERSPARC:
1564 case PROCESSOR_SPARCLITE86X:
1565 sparc_costs = &hypersparc_costs;
1566 break;
1567 case PROCESSOR_LEON:
1568 sparc_costs = &leon_costs;
1569 break;
1570 case PROCESSOR_LEON3:
1571 case PROCESSOR_LEON3V7:
1572 sparc_costs = &leon3_costs;
1573 break;
1574 case PROCESSOR_SPARCLET:
1575 case PROCESSOR_TSC701:
1576 sparc_costs = &sparclet_costs;
1577 break;
1578 case PROCESSOR_V9:
1579 case PROCESSOR_ULTRASPARC:
1580 sparc_costs = &ultrasparc_costs;
1581 break;
1582 case PROCESSOR_ULTRASPARC3:
1583 sparc_costs = &ultrasparc3_costs;
1584 break;
1585 case PROCESSOR_NIAGARA:
1586 sparc_costs = &niagara_costs;
1587 break;
1588 case PROCESSOR_NIAGARA2:
1589 sparc_costs = &niagara2_costs;
1590 break;
1591 case PROCESSOR_NIAGARA3:
1592 sparc_costs = &niagara3_costs;
1593 break;
1594 case PROCESSOR_NIAGARA4:
1595 sparc_costs = &niagara4_costs;
1596 break;
1597 case PROCESSOR_NIAGARA7:
1598 sparc_costs = &niagara7_costs;
1599 break;
1600 case PROCESSOR_NATIVE:
1601 gcc_unreachable ();
1602 };
1603
1604 if (sparc_memory_model == SMM_DEFAULT)
1605 {
1606 /* Choose the memory model for the operating system. */
1607 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1608 if (os_default != SMM_DEFAULT)
1609 sparc_memory_model = os_default;
1610 /* Choose the most relaxed model for the processor. */
1611 else if (TARGET_V9)
1612 sparc_memory_model = SMM_RMO;
1613 else if (TARGET_LEON3)
1614 sparc_memory_model = SMM_TSO;
1615 else if (TARGET_LEON)
1616 sparc_memory_model = SMM_SC;
1617 else if (TARGET_V8)
1618 sparc_memory_model = SMM_PSO;
1619 else
1620 sparc_memory_model = SMM_SC;
1621 }
1622
1623 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1624 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1625 target_flags |= MASK_LONG_DOUBLE_128;
1626 #endif
1627
1628 if (TARGET_DEBUG_OPTIONS)
1629 dump_target_flags ("Final target_flags", target_flags);
1630
1631 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1632 can run at the same time. More important, it is the threshold
1633 defining when additional prefetches will be dropped by the
1634 hardware.
1635
1636 The UltraSPARC-III features a documented prefetch queue with a
1637 size of 8. Additional prefetches issued in the cpu are
1638 dropped.
1639
1640 Niagara processors are different. In these processors prefetches
1641 are handled much like regular loads. The L1 miss buffer is 32
1642 entries, but prefetches start getting affected when 30 entries
1643 become occupied. That occupation could be a mix of regular loads
1644 and prefetches though. And that buffer is shared by all threads.
1645 Once the threshold is reached, if the core is running a single
1646 thread the prefetch will retry. If more than one thread is
1647 running, the prefetch will be dropped.
1648
1649 All this makes it very difficult to determine how many
1650 simultaneous prefetches can be issued simultaneously, even in a
1651 single-threaded program. Experimental results show that setting
1652 this parameter to 32 works well when the number of threads is not
1653 high. */
1654 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1655 ((sparc_cpu == PROCESSOR_ULTRASPARC
1656 || sparc_cpu == PROCESSOR_NIAGARA
1657 || sparc_cpu == PROCESSOR_NIAGARA2
1658 || sparc_cpu == PROCESSOR_NIAGARA3
1659 || sparc_cpu == PROCESSOR_NIAGARA4)
1660 ? 2
1661 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1662 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1663 ? 32 : 3))),
1664 global_options.x_param_values,
1665 global_options_set.x_param_values);
1666
1667 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1668 params.def), so no maybe_set_param_value is needed.
1669
1670 The Oracle SPARC Architecture (previously the UltraSPARC
1671 Architecture) specification states that when a PREFETCH[A]
1672 instruction is executed an implementation-specific amount of data
1673 is prefetched, and that it is at least 64 bytes long (aligned to
1674 at least 64 bytes).
1675
1676 However, this is not correct. The M7 (and implementations prior
1677 to that) does not guarantee a 64B prefetch into a cache if the
1678 line size is smaller. A single cache line is all that is ever
1679 prefetched. So for the M7, where the L1D$ has 32B lines and the
1680 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1681 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1682 is a read_n prefetch, which is the only type which allocates to
1683 the L1.) */
1684
1685 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1686 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1687 Niagara processors feature a L1D$ of 16KB. */
1688 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1689 ((sparc_cpu == PROCESSOR_ULTRASPARC
1690 || sparc_cpu == PROCESSOR_ULTRASPARC3
1691 || sparc_cpu == PROCESSOR_NIAGARA
1692 || sparc_cpu == PROCESSOR_NIAGARA2
1693 || sparc_cpu == PROCESSOR_NIAGARA3
1694 || sparc_cpu == PROCESSOR_NIAGARA4
1695 || sparc_cpu == PROCESSOR_NIAGARA7)
1696 ? 16 : 64),
1697 global_options.x_param_values,
1698 global_options_set.x_param_values);
1699
1700
1701 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1702 that 512 is the default in params.def. */
1703 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1704 (sparc_cpu == PROCESSOR_NIAGARA4
1705 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1706 ? 256 : 512)),
1707 global_options.x_param_values,
1708 global_options_set.x_param_values);
1709
1710
1711 /* Disable save slot sharing for call-clobbered registers by default.
1712 The IRA sharing algorithm works on single registers only and this
1713 pessimizes for double floating-point registers. */
1714 if (!global_options_set.x_flag_ira_share_save_slots)
1715 flag_ira_share_save_slots = 0;
1716
1717 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1718 redundant 32-to-64-bit extensions. */
1719 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1720 flag_ree = 0;
1721 }
1722 \f
1723 /* Miscellaneous utilities. */
1724
1725 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1726 or branch on register contents instructions. */
1727
1728 int
1729 v9_regcmp_p (enum rtx_code code)
1730 {
1731 return (code == EQ || code == NE || code == GE || code == LT
1732 || code == LE || code == GT);
1733 }
1734
1735 /* Nonzero if OP is a floating point constant which can
1736 be loaded into an integer register using a single
1737 sethi instruction. */
1738
1739 int
1740 fp_sethi_p (rtx op)
1741 {
1742 if (GET_CODE (op) == CONST_DOUBLE)
1743 {
1744 long i;
1745
1746 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1747 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1748 }
1749
1750 return 0;
1751 }
1752
1753 /* Nonzero if OP is a floating point constant which can
1754 be loaded into an integer register using a single
1755 mov instruction. */
1756
1757 int
1758 fp_mov_p (rtx op)
1759 {
1760 if (GET_CODE (op) == CONST_DOUBLE)
1761 {
1762 long i;
1763
1764 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1765 return SPARC_SIMM13_P (i);
1766 }
1767
1768 return 0;
1769 }
1770
1771 /* Nonzero if OP is a floating point constant which can
1772 be loaded into an integer register using a high/losum
1773 instruction sequence. */
1774
1775 int
1776 fp_high_losum_p (rtx op)
1777 {
1778 /* The constraints calling this should only be in
1779 SFmode move insns, so any constant which cannot
1780 be moved using a single insn will do. */
1781 if (GET_CODE (op) == CONST_DOUBLE)
1782 {
1783 long i;
1784
1785 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1786 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1787 }
1788
1789 return 0;
1790 }
1791
1792 /* Return true if the address of LABEL can be loaded by means of the
1793 mov{si,di}_pic_label_ref patterns in PIC mode. */
1794
1795 static bool
1796 can_use_mov_pic_label_ref (rtx label)
1797 {
1798 /* VxWorks does not impose a fixed gap between segments; the run-time
1799 gap can be different from the object-file gap. We therefore can't
1800 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1801 are absolutely sure that X is in the same segment as the GOT.
1802 Unfortunately, the flexibility of linker scripts means that we
1803 can't be sure of that in general, so assume that GOT-relative
1804 accesses are never valid on VxWorks. */
1805 if (TARGET_VXWORKS_RTP)
1806 return false;
1807
1808 /* Similarly, if the label is non-local, it might end up being placed
1809 in a different section than the current one; now mov_pic_label_ref
1810 requires the label and the code to be in the same section. */
1811 if (LABEL_REF_NONLOCAL_P (label))
1812 return false;
1813
1814 /* Finally, if we are reordering basic blocks and partition into hot
1815 and cold sections, this might happen for any label. */
1816 if (flag_reorder_blocks_and_partition)
1817 return false;
1818
1819 return true;
1820 }
1821
1822 /* Expand a move instruction. Return true if all work is done. */
1823
1824 bool
1825 sparc_expand_move (machine_mode mode, rtx *operands)
1826 {
1827 /* Handle sets of MEM first. */
1828 if (GET_CODE (operands[0]) == MEM)
1829 {
1830 /* 0 is a register (or a pair of registers) on SPARC. */
1831 if (register_or_zero_operand (operands[1], mode))
1832 return false;
1833
1834 if (!reload_in_progress)
1835 {
1836 operands[0] = validize_mem (operands[0]);
1837 operands[1] = force_reg (mode, operands[1]);
1838 }
1839 }
1840
1841 /* Fixup TLS cases. */
1842 if (TARGET_HAVE_TLS
1843 && CONSTANT_P (operands[1])
1844 && sparc_tls_referenced_p (operands [1]))
1845 {
1846 operands[1] = sparc_legitimize_tls_address (operands[1]);
1847 return false;
1848 }
1849
1850 /* Fixup PIC cases. */
1851 if (flag_pic && CONSTANT_P (operands[1]))
1852 {
1853 if (pic_address_needs_scratch (operands[1]))
1854 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1855
1856 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1857 if (GET_CODE (operands[1]) == LABEL_REF
1858 && can_use_mov_pic_label_ref (operands[1]))
1859 {
1860 if (mode == SImode)
1861 {
1862 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1863 return true;
1864 }
1865
1866 if (mode == DImode)
1867 {
1868 gcc_assert (TARGET_ARCH64);
1869 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1870 return true;
1871 }
1872 }
1873
1874 if (symbolic_operand (operands[1], mode))
1875 {
1876 operands[1]
1877 = sparc_legitimize_pic_address (operands[1],
1878 reload_in_progress
1879 ? operands[0] : NULL_RTX);
1880 return false;
1881 }
1882 }
1883
1884 /* If we are trying to toss an integer constant into FP registers,
1885 or loading a FP or vector constant, force it into memory. */
1886 if (CONSTANT_P (operands[1])
1887 && REG_P (operands[0])
1888 && (SPARC_FP_REG_P (REGNO (operands[0]))
1889 || SCALAR_FLOAT_MODE_P (mode)
1890 || VECTOR_MODE_P (mode)))
1891 {
1892 /* emit_group_store will send such bogosity to us when it is
1893 not storing directly into memory. So fix this up to avoid
1894 crashes in output_constant_pool. */
1895 if (operands [1] == const0_rtx)
1896 operands[1] = CONST0_RTX (mode);
1897
1898 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1899 always other regs. */
1900 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1901 && (const_zero_operand (operands[1], mode)
1902 || const_all_ones_operand (operands[1], mode)))
1903 return false;
1904
1905 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1906 /* We are able to build any SF constant in integer registers
1907 with at most 2 instructions. */
1908 && (mode == SFmode
1909 /* And any DF constant in integer registers if needed. */
1910 || (mode == DFmode && !can_create_pseudo_p ())))
1911 return false;
1912
1913 operands[1] = force_const_mem (mode, operands[1]);
1914 if (!reload_in_progress)
1915 operands[1] = validize_mem (operands[1]);
1916 return false;
1917 }
1918
1919 /* Accept non-constants and valid constants unmodified. */
1920 if (!CONSTANT_P (operands[1])
1921 || GET_CODE (operands[1]) == HIGH
1922 || input_operand (operands[1], mode))
1923 return false;
1924
1925 switch (mode)
1926 {
1927 case QImode:
1928 /* All QImode constants require only one insn, so proceed. */
1929 break;
1930
1931 case HImode:
1932 case SImode:
1933 sparc_emit_set_const32 (operands[0], operands[1]);
1934 return true;
1935
1936 case DImode:
1937 /* input_operand should have filtered out 32-bit mode. */
1938 sparc_emit_set_const64 (operands[0], operands[1]);
1939 return true;
1940
1941 case TImode:
1942 {
1943 rtx high, low;
1944 /* TImode isn't available in 32-bit mode. */
1945 split_double (operands[1], &high, &low);
1946 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1947 high));
1948 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1949 low));
1950 }
1951 return true;
1952
1953 default:
1954 gcc_unreachable ();
1955 }
1956
1957 return false;
1958 }
1959
1960 /* Load OP1, a 32-bit constant, into OP0, a register.
1961 We know it can't be done in one insn when we get
1962 here, the move expander guarantees this. */
1963
1964 static void
1965 sparc_emit_set_const32 (rtx op0, rtx op1)
1966 {
1967 machine_mode mode = GET_MODE (op0);
1968 rtx temp = op0;
1969
1970 if (can_create_pseudo_p ())
1971 temp = gen_reg_rtx (mode);
1972
1973 if (GET_CODE (op1) == CONST_INT)
1974 {
1975 gcc_assert (!small_int_operand (op1, mode)
1976 && !const_high_operand (op1, mode));
1977
1978 /* Emit them as real moves instead of a HIGH/LO_SUM,
1979 this way CSE can see everything and reuse intermediate
1980 values if it wants. */
1981 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1982 & ~(HOST_WIDE_INT) 0x3ff)));
1983
1984 emit_insn (gen_rtx_SET (op0,
1985 gen_rtx_IOR (mode, temp,
1986 GEN_INT (INTVAL (op1) & 0x3ff))));
1987 }
1988 else
1989 {
1990 /* A symbol, emit in the traditional way. */
1991 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1992 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1993 }
1994 }
1995
1996 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1997 If TEMP is nonzero, we are forbidden to use any other scratch
1998 registers. Otherwise, we are allowed to generate them as needed.
1999
2000 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2001 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2002
2003 void
2004 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2005 {
2006 rtx temp1, temp2, temp3, temp4, temp5;
2007 rtx ti_temp = 0;
2008
2009 if (temp && GET_MODE (temp) == TImode)
2010 {
2011 ti_temp = temp;
2012 temp = gen_rtx_REG (DImode, REGNO (temp));
2013 }
2014
2015 /* SPARC-V9 code-model support. */
2016 switch (sparc_cmodel)
2017 {
2018 case CM_MEDLOW:
2019 /* The range spanned by all instructions in the object is less
2020 than 2^31 bytes (2GB) and the distance from any instruction
2021 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2022 than 2^31 bytes (2GB).
2023
2024 The executable must be in the low 4TB of the virtual address
2025 space.
2026
2027 sethi %hi(symbol), %temp1
2028 or %temp1, %lo(symbol), %reg */
2029 if (temp)
2030 temp1 = temp; /* op0 is allowed. */
2031 else
2032 temp1 = gen_reg_rtx (DImode);
2033
2034 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2035 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2036 break;
2037
2038 case CM_MEDMID:
2039 /* The range spanned by all instructions in the object is less
2040 than 2^31 bytes (2GB) and the distance from any instruction
2041 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2042 than 2^31 bytes (2GB).
2043
2044 The executable must be in the low 16TB of the virtual address
2045 space.
2046
2047 sethi %h44(symbol), %temp1
2048 or %temp1, %m44(symbol), %temp2
2049 sllx %temp2, 12, %temp3
2050 or %temp3, %l44(symbol), %reg */
2051 if (temp)
2052 {
2053 temp1 = op0;
2054 temp2 = op0;
2055 temp3 = temp; /* op0 is allowed. */
2056 }
2057 else
2058 {
2059 temp1 = gen_reg_rtx (DImode);
2060 temp2 = gen_reg_rtx (DImode);
2061 temp3 = gen_reg_rtx (DImode);
2062 }
2063
2064 emit_insn (gen_seth44 (temp1, op1));
2065 emit_insn (gen_setm44 (temp2, temp1, op1));
2066 emit_insn (gen_rtx_SET (temp3,
2067 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2068 emit_insn (gen_setl44 (op0, temp3, op1));
2069 break;
2070
2071 case CM_MEDANY:
2072 /* The range spanned by all instructions in the object is less
2073 than 2^31 bytes (2GB) and the distance from any instruction
2074 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2075 than 2^31 bytes (2GB).
2076
2077 The executable can be placed anywhere in the virtual address
2078 space.
2079
2080 sethi %hh(symbol), %temp1
2081 sethi %lm(symbol), %temp2
2082 or %temp1, %hm(symbol), %temp3
2083 sllx %temp3, 32, %temp4
2084 or %temp4, %temp2, %temp5
2085 or %temp5, %lo(symbol), %reg */
2086 if (temp)
2087 {
2088 /* It is possible that one of the registers we got for operands[2]
2089 might coincide with that of operands[0] (which is why we made
2090 it TImode). Pick the other one to use as our scratch. */
2091 if (rtx_equal_p (temp, op0))
2092 {
2093 gcc_assert (ti_temp);
2094 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2095 }
2096 temp1 = op0;
2097 temp2 = temp; /* op0 is _not_ allowed, see above. */
2098 temp3 = op0;
2099 temp4 = op0;
2100 temp5 = op0;
2101 }
2102 else
2103 {
2104 temp1 = gen_reg_rtx (DImode);
2105 temp2 = gen_reg_rtx (DImode);
2106 temp3 = gen_reg_rtx (DImode);
2107 temp4 = gen_reg_rtx (DImode);
2108 temp5 = gen_reg_rtx (DImode);
2109 }
2110
2111 emit_insn (gen_sethh (temp1, op1));
2112 emit_insn (gen_setlm (temp2, op1));
2113 emit_insn (gen_sethm (temp3, temp1, op1));
2114 emit_insn (gen_rtx_SET (temp4,
2115 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2116 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2117 emit_insn (gen_setlo (op0, temp5, op1));
2118 break;
2119
2120 case CM_EMBMEDANY:
2121 /* Old old old backwards compatibility kruft here.
2122 Essentially it is MEDLOW with a fixed 64-bit
2123 virtual base added to all data segment addresses.
2124 Text-segment stuff is computed like MEDANY, we can't
2125 reuse the code above because the relocation knobs
2126 look different.
2127
2128 Data segment: sethi %hi(symbol), %temp1
2129 add %temp1, EMBMEDANY_BASE_REG, %temp2
2130 or %temp2, %lo(symbol), %reg */
2131 if (data_segment_operand (op1, GET_MODE (op1)))
2132 {
2133 if (temp)
2134 {
2135 temp1 = temp; /* op0 is allowed. */
2136 temp2 = op0;
2137 }
2138 else
2139 {
2140 temp1 = gen_reg_rtx (DImode);
2141 temp2 = gen_reg_rtx (DImode);
2142 }
2143
2144 emit_insn (gen_embmedany_sethi (temp1, op1));
2145 emit_insn (gen_embmedany_brsum (temp2, temp1));
2146 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2147 }
2148
2149 /* Text segment: sethi %uhi(symbol), %temp1
2150 sethi %hi(symbol), %temp2
2151 or %temp1, %ulo(symbol), %temp3
2152 sllx %temp3, 32, %temp4
2153 or %temp4, %temp2, %temp5
2154 or %temp5, %lo(symbol), %reg */
2155 else
2156 {
2157 if (temp)
2158 {
2159 /* It is possible that one of the registers we got for operands[2]
2160 might coincide with that of operands[0] (which is why we made
2161 it TImode). Pick the other one to use as our scratch. */
2162 if (rtx_equal_p (temp, op0))
2163 {
2164 gcc_assert (ti_temp);
2165 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2166 }
2167 temp1 = op0;
2168 temp2 = temp; /* op0 is _not_ allowed, see above. */
2169 temp3 = op0;
2170 temp4 = op0;
2171 temp5 = op0;
2172 }
2173 else
2174 {
2175 temp1 = gen_reg_rtx (DImode);
2176 temp2 = gen_reg_rtx (DImode);
2177 temp3 = gen_reg_rtx (DImode);
2178 temp4 = gen_reg_rtx (DImode);
2179 temp5 = gen_reg_rtx (DImode);
2180 }
2181
2182 emit_insn (gen_embmedany_textuhi (temp1, op1));
2183 emit_insn (gen_embmedany_texthi (temp2, op1));
2184 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2185 emit_insn (gen_rtx_SET (temp4,
2186 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2187 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2188 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2189 }
2190 break;
2191
2192 default:
2193 gcc_unreachable ();
2194 }
2195 }
2196
2197 /* These avoid problems when cross compiling. If we do not
2198 go through all this hair then the optimizer will see
2199 invalid REG_EQUAL notes or in some cases none at all. */
2200 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2201 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2202 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2203 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2204
2205 /* The optimizer is not to assume anything about exactly
2206 which bits are set for a HIGH, they are unspecified.
2207 Unfortunately this leads to many missed optimizations
2208 during CSE. We mask out the non-HIGH bits, and matches
2209 a plain movdi, to alleviate this problem. */
2210 static rtx
2211 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2212 {
2213 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2214 }
2215
2216 static rtx
2217 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2218 {
2219 return gen_rtx_SET (dest, GEN_INT (val));
2220 }
2221
2222 static rtx
2223 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2224 {
2225 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2226 }
2227
2228 static rtx
2229 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2230 {
2231 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2232 }
2233
2234 /* Worker routines for 64-bit constant formation on arch64.
2235 One of the key things to be doing in these emissions is
2236 to create as many temp REGs as possible. This makes it
2237 possible for half-built constants to be used later when
2238 such values are similar to something required later on.
2239 Without doing this, the optimizer cannot see such
2240 opportunities. */
2241
2242 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2243 unsigned HOST_WIDE_INT, int);
2244
2245 static void
2246 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2247 unsigned HOST_WIDE_INT low_bits, int is_neg)
2248 {
2249 unsigned HOST_WIDE_INT high_bits;
2250
2251 if (is_neg)
2252 high_bits = (~low_bits) & 0xffffffff;
2253 else
2254 high_bits = low_bits;
2255
2256 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2257 if (!is_neg)
2258 {
2259 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2260 }
2261 else
2262 {
2263 /* If we are XOR'ing with -1, then we should emit a one's complement
2264 instead. This way the combiner will notice logical operations
2265 such as ANDN later on and substitute. */
2266 if ((low_bits & 0x3ff) == 0x3ff)
2267 {
2268 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2269 }
2270 else
2271 {
2272 emit_insn (gen_rtx_SET (op0,
2273 gen_safe_XOR64 (temp,
2274 (-(HOST_WIDE_INT)0x400
2275 | (low_bits & 0x3ff)))));
2276 }
2277 }
2278 }
2279
2280 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2281 unsigned HOST_WIDE_INT, int);
2282
2283 static void
2284 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2285 unsigned HOST_WIDE_INT high_bits,
2286 unsigned HOST_WIDE_INT low_immediate,
2287 int shift_count)
2288 {
2289 rtx temp2 = op0;
2290
2291 if ((high_bits & 0xfffffc00) != 0)
2292 {
2293 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2294 if ((high_bits & ~0xfffffc00) != 0)
2295 emit_insn (gen_rtx_SET (op0,
2296 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2297 else
2298 temp2 = temp;
2299 }
2300 else
2301 {
2302 emit_insn (gen_safe_SET64 (temp, high_bits));
2303 temp2 = temp;
2304 }
2305
2306 /* Now shift it up into place. */
2307 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2308 GEN_INT (shift_count))));
2309
2310 /* If there is a low immediate part piece, finish up by
2311 putting that in as well. */
2312 if (low_immediate != 0)
2313 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2314 }
2315
2316 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2317 unsigned HOST_WIDE_INT);
2318
2319 /* Full 64-bit constant decomposition. Even though this is the
2320 'worst' case, we still optimize a few things away. */
2321 static void
2322 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2323 unsigned HOST_WIDE_INT high_bits,
2324 unsigned HOST_WIDE_INT low_bits)
2325 {
2326 rtx sub_temp = op0;
2327
2328 if (can_create_pseudo_p ())
2329 sub_temp = gen_reg_rtx (DImode);
2330
2331 if ((high_bits & 0xfffffc00) != 0)
2332 {
2333 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2334 if ((high_bits & ~0xfffffc00) != 0)
2335 emit_insn (gen_rtx_SET (sub_temp,
2336 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2337 else
2338 sub_temp = temp;
2339 }
2340 else
2341 {
2342 emit_insn (gen_safe_SET64 (temp, high_bits));
2343 sub_temp = temp;
2344 }
2345
2346 if (can_create_pseudo_p ())
2347 {
2348 rtx temp2 = gen_reg_rtx (DImode);
2349 rtx temp3 = gen_reg_rtx (DImode);
2350 rtx temp4 = gen_reg_rtx (DImode);
2351
2352 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2353 GEN_INT (32))));
2354
2355 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2356 if ((low_bits & ~0xfffffc00) != 0)
2357 {
2358 emit_insn (gen_rtx_SET (temp3,
2359 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2360 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2361 }
2362 else
2363 {
2364 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2365 }
2366 }
2367 else
2368 {
2369 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2370 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2371 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2372 int to_shift = 12;
2373
2374 /* We are in the middle of reload, so this is really
2375 painful. However we do still make an attempt to
2376 avoid emitting truly stupid code. */
2377 if (low1 != const0_rtx)
2378 {
2379 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2380 GEN_INT (to_shift))));
2381 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2382 sub_temp = op0;
2383 to_shift = 12;
2384 }
2385 else
2386 {
2387 to_shift += 12;
2388 }
2389 if (low2 != const0_rtx)
2390 {
2391 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2392 GEN_INT (to_shift))));
2393 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2394 sub_temp = op0;
2395 to_shift = 8;
2396 }
2397 else
2398 {
2399 to_shift += 8;
2400 }
2401 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2402 GEN_INT (to_shift))));
2403 if (low3 != const0_rtx)
2404 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2405 /* phew... */
2406 }
2407 }
2408
2409 /* Analyze a 64-bit constant for certain properties. */
2410 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2411 unsigned HOST_WIDE_INT,
2412 int *, int *, int *);
2413
2414 static void
2415 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2416 unsigned HOST_WIDE_INT low_bits,
2417 int *hbsp, int *lbsp, int *abbasp)
2418 {
2419 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2420 int i;
2421
2422 lowest_bit_set = highest_bit_set = -1;
2423 i = 0;
2424 do
2425 {
2426 if ((lowest_bit_set == -1)
2427 && ((low_bits >> i) & 1))
2428 lowest_bit_set = i;
2429 if ((highest_bit_set == -1)
2430 && ((high_bits >> (32 - i - 1)) & 1))
2431 highest_bit_set = (64 - i - 1);
2432 }
2433 while (++i < 32
2434 && ((highest_bit_set == -1)
2435 || (lowest_bit_set == -1)));
2436 if (i == 32)
2437 {
2438 i = 0;
2439 do
2440 {
2441 if ((lowest_bit_set == -1)
2442 && ((high_bits >> i) & 1))
2443 lowest_bit_set = i + 32;
2444 if ((highest_bit_set == -1)
2445 && ((low_bits >> (32 - i - 1)) & 1))
2446 highest_bit_set = 32 - i - 1;
2447 }
2448 while (++i < 32
2449 && ((highest_bit_set == -1)
2450 || (lowest_bit_set == -1)));
2451 }
2452 /* If there are no bits set this should have gone out
2453 as one instruction! */
2454 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2455 all_bits_between_are_set = 1;
2456 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2457 {
2458 if (i < 32)
2459 {
2460 if ((low_bits & (1 << i)) != 0)
2461 continue;
2462 }
2463 else
2464 {
2465 if ((high_bits & (1 << (i - 32))) != 0)
2466 continue;
2467 }
2468 all_bits_between_are_set = 0;
2469 break;
2470 }
2471 *hbsp = highest_bit_set;
2472 *lbsp = lowest_bit_set;
2473 *abbasp = all_bits_between_are_set;
2474 }
2475
2476 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2477
2478 static int
2479 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2480 unsigned HOST_WIDE_INT low_bits)
2481 {
2482 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2483
2484 if (high_bits == 0
2485 || high_bits == 0xffffffff)
2486 return 1;
2487
2488 analyze_64bit_constant (high_bits, low_bits,
2489 &highest_bit_set, &lowest_bit_set,
2490 &all_bits_between_are_set);
2491
2492 if ((highest_bit_set == 63
2493 || lowest_bit_set == 0)
2494 && all_bits_between_are_set != 0)
2495 return 1;
2496
2497 if ((highest_bit_set - lowest_bit_set) < 21)
2498 return 1;
2499
2500 return 0;
2501 }
2502
2503 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2504 unsigned HOST_WIDE_INT,
2505 int, int);
2506
2507 static unsigned HOST_WIDE_INT
2508 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2509 unsigned HOST_WIDE_INT low_bits,
2510 int lowest_bit_set, int shift)
2511 {
2512 HOST_WIDE_INT hi, lo;
2513
2514 if (lowest_bit_set < 32)
2515 {
2516 lo = (low_bits >> lowest_bit_set) << shift;
2517 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2518 }
2519 else
2520 {
2521 lo = 0;
2522 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2523 }
2524 gcc_assert (! (hi & lo));
2525 return (hi | lo);
2526 }
2527
2528 /* Here we are sure to be arch64 and this is an integer constant
2529 being loaded into a register. Emit the most efficient
2530 insn sequence possible. Detection of all the 1-insn cases
2531 has been done already. */
2532 static void
2533 sparc_emit_set_const64 (rtx op0, rtx op1)
2534 {
2535 unsigned HOST_WIDE_INT high_bits, low_bits;
2536 int lowest_bit_set, highest_bit_set;
2537 int all_bits_between_are_set;
2538 rtx temp = 0;
2539
2540 /* Sanity check that we know what we are working with. */
2541 gcc_assert (TARGET_ARCH64
2542 && (GET_CODE (op0) == SUBREG
2543 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2544
2545 if (! can_create_pseudo_p ())
2546 temp = op0;
2547
2548 if (GET_CODE (op1) != CONST_INT)
2549 {
2550 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2551 return;
2552 }
2553
2554 if (! temp)
2555 temp = gen_reg_rtx (DImode);
2556
2557 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2558 low_bits = (INTVAL (op1) & 0xffffffff);
2559
2560 /* low_bits bits 0 --> 31
2561 high_bits bits 32 --> 63 */
2562
2563 analyze_64bit_constant (high_bits, low_bits,
2564 &highest_bit_set, &lowest_bit_set,
2565 &all_bits_between_are_set);
2566
2567 /* First try for a 2-insn sequence. */
2568
2569 /* These situations are preferred because the optimizer can
2570 * do more things with them:
2571 * 1) mov -1, %reg
2572 * sllx %reg, shift, %reg
2573 * 2) mov -1, %reg
2574 * srlx %reg, shift, %reg
2575 * 3) mov some_small_const, %reg
2576 * sllx %reg, shift, %reg
2577 */
2578 if (((highest_bit_set == 63
2579 || lowest_bit_set == 0)
2580 && all_bits_between_are_set != 0)
2581 || ((highest_bit_set - lowest_bit_set) < 12))
2582 {
2583 HOST_WIDE_INT the_const = -1;
2584 int shift = lowest_bit_set;
2585
2586 if ((highest_bit_set != 63
2587 && lowest_bit_set != 0)
2588 || all_bits_between_are_set == 0)
2589 {
2590 the_const =
2591 create_simple_focus_bits (high_bits, low_bits,
2592 lowest_bit_set, 0);
2593 }
2594 else if (lowest_bit_set == 0)
2595 shift = -(63 - highest_bit_set);
2596
2597 gcc_assert (SPARC_SIMM13_P (the_const));
2598 gcc_assert (shift != 0);
2599
2600 emit_insn (gen_safe_SET64 (temp, the_const));
2601 if (shift > 0)
2602 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2603 GEN_INT (shift))));
2604 else if (shift < 0)
2605 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2606 GEN_INT (-shift))));
2607 return;
2608 }
2609
2610 /* Now a range of 22 or less bits set somewhere.
2611 * 1) sethi %hi(focus_bits), %reg
2612 * sllx %reg, shift, %reg
2613 * 2) sethi %hi(focus_bits), %reg
2614 * srlx %reg, shift, %reg
2615 */
2616 if ((highest_bit_set - lowest_bit_set) < 21)
2617 {
2618 unsigned HOST_WIDE_INT focus_bits =
2619 create_simple_focus_bits (high_bits, low_bits,
2620 lowest_bit_set, 10);
2621
2622 gcc_assert (SPARC_SETHI_P (focus_bits));
2623 gcc_assert (lowest_bit_set != 10);
2624
2625 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2626
2627 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2628 if (lowest_bit_set < 10)
2629 emit_insn (gen_rtx_SET (op0,
2630 gen_rtx_LSHIFTRT (DImode, temp,
2631 GEN_INT (10 - lowest_bit_set))));
2632 else if (lowest_bit_set > 10)
2633 emit_insn (gen_rtx_SET (op0,
2634 gen_rtx_ASHIFT (DImode, temp,
2635 GEN_INT (lowest_bit_set - 10))));
2636 return;
2637 }
2638
2639 /* 1) sethi %hi(low_bits), %reg
2640 * or %reg, %lo(low_bits), %reg
2641 * 2) sethi %hi(~low_bits), %reg
2642 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2643 */
2644 if (high_bits == 0
2645 || high_bits == 0xffffffff)
2646 {
2647 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2648 (high_bits == 0xffffffff));
2649 return;
2650 }
2651
2652 /* Now, try 3-insn sequences. */
2653
2654 /* 1) sethi %hi(high_bits), %reg
2655 * or %reg, %lo(high_bits), %reg
2656 * sllx %reg, 32, %reg
2657 */
2658 if (low_bits == 0)
2659 {
2660 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2661 return;
2662 }
2663
2664 /* We may be able to do something quick
2665 when the constant is negated, so try that. */
2666 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2667 (~low_bits) & 0xfffffc00))
2668 {
2669 /* NOTE: The trailing bits get XOR'd so we need the
2670 non-negated bits, not the negated ones. */
2671 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2672
2673 if ((((~high_bits) & 0xffffffff) == 0
2674 && ((~low_bits) & 0x80000000) == 0)
2675 || (((~high_bits) & 0xffffffff) == 0xffffffff
2676 && ((~low_bits) & 0x80000000) != 0))
2677 {
2678 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2679
2680 if ((SPARC_SETHI_P (fast_int)
2681 && (~high_bits & 0xffffffff) == 0)
2682 || SPARC_SIMM13_P (fast_int))
2683 emit_insn (gen_safe_SET64 (temp, fast_int));
2684 else
2685 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2686 }
2687 else
2688 {
2689 rtx negated_const;
2690 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2691 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2692 sparc_emit_set_const64 (temp, negated_const);
2693 }
2694
2695 /* If we are XOR'ing with -1, then we should emit a one's complement
2696 instead. This way the combiner will notice logical operations
2697 such as ANDN later on and substitute. */
2698 if (trailing_bits == 0x3ff)
2699 {
2700 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2701 }
2702 else
2703 {
2704 emit_insn (gen_rtx_SET (op0,
2705 gen_safe_XOR64 (temp,
2706 (-0x400 | trailing_bits))));
2707 }
2708 return;
2709 }
2710
2711 /* 1) sethi %hi(xxx), %reg
2712 * or %reg, %lo(xxx), %reg
2713 * sllx %reg, yyy, %reg
2714 *
2715 * ??? This is just a generalized version of the low_bits==0
2716 * thing above, FIXME...
2717 */
2718 if ((highest_bit_set - lowest_bit_set) < 32)
2719 {
2720 unsigned HOST_WIDE_INT focus_bits =
2721 create_simple_focus_bits (high_bits, low_bits,
2722 lowest_bit_set, 0);
2723
2724 /* We can't get here in this state. */
2725 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2726
2727 /* So what we know is that the set bits straddle the
2728 middle of the 64-bit word. */
2729 sparc_emit_set_const64_quick2 (op0, temp,
2730 focus_bits, 0,
2731 lowest_bit_set);
2732 return;
2733 }
2734
2735 /* 1) sethi %hi(high_bits), %reg
2736 * or %reg, %lo(high_bits), %reg
2737 * sllx %reg, 32, %reg
2738 * or %reg, low_bits, %reg
2739 */
2740 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2741 {
2742 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2743 return;
2744 }
2745
2746 /* The easiest way when all else fails, is full decomposition. */
2747 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2748 }
2749
2750 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2751
2752 static bool
2753 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2754 {
2755 *p1 = SPARC_ICC_REG;
2756 *p2 = SPARC_FCC_REG;
2757 return true;
2758 }
2759
2760 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2761
2762 static unsigned int
2763 sparc_min_arithmetic_precision (void)
2764 {
2765 return 32;
2766 }
2767
2768 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2769 return the mode to be used for the comparison. For floating-point,
2770 CCFP[E]mode is used. CCNZmode should be used when the first operand
2771 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2772 processing is needed. */
2773
2774 machine_mode
2775 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2776 {
2777 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2778 {
2779 switch (op)
2780 {
2781 case EQ:
2782 case NE:
2783 case UNORDERED:
2784 case ORDERED:
2785 case UNLT:
2786 case UNLE:
2787 case UNGT:
2788 case UNGE:
2789 case UNEQ:
2790 case LTGT:
2791 return CCFPmode;
2792
2793 case LT:
2794 case LE:
2795 case GT:
2796 case GE:
2797 return CCFPEmode;
2798
2799 default:
2800 gcc_unreachable ();
2801 }
2802 }
2803 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2804 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2805 && y == const0_rtx)
2806 {
2807 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2808 return CCXNZmode;
2809 else
2810 return CCNZmode;
2811 }
2812 else
2813 {
2814 /* This is for the cmp<mode>_sne pattern. */
2815 if (GET_CODE (x) == NOT && y == constm1_rtx)
2816 {
2817 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2818 return CCXCmode;
2819 else
2820 return CCCmode;
2821 }
2822
2823 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
2824 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
2825 {
2826 if (GET_CODE (y) == UNSPEC
2827 && (XINT (y, 1) == UNSPEC_ADDV
2828 || XINT (y, 1) == UNSPEC_SUBV
2829 || XINT (y, 1) == UNSPEC_NEGV))
2830 return CCVmode;
2831 else
2832 return CCCmode;
2833 }
2834
2835 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2836 return CCXmode;
2837 else
2838 return CCmode;
2839 }
2840 }
2841
2842 /* Emit the compare insn and return the CC reg for a CODE comparison
2843 with operands X and Y. */
2844
2845 static rtx
2846 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2847 {
2848 machine_mode mode;
2849 rtx cc_reg;
2850
2851 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2852 return x;
2853
2854 mode = SELECT_CC_MODE (code, x, y);
2855
2856 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2857 fcc regs (cse can't tell they're really call clobbered regs and will
2858 remove a duplicate comparison even if there is an intervening function
2859 call - it will then try to reload the cc reg via an int reg which is why
2860 we need the movcc patterns). It is possible to provide the movcc
2861 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2862 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2863 to tell cse that CCFPE mode registers (even pseudos) are call
2864 clobbered. */
2865
2866 /* ??? This is an experiment. Rather than making changes to cse which may
2867 or may not be easy/clean, we do our own cse. This is possible because
2868 we will generate hard registers. Cse knows they're call clobbered (it
2869 doesn't know the same thing about pseudos). If we guess wrong, no big
2870 deal, but if we win, great! */
2871
2872 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2873 #if 1 /* experiment */
2874 {
2875 int reg;
2876 /* We cycle through the registers to ensure they're all exercised. */
2877 static int next_fcc_reg = 0;
2878 /* Previous x,y for each fcc reg. */
2879 static rtx prev_args[4][2];
2880
2881 /* Scan prev_args for x,y. */
2882 for (reg = 0; reg < 4; reg++)
2883 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2884 break;
2885 if (reg == 4)
2886 {
2887 reg = next_fcc_reg;
2888 prev_args[reg][0] = x;
2889 prev_args[reg][1] = y;
2890 next_fcc_reg = (next_fcc_reg + 1) & 3;
2891 }
2892 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2893 }
2894 #else
2895 cc_reg = gen_reg_rtx (mode);
2896 #endif /* ! experiment */
2897 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2898 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2899 else
2900 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2901
2902 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2903 will only result in an unrecognizable insn so no point in asserting. */
2904 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2905
2906 return cc_reg;
2907 }
2908
2909
2910 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2911
2912 rtx
2913 gen_compare_reg (rtx cmp)
2914 {
2915 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2916 }
2917
2918 /* This function is used for v9 only.
2919 DEST is the target of the Scc insn.
2920 CODE is the code for an Scc's comparison.
2921 X and Y are the values we compare.
2922
2923 This function is needed to turn
2924
2925 (set (reg:SI 110)
2926 (gt (reg:CCX 100 %icc)
2927 (const_int 0)))
2928 into
2929 (set (reg:SI 110)
2930 (gt:DI (reg:CCX 100 %icc)
2931 (const_int 0)))
2932
2933 IE: The instruction recognizer needs to see the mode of the comparison to
2934 find the right instruction. We could use "gt:DI" right in the
2935 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2936
2937 static int
2938 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2939 {
2940 if (! TARGET_ARCH64
2941 && (GET_MODE (x) == DImode
2942 || GET_MODE (dest) == DImode))
2943 return 0;
2944
2945 /* Try to use the movrCC insns. */
2946 if (TARGET_ARCH64
2947 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2948 && y == const0_rtx
2949 && v9_regcmp_p (compare_code))
2950 {
2951 rtx op0 = x;
2952 rtx temp;
2953
2954 /* Special case for op0 != 0. This can be done with one instruction if
2955 dest == x. */
2956
2957 if (compare_code == NE
2958 && GET_MODE (dest) == DImode
2959 && rtx_equal_p (op0, dest))
2960 {
2961 emit_insn (gen_rtx_SET (dest,
2962 gen_rtx_IF_THEN_ELSE (DImode,
2963 gen_rtx_fmt_ee (compare_code, DImode,
2964 op0, const0_rtx),
2965 const1_rtx,
2966 dest)));
2967 return 1;
2968 }
2969
2970 if (reg_overlap_mentioned_p (dest, op0))
2971 {
2972 /* Handle the case where dest == x.
2973 We "early clobber" the result. */
2974 op0 = gen_reg_rtx (GET_MODE (x));
2975 emit_move_insn (op0, x);
2976 }
2977
2978 emit_insn (gen_rtx_SET (dest, const0_rtx));
2979 if (GET_MODE (op0) != DImode)
2980 {
2981 temp = gen_reg_rtx (DImode);
2982 convert_move (temp, op0, 0);
2983 }
2984 else
2985 temp = op0;
2986 emit_insn (gen_rtx_SET (dest,
2987 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2988 gen_rtx_fmt_ee (compare_code, DImode,
2989 temp, const0_rtx),
2990 const1_rtx,
2991 dest)));
2992 return 1;
2993 }
2994 else
2995 {
2996 x = gen_compare_reg_1 (compare_code, x, y);
2997 y = const0_rtx;
2998
2999 emit_insn (gen_rtx_SET (dest, const0_rtx));
3000 emit_insn (gen_rtx_SET (dest,
3001 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3002 gen_rtx_fmt_ee (compare_code,
3003 GET_MODE (x), x, y),
3004 const1_rtx, dest)));
3005 return 1;
3006 }
3007 }
3008
3009
3010 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3011 without jumps using the addx/subx instructions. */
3012
3013 bool
3014 emit_scc_insn (rtx operands[])
3015 {
3016 rtx tem, x, y;
3017 enum rtx_code code;
3018 machine_mode mode;
3019
3020 /* The quad-word fp compare library routines all return nonzero to indicate
3021 true, which is different from the equivalent libgcc routines, so we must
3022 handle them specially here. */
3023 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3024 {
3025 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3026 GET_CODE (operands[1]));
3027 operands[2] = XEXP (operands[1], 0);
3028 operands[3] = XEXP (operands[1], 1);
3029 }
3030
3031 code = GET_CODE (operands[1]);
3032 x = operands[2];
3033 y = operands[3];
3034 mode = GET_MODE (x);
3035
3036 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3037 more applications). The exception to this is "reg != 0" which can
3038 be done in one instruction on v9 (so we do it). */
3039 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3040 {
3041 if (y != const0_rtx)
3042 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3043
3044 rtx pat = gen_rtx_SET (operands[0],
3045 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3046 x, const0_rtx));
3047
3048 /* If we can use addx/subx or addxc, add a clobber for CC. */
3049 if (mode == SImode || (code == NE && TARGET_VIS3))
3050 {
3051 rtx clobber
3052 = gen_rtx_CLOBBER (VOIDmode,
3053 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3054 SPARC_ICC_REG));
3055 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3056 }
3057
3058 emit_insn (pat);
3059 return true;
3060 }
3061
3062 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3063 if (TARGET_ARCH64
3064 && mode == DImode
3065 && !((code == LTU || code == GTU) && TARGET_VIS3)
3066 && gen_v9_scc (operands[0], code, x, y))
3067 return true;
3068
3069 /* We can do LTU and GEU using the addx/subx instructions too. And
3070 for GTU/LEU, if both operands are registers swap them and fall
3071 back to the easy case. */
3072 if (code == GTU || code == LEU)
3073 {
3074 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3075 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3076 {
3077 tem = x;
3078 x = y;
3079 y = tem;
3080 code = swap_condition (code);
3081 }
3082 }
3083
3084 if (code == LTU || code == GEU)
3085 {
3086 emit_insn (gen_rtx_SET (operands[0],
3087 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3088 gen_compare_reg_1 (code, x, y),
3089 const0_rtx)));
3090 return true;
3091 }
3092
3093 /* All the posibilities to use addx/subx based sequences has been
3094 exhausted, try for a 3 instruction sequence using v9 conditional
3095 moves. */
3096 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3097 return true;
3098
3099 /* Nope, do branches. */
3100 return false;
3101 }
3102
3103 /* Emit a conditional jump insn for the v9 architecture using comparison code
3104 CODE and jump target LABEL.
3105 This function exists to take advantage of the v9 brxx insns. */
3106
3107 static void
3108 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3109 {
3110 emit_jump_insn (gen_rtx_SET (pc_rtx,
3111 gen_rtx_IF_THEN_ELSE (VOIDmode,
3112 gen_rtx_fmt_ee (code, GET_MODE (op0),
3113 op0, const0_rtx),
3114 gen_rtx_LABEL_REF (VOIDmode, label),
3115 pc_rtx)));
3116 }
3117
3118 /* Emit a conditional jump insn for the UA2011 architecture using
3119 comparison code CODE and jump target LABEL. This function exists
3120 to take advantage of the UA2011 Compare and Branch insns. */
3121
3122 static void
3123 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3124 {
3125 rtx if_then_else;
3126
3127 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3128 gen_rtx_fmt_ee(code, GET_MODE(op0),
3129 op0, op1),
3130 gen_rtx_LABEL_REF (VOIDmode, label),
3131 pc_rtx);
3132
3133 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3134 }
3135
3136 void
3137 emit_conditional_branch_insn (rtx operands[])
3138 {
3139 /* The quad-word fp compare library routines all return nonzero to indicate
3140 true, which is different from the equivalent libgcc routines, so we must
3141 handle them specially here. */
3142 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3143 {
3144 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3145 GET_CODE (operands[0]));
3146 operands[1] = XEXP (operands[0], 0);
3147 operands[2] = XEXP (operands[0], 1);
3148 }
3149
3150 /* If we can tell early on that the comparison is against a constant
3151 that won't fit in the 5-bit signed immediate field of a cbcond,
3152 use one of the other v9 conditional branch sequences. */
3153 if (TARGET_CBCOND
3154 && GET_CODE (operands[1]) == REG
3155 && (GET_MODE (operands[1]) == SImode
3156 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3157 && (GET_CODE (operands[2]) != CONST_INT
3158 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3159 {
3160 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3161 return;
3162 }
3163
3164 if (TARGET_ARCH64 && operands[2] == const0_rtx
3165 && GET_CODE (operands[1]) == REG
3166 && GET_MODE (operands[1]) == DImode)
3167 {
3168 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3169 return;
3170 }
3171
3172 operands[1] = gen_compare_reg (operands[0]);
3173 operands[2] = const0_rtx;
3174 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3175 operands[1], operands[2]);
3176 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3177 operands[3]));
3178 }
3179
3180
3181 /* Generate a DFmode part of a hard TFmode register.
3182 REG is the TFmode hard register, LOW is 1 for the
3183 low 64bit of the register and 0 otherwise.
3184 */
3185 rtx
3186 gen_df_reg (rtx reg, int low)
3187 {
3188 int regno = REGNO (reg);
3189
3190 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3191 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3192 return gen_rtx_REG (DFmode, regno);
3193 }
3194 \f
3195 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3196 Unlike normal calls, TFmode operands are passed by reference. It is
3197 assumed that no more than 3 operands are required. */
3198
3199 static void
3200 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3201 {
3202 rtx ret_slot = NULL, arg[3], func_sym;
3203 int i;
3204
3205 /* We only expect to be called for conversions, unary, and binary ops. */
3206 gcc_assert (nargs == 2 || nargs == 3);
3207
3208 for (i = 0; i < nargs; ++i)
3209 {
3210 rtx this_arg = operands[i];
3211 rtx this_slot;
3212
3213 /* TFmode arguments and return values are passed by reference. */
3214 if (GET_MODE (this_arg) == TFmode)
3215 {
3216 int force_stack_temp;
3217
3218 force_stack_temp = 0;
3219 if (TARGET_BUGGY_QP_LIB && i == 0)
3220 force_stack_temp = 1;
3221
3222 if (GET_CODE (this_arg) == MEM
3223 && ! force_stack_temp)
3224 {
3225 tree expr = MEM_EXPR (this_arg);
3226 if (expr)
3227 mark_addressable (expr);
3228 this_arg = XEXP (this_arg, 0);
3229 }
3230 else if (CONSTANT_P (this_arg)
3231 && ! force_stack_temp)
3232 {
3233 this_slot = force_const_mem (TFmode, this_arg);
3234 this_arg = XEXP (this_slot, 0);
3235 }
3236 else
3237 {
3238 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3239
3240 /* Operand 0 is the return value. We'll copy it out later. */
3241 if (i > 0)
3242 emit_move_insn (this_slot, this_arg);
3243 else
3244 ret_slot = this_slot;
3245
3246 this_arg = XEXP (this_slot, 0);
3247 }
3248 }
3249
3250 arg[i] = this_arg;
3251 }
3252
3253 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3254
3255 if (GET_MODE (operands[0]) == TFmode)
3256 {
3257 if (nargs == 2)
3258 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3259 arg[0], GET_MODE (arg[0]),
3260 arg[1], GET_MODE (arg[1]));
3261 else
3262 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3263 arg[0], GET_MODE (arg[0]),
3264 arg[1], GET_MODE (arg[1]),
3265 arg[2], GET_MODE (arg[2]));
3266
3267 if (ret_slot)
3268 emit_move_insn (operands[0], ret_slot);
3269 }
3270 else
3271 {
3272 rtx ret;
3273
3274 gcc_assert (nargs == 2);
3275
3276 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3277 GET_MODE (operands[0]), 1,
3278 arg[1], GET_MODE (arg[1]));
3279
3280 if (ret != operands[0])
3281 emit_move_insn (operands[0], ret);
3282 }
3283 }
3284
3285 /* Expand soft-float TFmode calls to sparc abi routines. */
3286
3287 static void
3288 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3289 {
3290 const char *func;
3291
3292 switch (code)
3293 {
3294 case PLUS:
3295 func = "_Qp_add";
3296 break;
3297 case MINUS:
3298 func = "_Qp_sub";
3299 break;
3300 case MULT:
3301 func = "_Qp_mul";
3302 break;
3303 case DIV:
3304 func = "_Qp_div";
3305 break;
3306 default:
3307 gcc_unreachable ();
3308 }
3309
3310 emit_soft_tfmode_libcall (func, 3, operands);
3311 }
3312
3313 static void
3314 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3315 {
3316 const char *func;
3317
3318 gcc_assert (code == SQRT);
3319 func = "_Qp_sqrt";
3320
3321 emit_soft_tfmode_libcall (func, 2, operands);
3322 }
3323
3324 static void
3325 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3326 {
3327 const char *func;
3328
3329 switch (code)
3330 {
3331 case FLOAT_EXTEND:
3332 switch (GET_MODE (operands[1]))
3333 {
3334 case SFmode:
3335 func = "_Qp_stoq";
3336 break;
3337 case DFmode:
3338 func = "_Qp_dtoq";
3339 break;
3340 default:
3341 gcc_unreachable ();
3342 }
3343 break;
3344
3345 case FLOAT_TRUNCATE:
3346 switch (GET_MODE (operands[0]))
3347 {
3348 case SFmode:
3349 func = "_Qp_qtos";
3350 break;
3351 case DFmode:
3352 func = "_Qp_qtod";
3353 break;
3354 default:
3355 gcc_unreachable ();
3356 }
3357 break;
3358
3359 case FLOAT:
3360 switch (GET_MODE (operands[1]))
3361 {
3362 case SImode:
3363 func = "_Qp_itoq";
3364 if (TARGET_ARCH64)
3365 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3366 break;
3367 case DImode:
3368 func = "_Qp_xtoq";
3369 break;
3370 default:
3371 gcc_unreachable ();
3372 }
3373 break;
3374
3375 case UNSIGNED_FLOAT:
3376 switch (GET_MODE (operands[1]))
3377 {
3378 case SImode:
3379 func = "_Qp_uitoq";
3380 if (TARGET_ARCH64)
3381 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3382 break;
3383 case DImode:
3384 func = "_Qp_uxtoq";
3385 break;
3386 default:
3387 gcc_unreachable ();
3388 }
3389 break;
3390
3391 case FIX:
3392 switch (GET_MODE (operands[0]))
3393 {
3394 case SImode:
3395 func = "_Qp_qtoi";
3396 break;
3397 case DImode:
3398 func = "_Qp_qtox";
3399 break;
3400 default:
3401 gcc_unreachable ();
3402 }
3403 break;
3404
3405 case UNSIGNED_FIX:
3406 switch (GET_MODE (operands[0]))
3407 {
3408 case SImode:
3409 func = "_Qp_qtoui";
3410 break;
3411 case DImode:
3412 func = "_Qp_qtoux";
3413 break;
3414 default:
3415 gcc_unreachable ();
3416 }
3417 break;
3418
3419 default:
3420 gcc_unreachable ();
3421 }
3422
3423 emit_soft_tfmode_libcall (func, 2, operands);
3424 }
3425
3426 /* Expand a hard-float tfmode operation. All arguments must be in
3427 registers. */
3428
3429 static void
3430 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3431 {
3432 rtx op, dest;
3433
3434 if (GET_RTX_CLASS (code) == RTX_UNARY)
3435 {
3436 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3437 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3438 }
3439 else
3440 {
3441 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3442 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3443 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3444 operands[1], operands[2]);
3445 }
3446
3447 if (register_operand (operands[0], VOIDmode))
3448 dest = operands[0];
3449 else
3450 dest = gen_reg_rtx (GET_MODE (operands[0]));
3451
3452 emit_insn (gen_rtx_SET (dest, op));
3453
3454 if (dest != operands[0])
3455 emit_move_insn (operands[0], dest);
3456 }
3457
3458 void
3459 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3460 {
3461 if (TARGET_HARD_QUAD)
3462 emit_hard_tfmode_operation (code, operands);
3463 else
3464 emit_soft_tfmode_binop (code, operands);
3465 }
3466
3467 void
3468 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3469 {
3470 if (TARGET_HARD_QUAD)
3471 emit_hard_tfmode_operation (code, operands);
3472 else
3473 emit_soft_tfmode_unop (code, operands);
3474 }
3475
3476 void
3477 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3478 {
3479 if (TARGET_HARD_QUAD)
3480 emit_hard_tfmode_operation (code, operands);
3481 else
3482 emit_soft_tfmode_cvt (code, operands);
3483 }
3484 \f
3485 /* Return nonzero if a branch/jump/call instruction will be emitting
3486 nop into its delay slot. */
3487
3488 int
3489 empty_delay_slot (rtx_insn *insn)
3490 {
3491 rtx seq;
3492
3493 /* If no previous instruction (should not happen), return true. */
3494 if (PREV_INSN (insn) == NULL)
3495 return 1;
3496
3497 seq = NEXT_INSN (PREV_INSN (insn));
3498 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3499 return 0;
3500
3501 return 1;
3502 }
3503
3504 /* Return nonzero if we should emit a nop after a cbcond instruction.
3505 The cbcond instruction does not have a delay slot, however there is
3506 a severe performance penalty if a control transfer appears right
3507 after a cbcond. Therefore we emit a nop when we detect this
3508 situation. */
3509
3510 int
3511 emit_cbcond_nop (rtx_insn *insn)
3512 {
3513 rtx next = next_active_insn (insn);
3514
3515 if (!next)
3516 return 1;
3517
3518 if (NONJUMP_INSN_P (next)
3519 && GET_CODE (PATTERN (next)) == SEQUENCE)
3520 next = XVECEXP (PATTERN (next), 0, 0);
3521 else if (CALL_P (next)
3522 && GET_CODE (PATTERN (next)) == PARALLEL)
3523 {
3524 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3525
3526 if (GET_CODE (delay) == RETURN)
3527 {
3528 /* It's a sibling call. Do not emit the nop if we're going
3529 to emit something other than the jump itself as the first
3530 instruction of the sibcall sequence. */
3531 if (sparc_leaf_function_p || TARGET_FLAT)
3532 return 0;
3533 }
3534 }
3535
3536 if (NONJUMP_INSN_P (next))
3537 return 0;
3538
3539 return 1;
3540 }
3541
3542 /* Return nonzero if TRIAL can go into the call delay slot. */
3543
3544 int
3545 eligible_for_call_delay (rtx_insn *trial)
3546 {
3547 rtx pat;
3548
3549 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3550 return 0;
3551
3552 /* Binutils allows
3553 call __tls_get_addr, %tgd_call (foo)
3554 add %l7, %o0, %o0, %tgd_add (foo)
3555 while Sun as/ld does not. */
3556 if (TARGET_GNU_TLS || !TARGET_TLS)
3557 return 1;
3558
3559 pat = PATTERN (trial);
3560
3561 /* We must reject tgd_add{32|64}, i.e.
3562 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3563 and tldm_add{32|64}, i.e.
3564 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3565 for Sun as/ld. */
3566 if (GET_CODE (pat) == SET
3567 && GET_CODE (SET_SRC (pat)) == PLUS)
3568 {
3569 rtx unspec = XEXP (SET_SRC (pat), 1);
3570
3571 if (GET_CODE (unspec) == UNSPEC
3572 && (XINT (unspec, 1) == UNSPEC_TLSGD
3573 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3574 return 0;
3575 }
3576
3577 return 1;
3578 }
3579
3580 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3581 instruction. RETURN_P is true if the v9 variant 'return' is to be
3582 considered in the test too.
3583
3584 TRIAL must be a SET whose destination is a REG appropriate for the
3585 'restore' instruction or, if RETURN_P is true, for the 'return'
3586 instruction. */
3587
3588 static int
3589 eligible_for_restore_insn (rtx trial, bool return_p)
3590 {
3591 rtx pat = PATTERN (trial);
3592 rtx src = SET_SRC (pat);
3593 bool src_is_freg = false;
3594 rtx src_reg;
3595
3596 /* Since we now can do moves between float and integer registers when
3597 VIS3 is enabled, we have to catch this case. We can allow such
3598 moves when doing a 'return' however. */
3599 src_reg = src;
3600 if (GET_CODE (src_reg) == SUBREG)
3601 src_reg = SUBREG_REG (src_reg);
3602 if (GET_CODE (src_reg) == REG
3603 && SPARC_FP_REG_P (REGNO (src_reg)))
3604 src_is_freg = true;
3605
3606 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3607 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3608 && arith_operand (src, GET_MODE (src))
3609 && ! src_is_freg)
3610 {
3611 if (TARGET_ARCH64)
3612 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3613 else
3614 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3615 }
3616
3617 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3618 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3619 && arith_double_operand (src, GET_MODE (src))
3620 && ! src_is_freg)
3621 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3622
3623 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3624 else if (! TARGET_FPU && register_operand (src, SFmode))
3625 return 1;
3626
3627 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3628 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3629 return 1;
3630
3631 /* If we have the 'return' instruction, anything that does not use
3632 local or output registers and can go into a delay slot wins. */
3633 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3634 return 1;
3635
3636 /* The 'restore src1,src2,dest' pattern for SImode. */
3637 else if (GET_CODE (src) == PLUS
3638 && register_operand (XEXP (src, 0), SImode)
3639 && arith_operand (XEXP (src, 1), SImode))
3640 return 1;
3641
3642 /* The 'restore src1,src2,dest' pattern for DImode. */
3643 else if (GET_CODE (src) == PLUS
3644 && register_operand (XEXP (src, 0), DImode)
3645 && arith_double_operand (XEXP (src, 1), DImode))
3646 return 1;
3647
3648 /* The 'restore src1,%lo(src2),dest' pattern. */
3649 else if (GET_CODE (src) == LO_SUM
3650 && ! TARGET_CM_MEDMID
3651 && ((register_operand (XEXP (src, 0), SImode)
3652 && immediate_operand (XEXP (src, 1), SImode))
3653 || (TARGET_ARCH64
3654 && register_operand (XEXP (src, 0), DImode)
3655 && immediate_operand (XEXP (src, 1), DImode))))
3656 return 1;
3657
3658 /* The 'restore src,src,dest' pattern. */
3659 else if (GET_CODE (src) == ASHIFT
3660 && (register_operand (XEXP (src, 0), SImode)
3661 || register_operand (XEXP (src, 0), DImode))
3662 && XEXP (src, 1) == const1_rtx)
3663 return 1;
3664
3665 return 0;
3666 }
3667
3668 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3669
3670 int
3671 eligible_for_return_delay (rtx_insn *trial)
3672 {
3673 int regno;
3674 rtx pat;
3675
3676 /* If the function uses __builtin_eh_return, the eh_return machinery
3677 occupies the delay slot. */
3678 if (crtl->calls_eh_return)
3679 return 0;
3680
3681 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3682 return 0;
3683
3684 /* In the case of a leaf or flat function, anything can go into the slot. */
3685 if (sparc_leaf_function_p || TARGET_FLAT)
3686 return 1;
3687
3688 if (!NONJUMP_INSN_P (trial))
3689 return 0;
3690
3691 pat = PATTERN (trial);
3692 if (GET_CODE (pat) == PARALLEL)
3693 {
3694 int i;
3695
3696 if (! TARGET_V9)
3697 return 0;
3698 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3699 {
3700 rtx expr = XVECEXP (pat, 0, i);
3701 if (GET_CODE (expr) != SET)
3702 return 0;
3703 if (GET_CODE (SET_DEST (expr)) != REG)
3704 return 0;
3705 regno = REGNO (SET_DEST (expr));
3706 if (regno >= 8 && regno < 24)
3707 return 0;
3708 }
3709 return !epilogue_renumber (&pat, 1);
3710 }
3711
3712 if (GET_CODE (pat) != SET)
3713 return 0;
3714
3715 if (GET_CODE (SET_DEST (pat)) != REG)
3716 return 0;
3717
3718 regno = REGNO (SET_DEST (pat));
3719
3720 /* Otherwise, only operations which can be done in tandem with
3721 a `restore' or `return' insn can go into the delay slot. */
3722 if (regno >= 8 && regno < 24)
3723 return 0;
3724
3725 /* If this instruction sets up floating point register and we have a return
3726 instruction, it can probably go in. But restore will not work
3727 with FP_REGS. */
3728 if (! SPARC_INT_REG_P (regno))
3729 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3730
3731 return eligible_for_restore_insn (trial, true);
3732 }
3733
3734 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3735
3736 int
3737 eligible_for_sibcall_delay (rtx_insn *trial)
3738 {
3739 rtx pat;
3740
3741 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3742 return 0;
3743
3744 if (!NONJUMP_INSN_P (trial))
3745 return 0;
3746
3747 pat = PATTERN (trial);
3748
3749 if (sparc_leaf_function_p || TARGET_FLAT)
3750 {
3751 /* If the tail call is done using the call instruction,
3752 we have to restore %o7 in the delay slot. */
3753 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3754 return 0;
3755
3756 /* %g1 is used to build the function address */
3757 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3758 return 0;
3759
3760 return 1;
3761 }
3762
3763 if (GET_CODE (pat) != SET)
3764 return 0;
3765
3766 /* Otherwise, only operations which can be done in tandem with
3767 a `restore' insn can go into the delay slot. */
3768 if (GET_CODE (SET_DEST (pat)) != REG
3769 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3770 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3771 return 0;
3772
3773 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3774 in most cases. */
3775 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3776 return 0;
3777
3778 return eligible_for_restore_insn (trial, false);
3779 }
3780 \f
3781 /* Determine if it's legal to put X into the constant pool. This
3782 is not possible if X contains the address of a symbol that is
3783 not constant (TLS) or not known at final link time (PIC). */
3784
3785 static bool
3786 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3787 {
3788 switch (GET_CODE (x))
3789 {
3790 case CONST_INT:
3791 case CONST_WIDE_INT:
3792 case CONST_DOUBLE:
3793 case CONST_VECTOR:
3794 /* Accept all non-symbolic constants. */
3795 return false;
3796
3797 case LABEL_REF:
3798 /* Labels are OK iff we are non-PIC. */
3799 return flag_pic != 0;
3800
3801 case SYMBOL_REF:
3802 /* 'Naked' TLS symbol references are never OK,
3803 non-TLS symbols are OK iff we are non-PIC. */
3804 if (SYMBOL_REF_TLS_MODEL (x))
3805 return true;
3806 else
3807 return flag_pic != 0;
3808
3809 case CONST:
3810 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3811 case PLUS:
3812 case MINUS:
3813 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3814 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3815 case UNSPEC:
3816 return true;
3817 default:
3818 gcc_unreachable ();
3819 }
3820 }
3821 \f
3822 /* Global Offset Table support. */
3823 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3824 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3825
3826 /* Return the SYMBOL_REF for the Global Offset Table. */
3827
3828 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3829
3830 static rtx
3831 sparc_got (void)
3832 {
3833 if (!sparc_got_symbol)
3834 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3835
3836 return sparc_got_symbol;
3837 }
3838
3839 /* Ensure that we are not using patterns that are not OK with PIC. */
3840
3841 int
3842 check_pic (int i)
3843 {
3844 rtx op;
3845
3846 switch (flag_pic)
3847 {
3848 case 1:
3849 op = recog_data.operand[i];
3850 gcc_assert (GET_CODE (op) != SYMBOL_REF
3851 && (GET_CODE (op) != CONST
3852 || (GET_CODE (XEXP (op, 0)) == MINUS
3853 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3854 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3855 /* fallthrough */
3856 case 2:
3857 default:
3858 return 1;
3859 }
3860 }
3861
3862 /* Return true if X is an address which needs a temporary register when
3863 reloaded while generating PIC code. */
3864
3865 int
3866 pic_address_needs_scratch (rtx x)
3867 {
3868 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3869 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3870 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3871 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3872 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3873 return 1;
3874
3875 return 0;
3876 }
3877
3878 /* Determine if a given RTX is a valid constant. We already know this
3879 satisfies CONSTANT_P. */
3880
3881 static bool
3882 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3883 {
3884 switch (GET_CODE (x))
3885 {
3886 case CONST:
3887 case SYMBOL_REF:
3888 if (sparc_tls_referenced_p (x))
3889 return false;
3890 break;
3891
3892 case CONST_DOUBLE:
3893 /* Floating point constants are generally not ok.
3894 The only exception is 0.0 and all-ones in VIS. */
3895 if (TARGET_VIS
3896 && SCALAR_FLOAT_MODE_P (mode)
3897 && (const_zero_operand (x, mode)
3898 || const_all_ones_operand (x, mode)))
3899 return true;
3900
3901 return false;
3902
3903 case CONST_VECTOR:
3904 /* Vector constants are generally not ok.
3905 The only exception is 0 or -1 in VIS. */
3906 if (TARGET_VIS
3907 && (const_zero_operand (x, mode)
3908 || const_all_ones_operand (x, mode)))
3909 return true;
3910
3911 return false;
3912
3913 default:
3914 break;
3915 }
3916
3917 return true;
3918 }
3919
3920 /* Determine if a given RTX is a valid constant address. */
3921
3922 bool
3923 constant_address_p (rtx x)
3924 {
3925 switch (GET_CODE (x))
3926 {
3927 case LABEL_REF:
3928 case CONST_INT:
3929 case HIGH:
3930 return true;
3931
3932 case CONST:
3933 if (flag_pic && pic_address_needs_scratch (x))
3934 return false;
3935 return sparc_legitimate_constant_p (Pmode, x);
3936
3937 case SYMBOL_REF:
3938 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3939
3940 default:
3941 return false;
3942 }
3943 }
3944
3945 /* Nonzero if the constant value X is a legitimate general operand
3946 when generating PIC code. It is given that flag_pic is on and
3947 that X satisfies CONSTANT_P. */
3948
3949 bool
3950 legitimate_pic_operand_p (rtx x)
3951 {
3952 if (pic_address_needs_scratch (x))
3953 return false;
3954 if (sparc_tls_referenced_p (x))
3955 return false;
3956 return true;
3957 }
3958
3959 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3960 (CONST_INT_P (X) \
3961 && INTVAL (X) >= -0x1000 \
3962 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3963
3964 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3965 (CONST_INT_P (X) \
3966 && INTVAL (X) >= -0x1000 \
3967 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3968
3969 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3970
3971 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3972 ordinarily. This changes a bit when generating PIC. */
3973
3974 static bool
3975 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3976 {
3977 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3978
3979 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3980 rs1 = addr;
3981 else if (GET_CODE (addr) == PLUS)
3982 {
3983 rs1 = XEXP (addr, 0);
3984 rs2 = XEXP (addr, 1);
3985
3986 /* Canonicalize. REG comes first, if there are no regs,
3987 LO_SUM comes first. */
3988 if (!REG_P (rs1)
3989 && GET_CODE (rs1) != SUBREG
3990 && (REG_P (rs2)
3991 || GET_CODE (rs2) == SUBREG
3992 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3993 {
3994 rs1 = XEXP (addr, 1);
3995 rs2 = XEXP (addr, 0);
3996 }
3997
3998 if ((flag_pic == 1
3999 && rs1 == pic_offset_table_rtx
4000 && !REG_P (rs2)
4001 && GET_CODE (rs2) != SUBREG
4002 && GET_CODE (rs2) != LO_SUM
4003 && GET_CODE (rs2) != MEM
4004 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4005 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4006 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4007 || ((REG_P (rs1)
4008 || GET_CODE (rs1) == SUBREG)
4009 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4010 {
4011 imm1 = rs2;
4012 rs2 = NULL;
4013 }
4014 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4015 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4016 {
4017 /* We prohibit REG + REG for TFmode when there are no quad move insns
4018 and we consequently need to split. We do this because REG+REG
4019 is not an offsettable address. If we get the situation in reload
4020 where source and destination of a movtf pattern are both MEMs with
4021 REG+REG address, then only one of them gets converted to an
4022 offsettable address. */
4023 if (mode == TFmode
4024 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4025 return 0;
4026
4027 /* Likewise for TImode, but in all cases. */
4028 if (mode == TImode)
4029 return 0;
4030
4031 /* We prohibit REG + REG on ARCH32 if not optimizing for
4032 DFmode/DImode because then mem_min_alignment is likely to be zero
4033 after reload and the forced split would lack a matching splitter
4034 pattern. */
4035 if (TARGET_ARCH32 && !optimize
4036 && (mode == DFmode || mode == DImode))
4037 return 0;
4038 }
4039 else if (USE_AS_OFFSETABLE_LO10
4040 && GET_CODE (rs1) == LO_SUM
4041 && TARGET_ARCH64
4042 && ! TARGET_CM_MEDMID
4043 && RTX_OK_FOR_OLO10_P (rs2, mode))
4044 {
4045 rs2 = NULL;
4046 imm1 = XEXP (rs1, 1);
4047 rs1 = XEXP (rs1, 0);
4048 if (!CONSTANT_P (imm1)
4049 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4050 return 0;
4051 }
4052 }
4053 else if (GET_CODE (addr) == LO_SUM)
4054 {
4055 rs1 = XEXP (addr, 0);
4056 imm1 = XEXP (addr, 1);
4057
4058 if (!CONSTANT_P (imm1)
4059 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4060 return 0;
4061
4062 /* We can't allow TFmode in 32-bit mode, because an offset greater
4063 than the alignment (8) may cause the LO_SUM to overflow. */
4064 if (mode == TFmode && TARGET_ARCH32)
4065 return 0;
4066
4067 /* During reload, accept the HIGH+LO_SUM construct generated by
4068 sparc_legitimize_reload_address. */
4069 if (reload_in_progress
4070 && GET_CODE (rs1) == HIGH
4071 && XEXP (rs1, 0) == imm1)
4072 return 1;
4073 }
4074 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4075 return 1;
4076 else
4077 return 0;
4078
4079 if (GET_CODE (rs1) == SUBREG)
4080 rs1 = SUBREG_REG (rs1);
4081 if (!REG_P (rs1))
4082 return 0;
4083
4084 if (rs2)
4085 {
4086 if (GET_CODE (rs2) == SUBREG)
4087 rs2 = SUBREG_REG (rs2);
4088 if (!REG_P (rs2))
4089 return 0;
4090 }
4091
4092 if (strict)
4093 {
4094 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4095 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4096 return 0;
4097 }
4098 else
4099 {
4100 if ((! SPARC_INT_REG_P (REGNO (rs1))
4101 && REGNO (rs1) != FRAME_POINTER_REGNUM
4102 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4103 || (rs2
4104 && (! SPARC_INT_REG_P (REGNO (rs2))
4105 && REGNO (rs2) != FRAME_POINTER_REGNUM
4106 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4107 return 0;
4108 }
4109 return 1;
4110 }
4111
4112 /* Return the SYMBOL_REF for the tls_get_addr function. */
4113
4114 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4115
4116 static rtx
4117 sparc_tls_get_addr (void)
4118 {
4119 if (!sparc_tls_symbol)
4120 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4121
4122 return sparc_tls_symbol;
4123 }
4124
4125 /* Return the Global Offset Table to be used in TLS mode. */
4126
4127 static rtx
4128 sparc_tls_got (void)
4129 {
4130 /* In PIC mode, this is just the PIC offset table. */
4131 if (flag_pic)
4132 {
4133 crtl->uses_pic_offset_table = 1;
4134 return pic_offset_table_rtx;
4135 }
4136
4137 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4138 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4139 if (TARGET_SUN_TLS && TARGET_ARCH32)
4140 {
4141 load_got_register ();
4142 return global_offset_table_rtx;
4143 }
4144
4145 /* In all other cases, we load a new pseudo with the GOT symbol. */
4146 return copy_to_reg (sparc_got ());
4147 }
4148
4149 /* Return true if X contains a thread-local symbol. */
4150
4151 static bool
4152 sparc_tls_referenced_p (rtx x)
4153 {
4154 if (!TARGET_HAVE_TLS)
4155 return false;
4156
4157 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4158 x = XEXP (XEXP (x, 0), 0);
4159
4160 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4161 return true;
4162
4163 /* That's all we handle in sparc_legitimize_tls_address for now. */
4164 return false;
4165 }
4166
4167 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4168 this (thread-local) address. */
4169
4170 static rtx
4171 sparc_legitimize_tls_address (rtx addr)
4172 {
4173 rtx temp1, temp2, temp3, ret, o0, got;
4174 rtx_insn *insn;
4175
4176 gcc_assert (can_create_pseudo_p ());
4177
4178 if (GET_CODE (addr) == SYMBOL_REF)
4179 switch (SYMBOL_REF_TLS_MODEL (addr))
4180 {
4181 case TLS_MODEL_GLOBAL_DYNAMIC:
4182 start_sequence ();
4183 temp1 = gen_reg_rtx (SImode);
4184 temp2 = gen_reg_rtx (SImode);
4185 ret = gen_reg_rtx (Pmode);
4186 o0 = gen_rtx_REG (Pmode, 8);
4187 got = sparc_tls_got ();
4188 emit_insn (gen_tgd_hi22 (temp1, addr));
4189 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4190 if (TARGET_ARCH32)
4191 {
4192 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4193 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4194 addr, const1_rtx));
4195 }
4196 else
4197 {
4198 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4199 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4200 addr, const1_rtx));
4201 }
4202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4203 insn = get_insns ();
4204 end_sequence ();
4205 emit_libcall_block (insn, ret, o0, addr);
4206 break;
4207
4208 case TLS_MODEL_LOCAL_DYNAMIC:
4209 start_sequence ();
4210 temp1 = gen_reg_rtx (SImode);
4211 temp2 = gen_reg_rtx (SImode);
4212 temp3 = gen_reg_rtx (Pmode);
4213 ret = gen_reg_rtx (Pmode);
4214 o0 = gen_rtx_REG (Pmode, 8);
4215 got = sparc_tls_got ();
4216 emit_insn (gen_tldm_hi22 (temp1));
4217 emit_insn (gen_tldm_lo10 (temp2, temp1));
4218 if (TARGET_ARCH32)
4219 {
4220 emit_insn (gen_tldm_add32 (o0, got, temp2));
4221 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4222 const1_rtx));
4223 }
4224 else
4225 {
4226 emit_insn (gen_tldm_add64 (o0, got, temp2));
4227 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4228 const1_rtx));
4229 }
4230 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4231 insn = get_insns ();
4232 end_sequence ();
4233 emit_libcall_block (insn, temp3, o0,
4234 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4235 UNSPEC_TLSLD_BASE));
4236 temp1 = gen_reg_rtx (SImode);
4237 temp2 = gen_reg_rtx (SImode);
4238 emit_insn (gen_tldo_hix22 (temp1, addr));
4239 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4240 if (TARGET_ARCH32)
4241 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4242 else
4243 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4244 break;
4245
4246 case TLS_MODEL_INITIAL_EXEC:
4247 temp1 = gen_reg_rtx (SImode);
4248 temp2 = gen_reg_rtx (SImode);
4249 temp3 = gen_reg_rtx (Pmode);
4250 got = sparc_tls_got ();
4251 emit_insn (gen_tie_hi22 (temp1, addr));
4252 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4253 if (TARGET_ARCH32)
4254 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4255 else
4256 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4257 if (TARGET_SUN_TLS)
4258 {
4259 ret = gen_reg_rtx (Pmode);
4260 if (TARGET_ARCH32)
4261 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4262 temp3, addr));
4263 else
4264 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4265 temp3, addr));
4266 }
4267 else
4268 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4269 break;
4270
4271 case TLS_MODEL_LOCAL_EXEC:
4272 temp1 = gen_reg_rtx (Pmode);
4273 temp2 = gen_reg_rtx (Pmode);
4274 if (TARGET_ARCH32)
4275 {
4276 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4277 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4278 }
4279 else
4280 {
4281 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4282 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4283 }
4284 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4285 break;
4286
4287 default:
4288 gcc_unreachable ();
4289 }
4290
4291 else if (GET_CODE (addr) == CONST)
4292 {
4293 rtx base, offset;
4294
4295 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4296
4297 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4298 offset = XEXP (XEXP (addr, 0), 1);
4299
4300 base = force_operand (base, NULL_RTX);
4301 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4302 offset = force_reg (Pmode, offset);
4303 ret = gen_rtx_PLUS (Pmode, base, offset);
4304 }
4305
4306 else
4307 gcc_unreachable (); /* for now ... */
4308
4309 return ret;
4310 }
4311
4312 /* Legitimize PIC addresses. If the address is already position-independent,
4313 we return ORIG. Newly generated position-independent addresses go into a
4314 reg. This is REG if nonzero, otherwise we allocate register(s) as
4315 necessary. */
4316
4317 static rtx
4318 sparc_legitimize_pic_address (rtx orig, rtx reg)
4319 {
4320 bool gotdata_op = false;
4321
4322 if (GET_CODE (orig) == SYMBOL_REF
4323 /* See the comment in sparc_expand_move. */
4324 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4325 {
4326 rtx pic_ref, address;
4327 rtx_insn *insn;
4328
4329 if (reg == 0)
4330 {
4331 gcc_assert (can_create_pseudo_p ());
4332 reg = gen_reg_rtx (Pmode);
4333 }
4334
4335 if (flag_pic == 2)
4336 {
4337 /* If not during reload, allocate another temp reg here for loading
4338 in the address, so that these instructions can be optimized
4339 properly. */
4340 rtx temp_reg = (! can_create_pseudo_p ()
4341 ? reg : gen_reg_rtx (Pmode));
4342
4343 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4344 won't get confused into thinking that these two instructions
4345 are loading in the true address of the symbol. If in the
4346 future a PIC rtx exists, that should be used instead. */
4347 if (TARGET_ARCH64)
4348 {
4349 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4350 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4351 }
4352 else
4353 {
4354 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4355 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4356 }
4357 address = temp_reg;
4358 gotdata_op = true;
4359 }
4360 else
4361 address = orig;
4362
4363 crtl->uses_pic_offset_table = 1;
4364 if (gotdata_op)
4365 {
4366 if (TARGET_ARCH64)
4367 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4368 pic_offset_table_rtx,
4369 address, orig));
4370 else
4371 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4372 pic_offset_table_rtx,
4373 address, orig));
4374 }
4375 else
4376 {
4377 pic_ref
4378 = gen_const_mem (Pmode,
4379 gen_rtx_PLUS (Pmode,
4380 pic_offset_table_rtx, address));
4381 insn = emit_move_insn (reg, pic_ref);
4382 }
4383
4384 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4385 by loop. */
4386 set_unique_reg_note (insn, REG_EQUAL, orig);
4387 return reg;
4388 }
4389 else if (GET_CODE (orig) == CONST)
4390 {
4391 rtx base, offset;
4392
4393 if (GET_CODE (XEXP (orig, 0)) == PLUS
4394 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4395 return orig;
4396
4397 if (reg == 0)
4398 {
4399 gcc_assert (can_create_pseudo_p ());
4400 reg = gen_reg_rtx (Pmode);
4401 }
4402
4403 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4404 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4405 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4406 base == reg ? NULL_RTX : reg);
4407
4408 if (GET_CODE (offset) == CONST_INT)
4409 {
4410 if (SMALL_INT (offset))
4411 return plus_constant (Pmode, base, INTVAL (offset));
4412 else if (can_create_pseudo_p ())
4413 offset = force_reg (Pmode, offset);
4414 else
4415 /* If we reach here, then something is seriously wrong. */
4416 gcc_unreachable ();
4417 }
4418 return gen_rtx_PLUS (Pmode, base, offset);
4419 }
4420 else if (GET_CODE (orig) == LABEL_REF)
4421 /* ??? We ought to be checking that the register is live instead, in case
4422 it is eliminated. */
4423 crtl->uses_pic_offset_table = 1;
4424
4425 return orig;
4426 }
4427
4428 /* Try machine-dependent ways of modifying an illegitimate address X
4429 to be legitimate. If we find one, return the new, valid address.
4430
4431 OLDX is the address as it was before break_out_memory_refs was called.
4432 In some cases it is useful to look at this to decide what needs to be done.
4433
4434 MODE is the mode of the operand pointed to by X.
4435
4436 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4437
4438 static rtx
4439 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4440 machine_mode mode)
4441 {
4442 rtx orig_x = x;
4443
4444 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4445 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4446 force_operand (XEXP (x, 0), NULL_RTX));
4447 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4448 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4449 force_operand (XEXP (x, 1), NULL_RTX));
4450 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4451 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4452 XEXP (x, 1));
4453 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4454 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4455 force_operand (XEXP (x, 1), NULL_RTX));
4456
4457 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4458 return x;
4459
4460 if (sparc_tls_referenced_p (x))
4461 x = sparc_legitimize_tls_address (x);
4462 else if (flag_pic)
4463 x = sparc_legitimize_pic_address (x, NULL_RTX);
4464 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4465 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4466 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4467 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4468 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4469 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4470 else if (GET_CODE (x) == SYMBOL_REF
4471 || GET_CODE (x) == CONST
4472 || GET_CODE (x) == LABEL_REF)
4473 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4474
4475 return x;
4476 }
4477
4478 /* Delegitimize an address that was legitimized by the above function. */
4479
4480 static rtx
4481 sparc_delegitimize_address (rtx x)
4482 {
4483 x = delegitimize_mem_from_attrs (x);
4484
4485 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4486 switch (XINT (XEXP (x, 1), 1))
4487 {
4488 case UNSPEC_MOVE_PIC:
4489 case UNSPEC_TLSLE:
4490 x = XVECEXP (XEXP (x, 1), 0, 0);
4491 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4492 break;
4493 default:
4494 break;
4495 }
4496
4497 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4498 if (GET_CODE (x) == MINUS
4499 && REG_P (XEXP (x, 0))
4500 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4501 && GET_CODE (XEXP (x, 1)) == LO_SUM
4502 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4503 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4504 {
4505 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4506 gcc_assert (GET_CODE (x) == LABEL_REF);
4507 }
4508
4509 return x;
4510 }
4511
4512 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4513 replace the input X, or the original X if no replacement is called for.
4514 The output parameter *WIN is 1 if the calling macro should goto WIN,
4515 0 if it should not.
4516
4517 For SPARC, we wish to handle addresses by splitting them into
4518 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4519 This cuts the number of extra insns by one.
4520
4521 Do nothing when generating PIC code and the address is a symbolic
4522 operand or requires a scratch register. */
4523
4524 rtx
4525 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4526 int opnum, int type,
4527 int ind_levels ATTRIBUTE_UNUSED, int *win)
4528 {
4529 /* Decompose SImode constants into HIGH+LO_SUM. */
4530 if (CONSTANT_P (x)
4531 && (mode != TFmode || TARGET_ARCH64)
4532 && GET_MODE (x) == SImode
4533 && GET_CODE (x) != LO_SUM
4534 && GET_CODE (x) != HIGH
4535 && sparc_cmodel <= CM_MEDLOW
4536 && !(flag_pic
4537 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4538 {
4539 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4540 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4541 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4542 opnum, (enum reload_type)type);
4543 *win = 1;
4544 return x;
4545 }
4546
4547 /* We have to recognize what we have already generated above. */
4548 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4549 {
4550 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4551 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4552 opnum, (enum reload_type)type);
4553 *win = 1;
4554 return x;
4555 }
4556
4557 *win = 0;
4558 return x;
4559 }
4560
4561 /* Return true if ADDR (a legitimate address expression)
4562 has an effect that depends on the machine mode it is used for.
4563
4564 In PIC mode,
4565
4566 (mem:HI [%l7+a])
4567
4568 is not equivalent to
4569
4570 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4571
4572 because [%l7+a+1] is interpreted as the address of (a+1). */
4573
4574
4575 static bool
4576 sparc_mode_dependent_address_p (const_rtx addr,
4577 addr_space_t as ATTRIBUTE_UNUSED)
4578 {
4579 if (flag_pic && GET_CODE (addr) == PLUS)
4580 {
4581 rtx op0 = XEXP (addr, 0);
4582 rtx op1 = XEXP (addr, 1);
4583 if (op0 == pic_offset_table_rtx
4584 && symbolic_operand (op1, VOIDmode))
4585 return true;
4586 }
4587
4588 return false;
4589 }
4590
4591 #ifdef HAVE_GAS_HIDDEN
4592 # define USE_HIDDEN_LINKONCE 1
4593 #else
4594 # define USE_HIDDEN_LINKONCE 0
4595 #endif
4596
4597 static void
4598 get_pc_thunk_name (char name[32], unsigned int regno)
4599 {
4600 const char *reg_name = reg_names[regno];
4601
4602 /* Skip the leading '%' as that cannot be used in a
4603 symbol name. */
4604 reg_name += 1;
4605
4606 if (USE_HIDDEN_LINKONCE)
4607 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4608 else
4609 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4610 }
4611
4612 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4613
4614 static rtx
4615 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4616 {
4617 int orig_flag_pic = flag_pic;
4618 rtx insn;
4619
4620 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4621 flag_pic = 0;
4622 if (TARGET_ARCH64)
4623 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4624 else
4625 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4626 flag_pic = orig_flag_pic;
4627
4628 return insn;
4629 }
4630
4631 /* Emit code to load the GOT register. */
4632
4633 void
4634 load_got_register (void)
4635 {
4636 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4637 if (!global_offset_table_rtx)
4638 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4639
4640 if (TARGET_VXWORKS_RTP)
4641 emit_insn (gen_vxworks_load_got ());
4642 else
4643 {
4644 /* The GOT symbol is subject to a PC-relative relocation so we need a
4645 helper function to add the PC value and thus get the final value. */
4646 if (!got_helper_rtx)
4647 {
4648 char name[32];
4649 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4650 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4651 }
4652
4653 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4654 got_helper_rtx,
4655 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4656 }
4657
4658 /* Need to emit this whether or not we obey regdecls,
4659 since setjmp/longjmp can cause life info to screw up.
4660 ??? In the case where we don't obey regdecls, this is not sufficient
4661 since we may not fall out the bottom. */
4662 emit_use (global_offset_table_rtx);
4663 }
4664
4665 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4666 address of the call target. */
4667
4668 void
4669 sparc_emit_call_insn (rtx pat, rtx addr)
4670 {
4671 rtx_insn *insn;
4672
4673 insn = emit_call_insn (pat);
4674
4675 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4676 if (TARGET_VXWORKS_RTP
4677 && flag_pic
4678 && GET_CODE (addr) == SYMBOL_REF
4679 && (SYMBOL_REF_DECL (addr)
4680 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4681 : !SYMBOL_REF_LOCAL_P (addr)))
4682 {
4683 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4684 crtl->uses_pic_offset_table = 1;
4685 }
4686 }
4687 \f
4688 /* Return 1 if RTX is a MEM which is known to be aligned to at
4689 least a DESIRED byte boundary. */
4690
4691 int
4692 mem_min_alignment (rtx mem, int desired)
4693 {
4694 rtx addr, base, offset;
4695
4696 /* If it's not a MEM we can't accept it. */
4697 if (GET_CODE (mem) != MEM)
4698 return 0;
4699
4700 /* Obviously... */
4701 if (!TARGET_UNALIGNED_DOUBLES
4702 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4703 return 1;
4704
4705 /* ??? The rest of the function predates MEM_ALIGN so
4706 there is probably a bit of redundancy. */
4707 addr = XEXP (mem, 0);
4708 base = offset = NULL_RTX;
4709 if (GET_CODE (addr) == PLUS)
4710 {
4711 if (GET_CODE (XEXP (addr, 0)) == REG)
4712 {
4713 base = XEXP (addr, 0);
4714
4715 /* What we are saying here is that if the base
4716 REG is aligned properly, the compiler will make
4717 sure any REG based index upon it will be so
4718 as well. */
4719 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4720 offset = XEXP (addr, 1);
4721 else
4722 offset = const0_rtx;
4723 }
4724 }
4725 else if (GET_CODE (addr) == REG)
4726 {
4727 base = addr;
4728 offset = const0_rtx;
4729 }
4730
4731 if (base != NULL_RTX)
4732 {
4733 int regno = REGNO (base);
4734
4735 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4736 {
4737 /* Check if the compiler has recorded some information
4738 about the alignment of the base REG. If reload has
4739 completed, we already matched with proper alignments.
4740 If not running global_alloc, reload might give us
4741 unaligned pointer to local stack though. */
4742 if (((cfun != 0
4743 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4744 || (optimize && reload_completed))
4745 && (INTVAL (offset) & (desired - 1)) == 0)
4746 return 1;
4747 }
4748 else
4749 {
4750 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4751 return 1;
4752 }
4753 }
4754 else if (! TARGET_UNALIGNED_DOUBLES
4755 || CONSTANT_P (addr)
4756 || GET_CODE (addr) == LO_SUM)
4757 {
4758 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4759 is true, in which case we can only assume that an access is aligned if
4760 it is to a constant address, or the address involves a LO_SUM. */
4761 return 1;
4762 }
4763
4764 /* An obviously unaligned address. */
4765 return 0;
4766 }
4767
4768 \f
4769 /* Vectors to keep interesting information about registers where it can easily
4770 be got. We used to use the actual mode value as the bit number, but there
4771 are more than 32 modes now. Instead we use two tables: one indexed by
4772 hard register number, and one indexed by mode. */
4773
4774 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4775 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4776 mapped into one sparc_mode_class mode. */
4777
4778 enum sparc_mode_class {
4779 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4780 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4781 CC_MODE, CCFP_MODE
4782 };
4783
4784 /* Modes for single-word and smaller quantities. */
4785 #define S_MODES \
4786 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4787
4788 /* Modes for double-word and smaller quantities. */
4789 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4790
4791 /* Modes for quad-word and smaller quantities. */
4792 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4793
4794 /* Modes for 8-word and smaller quantities. */
4795 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4796
4797 /* Modes for single-float quantities. */
4798 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4799
4800 /* Modes for double-float and smaller quantities. */
4801 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4802
4803 /* Modes for quad-float and smaller quantities. */
4804 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4805
4806 /* Modes for quad-float pairs and smaller quantities. */
4807 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4808
4809 /* Modes for double-float only quantities. */
4810 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4811
4812 /* Modes for quad-float and double-float only quantities. */
4813 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4814
4815 /* Modes for quad-float pairs and double-float only quantities. */
4816 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4817
4818 /* Modes for condition codes. */
4819 #define CC_MODES (1 << (int) CC_MODE)
4820 #define CCFP_MODES (1 << (int) CCFP_MODE)
4821
4822 /* Value is 1 if register/mode pair is acceptable on sparc.
4823
4824 The funny mixture of D and T modes is because integer operations
4825 do not specially operate on tetra quantities, so non-quad-aligned
4826 registers can hold quadword quantities (except %o4 and %i4 because
4827 they cross fixed registers).
4828
4829 ??? Note that, despite the settings, non-double-aligned parameter
4830 registers can hold double-word quantities in 32-bit mode. */
4831
4832 /* This points to either the 32 bit or the 64 bit version. */
4833 const int *hard_regno_mode_classes;
4834
4835 static const int hard_32bit_mode_classes[] = {
4836 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4837 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4838 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4839 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4840
4841 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4842 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4843 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4844 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4845
4846 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4847 and none can hold SFmode/SImode values. */
4848 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4849 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4850 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4851 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4852
4853 /* %fcc[0123] */
4854 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4855
4856 /* %icc, %sfp, %gsr */
4857 CC_MODES, 0, D_MODES
4858 };
4859
4860 static const int hard_64bit_mode_classes[] = {
4861 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4862 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4863 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4864 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4865
4866 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4867 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4868 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4869 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4870
4871 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4872 and none can hold SFmode/SImode values. */
4873 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4874 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4875 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4876 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4877
4878 /* %fcc[0123] */
4879 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4880
4881 /* %icc, %sfp, %gsr */
4882 CC_MODES, 0, D_MODES
4883 };
4884
4885 int sparc_mode_class [NUM_MACHINE_MODES];
4886
4887 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4888
4889 static void
4890 sparc_init_modes (void)
4891 {
4892 int i;
4893
4894 for (i = 0; i < NUM_MACHINE_MODES; i++)
4895 {
4896 machine_mode m = (machine_mode) i;
4897 unsigned int size = GET_MODE_SIZE (m);
4898
4899 switch (GET_MODE_CLASS (m))
4900 {
4901 case MODE_INT:
4902 case MODE_PARTIAL_INT:
4903 case MODE_COMPLEX_INT:
4904 if (size < 4)
4905 sparc_mode_class[i] = 1 << (int) H_MODE;
4906 else if (size == 4)
4907 sparc_mode_class[i] = 1 << (int) S_MODE;
4908 else if (size == 8)
4909 sparc_mode_class[i] = 1 << (int) D_MODE;
4910 else if (size == 16)
4911 sparc_mode_class[i] = 1 << (int) T_MODE;
4912 else if (size == 32)
4913 sparc_mode_class[i] = 1 << (int) O_MODE;
4914 else
4915 sparc_mode_class[i] = 0;
4916 break;
4917 case MODE_VECTOR_INT:
4918 if (size == 4)
4919 sparc_mode_class[i] = 1 << (int) SF_MODE;
4920 else if (size == 8)
4921 sparc_mode_class[i] = 1 << (int) DF_MODE;
4922 else
4923 sparc_mode_class[i] = 0;
4924 break;
4925 case MODE_FLOAT:
4926 case MODE_COMPLEX_FLOAT:
4927 if (size == 4)
4928 sparc_mode_class[i] = 1 << (int) SF_MODE;
4929 else if (size == 8)
4930 sparc_mode_class[i] = 1 << (int) DF_MODE;
4931 else if (size == 16)
4932 sparc_mode_class[i] = 1 << (int) TF_MODE;
4933 else if (size == 32)
4934 sparc_mode_class[i] = 1 << (int) OF_MODE;
4935 else
4936 sparc_mode_class[i] = 0;
4937 break;
4938 case MODE_CC:
4939 if (m == CCFPmode || m == CCFPEmode)
4940 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4941 else
4942 sparc_mode_class[i] = 1 << (int) CC_MODE;
4943 break;
4944 default:
4945 sparc_mode_class[i] = 0;
4946 break;
4947 }
4948 }
4949
4950 if (TARGET_ARCH64)
4951 hard_regno_mode_classes = hard_64bit_mode_classes;
4952 else
4953 hard_regno_mode_classes = hard_32bit_mode_classes;
4954
4955 /* Initialize the array used by REGNO_REG_CLASS. */
4956 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4957 {
4958 if (i < 16 && TARGET_V8PLUS)
4959 sparc_regno_reg_class[i] = I64_REGS;
4960 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4961 sparc_regno_reg_class[i] = GENERAL_REGS;
4962 else if (i < 64)
4963 sparc_regno_reg_class[i] = FP_REGS;
4964 else if (i < 96)
4965 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4966 else if (i < 100)
4967 sparc_regno_reg_class[i] = FPCC_REGS;
4968 else
4969 sparc_regno_reg_class[i] = NO_REGS;
4970 }
4971 }
4972 \f
4973 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4974
4975 static inline bool
4976 save_global_or_fp_reg_p (unsigned int regno,
4977 int leaf_function ATTRIBUTE_UNUSED)
4978 {
4979 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4980 }
4981
4982 /* Return whether the return address register (%i7) is needed. */
4983
4984 static inline bool
4985 return_addr_reg_needed_p (int leaf_function)
4986 {
4987 /* If it is live, for example because of __builtin_return_address (0). */
4988 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4989 return true;
4990
4991 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4992 if (!leaf_function
4993 /* Loading the GOT register clobbers %o7. */
4994 || crtl->uses_pic_offset_table
4995 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4996 return true;
4997
4998 return false;
4999 }
5000
5001 /* Return whether REGNO, a local or in register, must be saved/restored. */
5002
5003 static bool
5004 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5005 {
5006 /* General case: call-saved registers live at some point. */
5007 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5008 return true;
5009
5010 /* Frame pointer register (%fp) if needed. */
5011 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5012 return true;
5013
5014 /* Return address register (%i7) if needed. */
5015 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5016 return true;
5017
5018 /* GOT register (%l7) if needed. */
5019 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5020 return true;
5021
5022 /* If the function accesses prior frames, the frame pointer and the return
5023 address of the previous frame must be saved on the stack. */
5024 if (crtl->accesses_prior_frames
5025 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5026 return true;
5027
5028 return false;
5029 }
5030
5031 /* Compute the frame size required by the function. This function is called
5032 during the reload pass and also by sparc_expand_prologue. */
5033
5034 HOST_WIDE_INT
5035 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5036 {
5037 HOST_WIDE_INT frame_size, apparent_frame_size;
5038 int args_size, n_global_fp_regs = 0;
5039 bool save_local_in_regs_p = false;
5040 unsigned int i;
5041
5042 /* If the function allocates dynamic stack space, the dynamic offset is
5043 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5044 if (leaf_function && !cfun->calls_alloca)
5045 args_size = 0;
5046 else
5047 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5048
5049 /* Calculate space needed for global registers. */
5050 if (TARGET_ARCH64)
5051 {
5052 for (i = 0; i < 8; i++)
5053 if (save_global_or_fp_reg_p (i, 0))
5054 n_global_fp_regs += 2;
5055 }
5056 else
5057 {
5058 for (i = 0; i < 8; i += 2)
5059 if (save_global_or_fp_reg_p (i, 0)
5060 || save_global_or_fp_reg_p (i + 1, 0))
5061 n_global_fp_regs += 2;
5062 }
5063
5064 /* In the flat window model, find out which local and in registers need to
5065 be saved. We don't reserve space in the current frame for them as they
5066 will be spilled into the register window save area of the caller's frame.
5067 However, as soon as we use this register window save area, we must create
5068 that of the current frame to make it the live one. */
5069 if (TARGET_FLAT)
5070 for (i = 16; i < 32; i++)
5071 if (save_local_or_in_reg_p (i, leaf_function))
5072 {
5073 save_local_in_regs_p = true;
5074 break;
5075 }
5076
5077 /* Calculate space needed for FP registers. */
5078 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5079 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5080 n_global_fp_regs += 2;
5081
5082 if (size == 0
5083 && n_global_fp_regs == 0
5084 && args_size == 0
5085 && !save_local_in_regs_p)
5086 frame_size = apparent_frame_size = 0;
5087 else
5088 {
5089 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5090 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5091 apparent_frame_size += n_global_fp_regs * 4;
5092
5093 /* We need to add the size of the outgoing argument area. */
5094 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5095
5096 /* And that of the register window save area. */
5097 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5098
5099 /* Finally, bump to the appropriate alignment. */
5100 frame_size = SPARC_STACK_ALIGN (frame_size);
5101 }
5102
5103 /* Set up values for use in prologue and epilogue. */
5104 sparc_frame_size = frame_size;
5105 sparc_apparent_frame_size = apparent_frame_size;
5106 sparc_n_global_fp_regs = n_global_fp_regs;
5107 sparc_save_local_in_regs_p = save_local_in_regs_p;
5108
5109 return frame_size;
5110 }
5111
5112 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5113
5114 int
5115 sparc_initial_elimination_offset (int to)
5116 {
5117 int offset;
5118
5119 if (to == STACK_POINTER_REGNUM)
5120 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5121 else
5122 offset = 0;
5123
5124 offset += SPARC_STACK_BIAS;
5125 return offset;
5126 }
5127
5128 /* Output any necessary .register pseudo-ops. */
5129
5130 void
5131 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5132 {
5133 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5134 int i;
5135
5136 if (TARGET_ARCH32)
5137 return;
5138
5139 /* Check if %g[2367] were used without
5140 .register being printed for them already. */
5141 for (i = 2; i < 8; i++)
5142 {
5143 if (df_regs_ever_live_p (i)
5144 && ! sparc_hard_reg_printed [i])
5145 {
5146 sparc_hard_reg_printed [i] = 1;
5147 /* %g7 is used as TLS base register, use #ignore
5148 for it instead of #scratch. */
5149 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5150 i == 7 ? "ignore" : "scratch");
5151 }
5152 if (i == 3) i = 5;
5153 }
5154 #endif
5155 }
5156
5157 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5158
5159 #if PROBE_INTERVAL > 4096
5160 #error Cannot use indexed addressing mode for stack probing
5161 #endif
5162
5163 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5164 inclusive. These are offsets from the current stack pointer.
5165
5166 Note that we don't use the REG+REG addressing mode for the probes because
5167 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5168 so the advantages of having a single code win here. */
5169
5170 static void
5171 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5172 {
5173 rtx g1 = gen_rtx_REG (Pmode, 1);
5174
5175 /* See if we have a constant small number of probes to generate. If so,
5176 that's the easy case. */
5177 if (size <= PROBE_INTERVAL)
5178 {
5179 emit_move_insn (g1, GEN_INT (first));
5180 emit_insn (gen_rtx_SET (g1,
5181 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5182 emit_stack_probe (plus_constant (Pmode, g1, -size));
5183 }
5184
5185 /* The run-time loop is made up of 9 insns in the generic case while the
5186 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5187 else if (size <= 4 * PROBE_INTERVAL)
5188 {
5189 HOST_WIDE_INT i;
5190
5191 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5192 emit_insn (gen_rtx_SET (g1,
5193 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5194 emit_stack_probe (g1);
5195
5196 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5197 it exceeds SIZE. If only two probes are needed, this will not
5198 generate any code. Then probe at FIRST + SIZE. */
5199 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5200 {
5201 emit_insn (gen_rtx_SET (g1,
5202 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5203 emit_stack_probe (g1);
5204 }
5205
5206 emit_stack_probe (plus_constant (Pmode, g1,
5207 (i - PROBE_INTERVAL) - size));
5208 }
5209
5210 /* Otherwise, do the same as above, but in a loop. Note that we must be
5211 extra careful with variables wrapping around because we might be at
5212 the very top (or the very bottom) of the address space and we have
5213 to be able to handle this case properly; in particular, we use an
5214 equality test for the loop condition. */
5215 else
5216 {
5217 HOST_WIDE_INT rounded_size;
5218 rtx g4 = gen_rtx_REG (Pmode, 4);
5219
5220 emit_move_insn (g1, GEN_INT (first));
5221
5222
5223 /* Step 1: round SIZE to the previous multiple of the interval. */
5224
5225 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5226 emit_move_insn (g4, GEN_INT (rounded_size));
5227
5228
5229 /* Step 2: compute initial and final value of the loop counter. */
5230
5231 /* TEST_ADDR = SP + FIRST. */
5232 emit_insn (gen_rtx_SET (g1,
5233 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5234
5235 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5236 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5237
5238
5239 /* Step 3: the loop
5240
5241 while (TEST_ADDR != LAST_ADDR)
5242 {
5243 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5244 probe at TEST_ADDR
5245 }
5246
5247 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5248 until it is equal to ROUNDED_SIZE. */
5249
5250 if (TARGET_ARCH64)
5251 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5252 else
5253 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5254
5255
5256 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5257 that SIZE is equal to ROUNDED_SIZE. */
5258
5259 if (size != rounded_size)
5260 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5261 }
5262
5263 /* Make sure nothing is scheduled before we are done. */
5264 emit_insn (gen_blockage ());
5265 }
5266
5267 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5268 absolute addresses. */
5269
5270 const char *
5271 output_probe_stack_range (rtx reg1, rtx reg2)
5272 {
5273 static int labelno = 0;
5274 char loop_lab[32];
5275 rtx xops[2];
5276
5277 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5278
5279 /* Loop. */
5280 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5281
5282 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5283 xops[0] = reg1;
5284 xops[1] = GEN_INT (-PROBE_INTERVAL);
5285 output_asm_insn ("add\t%0, %1, %0", xops);
5286
5287 /* Test if TEST_ADDR == LAST_ADDR. */
5288 xops[1] = reg2;
5289 output_asm_insn ("cmp\t%0, %1", xops);
5290
5291 /* Probe at TEST_ADDR and branch. */
5292 if (TARGET_ARCH64)
5293 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5294 else
5295 fputs ("\tbne\t", asm_out_file);
5296 assemble_name_raw (asm_out_file, loop_lab);
5297 fputc ('\n', asm_out_file);
5298 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5299 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5300
5301 return "";
5302 }
5303
5304 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5305 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5306 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5307 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5308 the action to be performed if it returns false. Return the new offset. */
5309
5310 typedef bool (*sorr_pred_t) (unsigned int, int);
5311 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5312
5313 static int
5314 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5315 int offset, int leaf_function, sorr_pred_t save_p,
5316 sorr_act_t action_true, sorr_act_t action_false)
5317 {
5318 unsigned int i;
5319 rtx mem;
5320 rtx_insn *insn;
5321
5322 if (TARGET_ARCH64 && high <= 32)
5323 {
5324 int fp_offset = -1;
5325
5326 for (i = low; i < high; i++)
5327 {
5328 if (save_p (i, leaf_function))
5329 {
5330 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5331 base, offset));
5332 if (action_true == SORR_SAVE)
5333 {
5334 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5335 RTX_FRAME_RELATED_P (insn) = 1;
5336 }
5337 else /* action_true == SORR_RESTORE */
5338 {
5339 /* The frame pointer must be restored last since its old
5340 value may be used as base address for the frame. This
5341 is problematic in 64-bit mode only because of the lack
5342 of double-word load instruction. */
5343 if (i == HARD_FRAME_POINTER_REGNUM)
5344 fp_offset = offset;
5345 else
5346 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5347 }
5348 offset += 8;
5349 }
5350 else if (action_false == SORR_ADVANCE)
5351 offset += 8;
5352 }
5353
5354 if (fp_offset >= 0)
5355 {
5356 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5357 emit_move_insn (hard_frame_pointer_rtx, mem);
5358 }
5359 }
5360 else
5361 {
5362 for (i = low; i < high; i += 2)
5363 {
5364 bool reg0 = save_p (i, leaf_function);
5365 bool reg1 = save_p (i + 1, leaf_function);
5366 machine_mode mode;
5367 int regno;
5368
5369 if (reg0 && reg1)
5370 {
5371 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5372 regno = i;
5373 }
5374 else if (reg0)
5375 {
5376 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5377 regno = i;
5378 }
5379 else if (reg1)
5380 {
5381 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5382 regno = i + 1;
5383 offset += 4;
5384 }
5385 else
5386 {
5387 if (action_false == SORR_ADVANCE)
5388 offset += 8;
5389 continue;
5390 }
5391
5392 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5393 if (action_true == SORR_SAVE)
5394 {
5395 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5396 RTX_FRAME_RELATED_P (insn) = 1;
5397 if (mode == DImode)
5398 {
5399 rtx set1, set2;
5400 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5401 offset));
5402 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5403 RTX_FRAME_RELATED_P (set1) = 1;
5404 mem
5405 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5406 offset + 4));
5407 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5408 RTX_FRAME_RELATED_P (set2) = 1;
5409 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5410 gen_rtx_PARALLEL (VOIDmode,
5411 gen_rtvec (2, set1, set2)));
5412 }
5413 }
5414 else /* action_true == SORR_RESTORE */
5415 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5416
5417 /* Bump and round down to double word
5418 in case we already bumped by 4. */
5419 offset = ROUND_DOWN (offset + 8, 8);
5420 }
5421 }
5422
5423 return offset;
5424 }
5425
5426 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5427
5428 static rtx
5429 emit_adjust_base_to_offset (rtx base, int offset)
5430 {
5431 /* ??? This might be optimized a little as %g1 might already have a
5432 value close enough that a single add insn will do. */
5433 /* ??? Although, all of this is probably only a temporary fix because
5434 if %g1 can hold a function result, then sparc_expand_epilogue will
5435 lose (the result will be clobbered). */
5436 rtx new_base = gen_rtx_REG (Pmode, 1);
5437 emit_move_insn (new_base, GEN_INT (offset));
5438 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5439 return new_base;
5440 }
5441
5442 /* Emit code to save/restore call-saved global and FP registers. */
5443
5444 static void
5445 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5446 {
5447 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5448 {
5449 base = emit_adjust_base_to_offset (base, offset);
5450 offset = 0;
5451 }
5452
5453 offset
5454 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5455 save_global_or_fp_reg_p, action, SORR_NONE);
5456 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5457 save_global_or_fp_reg_p, action, SORR_NONE);
5458 }
5459
5460 /* Emit code to save/restore call-saved local and in registers. */
5461
5462 static void
5463 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5464 {
5465 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5466 {
5467 base = emit_adjust_base_to_offset (base, offset);
5468 offset = 0;
5469 }
5470
5471 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5472 save_local_or_in_reg_p, action, SORR_ADVANCE);
5473 }
5474
5475 /* Emit a window_save insn. */
5476
5477 static rtx_insn *
5478 emit_window_save (rtx increment)
5479 {
5480 rtx_insn *insn = emit_insn (gen_window_save (increment));
5481 RTX_FRAME_RELATED_P (insn) = 1;
5482
5483 /* The incoming return address (%o7) is saved in %i7. */
5484 add_reg_note (insn, REG_CFA_REGISTER,
5485 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5486 gen_rtx_REG (Pmode,
5487 INCOMING_RETURN_ADDR_REGNUM)));
5488
5489 /* The window save event. */
5490 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5491
5492 /* The CFA is %fp, the hard frame pointer. */
5493 add_reg_note (insn, REG_CFA_DEF_CFA,
5494 plus_constant (Pmode, hard_frame_pointer_rtx,
5495 INCOMING_FRAME_SP_OFFSET));
5496
5497 return insn;
5498 }
5499
5500 /* Generate an increment for the stack pointer. */
5501
5502 static rtx
5503 gen_stack_pointer_inc (rtx increment)
5504 {
5505 return gen_rtx_SET (stack_pointer_rtx,
5506 gen_rtx_PLUS (Pmode,
5507 stack_pointer_rtx,
5508 increment));
5509 }
5510
5511 /* Expand the function prologue. The prologue is responsible for reserving
5512 storage for the frame, saving the call-saved registers and loading the
5513 GOT register if needed. */
5514
5515 void
5516 sparc_expand_prologue (void)
5517 {
5518 HOST_WIDE_INT size;
5519 rtx_insn *insn;
5520
5521 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5522 on the final value of the flag means deferring the prologue/epilogue
5523 expansion until just before the second scheduling pass, which is too
5524 late to emit multiple epilogues or return insns.
5525
5526 Of course we are making the assumption that the value of the flag
5527 will not change between now and its final value. Of the three parts
5528 of the formula, only the last one can reasonably vary. Let's take a
5529 closer look, after assuming that the first two ones are set to true
5530 (otherwise the last value is effectively silenced).
5531
5532 If only_leaf_regs_used returns false, the global predicate will also
5533 be false so the actual frame size calculated below will be positive.
5534 As a consequence, the save_register_window insn will be emitted in
5535 the instruction stream; now this insn explicitly references %fp
5536 which is not a leaf register so only_leaf_regs_used will always
5537 return false subsequently.
5538
5539 If only_leaf_regs_used returns true, we hope that the subsequent
5540 optimization passes won't cause non-leaf registers to pop up. For
5541 example, the regrename pass has special provisions to not rename to
5542 non-leaf registers in a leaf function. */
5543 sparc_leaf_function_p
5544 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5545
5546 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5547
5548 if (flag_stack_usage_info)
5549 current_function_static_stack_size = size;
5550
5551 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5552 {
5553 if (crtl->is_leaf && !cfun->calls_alloca)
5554 {
5555 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5556 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5557 size - STACK_CHECK_PROTECT);
5558 }
5559 else if (size > 0)
5560 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5561 }
5562
5563 if (size == 0)
5564 ; /* do nothing. */
5565 else if (sparc_leaf_function_p)
5566 {
5567 rtx size_int_rtx = GEN_INT (-size);
5568
5569 if (size <= 4096)
5570 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5571 else if (size <= 8192)
5572 {
5573 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5574 RTX_FRAME_RELATED_P (insn) = 1;
5575
5576 /* %sp is still the CFA register. */
5577 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5578 }
5579 else
5580 {
5581 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5582 emit_move_insn (size_rtx, size_int_rtx);
5583 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5584 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5585 gen_stack_pointer_inc (size_int_rtx));
5586 }
5587
5588 RTX_FRAME_RELATED_P (insn) = 1;
5589 }
5590 else
5591 {
5592 rtx size_int_rtx = GEN_INT (-size);
5593
5594 if (size <= 4096)
5595 emit_window_save (size_int_rtx);
5596 else if (size <= 8192)
5597 {
5598 emit_window_save (GEN_INT (-4096));
5599
5600 /* %sp is not the CFA register anymore. */
5601 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5602
5603 /* Make sure no %fp-based store is issued until after the frame is
5604 established. The offset between the frame pointer and the stack
5605 pointer is calculated relative to the value of the stack pointer
5606 at the end of the function prologue, and moving instructions that
5607 access the stack via the frame pointer between the instructions
5608 that decrement the stack pointer could result in accessing the
5609 register window save area, which is volatile. */
5610 emit_insn (gen_frame_blockage ());
5611 }
5612 else
5613 {
5614 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5615 emit_move_insn (size_rtx, size_int_rtx);
5616 emit_window_save (size_rtx);
5617 }
5618 }
5619
5620 if (sparc_leaf_function_p)
5621 {
5622 sparc_frame_base_reg = stack_pointer_rtx;
5623 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5624 }
5625 else
5626 {
5627 sparc_frame_base_reg = hard_frame_pointer_rtx;
5628 sparc_frame_base_offset = SPARC_STACK_BIAS;
5629 }
5630
5631 if (sparc_n_global_fp_regs > 0)
5632 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5633 sparc_frame_base_offset
5634 - sparc_apparent_frame_size,
5635 SORR_SAVE);
5636
5637 /* Load the GOT register if needed. */
5638 if (crtl->uses_pic_offset_table)
5639 load_got_register ();
5640
5641 /* Advertise that the data calculated just above are now valid. */
5642 sparc_prologue_data_valid_p = true;
5643 }
5644
5645 /* Expand the function prologue. The prologue is responsible for reserving
5646 storage for the frame, saving the call-saved registers and loading the
5647 GOT register if needed. */
5648
5649 void
5650 sparc_flat_expand_prologue (void)
5651 {
5652 HOST_WIDE_INT size;
5653 rtx_insn *insn;
5654
5655 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5656
5657 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5658
5659 if (flag_stack_usage_info)
5660 current_function_static_stack_size = size;
5661
5662 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5663 {
5664 if (crtl->is_leaf && !cfun->calls_alloca)
5665 {
5666 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5667 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5668 size - STACK_CHECK_PROTECT);
5669 }
5670 else if (size > 0)
5671 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5672 }
5673
5674 if (sparc_save_local_in_regs_p)
5675 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5676 SORR_SAVE);
5677
5678 if (size == 0)
5679 ; /* do nothing. */
5680 else
5681 {
5682 rtx size_int_rtx, size_rtx;
5683
5684 size_rtx = size_int_rtx = GEN_INT (-size);
5685
5686 /* We establish the frame (i.e. decrement the stack pointer) first, even
5687 if we use a frame pointer, because we cannot clobber any call-saved
5688 registers, including the frame pointer, if we haven't created a new
5689 register save area, for the sake of compatibility with the ABI. */
5690 if (size <= 4096)
5691 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5692 else if (size <= 8192 && !frame_pointer_needed)
5693 {
5694 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5695 RTX_FRAME_RELATED_P (insn) = 1;
5696 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5697 }
5698 else
5699 {
5700 size_rtx = gen_rtx_REG (Pmode, 1);
5701 emit_move_insn (size_rtx, size_int_rtx);
5702 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5703 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5704 gen_stack_pointer_inc (size_int_rtx));
5705 }
5706 RTX_FRAME_RELATED_P (insn) = 1;
5707
5708 /* Ensure nothing is scheduled until after the frame is established. */
5709 emit_insn (gen_blockage ());
5710
5711 if (frame_pointer_needed)
5712 {
5713 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5714 gen_rtx_MINUS (Pmode,
5715 stack_pointer_rtx,
5716 size_rtx)));
5717 RTX_FRAME_RELATED_P (insn) = 1;
5718
5719 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5720 gen_rtx_SET (hard_frame_pointer_rtx,
5721 plus_constant (Pmode, stack_pointer_rtx,
5722 size)));
5723 }
5724
5725 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5726 {
5727 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5728 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5729
5730 insn = emit_move_insn (i7, o7);
5731 RTX_FRAME_RELATED_P (insn) = 1;
5732
5733 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5734
5735 /* Prevent this instruction from ever being considered dead,
5736 even if this function has no epilogue. */
5737 emit_use (i7);
5738 }
5739 }
5740
5741 if (frame_pointer_needed)
5742 {
5743 sparc_frame_base_reg = hard_frame_pointer_rtx;
5744 sparc_frame_base_offset = SPARC_STACK_BIAS;
5745 }
5746 else
5747 {
5748 sparc_frame_base_reg = stack_pointer_rtx;
5749 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5750 }
5751
5752 if (sparc_n_global_fp_regs > 0)
5753 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5754 sparc_frame_base_offset
5755 - sparc_apparent_frame_size,
5756 SORR_SAVE);
5757
5758 /* Load the GOT register if needed. */
5759 if (crtl->uses_pic_offset_table)
5760 load_got_register ();
5761
5762 /* Advertise that the data calculated just above are now valid. */
5763 sparc_prologue_data_valid_p = true;
5764 }
5765
5766 /* This function generates the assembly code for function entry, which boils
5767 down to emitting the necessary .register directives. */
5768
5769 static void
5770 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5771 {
5772 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5773 if (!TARGET_FLAT)
5774 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5775
5776 sparc_output_scratch_registers (file);
5777 }
5778
5779 /* Expand the function epilogue, either normal or part of a sibcall.
5780 We emit all the instructions except the return or the call. */
5781
5782 void
5783 sparc_expand_epilogue (bool for_eh)
5784 {
5785 HOST_WIDE_INT size = sparc_frame_size;
5786
5787 if (cfun->calls_alloca)
5788 emit_insn (gen_frame_blockage ());
5789
5790 if (sparc_n_global_fp_regs > 0)
5791 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5792 sparc_frame_base_offset
5793 - sparc_apparent_frame_size,
5794 SORR_RESTORE);
5795
5796 if (size == 0 || for_eh)
5797 ; /* do nothing. */
5798 else if (sparc_leaf_function_p)
5799 {
5800 if (size <= 4096)
5801 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5802 else if (size <= 8192)
5803 {
5804 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5805 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5806 }
5807 else
5808 {
5809 rtx reg = gen_rtx_REG (Pmode, 1);
5810 emit_move_insn (reg, GEN_INT (size));
5811 emit_insn (gen_stack_pointer_inc (reg));
5812 }
5813 }
5814 }
5815
5816 /* Expand the function epilogue, either normal or part of a sibcall.
5817 We emit all the instructions except the return or the call. */
5818
5819 void
5820 sparc_flat_expand_epilogue (bool for_eh)
5821 {
5822 HOST_WIDE_INT size = sparc_frame_size;
5823
5824 if (sparc_n_global_fp_regs > 0)
5825 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5826 sparc_frame_base_offset
5827 - sparc_apparent_frame_size,
5828 SORR_RESTORE);
5829
5830 /* If we have a frame pointer, we'll need both to restore it before the
5831 frame is destroyed and use its current value in destroying the frame.
5832 Since we don't have an atomic way to do that in the flat window model,
5833 we save the current value into a temporary register (%g1). */
5834 if (frame_pointer_needed && !for_eh)
5835 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5836
5837 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5838 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5839 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5840
5841 if (sparc_save_local_in_regs_p)
5842 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5843 sparc_frame_base_offset,
5844 SORR_RESTORE);
5845
5846 if (size == 0 || for_eh)
5847 ; /* do nothing. */
5848 else if (frame_pointer_needed)
5849 {
5850 /* Make sure the frame is destroyed after everything else is done. */
5851 emit_insn (gen_blockage ());
5852
5853 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5854 }
5855 else
5856 {
5857 /* Likewise. */
5858 emit_insn (gen_blockage ());
5859
5860 if (size <= 4096)
5861 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5862 else if (size <= 8192)
5863 {
5864 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5865 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5866 }
5867 else
5868 {
5869 rtx reg = gen_rtx_REG (Pmode, 1);
5870 emit_move_insn (reg, GEN_INT (size));
5871 emit_insn (gen_stack_pointer_inc (reg));
5872 }
5873 }
5874 }
5875
5876 /* Return true if it is appropriate to emit `return' instructions in the
5877 body of a function. */
5878
5879 bool
5880 sparc_can_use_return_insn_p (void)
5881 {
5882 return sparc_prologue_data_valid_p
5883 && sparc_n_global_fp_regs == 0
5884 && TARGET_FLAT
5885 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5886 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5887 }
5888
5889 /* This function generates the assembly code for function exit. */
5890
5891 static void
5892 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5893 {
5894 /* If the last two instructions of a function are "call foo; dslot;"
5895 the return address might point to the first instruction in the next
5896 function and we have to output a dummy nop for the sake of sane
5897 backtraces in such cases. This is pointless for sibling calls since
5898 the return address is explicitly adjusted. */
5899
5900 rtx_insn *insn = get_last_insn ();
5901
5902 rtx last_real_insn = prev_real_insn (insn);
5903 if (last_real_insn
5904 && NONJUMP_INSN_P (last_real_insn)
5905 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5906 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5907
5908 if (last_real_insn
5909 && CALL_P (last_real_insn)
5910 && !SIBLING_CALL_P (last_real_insn))
5911 fputs("\tnop\n", file);
5912
5913 sparc_output_deferred_case_vectors ();
5914 }
5915
5916 /* Output a 'restore' instruction. */
5917
5918 static void
5919 output_restore (rtx pat)
5920 {
5921 rtx operands[3];
5922
5923 if (! pat)
5924 {
5925 fputs ("\t restore\n", asm_out_file);
5926 return;
5927 }
5928
5929 gcc_assert (GET_CODE (pat) == SET);
5930
5931 operands[0] = SET_DEST (pat);
5932 pat = SET_SRC (pat);
5933
5934 switch (GET_CODE (pat))
5935 {
5936 case PLUS:
5937 operands[1] = XEXP (pat, 0);
5938 operands[2] = XEXP (pat, 1);
5939 output_asm_insn (" restore %r1, %2, %Y0", operands);
5940 break;
5941 case LO_SUM:
5942 operands[1] = XEXP (pat, 0);
5943 operands[2] = XEXP (pat, 1);
5944 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5945 break;
5946 case ASHIFT:
5947 operands[1] = XEXP (pat, 0);
5948 gcc_assert (XEXP (pat, 1) == const1_rtx);
5949 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5950 break;
5951 default:
5952 operands[1] = pat;
5953 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5954 break;
5955 }
5956 }
5957
5958 /* Output a return. */
5959
5960 const char *
5961 output_return (rtx_insn *insn)
5962 {
5963 if (crtl->calls_eh_return)
5964 {
5965 /* If the function uses __builtin_eh_return, the eh_return
5966 machinery occupies the delay slot. */
5967 gcc_assert (!final_sequence);
5968
5969 if (flag_delayed_branch)
5970 {
5971 if (!TARGET_FLAT && TARGET_V9)
5972 fputs ("\treturn\t%i7+8\n", asm_out_file);
5973 else
5974 {
5975 if (!TARGET_FLAT)
5976 fputs ("\trestore\n", asm_out_file);
5977
5978 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5979 }
5980
5981 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5982 }
5983 else
5984 {
5985 if (!TARGET_FLAT)
5986 fputs ("\trestore\n", asm_out_file);
5987
5988 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5989 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5990 }
5991 }
5992 else if (sparc_leaf_function_p || TARGET_FLAT)
5993 {
5994 /* This is a leaf or flat function so we don't have to bother restoring
5995 the register window, which frees us from dealing with the convoluted
5996 semantics of restore/return. We simply output the jump to the
5997 return address and the insn in the delay slot (if any). */
5998
5999 return "jmp\t%%o7+%)%#";
6000 }
6001 else
6002 {
6003 /* This is a regular function so we have to restore the register window.
6004 We may have a pending insn for the delay slot, which will be either
6005 combined with the 'restore' instruction or put in the delay slot of
6006 the 'return' instruction. */
6007
6008 if (final_sequence)
6009 {
6010 rtx delay, pat;
6011
6012 delay = NEXT_INSN (insn);
6013 gcc_assert (delay);
6014
6015 pat = PATTERN (delay);
6016
6017 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6018 {
6019 epilogue_renumber (&pat, 0);
6020 return "return\t%%i7+%)%#";
6021 }
6022 else
6023 {
6024 output_asm_insn ("jmp\t%%i7+%)", NULL);
6025 output_restore (pat);
6026 PATTERN (delay) = gen_blockage ();
6027 INSN_CODE (delay) = -1;
6028 }
6029 }
6030 else
6031 {
6032 /* The delay slot is empty. */
6033 if (TARGET_V9)
6034 return "return\t%%i7+%)\n\t nop";
6035 else if (flag_delayed_branch)
6036 return "jmp\t%%i7+%)\n\t restore";
6037 else
6038 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6039 }
6040 }
6041
6042 return "";
6043 }
6044
6045 /* Output a sibling call. */
6046
6047 const char *
6048 output_sibcall (rtx_insn *insn, rtx call_operand)
6049 {
6050 rtx operands[1];
6051
6052 gcc_assert (flag_delayed_branch);
6053
6054 operands[0] = call_operand;
6055
6056 if (sparc_leaf_function_p || TARGET_FLAT)
6057 {
6058 /* This is a leaf or flat function so we don't have to bother restoring
6059 the register window. We simply output the jump to the function and
6060 the insn in the delay slot (if any). */
6061
6062 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6063
6064 if (final_sequence)
6065 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6066 operands);
6067 else
6068 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6069 it into branch if possible. */
6070 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6071 operands);
6072 }
6073 else
6074 {
6075 /* This is a regular function so we have to restore the register window.
6076 We may have a pending insn for the delay slot, which will be combined
6077 with the 'restore' instruction. */
6078
6079 output_asm_insn ("call\t%a0, 0", operands);
6080
6081 if (final_sequence)
6082 {
6083 rtx_insn *delay = NEXT_INSN (insn);
6084 gcc_assert (delay);
6085
6086 output_restore (PATTERN (delay));
6087
6088 PATTERN (delay) = gen_blockage ();
6089 INSN_CODE (delay) = -1;
6090 }
6091 else
6092 output_restore (NULL_RTX);
6093 }
6094
6095 return "";
6096 }
6097 \f
6098 /* Functions for handling argument passing.
6099
6100 For 32-bit, the first 6 args are normally in registers and the rest are
6101 pushed. Any arg that starts within the first 6 words is at least
6102 partially passed in a register unless its data type forbids.
6103
6104 For 64-bit, the argument registers are laid out as an array of 16 elements
6105 and arguments are added sequentially. The first 6 int args and up to the
6106 first 16 fp args (depending on size) are passed in regs.
6107
6108 Slot Stack Integral Float Float in structure Double Long Double
6109 ---- ----- -------- ----- ------------------ ------ -----------
6110 15 [SP+248] %f31 %f30,%f31 %d30
6111 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6112 13 [SP+232] %f27 %f26,%f27 %d26
6113 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6114 11 [SP+216] %f23 %f22,%f23 %d22
6115 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6116 9 [SP+200] %f19 %f18,%f19 %d18
6117 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6118 7 [SP+184] %f15 %f14,%f15 %d14
6119 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6120 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6121 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6122 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6123 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6124 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6125 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6126
6127 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6128
6129 Integral arguments are always passed as 64-bit quantities appropriately
6130 extended.
6131
6132 Passing of floating point values is handled as follows.
6133 If a prototype is in scope:
6134 If the value is in a named argument (i.e. not a stdarg function or a
6135 value not part of the `...') then the value is passed in the appropriate
6136 fp reg.
6137 If the value is part of the `...' and is passed in one of the first 6
6138 slots then the value is passed in the appropriate int reg.
6139 If the value is part of the `...' and is not passed in one of the first 6
6140 slots then the value is passed in memory.
6141 If a prototype is not in scope:
6142 If the value is one of the first 6 arguments the value is passed in the
6143 appropriate integer reg and the appropriate fp reg.
6144 If the value is not one of the first 6 arguments the value is passed in
6145 the appropriate fp reg and in memory.
6146
6147
6148 Summary of the calling conventions implemented by GCC on the SPARC:
6149
6150 32-bit ABI:
6151 size argument return value
6152
6153 small integer <4 int. reg. int. reg.
6154 word 4 int. reg. int. reg.
6155 double word 8 int. reg. int. reg.
6156
6157 _Complex small integer <8 int. reg. int. reg.
6158 _Complex word 8 int. reg. int. reg.
6159 _Complex double word 16 memory int. reg.
6160
6161 vector integer <=8 int. reg. FP reg.
6162 vector integer >8 memory memory
6163
6164 float 4 int. reg. FP reg.
6165 double 8 int. reg. FP reg.
6166 long double 16 memory memory
6167
6168 _Complex float 8 memory FP reg.
6169 _Complex double 16 memory FP reg.
6170 _Complex long double 32 memory FP reg.
6171
6172 vector float any memory memory
6173
6174 aggregate any memory memory
6175
6176
6177
6178 64-bit ABI:
6179 size argument return value
6180
6181 small integer <8 int. reg. int. reg.
6182 word 8 int. reg. int. reg.
6183 double word 16 int. reg. int. reg.
6184
6185 _Complex small integer <16 int. reg. int. reg.
6186 _Complex word 16 int. reg. int. reg.
6187 _Complex double word 32 memory int. reg.
6188
6189 vector integer <=16 FP reg. FP reg.
6190 vector integer 16<s<=32 memory FP reg.
6191 vector integer >32 memory memory
6192
6193 float 4 FP reg. FP reg.
6194 double 8 FP reg. FP reg.
6195 long double 16 FP reg. FP reg.
6196
6197 _Complex float 8 FP reg. FP reg.
6198 _Complex double 16 FP reg. FP reg.
6199 _Complex long double 32 memory FP reg.
6200
6201 vector float <=16 FP reg. FP reg.
6202 vector float 16<s<=32 memory FP reg.
6203 vector float >32 memory memory
6204
6205 aggregate <=16 reg. reg.
6206 aggregate 16<s<=32 memory reg.
6207 aggregate >32 memory memory
6208
6209
6210
6211 Note #1: complex floating-point types follow the extended SPARC ABIs as
6212 implemented by the Sun compiler.
6213
6214 Note #2: integral vector types follow the scalar floating-point types
6215 conventions to match what is implemented by the Sun VIS SDK.
6216
6217 Note #3: floating-point vector types follow the aggregate types
6218 conventions. */
6219
6220
6221 /* Maximum number of int regs for args. */
6222 #define SPARC_INT_ARG_MAX 6
6223 /* Maximum number of fp regs for args. */
6224 #define SPARC_FP_ARG_MAX 16
6225 /* Number of words (partially) occupied for a given size in units. */
6226 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6227
6228 /* Handle the INIT_CUMULATIVE_ARGS macro.
6229 Initialize a variable CUM of type CUMULATIVE_ARGS
6230 for a call to a function whose data type is FNTYPE.
6231 For a library call, FNTYPE is 0. */
6232
6233 void
6234 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6235 {
6236 cum->words = 0;
6237 cum->prototype_p = fntype && prototype_p (fntype);
6238 cum->libcall_p = !fntype;
6239 }
6240
6241 /* Handle promotion of pointer and integer arguments. */
6242
6243 static machine_mode
6244 sparc_promote_function_mode (const_tree type, machine_mode mode,
6245 int *punsignedp, const_tree, int)
6246 {
6247 if (type && POINTER_TYPE_P (type))
6248 {
6249 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6250 return Pmode;
6251 }
6252
6253 /* Integral arguments are passed as full words, as per the ABI. */
6254 if (GET_MODE_CLASS (mode) == MODE_INT
6255 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6256 return word_mode;
6257
6258 return mode;
6259 }
6260
6261 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6262
6263 static bool
6264 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6265 {
6266 return TARGET_ARCH64 ? true : false;
6267 }
6268
6269 /* Traverse the record TYPE recursively and call FUNC on its fields.
6270 NAMED is true if this is for a named parameter. DATA is passed
6271 to FUNC for each field. OFFSET is the starting position and
6272 PACKED is true if we are inside a packed record. */
6273
6274 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6275 static void
6276 traverse_record_type (const_tree type, bool named, T *data,
6277 HOST_WIDE_INT offset = 0, bool packed = false)
6278 {
6279 /* The ABI obviously doesn't specify how packed structures are passed.
6280 These are passed in integer regs if possible, otherwise memory. */
6281 if (!packed)
6282 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6283 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6284 {
6285 packed = true;
6286 break;
6287 }
6288
6289 /* Walk the real fields, but skip those with no size or a zero size.
6290 ??? Fields with variable offset are handled as having zero offset. */
6291 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6292 if (TREE_CODE (field) == FIELD_DECL)
6293 {
6294 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6295 continue;
6296
6297 HOST_WIDE_INT bitpos = offset;
6298 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6299 bitpos += int_bit_position (field);
6300
6301 tree field_type = TREE_TYPE (field);
6302 if (TREE_CODE (field_type) == RECORD_TYPE)
6303 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6304 packed);
6305 else
6306 {
6307 const bool fp_type
6308 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6309 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6310 data);
6311 }
6312 }
6313 }
6314
6315 /* Handle recursive register classifying for structure layout. */
6316
6317 typedef struct
6318 {
6319 bool fp_regs; /* true if field eligible to FP registers. */
6320 bool fp_regs_in_first_word; /* true if such field in first word. */
6321 } classify_data_t;
6322
6323 /* A subroutine of function_arg_slotno. Classify the field. */
6324
6325 inline void
6326 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6327 classify_data_t *data)
6328 {
6329 if (fp)
6330 {
6331 data->fp_regs = true;
6332 if (bitpos < BITS_PER_WORD)
6333 data->fp_regs_in_first_word = true;
6334 }
6335 }
6336
6337 /* Compute the slot number to pass an argument in.
6338 Return the slot number or -1 if passing on the stack.
6339
6340 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6341 the preceding args and about the function being called.
6342 MODE is the argument's machine mode.
6343 TYPE is the data type of the argument (as a tree).
6344 This is null for libcalls where that information may
6345 not be available.
6346 NAMED is nonzero if this argument is a named parameter
6347 (otherwise it is an extra parameter matching an ellipsis).
6348 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6349 *PREGNO records the register number to use if scalar type.
6350 *PPADDING records the amount of padding needed in words. */
6351
6352 static int
6353 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6354 const_tree type, bool named, bool incoming,
6355 int *pregno, int *ppadding)
6356 {
6357 int regbase = (incoming
6358 ? SPARC_INCOMING_INT_ARG_FIRST
6359 : SPARC_OUTGOING_INT_ARG_FIRST);
6360 int slotno = cum->words;
6361 enum mode_class mclass;
6362 int regno;
6363
6364 *ppadding = 0;
6365
6366 if (type && TREE_ADDRESSABLE (type))
6367 return -1;
6368
6369 if (TARGET_ARCH32
6370 && mode == BLKmode
6371 && type
6372 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6373 return -1;
6374
6375 /* For SPARC64, objects requiring 16-byte alignment get it. */
6376 if (TARGET_ARCH64
6377 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6378 && (slotno & 1) != 0)
6379 slotno++, *ppadding = 1;
6380
6381 mclass = GET_MODE_CLASS (mode);
6382 if (type && TREE_CODE (type) == VECTOR_TYPE)
6383 {
6384 /* Vector types deserve special treatment because they are
6385 polymorphic wrt their mode, depending upon whether VIS
6386 instructions are enabled. */
6387 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6388 {
6389 /* The SPARC port defines no floating-point vector modes. */
6390 gcc_assert (mode == BLKmode);
6391 }
6392 else
6393 {
6394 /* Integral vector types should either have a vector
6395 mode or an integral mode, because we are guaranteed
6396 by pass_by_reference that their size is not greater
6397 than 16 bytes and TImode is 16-byte wide. */
6398 gcc_assert (mode != BLKmode);
6399
6400 /* Vector integers are handled like floats according to
6401 the Sun VIS SDK. */
6402 mclass = MODE_FLOAT;
6403 }
6404 }
6405
6406 switch (mclass)
6407 {
6408 case MODE_FLOAT:
6409 case MODE_COMPLEX_FLOAT:
6410 case MODE_VECTOR_INT:
6411 if (TARGET_ARCH64 && TARGET_FPU && named)
6412 {
6413 /* If all arg slots are filled, then must pass on stack. */
6414 if (slotno >= SPARC_FP_ARG_MAX)
6415 return -1;
6416
6417 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6418 /* Arguments filling only one single FP register are
6419 right-justified in the outer double FP register. */
6420 if (GET_MODE_SIZE (mode) <= 4)
6421 regno++;
6422 break;
6423 }
6424 /* fallthrough */
6425
6426 case MODE_INT:
6427 case MODE_COMPLEX_INT:
6428 /* If all arg slots are filled, then must pass on stack. */
6429 if (slotno >= SPARC_INT_ARG_MAX)
6430 return -1;
6431
6432 regno = regbase + slotno;
6433 break;
6434
6435 case MODE_RANDOM:
6436 if (mode == VOIDmode)
6437 /* MODE is VOIDmode when generating the actual call. */
6438 return -1;
6439
6440 gcc_assert (mode == BLKmode);
6441
6442 if (TARGET_ARCH32
6443 || !type
6444 || (TREE_CODE (type) != RECORD_TYPE
6445 && TREE_CODE (type) != VECTOR_TYPE))
6446 {
6447 /* If all arg slots are filled, then must pass on stack. */
6448 if (slotno >= SPARC_INT_ARG_MAX)
6449 return -1;
6450
6451 regno = regbase + slotno;
6452 }
6453 else /* TARGET_ARCH64 && type */
6454 {
6455 /* If all arg slots are filled, then must pass on stack. */
6456 if (slotno >= SPARC_FP_ARG_MAX)
6457 return -1;
6458
6459 if (TREE_CODE (type) == RECORD_TYPE)
6460 {
6461 classify_data_t data = { false, false };
6462 traverse_record_type<classify_data_t, classify_registers>
6463 (type, named, &data);
6464
6465 if (data.fp_regs)
6466 {
6467 /* If all FP slots are filled except for the last one and
6468 there is no FP field in the first word, then must pass
6469 on stack. */
6470 if (slotno >= SPARC_FP_ARG_MAX - 1
6471 && !data.fp_regs_in_first_word)
6472 return -1;
6473 }
6474 else
6475 {
6476 /* If all int slots are filled, then must pass on stack. */
6477 if (slotno >= SPARC_INT_ARG_MAX)
6478 return -1;
6479 }
6480 }
6481
6482 /* PREGNO isn't set since both int and FP regs can be used. */
6483 return slotno;
6484 }
6485 break;
6486
6487 default :
6488 gcc_unreachable ();
6489 }
6490
6491 *pregno = regno;
6492 return slotno;
6493 }
6494
6495 /* Handle recursive register counting/assigning for structure layout. */
6496
6497 typedef struct
6498 {
6499 int slotno; /* slot number of the argument. */
6500 int regbase; /* regno of the base register. */
6501 int intoffset; /* offset of the first pending integer field. */
6502 int nregs; /* number of words passed in registers. */
6503 bool stack; /* true if part of the argument is on the stack. */
6504 rtx ret; /* return expression being built. */
6505 } assign_data_t;
6506
6507 /* A subroutine of function_arg_record_value. Compute the number of integer
6508 registers to be assigned between PARMS->intoffset and BITPOS. Return
6509 true if at least one integer register is assigned or false otherwise. */
6510
6511 static bool
6512 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6513 {
6514 if (data->intoffset < 0)
6515 return false;
6516
6517 const int intoffset = data->intoffset;
6518 data->intoffset = -1;
6519
6520 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6521 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6522 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6523 int nregs = (endbit - startbit) / BITS_PER_WORD;
6524
6525 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6526 {
6527 nregs = SPARC_INT_ARG_MAX - this_slotno;
6528
6529 /* We need to pass this field (partly) on the stack. */
6530 data->stack = 1;
6531 }
6532
6533 if (nregs <= 0)
6534 return false;
6535
6536 *pnregs = nregs;
6537 return true;
6538 }
6539
6540 /* A subroutine of function_arg_record_value. Compute the number and the mode
6541 of the FP registers to be assigned for FIELD. Return true if at least one
6542 FP register is assigned or false otherwise. */
6543
6544 static bool
6545 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6546 assign_data_t *data,
6547 int *pnregs, machine_mode *pmode)
6548 {
6549 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6550 machine_mode mode = DECL_MODE (field);
6551 int nregs, nslots;
6552
6553 /* Slots are counted as words while regs are counted as having the size of
6554 the (inner) mode. */
6555 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6556 {
6557 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6558 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6559 }
6560 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6561 {
6562 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6563 nregs = 2;
6564 }
6565 else
6566 nregs = 1;
6567
6568 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6569
6570 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6571 {
6572 nslots = SPARC_FP_ARG_MAX - this_slotno;
6573 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6574
6575 /* We need to pass this field (partly) on the stack. */
6576 data->stack = 1;
6577
6578 if (nregs <= 0)
6579 return false;
6580 }
6581
6582 *pnregs = nregs;
6583 *pmode = mode;
6584 return true;
6585 }
6586
6587 /* A subroutine of function_arg_record_value. Count the number of registers
6588 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6589
6590 inline void
6591 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6592 assign_data_t *data)
6593 {
6594 if (fp)
6595 {
6596 int nregs;
6597 machine_mode mode;
6598
6599 if (compute_int_layout (bitpos, data, &nregs))
6600 data->nregs += nregs;
6601
6602 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6603 data->nregs += nregs;
6604 }
6605 else
6606 {
6607 if (data->intoffset < 0)
6608 data->intoffset = bitpos;
6609 }
6610 }
6611
6612 /* A subroutine of function_arg_record_value. Assign the bits of the
6613 structure between PARMS->intoffset and BITPOS to integer registers. */
6614
6615 static void
6616 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6617 {
6618 int intoffset = data->intoffset;
6619 machine_mode mode;
6620 int nregs;
6621
6622 if (!compute_int_layout (bitpos, data, &nregs))
6623 return;
6624
6625 /* If this is the trailing part of a word, only load that much into
6626 the register. Otherwise load the whole register. Note that in
6627 the latter case we may pick up unwanted bits. It's not a problem
6628 at the moment but may wish to revisit. */
6629 if (intoffset % BITS_PER_WORD != 0)
6630 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6631 MODE_INT);
6632 else
6633 mode = word_mode;
6634
6635 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6636 unsigned int regno = data->regbase + this_slotno;
6637 intoffset /= BITS_PER_UNIT;
6638
6639 do
6640 {
6641 rtx reg = gen_rtx_REG (mode, regno);
6642 XVECEXP (data->ret, 0, data->stack + data->nregs)
6643 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6644 data->nregs += 1;
6645 mode = word_mode;
6646 regno += 1;
6647 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6648 }
6649 while (--nregs > 0);
6650 }
6651
6652 /* A subroutine of function_arg_record_value. Assign FIELD at position
6653 BITPOS to FP registers. */
6654
6655 static void
6656 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6657 assign_data_t *data)
6658 {
6659 int nregs;
6660 machine_mode mode;
6661
6662 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6663 return;
6664
6665 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6666 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6667 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6668 regno++;
6669 int pos = bitpos / BITS_PER_UNIT;
6670
6671 do
6672 {
6673 rtx reg = gen_rtx_REG (mode, regno);
6674 XVECEXP (data->ret, 0, data->stack + data->nregs)
6675 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6676 data->nregs += 1;
6677 regno += GET_MODE_SIZE (mode) / 4;
6678 pos += GET_MODE_SIZE (mode);
6679 }
6680 while (--nregs > 0);
6681 }
6682
6683 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6684 the structure between PARMS->intoffset and BITPOS to registers. */
6685
6686 inline void
6687 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6688 assign_data_t *data)
6689 {
6690 if (fp)
6691 {
6692 assign_int_registers (bitpos, data);
6693
6694 assign_fp_registers (field, bitpos, data);
6695 }
6696 else
6697 {
6698 if (data->intoffset < 0)
6699 data->intoffset = bitpos;
6700 }
6701 }
6702
6703 /* Used by function_arg and sparc_function_value_1 to implement the complex
6704 conventions of the 64-bit ABI for passing and returning structures.
6705 Return an expression valid as a return value for the FUNCTION_ARG
6706 and TARGET_FUNCTION_VALUE.
6707
6708 TYPE is the data type of the argument (as a tree).
6709 This is null for libcalls where that information may
6710 not be available.
6711 MODE is the argument's machine mode.
6712 SLOTNO is the index number of the argument's slot in the parameter array.
6713 NAMED is true if this argument is a named parameter
6714 (otherwise it is an extra parameter matching an ellipsis).
6715 REGBASE is the regno of the base register for the parameter array. */
6716
6717 static rtx
6718 function_arg_record_value (const_tree type, machine_mode mode,
6719 int slotno, bool named, int regbase)
6720 {
6721 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6722 assign_data_t data;
6723 int nregs;
6724
6725 data.slotno = slotno;
6726 data.regbase = regbase;
6727
6728 /* Count how many registers we need. */
6729 data.nregs = 0;
6730 data.intoffset = 0;
6731 data.stack = false;
6732 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6733
6734 /* Take into account pending integer fields. */
6735 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6736 data.nregs += nregs;
6737
6738 /* Allocate the vector and handle some annoying special cases. */
6739 nregs = data.nregs;
6740
6741 if (nregs == 0)
6742 {
6743 /* ??? Empty structure has no value? Duh? */
6744 if (typesize <= 0)
6745 {
6746 /* Though there's nothing really to store, return a word register
6747 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6748 leads to breakage due to the fact that there are zero bytes to
6749 load. */
6750 return gen_rtx_REG (mode, regbase);
6751 }
6752
6753 /* ??? C++ has structures with no fields, and yet a size. Give up
6754 for now and pass everything back in integer registers. */
6755 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6756 if (nregs + slotno > SPARC_INT_ARG_MAX)
6757 nregs = SPARC_INT_ARG_MAX - slotno;
6758 }
6759
6760 gcc_assert (nregs > 0);
6761
6762 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6763
6764 /* If at least one field must be passed on the stack, generate
6765 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6766 also be passed on the stack. We can't do much better because the
6767 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6768 of structures for which the fields passed exclusively in registers
6769 are not at the beginning of the structure. */
6770 if (data.stack)
6771 XVECEXP (data.ret, 0, 0)
6772 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6773
6774 /* Assign the registers. */
6775 data.nregs = 0;
6776 data.intoffset = 0;
6777 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6778
6779 /* Assign pending integer fields. */
6780 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6781
6782 gcc_assert (data.nregs == nregs);
6783
6784 return data.ret;
6785 }
6786
6787 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6788 of the 64-bit ABI for passing and returning unions.
6789 Return an expression valid as a return value for the FUNCTION_ARG
6790 and TARGET_FUNCTION_VALUE.
6791
6792 SIZE is the size in bytes of the union.
6793 MODE is the argument's machine mode.
6794 REGNO is the hard register the union will be passed in. */
6795
6796 static rtx
6797 function_arg_union_value (int size, machine_mode mode, int slotno,
6798 int regno)
6799 {
6800 int nwords = CEIL_NWORDS (size), i;
6801 rtx regs;
6802
6803 /* See comment in previous function for empty structures. */
6804 if (nwords == 0)
6805 return gen_rtx_REG (mode, regno);
6806
6807 if (slotno == SPARC_INT_ARG_MAX - 1)
6808 nwords = 1;
6809
6810 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6811
6812 for (i = 0; i < nwords; i++)
6813 {
6814 /* Unions are passed left-justified. */
6815 XVECEXP (regs, 0, i)
6816 = gen_rtx_EXPR_LIST (VOIDmode,
6817 gen_rtx_REG (word_mode, regno),
6818 GEN_INT (UNITS_PER_WORD * i));
6819 regno++;
6820 }
6821
6822 return regs;
6823 }
6824
6825 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6826 for passing and returning BLKmode vectors.
6827 Return an expression valid as a return value for the FUNCTION_ARG
6828 and TARGET_FUNCTION_VALUE.
6829
6830 SIZE is the size in bytes of the vector.
6831 REGNO is the FP hard register the vector will be passed in. */
6832
6833 static rtx
6834 function_arg_vector_value (int size, int regno)
6835 {
6836 const int nregs = MAX (1, size / 8);
6837 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6838
6839 if (size < 8)
6840 XVECEXP (regs, 0, 0)
6841 = gen_rtx_EXPR_LIST (VOIDmode,
6842 gen_rtx_REG (SImode, regno),
6843 const0_rtx);
6844 else
6845 for (int i = 0; i < nregs; i++)
6846 XVECEXP (regs, 0, i)
6847 = gen_rtx_EXPR_LIST (VOIDmode,
6848 gen_rtx_REG (DImode, regno + 2*i),
6849 GEN_INT (i*8));
6850
6851 return regs;
6852 }
6853
6854 /* Determine where to put an argument to a function.
6855 Value is zero to push the argument on the stack,
6856 or a hard register in which to store the argument.
6857
6858 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6859 the preceding args and about the function being called.
6860 MODE is the argument's machine mode.
6861 TYPE is the data type of the argument (as a tree).
6862 This is null for libcalls where that information may
6863 not be available.
6864 NAMED is true if this argument is a named parameter
6865 (otherwise it is an extra parameter matching an ellipsis).
6866 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6867 TARGET_FUNCTION_INCOMING_ARG. */
6868
6869 static rtx
6870 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6871 const_tree type, bool named, bool incoming)
6872 {
6873 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6874
6875 int regbase = (incoming
6876 ? SPARC_INCOMING_INT_ARG_FIRST
6877 : SPARC_OUTGOING_INT_ARG_FIRST);
6878 int slotno, regno, padding;
6879 enum mode_class mclass = GET_MODE_CLASS (mode);
6880
6881 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6882 &regno, &padding);
6883 if (slotno == -1)
6884 return 0;
6885
6886 /* Vector types deserve special treatment because they are polymorphic wrt
6887 their mode, depending upon whether VIS instructions are enabled. */
6888 if (type && TREE_CODE (type) == VECTOR_TYPE)
6889 {
6890 HOST_WIDE_INT size = int_size_in_bytes (type);
6891 gcc_assert ((TARGET_ARCH32 && size <= 8)
6892 || (TARGET_ARCH64 && size <= 16));
6893
6894 if (mode == BLKmode)
6895 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6896
6897 mclass = MODE_FLOAT;
6898 }
6899
6900 if (TARGET_ARCH32)
6901 return gen_rtx_REG (mode, regno);
6902
6903 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6904 and are promoted to registers if possible. */
6905 if (type && TREE_CODE (type) == RECORD_TYPE)
6906 {
6907 HOST_WIDE_INT size = int_size_in_bytes (type);
6908 gcc_assert (size <= 16);
6909
6910 return function_arg_record_value (type, mode, slotno, named, regbase);
6911 }
6912
6913 /* Unions up to 16 bytes in size are passed in integer registers. */
6914 else if (type && TREE_CODE (type) == UNION_TYPE)
6915 {
6916 HOST_WIDE_INT size = int_size_in_bytes (type);
6917 gcc_assert (size <= 16);
6918
6919 return function_arg_union_value (size, mode, slotno, regno);
6920 }
6921
6922 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6923 but also have the slot allocated for them.
6924 If no prototype is in scope fp values in register slots get passed
6925 in two places, either fp regs and int regs or fp regs and memory. */
6926 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6927 && SPARC_FP_REG_P (regno))
6928 {
6929 rtx reg = gen_rtx_REG (mode, regno);
6930 if (cum->prototype_p || cum->libcall_p)
6931 return reg;
6932 else
6933 {
6934 rtx v0, v1;
6935
6936 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6937 {
6938 int intreg;
6939
6940 /* On incoming, we don't need to know that the value
6941 is passed in %f0 and %i0, and it confuses other parts
6942 causing needless spillage even on the simplest cases. */
6943 if (incoming)
6944 return reg;
6945
6946 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6947 + (regno - SPARC_FP_ARG_FIRST) / 2);
6948
6949 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6950 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6951 const0_rtx);
6952 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6953 }
6954 else
6955 {
6956 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6957 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6958 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6959 }
6960 }
6961 }
6962
6963 /* All other aggregate types are passed in an integer register in a mode
6964 corresponding to the size of the type. */
6965 else if (type && AGGREGATE_TYPE_P (type))
6966 {
6967 HOST_WIDE_INT size = int_size_in_bytes (type);
6968 gcc_assert (size <= 16);
6969
6970 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6971 }
6972
6973 return gen_rtx_REG (mode, regno);
6974 }
6975
6976 /* Handle the TARGET_FUNCTION_ARG target hook. */
6977
6978 static rtx
6979 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6980 const_tree type, bool named)
6981 {
6982 return sparc_function_arg_1 (cum, mode, type, named, false);
6983 }
6984
6985 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6986
6987 static rtx
6988 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6989 const_tree type, bool named)
6990 {
6991 return sparc_function_arg_1 (cum, mode, type, named, true);
6992 }
6993
6994 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6995
6996 static unsigned int
6997 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6998 {
6999 return ((TARGET_ARCH64
7000 && (GET_MODE_ALIGNMENT (mode) == 128
7001 || (type && TYPE_ALIGN (type) == 128)))
7002 ? 128
7003 : PARM_BOUNDARY);
7004 }
7005
7006 /* For an arg passed partly in registers and partly in memory,
7007 this is the number of bytes of registers used.
7008 For args passed entirely in registers or entirely in memory, zero.
7009
7010 Any arg that starts in the first 6 regs but won't entirely fit in them
7011 needs partial registers on v8. On v9, structures with integer
7012 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7013 values that begin in the last fp reg [where "last fp reg" varies with the
7014 mode] will be split between that reg and memory. */
7015
7016 static int
7017 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7018 tree type, bool named)
7019 {
7020 int slotno, regno, padding;
7021
7022 /* We pass false for incoming here, it doesn't matter. */
7023 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7024 false, &regno, &padding);
7025
7026 if (slotno == -1)
7027 return 0;
7028
7029 if (TARGET_ARCH32)
7030 {
7031 if ((slotno + (mode == BLKmode
7032 ? CEIL_NWORDS (int_size_in_bytes (type))
7033 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7034 > SPARC_INT_ARG_MAX)
7035 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7036 }
7037 else
7038 {
7039 /* We are guaranteed by pass_by_reference that the size of the
7040 argument is not greater than 16 bytes, so we only need to return
7041 one word if the argument is partially passed in registers. */
7042
7043 if (type && AGGREGATE_TYPE_P (type))
7044 {
7045 int size = int_size_in_bytes (type);
7046
7047 if (size > UNITS_PER_WORD
7048 && (slotno == SPARC_INT_ARG_MAX - 1
7049 || slotno == SPARC_FP_ARG_MAX - 1))
7050 return UNITS_PER_WORD;
7051 }
7052 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7053 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7054 && ! (TARGET_FPU && named)))
7055 {
7056 /* The complex types are passed as packed types. */
7057 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7058 && slotno == SPARC_INT_ARG_MAX - 1)
7059 return UNITS_PER_WORD;
7060 }
7061 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7062 {
7063 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7064 > SPARC_FP_ARG_MAX)
7065 return UNITS_PER_WORD;
7066 }
7067 }
7068
7069 return 0;
7070 }
7071
7072 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7073 Specify whether to pass the argument by reference. */
7074
7075 static bool
7076 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7077 machine_mode mode, const_tree type,
7078 bool named ATTRIBUTE_UNUSED)
7079 {
7080 if (TARGET_ARCH32)
7081 /* Original SPARC 32-bit ABI says that structures and unions,
7082 and quad-precision floats are passed by reference. For Pascal,
7083 also pass arrays by reference. All other base types are passed
7084 in registers.
7085
7086 Extended ABI (as implemented by the Sun compiler) says that all
7087 complex floats are passed by reference. Pass complex integers
7088 in registers up to 8 bytes. More generally, enforce the 2-word
7089 cap for passing arguments in registers.
7090
7091 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7092 integers are passed like floats of the same size, that is in
7093 registers up to 8 bytes. Pass all vector floats by reference
7094 like structure and unions. */
7095 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7096 || mode == SCmode
7097 /* Catch CDImode, TFmode, DCmode and TCmode. */
7098 || GET_MODE_SIZE (mode) > 8
7099 || (type
7100 && TREE_CODE (type) == VECTOR_TYPE
7101 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7102 else
7103 /* Original SPARC 64-bit ABI says that structures and unions
7104 smaller than 16 bytes are passed in registers, as well as
7105 all other base types.
7106
7107 Extended ABI (as implemented by the Sun compiler) says that
7108 complex floats are passed in registers up to 16 bytes. Pass
7109 all complex integers in registers up to 16 bytes. More generally,
7110 enforce the 2-word cap for passing arguments in registers.
7111
7112 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7113 integers are passed like floats of the same size, that is in
7114 registers (up to 16 bytes). Pass all vector floats like structure
7115 and unions. */
7116 return ((type
7117 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7118 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7119 /* Catch CTImode and TCmode. */
7120 || GET_MODE_SIZE (mode) > 16);
7121 }
7122
7123 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7124 Update the data in CUM to advance over an argument
7125 of mode MODE and data type TYPE.
7126 TYPE is null for libcalls where that information may not be available. */
7127
7128 static void
7129 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7130 const_tree type, bool named)
7131 {
7132 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7133 int regno, padding;
7134
7135 /* We pass false for incoming here, it doesn't matter. */
7136 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7137
7138 /* If argument requires leading padding, add it. */
7139 cum->words += padding;
7140
7141 if (TARGET_ARCH32)
7142 cum->words += (mode == BLKmode
7143 ? CEIL_NWORDS (int_size_in_bytes (type))
7144 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7145 else
7146 {
7147 if (type && AGGREGATE_TYPE_P (type))
7148 {
7149 int size = int_size_in_bytes (type);
7150
7151 if (size <= 8)
7152 ++cum->words;
7153 else if (size <= 16)
7154 cum->words += 2;
7155 else /* passed by reference */
7156 ++cum->words;
7157 }
7158 else
7159 cum->words += (mode == BLKmode
7160 ? CEIL_NWORDS (int_size_in_bytes (type))
7161 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7162 }
7163 }
7164
7165 /* Handle the FUNCTION_ARG_PADDING macro.
7166 For the 64 bit ABI structs are always stored left shifted in their
7167 argument slot. */
7168
7169 enum direction
7170 function_arg_padding (machine_mode mode, const_tree type)
7171 {
7172 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7173 return upward;
7174
7175 /* Fall back to the default. */
7176 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7177 }
7178
7179 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7180 Specify whether to return the return value in memory. */
7181
7182 static bool
7183 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7184 {
7185 if (TARGET_ARCH32)
7186 /* Original SPARC 32-bit ABI says that structures and unions,
7187 and quad-precision floats are returned in memory. All other
7188 base types are returned in registers.
7189
7190 Extended ABI (as implemented by the Sun compiler) says that
7191 all complex floats are returned in registers (8 FP registers
7192 at most for '_Complex long double'). Return all complex integers
7193 in registers (4 at most for '_Complex long long').
7194
7195 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7196 integers are returned like floats of the same size, that is in
7197 registers up to 8 bytes and in memory otherwise. Return all
7198 vector floats in memory like structure and unions; note that
7199 they always have BLKmode like the latter. */
7200 return (TYPE_MODE (type) == BLKmode
7201 || TYPE_MODE (type) == TFmode
7202 || (TREE_CODE (type) == VECTOR_TYPE
7203 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7204 else
7205 /* Original SPARC 64-bit ABI says that structures and unions
7206 smaller than 32 bytes are returned in registers, as well as
7207 all other base types.
7208
7209 Extended ABI (as implemented by the Sun compiler) says that all
7210 complex floats are returned in registers (8 FP registers at most
7211 for '_Complex long double'). Return all complex integers in
7212 registers (4 at most for '_Complex TItype').
7213
7214 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7215 integers are returned like floats of the same size, that is in
7216 registers. Return all vector floats like structure and unions;
7217 note that they always have BLKmode like the latter. */
7218 return (TYPE_MODE (type) == BLKmode
7219 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7220 }
7221
7222 /* Handle the TARGET_STRUCT_VALUE target hook.
7223 Return where to find the structure return value address. */
7224
7225 static rtx
7226 sparc_struct_value_rtx (tree fndecl, int incoming)
7227 {
7228 if (TARGET_ARCH64)
7229 return 0;
7230 else
7231 {
7232 rtx mem;
7233
7234 if (incoming)
7235 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7236 STRUCT_VALUE_OFFSET));
7237 else
7238 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7239 STRUCT_VALUE_OFFSET));
7240
7241 /* Only follow the SPARC ABI for fixed-size structure returns.
7242 Variable size structure returns are handled per the normal
7243 procedures in GCC. This is enabled by -mstd-struct-return */
7244 if (incoming == 2
7245 && sparc_std_struct_return
7246 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7247 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7248 {
7249 /* We must check and adjust the return address, as it is optional
7250 as to whether the return object is really provided. */
7251 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7252 rtx scratch = gen_reg_rtx (SImode);
7253 rtx_code_label *endlab = gen_label_rtx ();
7254
7255 /* Calculate the return object size. */
7256 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7257 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7258 /* Construct a temporary return value. */
7259 rtx temp_val
7260 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7261
7262 /* Implement SPARC 32-bit psABI callee return struct checking:
7263
7264 Fetch the instruction where we will return to and see if
7265 it's an unimp instruction (the most significant 10 bits
7266 will be zero). */
7267 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7268 plus_constant (Pmode,
7269 ret_reg, 8)));
7270 /* Assume the size is valid and pre-adjust. */
7271 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7272 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7273 0, endlab);
7274 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7275 /* Write the address of the memory pointed to by temp_val into
7276 the memory pointed to by mem. */
7277 emit_move_insn (mem, XEXP (temp_val, 0));
7278 emit_label (endlab);
7279 }
7280
7281 return mem;
7282 }
7283 }
7284
7285 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7286 For v9, function return values are subject to the same rules as arguments,
7287 except that up to 32 bytes may be returned in registers. */
7288
7289 static rtx
7290 sparc_function_value_1 (const_tree type, machine_mode mode,
7291 bool outgoing)
7292 {
7293 /* Beware that the two values are swapped here wrt function_arg. */
7294 int regbase = (outgoing
7295 ? SPARC_INCOMING_INT_ARG_FIRST
7296 : SPARC_OUTGOING_INT_ARG_FIRST);
7297 enum mode_class mclass = GET_MODE_CLASS (mode);
7298 int regno;
7299
7300 /* Vector types deserve special treatment because they are polymorphic wrt
7301 their mode, depending upon whether VIS instructions are enabled. */
7302 if (type && TREE_CODE (type) == VECTOR_TYPE)
7303 {
7304 HOST_WIDE_INT size = int_size_in_bytes (type);
7305 gcc_assert ((TARGET_ARCH32 && size <= 8)
7306 || (TARGET_ARCH64 && size <= 32));
7307
7308 if (mode == BLKmode)
7309 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7310
7311 mclass = MODE_FLOAT;
7312 }
7313
7314 if (TARGET_ARCH64 && type)
7315 {
7316 /* Structures up to 32 bytes in size are returned in registers. */
7317 if (TREE_CODE (type) == RECORD_TYPE)
7318 {
7319 HOST_WIDE_INT size = int_size_in_bytes (type);
7320 gcc_assert (size <= 32);
7321
7322 return function_arg_record_value (type, mode, 0, 1, regbase);
7323 }
7324
7325 /* Unions up to 32 bytes in size are returned in integer registers. */
7326 else if (TREE_CODE (type) == UNION_TYPE)
7327 {
7328 HOST_WIDE_INT size = int_size_in_bytes (type);
7329 gcc_assert (size <= 32);
7330
7331 return function_arg_union_value (size, mode, 0, regbase);
7332 }
7333
7334 /* Objects that require it are returned in FP registers. */
7335 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7336 ;
7337
7338 /* All other aggregate types are returned in an integer register in a
7339 mode corresponding to the size of the type. */
7340 else if (AGGREGATE_TYPE_P (type))
7341 {
7342 /* All other aggregate types are passed in an integer register
7343 in a mode corresponding to the size of the type. */
7344 HOST_WIDE_INT size = int_size_in_bytes (type);
7345 gcc_assert (size <= 32);
7346
7347 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7348
7349 /* ??? We probably should have made the same ABI change in
7350 3.4.0 as the one we made for unions. The latter was
7351 required by the SCD though, while the former is not
7352 specified, so we favored compatibility and efficiency.
7353
7354 Now we're stuck for aggregates larger than 16 bytes,
7355 because OImode vanished in the meantime. Let's not
7356 try to be unduly clever, and simply follow the ABI
7357 for unions in that case. */
7358 if (mode == BLKmode)
7359 return function_arg_union_value (size, mode, 0, regbase);
7360 else
7361 mclass = MODE_INT;
7362 }
7363
7364 /* We should only have pointer and integer types at this point. This
7365 must match sparc_promote_function_mode. */
7366 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7367 mode = word_mode;
7368 }
7369
7370 /* We should only have pointer and integer types at this point, except with
7371 -freg-struct-return. This must match sparc_promote_function_mode. */
7372 else if (TARGET_ARCH32
7373 && !(type && AGGREGATE_TYPE_P (type))
7374 && mclass == MODE_INT
7375 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7376 mode = word_mode;
7377
7378 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7379 regno = SPARC_FP_ARG_FIRST;
7380 else
7381 regno = regbase;
7382
7383 return gen_rtx_REG (mode, regno);
7384 }
7385
7386 /* Handle TARGET_FUNCTION_VALUE.
7387 On the SPARC, the value is found in the first "output" register, but the
7388 called function leaves it in the first "input" register. */
7389
7390 static rtx
7391 sparc_function_value (const_tree valtype,
7392 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7393 bool outgoing)
7394 {
7395 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7396 }
7397
7398 /* Handle TARGET_LIBCALL_VALUE. */
7399
7400 static rtx
7401 sparc_libcall_value (machine_mode mode,
7402 const_rtx fun ATTRIBUTE_UNUSED)
7403 {
7404 return sparc_function_value_1 (NULL_TREE, mode, false);
7405 }
7406
7407 /* Handle FUNCTION_VALUE_REGNO_P.
7408 On the SPARC, the first "output" reg is used for integer values, and the
7409 first floating point register is used for floating point values. */
7410
7411 static bool
7412 sparc_function_value_regno_p (const unsigned int regno)
7413 {
7414 return (regno == 8 || (TARGET_FPU && regno == 32));
7415 }
7416
7417 /* Do what is necessary for `va_start'. We look at the current function
7418 to determine if stdarg or varargs is used and return the address of
7419 the first unnamed parameter. */
7420
7421 static rtx
7422 sparc_builtin_saveregs (void)
7423 {
7424 int first_reg = crtl->args.info.words;
7425 rtx address;
7426 int regno;
7427
7428 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7429 emit_move_insn (gen_rtx_MEM (word_mode,
7430 gen_rtx_PLUS (Pmode,
7431 frame_pointer_rtx,
7432 GEN_INT (FIRST_PARM_OFFSET (0)
7433 + (UNITS_PER_WORD
7434 * regno)))),
7435 gen_rtx_REG (word_mode,
7436 SPARC_INCOMING_INT_ARG_FIRST + regno));
7437
7438 address = gen_rtx_PLUS (Pmode,
7439 frame_pointer_rtx,
7440 GEN_INT (FIRST_PARM_OFFSET (0)
7441 + UNITS_PER_WORD * first_reg));
7442
7443 return address;
7444 }
7445
7446 /* Implement `va_start' for stdarg. */
7447
7448 static void
7449 sparc_va_start (tree valist, rtx nextarg)
7450 {
7451 nextarg = expand_builtin_saveregs ();
7452 std_expand_builtin_va_start (valist, nextarg);
7453 }
7454
7455 /* Implement `va_arg' for stdarg. */
7456
7457 static tree
7458 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7459 gimple_seq *post_p)
7460 {
7461 HOST_WIDE_INT size, rsize, align;
7462 tree addr, incr;
7463 bool indirect;
7464 tree ptrtype = build_pointer_type (type);
7465
7466 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7467 {
7468 indirect = true;
7469 size = rsize = UNITS_PER_WORD;
7470 align = 0;
7471 }
7472 else
7473 {
7474 indirect = false;
7475 size = int_size_in_bytes (type);
7476 rsize = ROUND_UP (size, UNITS_PER_WORD);
7477 align = 0;
7478
7479 if (TARGET_ARCH64)
7480 {
7481 /* For SPARC64, objects requiring 16-byte alignment get it. */
7482 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7483 align = 2 * UNITS_PER_WORD;
7484
7485 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7486 are left-justified in their slots. */
7487 if (AGGREGATE_TYPE_P (type))
7488 {
7489 if (size == 0)
7490 size = rsize = UNITS_PER_WORD;
7491 else
7492 size = rsize;
7493 }
7494 }
7495 }
7496
7497 incr = valist;
7498 if (align)
7499 {
7500 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7501 incr = fold_convert (sizetype, incr);
7502 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7503 size_int (-align));
7504 incr = fold_convert (ptr_type_node, incr);
7505 }
7506
7507 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7508 addr = incr;
7509
7510 if (BYTES_BIG_ENDIAN && size < rsize)
7511 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7512
7513 if (indirect)
7514 {
7515 addr = fold_convert (build_pointer_type (ptrtype), addr);
7516 addr = build_va_arg_indirect_ref (addr);
7517 }
7518
7519 /* If the address isn't aligned properly for the type, we need a temporary.
7520 FIXME: This is inefficient, usually we can do this in registers. */
7521 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7522 {
7523 tree tmp = create_tmp_var (type, "va_arg_tmp");
7524 tree dest_addr = build_fold_addr_expr (tmp);
7525 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7526 3, dest_addr, addr, size_int (rsize));
7527 TREE_ADDRESSABLE (tmp) = 1;
7528 gimplify_and_add (copy, pre_p);
7529 addr = dest_addr;
7530 }
7531
7532 else
7533 addr = fold_convert (ptrtype, addr);
7534
7535 incr = fold_build_pointer_plus_hwi (incr, rsize);
7536 gimplify_assign (valist, incr, post_p);
7537
7538 return build_va_arg_indirect_ref (addr);
7539 }
7540 \f
7541 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7542 Specify whether the vector mode is supported by the hardware. */
7543
7544 static bool
7545 sparc_vector_mode_supported_p (machine_mode mode)
7546 {
7547 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7548 }
7549 \f
7550 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7551
7552 static machine_mode
7553 sparc_preferred_simd_mode (machine_mode mode)
7554 {
7555 if (TARGET_VIS)
7556 switch (mode)
7557 {
7558 case SImode:
7559 return V2SImode;
7560 case HImode:
7561 return V4HImode;
7562 case QImode:
7563 return V8QImode;
7564
7565 default:;
7566 }
7567
7568 return word_mode;
7569 }
7570 \f
7571 /* Return the string to output an unconditional branch to LABEL, which is
7572 the operand number of the label.
7573
7574 DEST is the destination insn (i.e. the label), INSN is the source. */
7575
7576 const char *
7577 output_ubranch (rtx dest, rtx_insn *insn)
7578 {
7579 static char string[64];
7580 bool v9_form = false;
7581 int delta;
7582 char *p;
7583
7584 /* Even if we are trying to use cbcond for this, evaluate
7585 whether we can use V9 branches as our backup plan. */
7586
7587 delta = 5000000;
7588 if (INSN_ADDRESSES_SET_P ())
7589 delta = (INSN_ADDRESSES (INSN_UID (dest))
7590 - INSN_ADDRESSES (INSN_UID (insn)));
7591
7592 /* Leave some instructions for "slop". */
7593 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7594 v9_form = true;
7595
7596 if (TARGET_CBCOND)
7597 {
7598 bool emit_nop = emit_cbcond_nop (insn);
7599 bool far = false;
7600 const char *rval;
7601
7602 if (delta < -500 || delta > 500)
7603 far = true;
7604
7605 if (far)
7606 {
7607 if (v9_form)
7608 rval = "ba,a,pt\t%%xcc, %l0";
7609 else
7610 rval = "b,a\t%l0";
7611 }
7612 else
7613 {
7614 if (emit_nop)
7615 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7616 else
7617 rval = "cwbe\t%%g0, %%g0, %l0";
7618 }
7619 return rval;
7620 }
7621
7622 if (v9_form)
7623 strcpy (string, "ba%*,pt\t%%xcc, ");
7624 else
7625 strcpy (string, "b%*\t");
7626
7627 p = strchr (string, '\0');
7628 *p++ = '%';
7629 *p++ = 'l';
7630 *p++ = '0';
7631 *p++ = '%';
7632 *p++ = '(';
7633 *p = '\0';
7634
7635 return string;
7636 }
7637
7638 /* Return the string to output a conditional branch to LABEL, which is
7639 the operand number of the label. OP is the conditional expression.
7640 XEXP (OP, 0) is assumed to be a condition code register (integer or
7641 floating point) and its mode specifies what kind of comparison we made.
7642
7643 DEST is the destination insn (i.e. the label), INSN is the source.
7644
7645 REVERSED is nonzero if we should reverse the sense of the comparison.
7646
7647 ANNUL is nonzero if we should generate an annulling branch. */
7648
7649 const char *
7650 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7651 rtx_insn *insn)
7652 {
7653 static char string[64];
7654 enum rtx_code code = GET_CODE (op);
7655 rtx cc_reg = XEXP (op, 0);
7656 machine_mode mode = GET_MODE (cc_reg);
7657 const char *labelno, *branch;
7658 int spaces = 8, far;
7659 char *p;
7660
7661 /* v9 branches are limited to +-1MB. If it is too far away,
7662 change
7663
7664 bne,pt %xcc, .LC30
7665
7666 to
7667
7668 be,pn %xcc, .+12
7669 nop
7670 ba .LC30
7671
7672 and
7673
7674 fbne,a,pn %fcc2, .LC29
7675
7676 to
7677
7678 fbe,pt %fcc2, .+16
7679 nop
7680 ba .LC29 */
7681
7682 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7683 if (reversed ^ far)
7684 {
7685 /* Reversal of FP compares takes care -- an ordered compare
7686 becomes an unordered compare and vice versa. */
7687 if (mode == CCFPmode || mode == CCFPEmode)
7688 code = reverse_condition_maybe_unordered (code);
7689 else
7690 code = reverse_condition (code);
7691 }
7692
7693 /* Start by writing the branch condition. */
7694 if (mode == CCFPmode || mode == CCFPEmode)
7695 {
7696 switch (code)
7697 {
7698 case NE:
7699 branch = "fbne";
7700 break;
7701 case EQ:
7702 branch = "fbe";
7703 break;
7704 case GE:
7705 branch = "fbge";
7706 break;
7707 case GT:
7708 branch = "fbg";
7709 break;
7710 case LE:
7711 branch = "fble";
7712 break;
7713 case LT:
7714 branch = "fbl";
7715 break;
7716 case UNORDERED:
7717 branch = "fbu";
7718 break;
7719 case ORDERED:
7720 branch = "fbo";
7721 break;
7722 case UNGT:
7723 branch = "fbug";
7724 break;
7725 case UNLT:
7726 branch = "fbul";
7727 break;
7728 case UNEQ:
7729 branch = "fbue";
7730 break;
7731 case UNGE:
7732 branch = "fbuge";
7733 break;
7734 case UNLE:
7735 branch = "fbule";
7736 break;
7737 case LTGT:
7738 branch = "fblg";
7739 break;
7740 default:
7741 gcc_unreachable ();
7742 }
7743
7744 /* ??? !v9: FP branches cannot be preceded by another floating point
7745 insn. Because there is currently no concept of pre-delay slots,
7746 we can fix this only by always emitting a nop before a floating
7747 point branch. */
7748
7749 string[0] = '\0';
7750 if (! TARGET_V9)
7751 strcpy (string, "nop\n\t");
7752 strcat (string, branch);
7753 }
7754 else
7755 {
7756 switch (code)
7757 {
7758 case NE:
7759 if (mode == CCVmode || mode == CCXVmode)
7760 branch = "bvs";
7761 else
7762 branch = "bne";
7763 break;
7764 case EQ:
7765 if (mode == CCVmode || mode == CCXVmode)
7766 branch = "bvc";
7767 else
7768 branch = "be";
7769 break;
7770 case GE:
7771 if (mode == CCNZmode || mode == CCXNZmode)
7772 branch = "bpos";
7773 else
7774 branch = "bge";
7775 break;
7776 case GT:
7777 branch = "bg";
7778 break;
7779 case LE:
7780 branch = "ble";
7781 break;
7782 case LT:
7783 if (mode == CCNZmode || mode == CCXNZmode)
7784 branch = "bneg";
7785 else
7786 branch = "bl";
7787 break;
7788 case GEU:
7789 branch = "bgeu";
7790 break;
7791 case GTU:
7792 branch = "bgu";
7793 break;
7794 case LEU:
7795 branch = "bleu";
7796 break;
7797 case LTU:
7798 branch = "blu";
7799 break;
7800 default:
7801 gcc_unreachable ();
7802 }
7803 strcpy (string, branch);
7804 }
7805 spaces -= strlen (branch);
7806 p = strchr (string, '\0');
7807
7808 /* Now add the annulling, the label, and a possible noop. */
7809 if (annul && ! far)
7810 {
7811 strcpy (p, ",a");
7812 p += 2;
7813 spaces -= 2;
7814 }
7815
7816 if (TARGET_V9)
7817 {
7818 rtx note;
7819 int v8 = 0;
7820
7821 if (! far && insn && INSN_ADDRESSES_SET_P ())
7822 {
7823 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7824 - INSN_ADDRESSES (INSN_UID (insn)));
7825 /* Leave some instructions for "slop". */
7826 if (delta < -260000 || delta >= 260000)
7827 v8 = 1;
7828 }
7829
7830 switch (mode)
7831 {
7832 case CCmode:
7833 case CCNZmode:
7834 case CCCmode:
7835 case CCVmode:
7836 labelno = "%%icc, ";
7837 if (v8)
7838 labelno = "";
7839 break;
7840 case CCXmode:
7841 case CCXNZmode:
7842 case CCXCmode:
7843 case CCXVmode:
7844 labelno = "%%xcc, ";
7845 gcc_assert (!v8);
7846 break;
7847 case CCFPmode:
7848 case CCFPEmode:
7849 {
7850 static char v9_fcc_labelno[] = "%%fccX, ";
7851 /* Set the char indicating the number of the fcc reg to use. */
7852 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7853 labelno = v9_fcc_labelno;
7854 if (v8)
7855 {
7856 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7857 labelno = "";
7858 }
7859 }
7860 break;
7861 default:
7862 gcc_unreachable ();
7863 }
7864
7865 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7866 {
7867 strcpy (p,
7868 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7869 ? ",pt" : ",pn");
7870 p += 3;
7871 spaces -= 3;
7872 }
7873 }
7874 else
7875 labelno = "";
7876
7877 if (spaces > 0)
7878 *p++ = '\t';
7879 else
7880 *p++ = ' ';
7881 strcpy (p, labelno);
7882 p = strchr (p, '\0');
7883 if (far)
7884 {
7885 strcpy (p, ".+12\n\t nop\n\tb\t");
7886 /* Skip the next insn if requested or
7887 if we know that it will be a nop. */
7888 if (annul || ! final_sequence)
7889 p[3] = '6';
7890 p += 14;
7891 }
7892 *p++ = '%';
7893 *p++ = 'l';
7894 *p++ = label + '0';
7895 *p++ = '%';
7896 *p++ = '#';
7897 *p = '\0';
7898
7899 return string;
7900 }
7901
7902 /* Emit a library call comparison between floating point X and Y.
7903 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7904 Return the new operator to be used in the comparison sequence.
7905
7906 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7907 values as arguments instead of the TFmode registers themselves,
7908 that's why we cannot call emit_float_lib_cmp. */
7909
7910 rtx
7911 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7912 {
7913 const char *qpfunc;
7914 rtx slot0, slot1, result, tem, tem2, libfunc;
7915 machine_mode mode;
7916 enum rtx_code new_comparison;
7917
7918 switch (comparison)
7919 {
7920 case EQ:
7921 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7922 break;
7923
7924 case NE:
7925 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7926 break;
7927
7928 case GT:
7929 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7930 break;
7931
7932 case GE:
7933 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7934 break;
7935
7936 case LT:
7937 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7938 break;
7939
7940 case LE:
7941 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7942 break;
7943
7944 case ORDERED:
7945 case UNORDERED:
7946 case UNGT:
7947 case UNLT:
7948 case UNEQ:
7949 case UNGE:
7950 case UNLE:
7951 case LTGT:
7952 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7953 break;
7954
7955 default:
7956 gcc_unreachable ();
7957 }
7958
7959 if (TARGET_ARCH64)
7960 {
7961 if (MEM_P (x))
7962 {
7963 tree expr = MEM_EXPR (x);
7964 if (expr)
7965 mark_addressable (expr);
7966 slot0 = x;
7967 }
7968 else
7969 {
7970 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7971 emit_move_insn (slot0, x);
7972 }
7973
7974 if (MEM_P (y))
7975 {
7976 tree expr = MEM_EXPR (y);
7977 if (expr)
7978 mark_addressable (expr);
7979 slot1 = y;
7980 }
7981 else
7982 {
7983 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7984 emit_move_insn (slot1, y);
7985 }
7986
7987 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7988 emit_library_call (libfunc, LCT_NORMAL,
7989 DImode, 2,
7990 XEXP (slot0, 0), Pmode,
7991 XEXP (slot1, 0), Pmode);
7992 mode = DImode;
7993 }
7994 else
7995 {
7996 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7997 emit_library_call (libfunc, LCT_NORMAL,
7998 SImode, 2,
7999 x, TFmode, y, TFmode);
8000 mode = SImode;
8001 }
8002
8003
8004 /* Immediately move the result of the libcall into a pseudo
8005 register so reload doesn't clobber the value if it needs
8006 the return register for a spill reg. */
8007 result = gen_reg_rtx (mode);
8008 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8009
8010 switch (comparison)
8011 {
8012 default:
8013 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8014 case ORDERED:
8015 case UNORDERED:
8016 new_comparison = (comparison == UNORDERED ? EQ : NE);
8017 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8018 case UNGT:
8019 case UNGE:
8020 new_comparison = (comparison == UNGT ? GT : NE);
8021 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8022 case UNLE:
8023 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8024 case UNLT:
8025 tem = gen_reg_rtx (mode);
8026 if (TARGET_ARCH32)
8027 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8028 else
8029 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8030 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8031 case UNEQ:
8032 case LTGT:
8033 tem = gen_reg_rtx (mode);
8034 if (TARGET_ARCH32)
8035 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8036 else
8037 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8038 tem2 = gen_reg_rtx (mode);
8039 if (TARGET_ARCH32)
8040 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8041 else
8042 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8043 new_comparison = (comparison == UNEQ ? EQ : NE);
8044 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8045 }
8046
8047 gcc_unreachable ();
8048 }
8049
8050 /* Generate an unsigned DImode to FP conversion. This is the same code
8051 optabs would emit if we didn't have TFmode patterns. */
8052
8053 void
8054 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8055 {
8056 rtx i0, i1, f0, in, out;
8057
8058 out = operands[0];
8059 in = force_reg (DImode, operands[1]);
8060 rtx_code_label *neglab = gen_label_rtx ();
8061 rtx_code_label *donelab = gen_label_rtx ();
8062 i0 = gen_reg_rtx (DImode);
8063 i1 = gen_reg_rtx (DImode);
8064 f0 = gen_reg_rtx (mode);
8065
8066 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8067
8068 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8069 emit_jump_insn (gen_jump (donelab));
8070 emit_barrier ();
8071
8072 emit_label (neglab);
8073
8074 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8075 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8076 emit_insn (gen_iordi3 (i0, i0, i1));
8077 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8078 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8079
8080 emit_label (donelab);
8081 }
8082
8083 /* Generate an FP to unsigned DImode conversion. This is the same code
8084 optabs would emit if we didn't have TFmode patterns. */
8085
8086 void
8087 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8088 {
8089 rtx i0, i1, f0, in, out, limit;
8090
8091 out = operands[0];
8092 in = force_reg (mode, operands[1]);
8093 rtx_code_label *neglab = gen_label_rtx ();
8094 rtx_code_label *donelab = gen_label_rtx ();
8095 i0 = gen_reg_rtx (DImode);
8096 i1 = gen_reg_rtx (DImode);
8097 limit = gen_reg_rtx (mode);
8098 f0 = gen_reg_rtx (mode);
8099
8100 emit_move_insn (limit,
8101 const_double_from_real_value (
8102 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8103 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8104
8105 emit_insn (gen_rtx_SET (out,
8106 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8107 emit_jump_insn (gen_jump (donelab));
8108 emit_barrier ();
8109
8110 emit_label (neglab);
8111
8112 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8113 emit_insn (gen_rtx_SET (i0,
8114 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8115 emit_insn (gen_movdi (i1, const1_rtx));
8116 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8117 emit_insn (gen_xordi3 (out, i0, i1));
8118
8119 emit_label (donelab);
8120 }
8121
8122 /* Return the string to output a compare and branch instruction to DEST.
8123 DEST is the destination insn (i.e. the label), INSN is the source,
8124 and OP is the conditional expression. */
8125
8126 const char *
8127 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8128 {
8129 machine_mode mode = GET_MODE (XEXP (op, 0));
8130 enum rtx_code code = GET_CODE (op);
8131 const char *cond_str, *tmpl;
8132 int far, emit_nop, len;
8133 static char string[64];
8134 char size_char;
8135
8136 /* Compare and Branch is limited to +-2KB. If it is too far away,
8137 change
8138
8139 cxbne X, Y, .LC30
8140
8141 to
8142
8143 cxbe X, Y, .+16
8144 nop
8145 ba,pt xcc, .LC30
8146 nop */
8147
8148 len = get_attr_length (insn);
8149
8150 far = len == 4;
8151 emit_nop = len == 2;
8152
8153 if (far)
8154 code = reverse_condition (code);
8155
8156 size_char = ((mode == SImode) ? 'w' : 'x');
8157
8158 switch (code)
8159 {
8160 case NE:
8161 cond_str = "ne";
8162 break;
8163
8164 case EQ:
8165 cond_str = "e";
8166 break;
8167
8168 case GE:
8169 cond_str = "ge";
8170 break;
8171
8172 case GT:
8173 cond_str = "g";
8174 break;
8175
8176 case LE:
8177 cond_str = "le";
8178 break;
8179
8180 case LT:
8181 cond_str = "l";
8182 break;
8183
8184 case GEU:
8185 cond_str = "cc";
8186 break;
8187
8188 case GTU:
8189 cond_str = "gu";
8190 break;
8191
8192 case LEU:
8193 cond_str = "leu";
8194 break;
8195
8196 case LTU:
8197 cond_str = "cs";
8198 break;
8199
8200 default:
8201 gcc_unreachable ();
8202 }
8203
8204 if (far)
8205 {
8206 int veryfar = 1, delta;
8207
8208 if (INSN_ADDRESSES_SET_P ())
8209 {
8210 delta = (INSN_ADDRESSES (INSN_UID (dest))
8211 - INSN_ADDRESSES (INSN_UID (insn)));
8212 /* Leave some instructions for "slop". */
8213 if (delta >= -260000 && delta < 260000)
8214 veryfar = 0;
8215 }
8216
8217 if (veryfar)
8218 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8219 else
8220 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8221 }
8222 else
8223 {
8224 if (emit_nop)
8225 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8226 else
8227 tmpl = "c%cb%s\t%%1, %%2, %%3";
8228 }
8229
8230 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8231
8232 return string;
8233 }
8234
8235 /* Return the string to output a conditional branch to LABEL, testing
8236 register REG. LABEL is the operand number of the label; REG is the
8237 operand number of the reg. OP is the conditional expression. The mode
8238 of REG says what kind of comparison we made.
8239
8240 DEST is the destination insn (i.e. the label), INSN is the source.
8241
8242 REVERSED is nonzero if we should reverse the sense of the comparison.
8243
8244 ANNUL is nonzero if we should generate an annulling branch. */
8245
8246 const char *
8247 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8248 int annul, rtx_insn *insn)
8249 {
8250 static char string[64];
8251 enum rtx_code code = GET_CODE (op);
8252 machine_mode mode = GET_MODE (XEXP (op, 0));
8253 rtx note;
8254 int far;
8255 char *p;
8256
8257 /* branch on register are limited to +-128KB. If it is too far away,
8258 change
8259
8260 brnz,pt %g1, .LC30
8261
8262 to
8263
8264 brz,pn %g1, .+12
8265 nop
8266 ba,pt %xcc, .LC30
8267
8268 and
8269
8270 brgez,a,pn %o1, .LC29
8271
8272 to
8273
8274 brlz,pt %o1, .+16
8275 nop
8276 ba,pt %xcc, .LC29 */
8277
8278 far = get_attr_length (insn) >= 3;
8279
8280 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8281 if (reversed ^ far)
8282 code = reverse_condition (code);
8283
8284 /* Only 64 bit versions of these instructions exist. */
8285 gcc_assert (mode == DImode);
8286
8287 /* Start by writing the branch condition. */
8288
8289 switch (code)
8290 {
8291 case NE:
8292 strcpy (string, "brnz");
8293 break;
8294
8295 case EQ:
8296 strcpy (string, "brz");
8297 break;
8298
8299 case GE:
8300 strcpy (string, "brgez");
8301 break;
8302
8303 case LT:
8304 strcpy (string, "brlz");
8305 break;
8306
8307 case LE:
8308 strcpy (string, "brlez");
8309 break;
8310
8311 case GT:
8312 strcpy (string, "brgz");
8313 break;
8314
8315 default:
8316 gcc_unreachable ();
8317 }
8318
8319 p = strchr (string, '\0');
8320
8321 /* Now add the annulling, reg, label, and nop. */
8322 if (annul && ! far)
8323 {
8324 strcpy (p, ",a");
8325 p += 2;
8326 }
8327
8328 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8329 {
8330 strcpy (p,
8331 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8332 ? ",pt" : ",pn");
8333 p += 3;
8334 }
8335
8336 *p = p < string + 8 ? '\t' : ' ';
8337 p++;
8338 *p++ = '%';
8339 *p++ = '0' + reg;
8340 *p++ = ',';
8341 *p++ = ' ';
8342 if (far)
8343 {
8344 int veryfar = 1, delta;
8345
8346 if (INSN_ADDRESSES_SET_P ())
8347 {
8348 delta = (INSN_ADDRESSES (INSN_UID (dest))
8349 - INSN_ADDRESSES (INSN_UID (insn)));
8350 /* Leave some instructions for "slop". */
8351 if (delta >= -260000 && delta < 260000)
8352 veryfar = 0;
8353 }
8354
8355 strcpy (p, ".+12\n\t nop\n\t");
8356 /* Skip the next insn if requested or
8357 if we know that it will be a nop. */
8358 if (annul || ! final_sequence)
8359 p[3] = '6';
8360 p += 12;
8361 if (veryfar)
8362 {
8363 strcpy (p, "b\t");
8364 p += 2;
8365 }
8366 else
8367 {
8368 strcpy (p, "ba,pt\t%%xcc, ");
8369 p += 13;
8370 }
8371 }
8372 *p++ = '%';
8373 *p++ = 'l';
8374 *p++ = '0' + label;
8375 *p++ = '%';
8376 *p++ = '#';
8377 *p = '\0';
8378
8379 return string;
8380 }
8381
8382 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8383 Such instructions cannot be used in the delay slot of return insn on v9.
8384 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8385 */
8386
8387 static int
8388 epilogue_renumber (register rtx *where, int test)
8389 {
8390 register const char *fmt;
8391 register int i;
8392 register enum rtx_code code;
8393
8394 if (*where == 0)
8395 return 0;
8396
8397 code = GET_CODE (*where);
8398
8399 switch (code)
8400 {
8401 case REG:
8402 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8403 return 1;
8404 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8405 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8406 /* fallthrough */
8407 case SCRATCH:
8408 case CC0:
8409 case PC:
8410 case CONST_INT:
8411 case CONST_WIDE_INT:
8412 case CONST_DOUBLE:
8413 return 0;
8414
8415 /* Do not replace the frame pointer with the stack pointer because
8416 it can cause the delayed instruction to load below the stack.
8417 This occurs when instructions like:
8418
8419 (set (reg/i:SI 24 %i0)
8420 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8421 (const_int -20 [0xffffffec])) 0))
8422
8423 are in the return delayed slot. */
8424 case PLUS:
8425 if (GET_CODE (XEXP (*where, 0)) == REG
8426 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8427 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8428 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8429 return 1;
8430 break;
8431
8432 case MEM:
8433 if (SPARC_STACK_BIAS
8434 && GET_CODE (XEXP (*where, 0)) == REG
8435 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8436 return 1;
8437 break;
8438
8439 default:
8440 break;
8441 }
8442
8443 fmt = GET_RTX_FORMAT (code);
8444
8445 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8446 {
8447 if (fmt[i] == 'E')
8448 {
8449 register int j;
8450 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8451 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8452 return 1;
8453 }
8454 else if (fmt[i] == 'e'
8455 && epilogue_renumber (&(XEXP (*where, i)), test))
8456 return 1;
8457 }
8458 return 0;
8459 }
8460 \f
8461 /* Leaf functions and non-leaf functions have different needs. */
8462
8463 static const int
8464 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8465
8466 static const int
8467 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8468
8469 static const int *const reg_alloc_orders[] = {
8470 reg_leaf_alloc_order,
8471 reg_nonleaf_alloc_order};
8472
8473 void
8474 order_regs_for_local_alloc (void)
8475 {
8476 static int last_order_nonleaf = 1;
8477
8478 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8479 {
8480 last_order_nonleaf = !last_order_nonleaf;
8481 memcpy ((char *) reg_alloc_order,
8482 (const char *) reg_alloc_orders[last_order_nonleaf],
8483 FIRST_PSEUDO_REGISTER * sizeof (int));
8484 }
8485 }
8486 \f
8487 /* Return 1 if REG and MEM are legitimate enough to allow the various
8488 MEM<-->REG splits to be run. */
8489
8490 int
8491 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8492 {
8493 /* Punt if we are here by mistake. */
8494 gcc_assert (reload_completed);
8495
8496 /* We must have an offsettable memory reference. */
8497 if (!offsettable_memref_p (mem))
8498 return 0;
8499
8500 /* If we have legitimate args for ldd/std, we do not want
8501 the split to happen. */
8502 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8503 return 0;
8504
8505 /* Success. */
8506 return 1;
8507 }
8508
8509 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8510
8511 void
8512 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8513 {
8514 rtx high_part = gen_highpart (mode, dest);
8515 rtx low_part = gen_lowpart (mode, dest);
8516 rtx word0 = adjust_address (src, mode, 0);
8517 rtx word1 = adjust_address (src, mode, 4);
8518
8519 if (reg_overlap_mentioned_p (high_part, word1))
8520 {
8521 emit_move_insn_1 (low_part, word1);
8522 emit_move_insn_1 (high_part, word0);
8523 }
8524 else
8525 {
8526 emit_move_insn_1 (high_part, word0);
8527 emit_move_insn_1 (low_part, word1);
8528 }
8529 }
8530
8531 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8532
8533 void
8534 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8535 {
8536 rtx word0 = adjust_address (dest, mode, 0);
8537 rtx word1 = adjust_address (dest, mode, 4);
8538 rtx high_part = gen_highpart (mode, src);
8539 rtx low_part = gen_lowpart (mode, src);
8540
8541 emit_move_insn_1 (word0, high_part);
8542 emit_move_insn_1 (word1, low_part);
8543 }
8544
8545 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8546
8547 int
8548 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8549 {
8550 /* Punt if we are here by mistake. */
8551 gcc_assert (reload_completed);
8552
8553 if (GET_CODE (reg1) == SUBREG)
8554 reg1 = SUBREG_REG (reg1);
8555 if (GET_CODE (reg1) != REG)
8556 return 0;
8557 const int regno1 = REGNO (reg1);
8558
8559 if (GET_CODE (reg2) == SUBREG)
8560 reg2 = SUBREG_REG (reg2);
8561 if (GET_CODE (reg2) != REG)
8562 return 0;
8563 const int regno2 = REGNO (reg2);
8564
8565 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8566 return 1;
8567
8568 if (TARGET_VIS3)
8569 {
8570 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8571 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8572 return 1;
8573 }
8574
8575 return 0;
8576 }
8577
8578 /* Split a REG <--> REG move into a pair of moves in MODE. */
8579
8580 void
8581 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8582 {
8583 rtx dest1 = gen_highpart (mode, dest);
8584 rtx dest2 = gen_lowpart (mode, dest);
8585 rtx src1 = gen_highpart (mode, src);
8586 rtx src2 = gen_lowpart (mode, src);
8587
8588 /* Now emit using the real source and destination we found, swapping
8589 the order if we detect overlap. */
8590 if (reg_overlap_mentioned_p (dest1, src2))
8591 {
8592 emit_move_insn_1 (dest2, src2);
8593 emit_move_insn_1 (dest1, src1);
8594 }
8595 else
8596 {
8597 emit_move_insn_1 (dest1, src1);
8598 emit_move_insn_1 (dest2, src2);
8599 }
8600 }
8601
8602 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8603 This makes them candidates for using ldd and std insns.
8604
8605 Note reg1 and reg2 *must* be hard registers. */
8606
8607 int
8608 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8609 {
8610 /* We might have been passed a SUBREG. */
8611 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8612 return 0;
8613
8614 if (REGNO (reg1) % 2 != 0)
8615 return 0;
8616
8617 /* Integer ldd is deprecated in SPARC V9 */
8618 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8619 return 0;
8620
8621 return (REGNO (reg1) == REGNO (reg2) - 1);
8622 }
8623
8624 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8625 an ldd or std insn.
8626
8627 This can only happen when addr1 and addr2, the addresses in mem1
8628 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8629 addr1 must also be aligned on a 64-bit boundary.
8630
8631 Also iff dependent_reg_rtx is not null it should not be used to
8632 compute the address for mem1, i.e. we cannot optimize a sequence
8633 like:
8634 ld [%o0], %o0
8635 ld [%o0 + 4], %o1
8636 to
8637 ldd [%o0], %o0
8638 nor:
8639 ld [%g3 + 4], %g3
8640 ld [%g3], %g2
8641 to
8642 ldd [%g3], %g2
8643
8644 But, note that the transformation from:
8645 ld [%g2 + 4], %g3
8646 ld [%g2], %g2
8647 to
8648 ldd [%g2], %g2
8649 is perfectly fine. Thus, the peephole2 patterns always pass us
8650 the destination register of the first load, never the second one.
8651
8652 For stores we don't have a similar problem, so dependent_reg_rtx is
8653 NULL_RTX. */
8654
8655 int
8656 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8657 {
8658 rtx addr1, addr2;
8659 unsigned int reg1;
8660 HOST_WIDE_INT offset1;
8661
8662 /* The mems cannot be volatile. */
8663 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8664 return 0;
8665
8666 /* MEM1 should be aligned on a 64-bit boundary. */
8667 if (MEM_ALIGN (mem1) < 64)
8668 return 0;
8669
8670 addr1 = XEXP (mem1, 0);
8671 addr2 = XEXP (mem2, 0);
8672
8673 /* Extract a register number and offset (if used) from the first addr. */
8674 if (GET_CODE (addr1) == PLUS)
8675 {
8676 /* If not a REG, return zero. */
8677 if (GET_CODE (XEXP (addr1, 0)) != REG)
8678 return 0;
8679 else
8680 {
8681 reg1 = REGNO (XEXP (addr1, 0));
8682 /* The offset must be constant! */
8683 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8684 return 0;
8685 offset1 = INTVAL (XEXP (addr1, 1));
8686 }
8687 }
8688 else if (GET_CODE (addr1) != REG)
8689 return 0;
8690 else
8691 {
8692 reg1 = REGNO (addr1);
8693 /* This was a simple (mem (reg)) expression. Offset is 0. */
8694 offset1 = 0;
8695 }
8696
8697 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8698 if (GET_CODE (addr2) != PLUS)
8699 return 0;
8700
8701 if (GET_CODE (XEXP (addr2, 0)) != REG
8702 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8703 return 0;
8704
8705 if (reg1 != REGNO (XEXP (addr2, 0)))
8706 return 0;
8707
8708 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8709 return 0;
8710
8711 /* The first offset must be evenly divisible by 8 to ensure the
8712 address is 64 bit aligned. */
8713 if (offset1 % 8 != 0)
8714 return 0;
8715
8716 /* The offset for the second addr must be 4 more than the first addr. */
8717 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8718 return 0;
8719
8720 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8721 instructions. */
8722 return 1;
8723 }
8724
8725 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8726
8727 rtx
8728 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8729 {
8730 rtx x = widen_memory_access (mem1, mode, 0);
8731 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8732 return x;
8733 }
8734
8735 /* Return 1 if reg is a pseudo, or is the first register in
8736 a hard register pair. This makes it suitable for use in
8737 ldd and std insns. */
8738
8739 int
8740 register_ok_for_ldd (rtx reg)
8741 {
8742 /* We might have been passed a SUBREG. */
8743 if (!REG_P (reg))
8744 return 0;
8745
8746 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8747 return (REGNO (reg) % 2 == 0);
8748
8749 return 1;
8750 }
8751
8752 /* Return 1 if OP, a MEM, has an address which is known to be
8753 aligned to an 8-byte boundary. */
8754
8755 int
8756 memory_ok_for_ldd (rtx op)
8757 {
8758 /* In 64-bit mode, we assume that the address is word-aligned. */
8759 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8760 return 0;
8761
8762 if (! can_create_pseudo_p ()
8763 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8764 return 0;
8765
8766 return 1;
8767 }
8768 \f
8769 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8770
8771 static bool
8772 sparc_print_operand_punct_valid_p (unsigned char code)
8773 {
8774 if (code == '#'
8775 || code == '*'
8776 || code == '('
8777 || code == ')'
8778 || code == '_'
8779 || code == '&')
8780 return true;
8781
8782 return false;
8783 }
8784
8785 /* Implement TARGET_PRINT_OPERAND.
8786 Print operand X (an rtx) in assembler syntax to file FILE.
8787 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8788 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8789
8790 static void
8791 sparc_print_operand (FILE *file, rtx x, int code)
8792 {
8793 const char *s;
8794
8795 switch (code)
8796 {
8797 case '#':
8798 /* Output an insn in a delay slot. */
8799 if (final_sequence)
8800 sparc_indent_opcode = 1;
8801 else
8802 fputs ("\n\t nop", file);
8803 return;
8804 case '*':
8805 /* Output an annul flag if there's nothing for the delay slot and we
8806 are optimizing. This is always used with '(' below.
8807 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8808 this is a dbx bug. So, we only do this when optimizing.
8809 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8810 Always emit a nop in case the next instruction is a branch. */
8811 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8812 fputs (",a", file);
8813 return;
8814 case '(':
8815 /* Output a 'nop' if there's nothing for the delay slot and we are
8816 not optimizing. This is always used with '*' above. */
8817 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8818 fputs ("\n\t nop", file);
8819 else if (final_sequence)
8820 sparc_indent_opcode = 1;
8821 return;
8822 case ')':
8823 /* Output the right displacement from the saved PC on function return.
8824 The caller may have placed an "unimp" insn immediately after the call
8825 so we have to account for it. This insn is used in the 32-bit ABI
8826 when calling a function that returns a non zero-sized structure. The
8827 64-bit ABI doesn't have it. Be careful to have this test be the same
8828 as that for the call. The exception is when sparc_std_struct_return
8829 is enabled, the psABI is followed exactly and the adjustment is made
8830 by the code in sparc_struct_value_rtx. The call emitted is the same
8831 when sparc_std_struct_return is enabled. */
8832 if (!TARGET_ARCH64
8833 && cfun->returns_struct
8834 && !sparc_std_struct_return
8835 && DECL_SIZE (DECL_RESULT (current_function_decl))
8836 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8837 == INTEGER_CST
8838 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8839 fputs ("12", file);
8840 else
8841 fputc ('8', file);
8842 return;
8843 case '_':
8844 /* Output the Embedded Medium/Anywhere code model base register. */
8845 fputs (EMBMEDANY_BASE_REG, file);
8846 return;
8847 case '&':
8848 /* Print some local dynamic TLS name. */
8849 if (const char *name = get_some_local_dynamic_name ())
8850 assemble_name (file, name);
8851 else
8852 output_operand_lossage ("'%%&' used without any "
8853 "local dynamic TLS references");
8854 return;
8855
8856 case 'Y':
8857 /* Adjust the operand to take into account a RESTORE operation. */
8858 if (GET_CODE (x) == CONST_INT)
8859 break;
8860 else if (GET_CODE (x) != REG)
8861 output_operand_lossage ("invalid %%Y operand");
8862 else if (REGNO (x) < 8)
8863 fputs (reg_names[REGNO (x)], file);
8864 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8865 fputs (reg_names[REGNO (x)-16], file);
8866 else
8867 output_operand_lossage ("invalid %%Y operand");
8868 return;
8869 case 'L':
8870 /* Print out the low order register name of a register pair. */
8871 if (WORDS_BIG_ENDIAN)
8872 fputs (reg_names[REGNO (x)+1], file);
8873 else
8874 fputs (reg_names[REGNO (x)], file);
8875 return;
8876 case 'H':
8877 /* Print out the high order register name of a register pair. */
8878 if (WORDS_BIG_ENDIAN)
8879 fputs (reg_names[REGNO (x)], file);
8880 else
8881 fputs (reg_names[REGNO (x)+1], file);
8882 return;
8883 case 'R':
8884 /* Print out the second register name of a register pair or quad.
8885 I.e., R (%o0) => %o1. */
8886 fputs (reg_names[REGNO (x)+1], file);
8887 return;
8888 case 'S':
8889 /* Print out the third register name of a register quad.
8890 I.e., S (%o0) => %o2. */
8891 fputs (reg_names[REGNO (x)+2], file);
8892 return;
8893 case 'T':
8894 /* Print out the fourth register name of a register quad.
8895 I.e., T (%o0) => %o3. */
8896 fputs (reg_names[REGNO (x)+3], file);
8897 return;
8898 case 'x':
8899 /* Print a condition code register. */
8900 if (REGNO (x) == SPARC_ICC_REG)
8901 {
8902 switch (GET_MODE (x))
8903 {
8904 case CCmode:
8905 case CCNZmode:
8906 case CCCmode:
8907 case CCVmode:
8908 s = "%icc";
8909 break;
8910 case CCXmode:
8911 case CCXNZmode:
8912 case CCXCmode:
8913 case CCXVmode:
8914 s = "%xcc";
8915 break;
8916 default:
8917 gcc_unreachable ();
8918 }
8919 fputs (s, file);
8920 }
8921 else
8922 /* %fccN register */
8923 fputs (reg_names[REGNO (x)], file);
8924 return;
8925 case 'm':
8926 /* Print the operand's address only. */
8927 output_address (GET_MODE (x), XEXP (x, 0));
8928 return;
8929 case 'r':
8930 /* In this case we need a register. Use %g0 if the
8931 operand is const0_rtx. */
8932 if (x == const0_rtx
8933 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8934 {
8935 fputs ("%g0", file);
8936 return;
8937 }
8938 else
8939 break;
8940
8941 case 'A':
8942 switch (GET_CODE (x))
8943 {
8944 case IOR:
8945 s = "or";
8946 break;
8947 case AND:
8948 s = "and";
8949 break;
8950 case XOR:
8951 s = "xor";
8952 break;
8953 default:
8954 output_operand_lossage ("invalid %%A operand");
8955 s = "";
8956 break;
8957 }
8958 fputs (s, file);
8959 return;
8960
8961 case 'B':
8962 switch (GET_CODE (x))
8963 {
8964 case IOR:
8965 s = "orn";
8966 break;
8967 case AND:
8968 s = "andn";
8969 break;
8970 case XOR:
8971 s = "xnor";
8972 break;
8973 default:
8974 output_operand_lossage ("invalid %%B operand");
8975 s = "";
8976 break;
8977 }
8978 fputs (s, file);
8979 return;
8980
8981 /* This is used by the conditional move instructions. */
8982 case 'C':
8983 {
8984 machine_mode mode = GET_MODE (XEXP (x, 0));
8985 switch (GET_CODE (x))
8986 {
8987 case NE:
8988 if (mode == CCVmode || mode == CCXVmode)
8989 s = "vs";
8990 else
8991 s = "ne";
8992 break;
8993 case EQ:
8994 if (mode == CCVmode || mode == CCXVmode)
8995 s = "vc";
8996 else
8997 s = "e";
8998 break;
8999 case GE:
9000 if (mode == CCNZmode || mode == CCXNZmode)
9001 s = "pos";
9002 else
9003 s = "ge";
9004 break;
9005 case GT:
9006 s = "g";
9007 break;
9008 case LE:
9009 s = "le";
9010 break;
9011 case LT:
9012 if (mode == CCNZmode || mode == CCXNZmode)
9013 s = "neg";
9014 else
9015 s = "l";
9016 break;
9017 case GEU:
9018 s = "geu";
9019 break;
9020 case GTU:
9021 s = "gu";
9022 break;
9023 case LEU:
9024 s = "leu";
9025 break;
9026 case LTU:
9027 s = "lu";
9028 break;
9029 case LTGT:
9030 s = "lg";
9031 break;
9032 case UNORDERED:
9033 s = "u";
9034 break;
9035 case ORDERED:
9036 s = "o";
9037 break;
9038 case UNLT:
9039 s = "ul";
9040 break;
9041 case UNLE:
9042 s = "ule";
9043 break;
9044 case UNGT:
9045 s = "ug";
9046 break;
9047 case UNGE:
9048 s = "uge"
9049 ; break;
9050 case UNEQ:
9051 s = "ue";
9052 break;
9053 default:
9054 output_operand_lossage ("invalid %%C operand");
9055 s = "";
9056 break;
9057 }
9058 fputs (s, file);
9059 return;
9060 }
9061
9062 /* This are used by the movr instruction pattern. */
9063 case 'D':
9064 {
9065 switch (GET_CODE (x))
9066 {
9067 case NE:
9068 s = "ne";
9069 break;
9070 case EQ:
9071 s = "e";
9072 break;
9073 case GE:
9074 s = "gez";
9075 break;
9076 case LT:
9077 s = "lz";
9078 break;
9079 case LE:
9080 s = "lez";
9081 break;
9082 case GT:
9083 s = "gz";
9084 break;
9085 default:
9086 output_operand_lossage ("invalid %%D operand");
9087 s = "";
9088 break;
9089 }
9090 fputs (s, file);
9091 return;
9092 }
9093
9094 case 'b':
9095 {
9096 /* Print a sign-extended character. */
9097 int i = trunc_int_for_mode (INTVAL (x), QImode);
9098 fprintf (file, "%d", i);
9099 return;
9100 }
9101
9102 case 'f':
9103 /* Operand must be a MEM; write its address. */
9104 if (GET_CODE (x) != MEM)
9105 output_operand_lossage ("invalid %%f operand");
9106 output_address (GET_MODE (x), XEXP (x, 0));
9107 return;
9108
9109 case 's':
9110 {
9111 /* Print a sign-extended 32-bit value. */
9112 HOST_WIDE_INT i;
9113 if (GET_CODE(x) == CONST_INT)
9114 i = INTVAL (x);
9115 else
9116 {
9117 output_operand_lossage ("invalid %%s operand");
9118 return;
9119 }
9120 i = trunc_int_for_mode (i, SImode);
9121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9122 return;
9123 }
9124
9125 case 0:
9126 /* Do nothing special. */
9127 break;
9128
9129 default:
9130 /* Undocumented flag. */
9131 output_operand_lossage ("invalid operand output code");
9132 }
9133
9134 if (GET_CODE (x) == REG)
9135 fputs (reg_names[REGNO (x)], file);
9136 else if (GET_CODE (x) == MEM)
9137 {
9138 fputc ('[', file);
9139 /* Poor Sun assembler doesn't understand absolute addressing. */
9140 if (CONSTANT_P (XEXP (x, 0)))
9141 fputs ("%g0+", file);
9142 output_address (GET_MODE (x), XEXP (x, 0));
9143 fputc (']', file);
9144 }
9145 else if (GET_CODE (x) == HIGH)
9146 {
9147 fputs ("%hi(", file);
9148 output_addr_const (file, XEXP (x, 0));
9149 fputc (')', file);
9150 }
9151 else if (GET_CODE (x) == LO_SUM)
9152 {
9153 sparc_print_operand (file, XEXP (x, 0), 0);
9154 if (TARGET_CM_MEDMID)
9155 fputs ("+%l44(", file);
9156 else
9157 fputs ("+%lo(", file);
9158 output_addr_const (file, XEXP (x, 1));
9159 fputc (')', file);
9160 }
9161 else if (GET_CODE (x) == CONST_DOUBLE)
9162 output_operand_lossage ("floating-point constant not a valid immediate operand");
9163 else
9164 output_addr_const (file, x);
9165 }
9166
9167 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9168
9169 static void
9170 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9171 {
9172 register rtx base, index = 0;
9173 int offset = 0;
9174 register rtx addr = x;
9175
9176 if (REG_P (addr))
9177 fputs (reg_names[REGNO (addr)], file);
9178 else if (GET_CODE (addr) == PLUS)
9179 {
9180 if (CONST_INT_P (XEXP (addr, 0)))
9181 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9182 else if (CONST_INT_P (XEXP (addr, 1)))
9183 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9184 else
9185 base = XEXP (addr, 0), index = XEXP (addr, 1);
9186 if (GET_CODE (base) == LO_SUM)
9187 {
9188 gcc_assert (USE_AS_OFFSETABLE_LO10
9189 && TARGET_ARCH64
9190 && ! TARGET_CM_MEDMID);
9191 output_operand (XEXP (base, 0), 0);
9192 fputs ("+%lo(", file);
9193 output_address (VOIDmode, XEXP (base, 1));
9194 fprintf (file, ")+%d", offset);
9195 }
9196 else
9197 {
9198 fputs (reg_names[REGNO (base)], file);
9199 if (index == 0)
9200 fprintf (file, "%+d", offset);
9201 else if (REG_P (index))
9202 fprintf (file, "+%s", reg_names[REGNO (index)]);
9203 else if (GET_CODE (index) == SYMBOL_REF
9204 || GET_CODE (index) == LABEL_REF
9205 || GET_CODE (index) == CONST)
9206 fputc ('+', file), output_addr_const (file, index);
9207 else gcc_unreachable ();
9208 }
9209 }
9210 else if (GET_CODE (addr) == MINUS
9211 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9212 {
9213 output_addr_const (file, XEXP (addr, 0));
9214 fputs ("-(", file);
9215 output_addr_const (file, XEXP (addr, 1));
9216 fputs ("-.)", file);
9217 }
9218 else if (GET_CODE (addr) == LO_SUM)
9219 {
9220 output_operand (XEXP (addr, 0), 0);
9221 if (TARGET_CM_MEDMID)
9222 fputs ("+%l44(", file);
9223 else
9224 fputs ("+%lo(", file);
9225 output_address (VOIDmode, XEXP (addr, 1));
9226 fputc (')', file);
9227 }
9228 else if (flag_pic
9229 && GET_CODE (addr) == CONST
9230 && GET_CODE (XEXP (addr, 0)) == MINUS
9231 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9232 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9233 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9234 {
9235 addr = XEXP (addr, 0);
9236 output_addr_const (file, XEXP (addr, 0));
9237 /* Group the args of the second CONST in parenthesis. */
9238 fputs ("-(", file);
9239 /* Skip past the second CONST--it does nothing for us. */
9240 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9241 /* Close the parenthesis. */
9242 fputc (')', file);
9243 }
9244 else
9245 {
9246 output_addr_const (file, addr);
9247 }
9248 }
9249 \f
9250 /* Target hook for assembling integer objects. The sparc version has
9251 special handling for aligned DI-mode objects. */
9252
9253 static bool
9254 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9255 {
9256 /* ??? We only output .xword's for symbols and only then in environments
9257 where the assembler can handle them. */
9258 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9259 {
9260 if (TARGET_V9)
9261 {
9262 assemble_integer_with_op ("\t.xword\t", x);
9263 return true;
9264 }
9265 else
9266 {
9267 assemble_aligned_integer (4, const0_rtx);
9268 assemble_aligned_integer (4, x);
9269 return true;
9270 }
9271 }
9272 return default_assemble_integer (x, size, aligned_p);
9273 }
9274 \f
9275 /* Return the value of a code used in the .proc pseudo-op that says
9276 what kind of result this function returns. For non-C types, we pick
9277 the closest C type. */
9278
9279 #ifndef SHORT_TYPE_SIZE
9280 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9281 #endif
9282
9283 #ifndef INT_TYPE_SIZE
9284 #define INT_TYPE_SIZE BITS_PER_WORD
9285 #endif
9286
9287 #ifndef LONG_TYPE_SIZE
9288 #define LONG_TYPE_SIZE BITS_PER_WORD
9289 #endif
9290
9291 #ifndef LONG_LONG_TYPE_SIZE
9292 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9293 #endif
9294
9295 #ifndef FLOAT_TYPE_SIZE
9296 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9297 #endif
9298
9299 #ifndef DOUBLE_TYPE_SIZE
9300 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9301 #endif
9302
9303 #ifndef LONG_DOUBLE_TYPE_SIZE
9304 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9305 #endif
9306
9307 unsigned long
9308 sparc_type_code (register tree type)
9309 {
9310 register unsigned long qualifiers = 0;
9311 register unsigned shift;
9312
9313 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9314 setting more, since some assemblers will give an error for this. Also,
9315 we must be careful to avoid shifts of 32 bits or more to avoid getting
9316 unpredictable results. */
9317
9318 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9319 {
9320 switch (TREE_CODE (type))
9321 {
9322 case ERROR_MARK:
9323 return qualifiers;
9324
9325 case ARRAY_TYPE:
9326 qualifiers |= (3 << shift);
9327 break;
9328
9329 case FUNCTION_TYPE:
9330 case METHOD_TYPE:
9331 qualifiers |= (2 << shift);
9332 break;
9333
9334 case POINTER_TYPE:
9335 case REFERENCE_TYPE:
9336 case OFFSET_TYPE:
9337 qualifiers |= (1 << shift);
9338 break;
9339
9340 case RECORD_TYPE:
9341 return (qualifiers | 8);
9342
9343 case UNION_TYPE:
9344 case QUAL_UNION_TYPE:
9345 return (qualifiers | 9);
9346
9347 case ENUMERAL_TYPE:
9348 return (qualifiers | 10);
9349
9350 case VOID_TYPE:
9351 return (qualifiers | 16);
9352
9353 case INTEGER_TYPE:
9354 /* If this is a range type, consider it to be the underlying
9355 type. */
9356 if (TREE_TYPE (type) != 0)
9357 break;
9358
9359 /* Carefully distinguish all the standard types of C,
9360 without messing up if the language is not C. We do this by
9361 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9362 look at both the names and the above fields, but that's redundant.
9363 Any type whose size is between two C types will be considered
9364 to be the wider of the two types. Also, we do not have a
9365 special code to use for "long long", so anything wider than
9366 long is treated the same. Note that we can't distinguish
9367 between "int" and "long" in this code if they are the same
9368 size, but that's fine, since neither can the assembler. */
9369
9370 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9371 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9372
9373 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9374 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9375
9376 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9377 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9378
9379 else
9380 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9381
9382 case REAL_TYPE:
9383 /* If this is a range type, consider it to be the underlying
9384 type. */
9385 if (TREE_TYPE (type) != 0)
9386 break;
9387
9388 /* Carefully distinguish all the standard types of C,
9389 without messing up if the language is not C. */
9390
9391 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9392 return (qualifiers | 6);
9393
9394 else
9395 return (qualifiers | 7);
9396
9397 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9398 /* ??? We need to distinguish between double and float complex types,
9399 but I don't know how yet because I can't reach this code from
9400 existing front-ends. */
9401 return (qualifiers | 7); /* Who knows? */
9402
9403 case VECTOR_TYPE:
9404 case BOOLEAN_TYPE: /* Boolean truth value type. */
9405 case LANG_TYPE:
9406 case NULLPTR_TYPE:
9407 return qualifiers;
9408
9409 default:
9410 gcc_unreachable (); /* Not a type! */
9411 }
9412 }
9413
9414 return qualifiers;
9415 }
9416 \f
9417 /* Nested function support. */
9418
9419 /* Emit RTL insns to initialize the variable parts of a trampoline.
9420 FNADDR is an RTX for the address of the function's pure code.
9421 CXT is an RTX for the static chain value for the function.
9422
9423 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9424 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9425 (to store insns). This is a bit excessive. Perhaps a different
9426 mechanism would be better here.
9427
9428 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9429
9430 static void
9431 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9432 {
9433 /* SPARC 32-bit trampoline:
9434
9435 sethi %hi(fn), %g1
9436 sethi %hi(static), %g2
9437 jmp %g1+%lo(fn)
9438 or %g2, %lo(static), %g2
9439
9440 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9441 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9442 */
9443
9444 emit_move_insn
9445 (adjust_address (m_tramp, SImode, 0),
9446 expand_binop (SImode, ior_optab,
9447 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9448 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9449 NULL_RTX, 1, OPTAB_DIRECT));
9450
9451 emit_move_insn
9452 (adjust_address (m_tramp, SImode, 4),
9453 expand_binop (SImode, ior_optab,
9454 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9455 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9456 NULL_RTX, 1, OPTAB_DIRECT));
9457
9458 emit_move_insn
9459 (adjust_address (m_tramp, SImode, 8),
9460 expand_binop (SImode, ior_optab,
9461 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9462 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9463 NULL_RTX, 1, OPTAB_DIRECT));
9464
9465 emit_move_insn
9466 (adjust_address (m_tramp, SImode, 12),
9467 expand_binop (SImode, ior_optab,
9468 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9469 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9470 NULL_RTX, 1, OPTAB_DIRECT));
9471
9472 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9473 aligned on a 16 byte boundary so one flush clears it all. */
9474 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9475 if (sparc_cpu != PROCESSOR_ULTRASPARC
9476 && sparc_cpu != PROCESSOR_ULTRASPARC3
9477 && sparc_cpu != PROCESSOR_NIAGARA
9478 && sparc_cpu != PROCESSOR_NIAGARA2
9479 && sparc_cpu != PROCESSOR_NIAGARA3
9480 && sparc_cpu != PROCESSOR_NIAGARA4
9481 && sparc_cpu != PROCESSOR_NIAGARA7)
9482 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9483
9484 /* Call __enable_execute_stack after writing onto the stack to make sure
9485 the stack address is accessible. */
9486 #ifdef HAVE_ENABLE_EXECUTE_STACK
9487 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9488 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9489 #endif
9490
9491 }
9492
9493 /* The 64-bit version is simpler because it makes more sense to load the
9494 values as "immediate" data out of the trampoline. It's also easier since
9495 we can read the PC without clobbering a register. */
9496
9497 static void
9498 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9499 {
9500 /* SPARC 64-bit trampoline:
9501
9502 rd %pc, %g1
9503 ldx [%g1+24], %g5
9504 jmp %g5
9505 ldx [%g1+16], %g5
9506 +16 bytes data
9507 */
9508
9509 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9510 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9511 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9512 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9513 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9514 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9515 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9516 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9517 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9518 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9519 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9520
9521 if (sparc_cpu != PROCESSOR_ULTRASPARC
9522 && sparc_cpu != PROCESSOR_ULTRASPARC3
9523 && sparc_cpu != PROCESSOR_NIAGARA
9524 && sparc_cpu != PROCESSOR_NIAGARA2
9525 && sparc_cpu != PROCESSOR_NIAGARA3
9526 && sparc_cpu != PROCESSOR_NIAGARA4
9527 && sparc_cpu != PROCESSOR_NIAGARA7)
9528 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9529
9530 /* Call __enable_execute_stack after writing onto the stack to make sure
9531 the stack address is accessible. */
9532 #ifdef HAVE_ENABLE_EXECUTE_STACK
9533 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9534 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9535 #endif
9536 }
9537
9538 /* Worker for TARGET_TRAMPOLINE_INIT. */
9539
9540 static void
9541 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9542 {
9543 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9544 cxt = force_reg (Pmode, cxt);
9545 if (TARGET_ARCH64)
9546 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9547 else
9548 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9549 }
9550 \f
9551 /* Adjust the cost of a scheduling dependency. Return the new cost of
9552 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9553
9554 static int
9555 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9556 int cost)
9557 {
9558 enum attr_type insn_type;
9559
9560 if (recog_memoized (insn) < 0)
9561 return cost;
9562
9563 insn_type = get_attr_type (insn);
9564
9565 if (dep_type == 0)
9566 {
9567 /* Data dependency; DEP_INSN writes a register that INSN reads some
9568 cycles later. */
9569
9570 /* if a load, then the dependence must be on the memory address;
9571 add an extra "cycle". Note that the cost could be two cycles
9572 if the reg was written late in an instruction group; we ca not tell
9573 here. */
9574 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9575 return cost + 3;
9576
9577 /* Get the delay only if the address of the store is the dependence. */
9578 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9579 {
9580 rtx pat = PATTERN(insn);
9581 rtx dep_pat = PATTERN (dep_insn);
9582
9583 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9584 return cost; /* This should not happen! */
9585
9586 /* The dependency between the two instructions was on the data that
9587 is being stored. Assume that this implies that the address of the
9588 store is not dependent. */
9589 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9590 return cost;
9591
9592 return cost + 3; /* An approximation. */
9593 }
9594
9595 /* A shift instruction cannot receive its data from an instruction
9596 in the same cycle; add a one cycle penalty. */
9597 if (insn_type == TYPE_SHIFT)
9598 return cost + 3; /* Split before cascade into shift. */
9599 }
9600 else
9601 {
9602 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9603 INSN writes some cycles later. */
9604
9605 /* These are only significant for the fpu unit; writing a fp reg before
9606 the fpu has finished with it stalls the processor. */
9607
9608 /* Reusing an integer register causes no problems. */
9609 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9610 return 0;
9611 }
9612
9613 return cost;
9614 }
9615
9616 static int
9617 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9618 int cost)
9619 {
9620 enum attr_type insn_type, dep_type;
9621 rtx pat = PATTERN(insn);
9622 rtx dep_pat = PATTERN (dep_insn);
9623
9624 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9625 return cost;
9626
9627 insn_type = get_attr_type (insn);
9628 dep_type = get_attr_type (dep_insn);
9629
9630 switch (dtype)
9631 {
9632 case 0:
9633 /* Data dependency; DEP_INSN writes a register that INSN reads some
9634 cycles later. */
9635
9636 switch (insn_type)
9637 {
9638 case TYPE_STORE:
9639 case TYPE_FPSTORE:
9640 /* Get the delay iff the address of the store is the dependence. */
9641 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9642 return cost;
9643
9644 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9645 return cost;
9646 return cost + 3;
9647
9648 case TYPE_LOAD:
9649 case TYPE_SLOAD:
9650 case TYPE_FPLOAD:
9651 /* If a load, then the dependence must be on the memory address. If
9652 the addresses aren't equal, then it might be a false dependency */
9653 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9654 {
9655 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9656 || GET_CODE (SET_DEST (dep_pat)) != MEM
9657 || GET_CODE (SET_SRC (pat)) != MEM
9658 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9659 XEXP (SET_SRC (pat), 0)))
9660 return cost + 2;
9661
9662 return cost + 8;
9663 }
9664 break;
9665
9666 case TYPE_BRANCH:
9667 /* Compare to branch latency is 0. There is no benefit from
9668 separating compare and branch. */
9669 if (dep_type == TYPE_COMPARE)
9670 return 0;
9671 /* Floating point compare to branch latency is less than
9672 compare to conditional move. */
9673 if (dep_type == TYPE_FPCMP)
9674 return cost - 1;
9675 break;
9676 default:
9677 break;
9678 }
9679 break;
9680
9681 case REG_DEP_ANTI:
9682 /* Anti-dependencies only penalize the fpu unit. */
9683 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9684 return 0;
9685 break;
9686
9687 default:
9688 break;
9689 }
9690
9691 return cost;
9692 }
9693
9694 static int
9695 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9696 unsigned int)
9697 {
9698 switch (sparc_cpu)
9699 {
9700 case PROCESSOR_SUPERSPARC:
9701 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9702 break;
9703 case PROCESSOR_HYPERSPARC:
9704 case PROCESSOR_SPARCLITE86X:
9705 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9706 break;
9707 default:
9708 break;
9709 }
9710 return cost;
9711 }
9712
9713 static void
9714 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9715 int sched_verbose ATTRIBUTE_UNUSED,
9716 int max_ready ATTRIBUTE_UNUSED)
9717 {}
9718
9719 static int
9720 sparc_use_sched_lookahead (void)
9721 {
9722 if (sparc_cpu == PROCESSOR_NIAGARA
9723 || sparc_cpu == PROCESSOR_NIAGARA2
9724 || sparc_cpu == PROCESSOR_NIAGARA3)
9725 return 0;
9726 if (sparc_cpu == PROCESSOR_NIAGARA4
9727 || sparc_cpu == PROCESSOR_NIAGARA7)
9728 return 2;
9729 if (sparc_cpu == PROCESSOR_ULTRASPARC
9730 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9731 return 4;
9732 if ((1 << sparc_cpu) &
9733 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9734 (1 << PROCESSOR_SPARCLITE86X)))
9735 return 3;
9736 return 0;
9737 }
9738
9739 static int
9740 sparc_issue_rate (void)
9741 {
9742 switch (sparc_cpu)
9743 {
9744 case PROCESSOR_NIAGARA:
9745 case PROCESSOR_NIAGARA2:
9746 case PROCESSOR_NIAGARA3:
9747 default:
9748 return 1;
9749 case PROCESSOR_NIAGARA4:
9750 case PROCESSOR_NIAGARA7:
9751 case PROCESSOR_V9:
9752 /* Assume V9 processors are capable of at least dual-issue. */
9753 return 2;
9754 case PROCESSOR_SUPERSPARC:
9755 return 3;
9756 case PROCESSOR_HYPERSPARC:
9757 case PROCESSOR_SPARCLITE86X:
9758 return 2;
9759 case PROCESSOR_ULTRASPARC:
9760 case PROCESSOR_ULTRASPARC3:
9761 return 4;
9762 }
9763 }
9764
9765 static int
9766 set_extends (rtx_insn *insn)
9767 {
9768 register rtx pat = PATTERN (insn);
9769
9770 switch (GET_CODE (SET_SRC (pat)))
9771 {
9772 /* Load and some shift instructions zero extend. */
9773 case MEM:
9774 case ZERO_EXTEND:
9775 /* sethi clears the high bits */
9776 case HIGH:
9777 /* LO_SUM is used with sethi. sethi cleared the high
9778 bits and the values used with lo_sum are positive */
9779 case LO_SUM:
9780 /* Store flag stores 0 or 1 */
9781 case LT: case LTU:
9782 case GT: case GTU:
9783 case LE: case LEU:
9784 case GE: case GEU:
9785 case EQ:
9786 case NE:
9787 return 1;
9788 case AND:
9789 {
9790 rtx op0 = XEXP (SET_SRC (pat), 0);
9791 rtx op1 = XEXP (SET_SRC (pat), 1);
9792 if (GET_CODE (op1) == CONST_INT)
9793 return INTVAL (op1) >= 0;
9794 if (GET_CODE (op0) != REG)
9795 return 0;
9796 if (sparc_check_64 (op0, insn) == 1)
9797 return 1;
9798 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9799 }
9800 case IOR:
9801 case XOR:
9802 {
9803 rtx op0 = XEXP (SET_SRC (pat), 0);
9804 rtx op1 = XEXP (SET_SRC (pat), 1);
9805 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9806 return 0;
9807 if (GET_CODE (op1) == CONST_INT)
9808 return INTVAL (op1) >= 0;
9809 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9810 }
9811 case LSHIFTRT:
9812 return GET_MODE (SET_SRC (pat)) == SImode;
9813 /* Positive integers leave the high bits zero. */
9814 case CONST_INT:
9815 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9816 case ASHIFTRT:
9817 case SIGN_EXTEND:
9818 return - (GET_MODE (SET_SRC (pat)) == SImode);
9819 case REG:
9820 return sparc_check_64 (SET_SRC (pat), insn);
9821 default:
9822 return 0;
9823 }
9824 }
9825
9826 /* We _ought_ to have only one kind per function, but... */
9827 static GTY(()) rtx sparc_addr_diff_list;
9828 static GTY(()) rtx sparc_addr_list;
9829
9830 void
9831 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9832 {
9833 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9834 if (diff)
9835 sparc_addr_diff_list
9836 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9837 else
9838 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9839 }
9840
9841 static void
9842 sparc_output_addr_vec (rtx vec)
9843 {
9844 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9845 int idx, vlen = XVECLEN (body, 0);
9846
9847 #ifdef ASM_OUTPUT_ADDR_VEC_START
9848 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9849 #endif
9850
9851 #ifdef ASM_OUTPUT_CASE_LABEL
9852 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9853 NEXT_INSN (lab));
9854 #else
9855 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9856 #endif
9857
9858 for (idx = 0; idx < vlen; idx++)
9859 {
9860 ASM_OUTPUT_ADDR_VEC_ELT
9861 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9862 }
9863
9864 #ifdef ASM_OUTPUT_ADDR_VEC_END
9865 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9866 #endif
9867 }
9868
9869 static void
9870 sparc_output_addr_diff_vec (rtx vec)
9871 {
9872 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9873 rtx base = XEXP (XEXP (body, 0), 0);
9874 int idx, vlen = XVECLEN (body, 1);
9875
9876 #ifdef ASM_OUTPUT_ADDR_VEC_START
9877 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9878 #endif
9879
9880 #ifdef ASM_OUTPUT_CASE_LABEL
9881 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9882 NEXT_INSN (lab));
9883 #else
9884 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9885 #endif
9886
9887 for (idx = 0; idx < vlen; idx++)
9888 {
9889 ASM_OUTPUT_ADDR_DIFF_ELT
9890 (asm_out_file,
9891 body,
9892 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9893 CODE_LABEL_NUMBER (base));
9894 }
9895
9896 #ifdef ASM_OUTPUT_ADDR_VEC_END
9897 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9898 #endif
9899 }
9900
9901 static void
9902 sparc_output_deferred_case_vectors (void)
9903 {
9904 rtx t;
9905 int align;
9906
9907 if (sparc_addr_list == NULL_RTX
9908 && sparc_addr_diff_list == NULL_RTX)
9909 return;
9910
9911 /* Align to cache line in the function's code section. */
9912 switch_to_section (current_function_section ());
9913
9914 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9915 if (align > 0)
9916 ASM_OUTPUT_ALIGN (asm_out_file, align);
9917
9918 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9919 sparc_output_addr_vec (XEXP (t, 0));
9920 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9921 sparc_output_addr_diff_vec (XEXP (t, 0));
9922
9923 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9924 }
9925
9926 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9927 unknown. Return 1 if the high bits are zero, -1 if the register is
9928 sign extended. */
9929 int
9930 sparc_check_64 (rtx x, rtx_insn *insn)
9931 {
9932 /* If a register is set only once it is safe to ignore insns this
9933 code does not know how to handle. The loop will either recognize
9934 the single set and return the correct value or fail to recognize
9935 it and return 0. */
9936 int set_once = 0;
9937 rtx y = x;
9938
9939 gcc_assert (GET_CODE (x) == REG);
9940
9941 if (GET_MODE (x) == DImode)
9942 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9943
9944 if (flag_expensive_optimizations
9945 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9946 set_once = 1;
9947
9948 if (insn == 0)
9949 {
9950 if (set_once)
9951 insn = get_last_insn_anywhere ();
9952 else
9953 return 0;
9954 }
9955
9956 while ((insn = PREV_INSN (insn)))
9957 {
9958 switch (GET_CODE (insn))
9959 {
9960 case JUMP_INSN:
9961 case NOTE:
9962 break;
9963 case CODE_LABEL:
9964 case CALL_INSN:
9965 default:
9966 if (! set_once)
9967 return 0;
9968 break;
9969 case INSN:
9970 {
9971 rtx pat = PATTERN (insn);
9972 if (GET_CODE (pat) != SET)
9973 return 0;
9974 if (rtx_equal_p (x, SET_DEST (pat)))
9975 return set_extends (insn);
9976 if (y && rtx_equal_p (y, SET_DEST (pat)))
9977 return set_extends (insn);
9978 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9979 return 0;
9980 }
9981 }
9982 }
9983 return 0;
9984 }
9985
9986 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9987 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9988
9989 const char *
9990 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9991 {
9992 static char asm_code[60];
9993
9994 /* The scratch register is only required when the destination
9995 register is not a 64-bit global or out register. */
9996 if (which_alternative != 2)
9997 operands[3] = operands[0];
9998
9999 /* We can only shift by constants <= 63. */
10000 if (GET_CODE (operands[2]) == CONST_INT)
10001 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10002
10003 if (GET_CODE (operands[1]) == CONST_INT)
10004 {
10005 output_asm_insn ("mov\t%1, %3", operands);
10006 }
10007 else
10008 {
10009 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10010 if (sparc_check_64 (operands[1], insn) <= 0)
10011 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10012 output_asm_insn ("or\t%L1, %3, %3", operands);
10013 }
10014
10015 strcpy (asm_code, opcode);
10016
10017 if (which_alternative != 2)
10018 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10019 else
10020 return
10021 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10022 }
10023 \f
10024 /* Output rtl to increment the profiler label LABELNO
10025 for profiling a function entry. */
10026
10027 void
10028 sparc_profile_hook (int labelno)
10029 {
10030 char buf[32];
10031 rtx lab, fun;
10032
10033 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10034 if (NO_PROFILE_COUNTERS)
10035 {
10036 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
10037 }
10038 else
10039 {
10040 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10041 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10042 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
10043 }
10044 }
10045 \f
10046 #ifdef TARGET_SOLARIS
10047 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10048
10049 static void
10050 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10051 tree decl ATTRIBUTE_UNUSED)
10052 {
10053 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10054 {
10055 solaris_elf_asm_comdat_section (name, flags, decl);
10056 return;
10057 }
10058
10059 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10060
10061 if (!(flags & SECTION_DEBUG))
10062 fputs (",#alloc", asm_out_file);
10063 if (flags & SECTION_WRITE)
10064 fputs (",#write", asm_out_file);
10065 if (flags & SECTION_TLS)
10066 fputs (",#tls", asm_out_file);
10067 if (flags & SECTION_CODE)
10068 fputs (",#execinstr", asm_out_file);
10069
10070 if (flags & SECTION_NOTYPE)
10071 ;
10072 else if (flags & SECTION_BSS)
10073 fputs (",#nobits", asm_out_file);
10074 else
10075 fputs (",#progbits", asm_out_file);
10076
10077 fputc ('\n', asm_out_file);
10078 }
10079 #endif /* TARGET_SOLARIS */
10080
10081 /* We do not allow indirect calls to be optimized into sibling calls.
10082
10083 We cannot use sibling calls when delayed branches are disabled
10084 because they will likely require the call delay slot to be filled.
10085
10086 Also, on SPARC 32-bit we cannot emit a sibling call when the
10087 current function returns a structure. This is because the "unimp
10088 after call" convention would cause the callee to return to the
10089 wrong place. The generic code already disallows cases where the
10090 function being called returns a structure.
10091
10092 It may seem strange how this last case could occur. Usually there
10093 is code after the call which jumps to epilogue code which dumps the
10094 return value into the struct return area. That ought to invalidate
10095 the sibling call right? Well, in the C++ case we can end up passing
10096 the pointer to the struct return area to a constructor (which returns
10097 void) and then nothing else happens. Such a sibling call would look
10098 valid without the added check here.
10099
10100 VxWorks PIC PLT entries require the global pointer to be initialized
10101 on entry. We therefore can't emit sibling calls to them. */
10102 static bool
10103 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10104 {
10105 return (decl
10106 && flag_delayed_branch
10107 && (TARGET_ARCH64 || ! cfun->returns_struct)
10108 && !(TARGET_VXWORKS_RTP
10109 && flag_pic
10110 && !targetm.binds_local_p (decl)));
10111 }
10112 \f
10113 /* libfunc renaming. */
10114
10115 static void
10116 sparc_init_libfuncs (void)
10117 {
10118 if (TARGET_ARCH32)
10119 {
10120 /* Use the subroutines that Sun's library provides for integer
10121 multiply and divide. The `*' prevents an underscore from
10122 being prepended by the compiler. .umul is a little faster
10123 than .mul. */
10124 set_optab_libfunc (smul_optab, SImode, "*.umul");
10125 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10126 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10127 set_optab_libfunc (smod_optab, SImode, "*.rem");
10128 set_optab_libfunc (umod_optab, SImode, "*.urem");
10129
10130 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10131 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10132 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10133 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10134 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10135 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10136
10137 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10138 is because with soft-float, the SFmode and DFmode sqrt
10139 instructions will be absent, and the compiler will notice and
10140 try to use the TFmode sqrt instruction for calls to the
10141 builtin function sqrt, but this fails. */
10142 if (TARGET_FPU)
10143 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10144
10145 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10146 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10147 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10148 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10149 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10150 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10151
10152 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10153 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10154 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10155 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10156
10157 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10158 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10159 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10160 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10161
10162 if (DITF_CONVERSION_LIBFUNCS)
10163 {
10164 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10165 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10166 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10167 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10168 }
10169
10170 if (SUN_CONVERSION_LIBFUNCS)
10171 {
10172 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10173 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10174 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10175 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10176 }
10177 }
10178 if (TARGET_ARCH64)
10179 {
10180 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10181 do not exist in the library. Make sure the compiler does not
10182 emit calls to them by accident. (It should always use the
10183 hardware instructions.) */
10184 set_optab_libfunc (smul_optab, SImode, 0);
10185 set_optab_libfunc (sdiv_optab, SImode, 0);
10186 set_optab_libfunc (udiv_optab, SImode, 0);
10187 set_optab_libfunc (smod_optab, SImode, 0);
10188 set_optab_libfunc (umod_optab, SImode, 0);
10189
10190 if (SUN_INTEGER_MULTIPLY_64)
10191 {
10192 set_optab_libfunc (smul_optab, DImode, "__mul64");
10193 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10194 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10195 set_optab_libfunc (smod_optab, DImode, "__rem64");
10196 set_optab_libfunc (umod_optab, DImode, "__urem64");
10197 }
10198
10199 if (SUN_CONVERSION_LIBFUNCS)
10200 {
10201 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10202 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10203 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10204 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10205 }
10206 }
10207 }
10208 \f
10209 /* SPARC builtins. */
10210 enum sparc_builtins
10211 {
10212 /* FPU builtins. */
10213 SPARC_BUILTIN_LDFSR,
10214 SPARC_BUILTIN_STFSR,
10215
10216 /* VIS 1.0 builtins. */
10217 SPARC_BUILTIN_FPACK16,
10218 SPARC_BUILTIN_FPACK32,
10219 SPARC_BUILTIN_FPACKFIX,
10220 SPARC_BUILTIN_FEXPAND,
10221 SPARC_BUILTIN_FPMERGE,
10222 SPARC_BUILTIN_FMUL8X16,
10223 SPARC_BUILTIN_FMUL8X16AU,
10224 SPARC_BUILTIN_FMUL8X16AL,
10225 SPARC_BUILTIN_FMUL8SUX16,
10226 SPARC_BUILTIN_FMUL8ULX16,
10227 SPARC_BUILTIN_FMULD8SUX16,
10228 SPARC_BUILTIN_FMULD8ULX16,
10229 SPARC_BUILTIN_FALIGNDATAV4HI,
10230 SPARC_BUILTIN_FALIGNDATAV8QI,
10231 SPARC_BUILTIN_FALIGNDATAV2SI,
10232 SPARC_BUILTIN_FALIGNDATADI,
10233 SPARC_BUILTIN_WRGSR,
10234 SPARC_BUILTIN_RDGSR,
10235 SPARC_BUILTIN_ALIGNADDR,
10236 SPARC_BUILTIN_ALIGNADDRL,
10237 SPARC_BUILTIN_PDIST,
10238 SPARC_BUILTIN_EDGE8,
10239 SPARC_BUILTIN_EDGE8L,
10240 SPARC_BUILTIN_EDGE16,
10241 SPARC_BUILTIN_EDGE16L,
10242 SPARC_BUILTIN_EDGE32,
10243 SPARC_BUILTIN_EDGE32L,
10244 SPARC_BUILTIN_FCMPLE16,
10245 SPARC_BUILTIN_FCMPLE32,
10246 SPARC_BUILTIN_FCMPNE16,
10247 SPARC_BUILTIN_FCMPNE32,
10248 SPARC_BUILTIN_FCMPGT16,
10249 SPARC_BUILTIN_FCMPGT32,
10250 SPARC_BUILTIN_FCMPEQ16,
10251 SPARC_BUILTIN_FCMPEQ32,
10252 SPARC_BUILTIN_FPADD16,
10253 SPARC_BUILTIN_FPADD16S,
10254 SPARC_BUILTIN_FPADD32,
10255 SPARC_BUILTIN_FPADD32S,
10256 SPARC_BUILTIN_FPSUB16,
10257 SPARC_BUILTIN_FPSUB16S,
10258 SPARC_BUILTIN_FPSUB32,
10259 SPARC_BUILTIN_FPSUB32S,
10260 SPARC_BUILTIN_ARRAY8,
10261 SPARC_BUILTIN_ARRAY16,
10262 SPARC_BUILTIN_ARRAY32,
10263
10264 /* VIS 2.0 builtins. */
10265 SPARC_BUILTIN_EDGE8N,
10266 SPARC_BUILTIN_EDGE8LN,
10267 SPARC_BUILTIN_EDGE16N,
10268 SPARC_BUILTIN_EDGE16LN,
10269 SPARC_BUILTIN_EDGE32N,
10270 SPARC_BUILTIN_EDGE32LN,
10271 SPARC_BUILTIN_BMASK,
10272 SPARC_BUILTIN_BSHUFFLEV4HI,
10273 SPARC_BUILTIN_BSHUFFLEV8QI,
10274 SPARC_BUILTIN_BSHUFFLEV2SI,
10275 SPARC_BUILTIN_BSHUFFLEDI,
10276
10277 /* VIS 3.0 builtins. */
10278 SPARC_BUILTIN_CMASK8,
10279 SPARC_BUILTIN_CMASK16,
10280 SPARC_BUILTIN_CMASK32,
10281 SPARC_BUILTIN_FCHKSM16,
10282 SPARC_BUILTIN_FSLL16,
10283 SPARC_BUILTIN_FSLAS16,
10284 SPARC_BUILTIN_FSRL16,
10285 SPARC_BUILTIN_FSRA16,
10286 SPARC_BUILTIN_FSLL32,
10287 SPARC_BUILTIN_FSLAS32,
10288 SPARC_BUILTIN_FSRL32,
10289 SPARC_BUILTIN_FSRA32,
10290 SPARC_BUILTIN_PDISTN,
10291 SPARC_BUILTIN_FMEAN16,
10292 SPARC_BUILTIN_FPADD64,
10293 SPARC_BUILTIN_FPSUB64,
10294 SPARC_BUILTIN_FPADDS16,
10295 SPARC_BUILTIN_FPADDS16S,
10296 SPARC_BUILTIN_FPSUBS16,
10297 SPARC_BUILTIN_FPSUBS16S,
10298 SPARC_BUILTIN_FPADDS32,
10299 SPARC_BUILTIN_FPADDS32S,
10300 SPARC_BUILTIN_FPSUBS32,
10301 SPARC_BUILTIN_FPSUBS32S,
10302 SPARC_BUILTIN_FUCMPLE8,
10303 SPARC_BUILTIN_FUCMPNE8,
10304 SPARC_BUILTIN_FUCMPGT8,
10305 SPARC_BUILTIN_FUCMPEQ8,
10306 SPARC_BUILTIN_FHADDS,
10307 SPARC_BUILTIN_FHADDD,
10308 SPARC_BUILTIN_FHSUBS,
10309 SPARC_BUILTIN_FHSUBD,
10310 SPARC_BUILTIN_FNHADDS,
10311 SPARC_BUILTIN_FNHADDD,
10312 SPARC_BUILTIN_UMULXHI,
10313 SPARC_BUILTIN_XMULX,
10314 SPARC_BUILTIN_XMULXHI,
10315
10316 /* VIS 4.0 builtins. */
10317 SPARC_BUILTIN_FPADD8,
10318 SPARC_BUILTIN_FPADDS8,
10319 SPARC_BUILTIN_FPADDUS8,
10320 SPARC_BUILTIN_FPADDUS16,
10321 SPARC_BUILTIN_FPCMPLE8,
10322 SPARC_BUILTIN_FPCMPGT8,
10323 SPARC_BUILTIN_FPCMPULE16,
10324 SPARC_BUILTIN_FPCMPUGT16,
10325 SPARC_BUILTIN_FPCMPULE32,
10326 SPARC_BUILTIN_FPCMPUGT32,
10327 SPARC_BUILTIN_FPMAX8,
10328 SPARC_BUILTIN_FPMAX16,
10329 SPARC_BUILTIN_FPMAX32,
10330 SPARC_BUILTIN_FPMAXU8,
10331 SPARC_BUILTIN_FPMAXU16,
10332 SPARC_BUILTIN_FPMAXU32,
10333 SPARC_BUILTIN_FPMIN8,
10334 SPARC_BUILTIN_FPMIN16,
10335 SPARC_BUILTIN_FPMIN32,
10336 SPARC_BUILTIN_FPMINU8,
10337 SPARC_BUILTIN_FPMINU16,
10338 SPARC_BUILTIN_FPMINU32,
10339 SPARC_BUILTIN_FPSUB8,
10340 SPARC_BUILTIN_FPSUBS8,
10341 SPARC_BUILTIN_FPSUBUS8,
10342 SPARC_BUILTIN_FPSUBUS16,
10343
10344 SPARC_BUILTIN_MAX
10345 };
10346
10347 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10348 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10349
10350 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10351 function decl or NULL_TREE if the builtin was not added. */
10352
10353 static tree
10354 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10355 tree type)
10356 {
10357 tree t
10358 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10359
10360 if (t)
10361 {
10362 sparc_builtins[code] = t;
10363 sparc_builtins_icode[code] = icode;
10364 }
10365
10366 return t;
10367 }
10368
10369 /* Likewise, but also marks the function as "const". */
10370
10371 static tree
10372 def_builtin_const (const char *name, enum insn_code icode,
10373 enum sparc_builtins code, tree type)
10374 {
10375 tree t = def_builtin (name, icode, code, type);
10376
10377 if (t)
10378 TREE_READONLY (t) = 1;
10379
10380 return t;
10381 }
10382
10383 /* Implement the TARGET_INIT_BUILTINS target hook.
10384 Create builtin functions for special SPARC instructions. */
10385
10386 static void
10387 sparc_init_builtins (void)
10388 {
10389 if (TARGET_FPU)
10390 sparc_fpu_init_builtins ();
10391
10392 if (TARGET_VIS)
10393 sparc_vis_init_builtins ();
10394 }
10395
10396 /* Create builtin functions for FPU instructions. */
10397
10398 static void
10399 sparc_fpu_init_builtins (void)
10400 {
10401 tree ftype
10402 = build_function_type_list (void_type_node,
10403 build_pointer_type (unsigned_type_node), 0);
10404 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10405 SPARC_BUILTIN_LDFSR, ftype);
10406 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10407 SPARC_BUILTIN_STFSR, ftype);
10408 }
10409
10410 /* Create builtin functions for VIS instructions. */
10411
10412 static void
10413 sparc_vis_init_builtins (void)
10414 {
10415 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10416 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10417 tree v4hi = build_vector_type (intHI_type_node, 4);
10418 tree v2hi = build_vector_type (intHI_type_node, 2);
10419 tree v2si = build_vector_type (intSI_type_node, 2);
10420 tree v1si = build_vector_type (intSI_type_node, 1);
10421
10422 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10423 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10424 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10425 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10426 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10427 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10428 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10429 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10430 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10431 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10432 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10433 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10434 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10435 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10436 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10437 v8qi, v8qi,
10438 intDI_type_node, 0);
10439 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10440 v8qi, v8qi, 0);
10441 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10442 v8qi, v8qi, 0);
10443 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10444 intDI_type_node,
10445 intDI_type_node, 0);
10446 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10447 intSI_type_node,
10448 intSI_type_node, 0);
10449 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10450 ptr_type_node,
10451 intSI_type_node, 0);
10452 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10453 ptr_type_node,
10454 intDI_type_node, 0);
10455 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10456 ptr_type_node,
10457 ptr_type_node, 0);
10458 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10459 ptr_type_node,
10460 ptr_type_node, 0);
10461 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10462 v4hi, v4hi, 0);
10463 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10464 v2si, v2si, 0);
10465 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10466 v4hi, v4hi, 0);
10467 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10468 v2si, v2si, 0);
10469 tree void_ftype_di = build_function_type_list (void_type_node,
10470 intDI_type_node, 0);
10471 tree di_ftype_void = build_function_type_list (intDI_type_node,
10472 void_type_node, 0);
10473 tree void_ftype_si = build_function_type_list (void_type_node,
10474 intSI_type_node, 0);
10475 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10476 float_type_node,
10477 float_type_node, 0);
10478 tree df_ftype_df_df = build_function_type_list (double_type_node,
10479 double_type_node,
10480 double_type_node, 0);
10481
10482 /* Packing and expanding vectors. */
10483 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10484 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10485 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10486 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10487 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10488 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10489 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10490 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10491 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10492 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10493
10494 /* Multiplications. */
10495 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10496 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10497 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10498 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10499 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10500 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10501 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10502 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10503 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10504 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10505 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10506 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10507 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10508 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10509
10510 /* Data aligning. */
10511 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10512 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10513 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10514 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10515 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10516 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10517 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10518 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10519
10520 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10521 SPARC_BUILTIN_WRGSR, void_ftype_di);
10522 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10523 SPARC_BUILTIN_RDGSR, di_ftype_void);
10524
10525 if (TARGET_ARCH64)
10526 {
10527 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10528 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10529 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10530 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10531 }
10532 else
10533 {
10534 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10535 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10536 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10537 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10538 }
10539
10540 /* Pixel distance. */
10541 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10542 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10543
10544 /* Edge handling. */
10545 if (TARGET_ARCH64)
10546 {
10547 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10548 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10549 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10550 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10551 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10552 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10553 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10554 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10555 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10556 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10557 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10558 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10559 }
10560 else
10561 {
10562 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10563 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10564 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10565 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10566 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10567 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10568 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10569 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10570 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10571 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10572 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10573 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10574 }
10575
10576 /* Pixel compare. */
10577 if (TARGET_ARCH64)
10578 {
10579 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10580 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10581 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10582 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10583 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10584 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10585 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10586 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10587 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10588 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10589 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10590 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10591 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10592 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10593 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10594 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10595 }
10596 else
10597 {
10598 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10599 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10600 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10601 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10602 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10603 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10604 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10605 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10606 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10607 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10608 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10609 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10610 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10611 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10612 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10613 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10614 }
10615
10616 /* Addition and subtraction. */
10617 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10618 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10619 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10620 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10621 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10622 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10623 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10624 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10625 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10626 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10627 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10628 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10629 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10630 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10631 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10632 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10633
10634 /* Three-dimensional array addressing. */
10635 if (TARGET_ARCH64)
10636 {
10637 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10638 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10639 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10640 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10641 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10642 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10643 }
10644 else
10645 {
10646 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10647 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10648 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10649 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10650 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10651 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10652 }
10653
10654 if (TARGET_VIS2)
10655 {
10656 /* Edge handling. */
10657 if (TARGET_ARCH64)
10658 {
10659 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10660 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10661 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10662 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10663 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10664 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10665 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10666 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10667 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10668 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10669 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10670 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10671 }
10672 else
10673 {
10674 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10675 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10676 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10677 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10678 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10679 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10680 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10681 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10682 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10683 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10684 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10685 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10686 }
10687
10688 /* Byte mask and shuffle. */
10689 if (TARGET_ARCH64)
10690 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10691 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10692 else
10693 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10694 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10695 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10696 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10697 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10698 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10699 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10700 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10701 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10702 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10703 }
10704
10705 if (TARGET_VIS3)
10706 {
10707 if (TARGET_ARCH64)
10708 {
10709 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10710 SPARC_BUILTIN_CMASK8, void_ftype_di);
10711 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10712 SPARC_BUILTIN_CMASK16, void_ftype_di);
10713 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10714 SPARC_BUILTIN_CMASK32, void_ftype_di);
10715 }
10716 else
10717 {
10718 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10719 SPARC_BUILTIN_CMASK8, void_ftype_si);
10720 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10721 SPARC_BUILTIN_CMASK16, void_ftype_si);
10722 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10723 SPARC_BUILTIN_CMASK32, void_ftype_si);
10724 }
10725
10726 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10727 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10728
10729 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10730 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10731 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10732 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10733 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10734 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10735 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10736 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10737 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10738 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10739 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10740 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10741 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10742 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10743 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10744 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10745
10746 if (TARGET_ARCH64)
10747 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10748 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10749 else
10750 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10751 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10752
10753 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10754 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10755 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10756 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10757 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10758 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10759
10760 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10761 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10762 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10763 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10764 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10765 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10766 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10767 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10768 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10769 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10770 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10771 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10772 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10773 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10774 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10775 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10776
10777 if (TARGET_ARCH64)
10778 {
10779 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10780 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10781 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10782 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10783 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10784 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10785 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10786 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10787 }
10788 else
10789 {
10790 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10791 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10792 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10793 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10794 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10795 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10796 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10797 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10798 }
10799
10800 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10801 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10802 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10803 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10804 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10805 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10806 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10807 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10808 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10809 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10810 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10811 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10812
10813 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10814 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10815 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10816 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10817 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10818 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10819 }
10820
10821 if (TARGET_VIS4)
10822 {
10823 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10824 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10825 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10826 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10827 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10828 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10829 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10830 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10831
10832
10833 if (TARGET_ARCH64)
10834 {
10835 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10836 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10837 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10838 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10839 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10840 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10841 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10842 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10843 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10844 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10845 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10846 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10847 }
10848 else
10849 {
10850 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10851 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10852 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10853 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10854 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10855 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10856 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10857 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10858 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10859 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10860 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10861 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10862 }
10863
10864 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10865 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10866 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10867 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10868 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10869 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10870 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10871 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10872 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10873 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10874 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10875 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10876 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10877 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10878 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10879 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10880 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10881 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10882 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10883 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10884 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10885 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10886 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10887 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10888 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10889 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10890 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10891 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10892 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10893 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10894 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10895 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10896 }
10897 }
10898
10899 /* Implement TARGET_BUILTIN_DECL hook. */
10900
10901 static tree
10902 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10903 {
10904 if (code >= SPARC_BUILTIN_MAX)
10905 return error_mark_node;
10906
10907 return sparc_builtins[code];
10908 }
10909
10910 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10911
10912 static rtx
10913 sparc_expand_builtin (tree exp, rtx target,
10914 rtx subtarget ATTRIBUTE_UNUSED,
10915 machine_mode tmode ATTRIBUTE_UNUSED,
10916 int ignore ATTRIBUTE_UNUSED)
10917 {
10918 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10919 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10920 enum insn_code icode = sparc_builtins_icode[code];
10921 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10922 call_expr_arg_iterator iter;
10923 int arg_count = 0;
10924 rtx pat, op[4];
10925 tree arg;
10926
10927 if (nonvoid)
10928 {
10929 machine_mode tmode = insn_data[icode].operand[0].mode;
10930 if (!target
10931 || GET_MODE (target) != tmode
10932 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10933 op[0] = gen_reg_rtx (tmode);
10934 else
10935 op[0] = target;
10936 }
10937
10938 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10939 {
10940 const struct insn_operand_data *insn_op;
10941 int idx;
10942
10943 if (arg == error_mark_node)
10944 return NULL_RTX;
10945
10946 arg_count++;
10947 idx = arg_count - !nonvoid;
10948 insn_op = &insn_data[icode].operand[idx];
10949 op[arg_count] = expand_normal (arg);
10950
10951 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10952 {
10953 if (!address_operand (op[arg_count], SImode))
10954 {
10955 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10956 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10957 }
10958 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10959 }
10960
10961 else if (insn_op->mode == V1DImode
10962 && GET_MODE (op[arg_count]) == DImode)
10963 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10964
10965 else if (insn_op->mode == V1SImode
10966 && GET_MODE (op[arg_count]) == SImode)
10967 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10968
10969 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10970 insn_op->mode))
10971 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10972 }
10973
10974 switch (arg_count)
10975 {
10976 case 0:
10977 pat = GEN_FCN (icode) (op[0]);
10978 break;
10979 case 1:
10980 if (nonvoid)
10981 pat = GEN_FCN (icode) (op[0], op[1]);
10982 else
10983 pat = GEN_FCN (icode) (op[1]);
10984 break;
10985 case 2:
10986 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10987 break;
10988 case 3:
10989 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10990 break;
10991 default:
10992 gcc_unreachable ();
10993 }
10994
10995 if (!pat)
10996 return NULL_RTX;
10997
10998 emit_insn (pat);
10999
11000 return (nonvoid ? op[0] : const0_rtx);
11001 }
11002
11003 /* Return the upper 16 bits of the 8x16 multiplication. */
11004
11005 static int
11006 sparc_vis_mul8x16 (int e8, int e16)
11007 {
11008 return (e8 * e16 + 128) / 256;
11009 }
11010
11011 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11012 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11013
11014 static void
11015 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
11016 tree inner_type, tree cst0, tree cst1)
11017 {
11018 unsigned i, num = VECTOR_CST_NELTS (cst0);
11019 int scale;
11020
11021 switch (fncode)
11022 {
11023 case SPARC_BUILTIN_FMUL8X16:
11024 for (i = 0; i < num; ++i)
11025 {
11026 int val
11027 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11028 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11029 n_elts[i] = build_int_cst (inner_type, val);
11030 }
11031 break;
11032
11033 case SPARC_BUILTIN_FMUL8X16AU:
11034 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11035
11036 for (i = 0; i < num; ++i)
11037 {
11038 int val
11039 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11040 scale);
11041 n_elts[i] = build_int_cst (inner_type, val);
11042 }
11043 break;
11044
11045 case SPARC_BUILTIN_FMUL8X16AL:
11046 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11047
11048 for (i = 0; i < num; ++i)
11049 {
11050 int val
11051 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11052 scale);
11053 n_elts[i] = build_int_cst (inner_type, val);
11054 }
11055 break;
11056
11057 default:
11058 gcc_unreachable ();
11059 }
11060 }
11061
11062 /* Implement TARGET_FOLD_BUILTIN hook.
11063
11064 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11065 result of the function call is ignored. NULL_TREE is returned if the
11066 function could not be folded. */
11067
11068 static tree
11069 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11070 tree *args, bool ignore)
11071 {
11072 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11073 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11074 tree arg0, arg1, arg2;
11075
11076 if (ignore)
11077 switch (code)
11078 {
11079 case SPARC_BUILTIN_LDFSR:
11080 case SPARC_BUILTIN_STFSR:
11081 case SPARC_BUILTIN_ALIGNADDR:
11082 case SPARC_BUILTIN_WRGSR:
11083 case SPARC_BUILTIN_BMASK:
11084 case SPARC_BUILTIN_CMASK8:
11085 case SPARC_BUILTIN_CMASK16:
11086 case SPARC_BUILTIN_CMASK32:
11087 break;
11088
11089 default:
11090 return build_zero_cst (rtype);
11091 }
11092
11093 switch (code)
11094 {
11095 case SPARC_BUILTIN_FEXPAND:
11096 arg0 = args[0];
11097 STRIP_NOPS (arg0);
11098
11099 if (TREE_CODE (arg0) == VECTOR_CST)
11100 {
11101 tree inner_type = TREE_TYPE (rtype);
11102 tree *n_elts;
11103 unsigned i;
11104
11105 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11106 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11107 n_elts[i] = build_int_cst (inner_type,
11108 TREE_INT_CST_LOW
11109 (VECTOR_CST_ELT (arg0, i)) << 4);
11110 return build_vector (rtype, n_elts);
11111 }
11112 break;
11113
11114 case SPARC_BUILTIN_FMUL8X16:
11115 case SPARC_BUILTIN_FMUL8X16AU:
11116 case SPARC_BUILTIN_FMUL8X16AL:
11117 arg0 = args[0];
11118 arg1 = args[1];
11119 STRIP_NOPS (arg0);
11120 STRIP_NOPS (arg1);
11121
11122 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11123 {
11124 tree inner_type = TREE_TYPE (rtype);
11125 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11126 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11127 return build_vector (rtype, n_elts);
11128 }
11129 break;
11130
11131 case SPARC_BUILTIN_FPMERGE:
11132 arg0 = args[0];
11133 arg1 = args[1];
11134 STRIP_NOPS (arg0);
11135 STRIP_NOPS (arg1);
11136
11137 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11138 {
11139 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11140 unsigned i;
11141 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11142 {
11143 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11144 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11145 }
11146
11147 return build_vector (rtype, n_elts);
11148 }
11149 break;
11150
11151 case SPARC_BUILTIN_PDIST:
11152 case SPARC_BUILTIN_PDISTN:
11153 arg0 = args[0];
11154 arg1 = args[1];
11155 STRIP_NOPS (arg0);
11156 STRIP_NOPS (arg1);
11157 if (code == SPARC_BUILTIN_PDIST)
11158 {
11159 arg2 = args[2];
11160 STRIP_NOPS (arg2);
11161 }
11162 else
11163 arg2 = integer_zero_node;
11164
11165 if (TREE_CODE (arg0) == VECTOR_CST
11166 && TREE_CODE (arg1) == VECTOR_CST
11167 && TREE_CODE (arg2) == INTEGER_CST)
11168 {
11169 bool overflow = false;
11170 widest_int result = wi::to_widest (arg2);
11171 widest_int tmp;
11172 unsigned i;
11173
11174 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11175 {
11176 tree e0 = VECTOR_CST_ELT (arg0, i);
11177 tree e1 = VECTOR_CST_ELT (arg1, i);
11178
11179 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11180
11181 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11182 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11183 if (wi::neg_p (tmp))
11184 tmp = wi::neg (tmp, &neg2_ovf);
11185 else
11186 neg2_ovf = false;
11187 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11188 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11189 }
11190
11191 gcc_assert (!overflow);
11192
11193 return wide_int_to_tree (rtype, result);
11194 }
11195
11196 default:
11197 break;
11198 }
11199
11200 return NULL_TREE;
11201 }
11202 \f
11203 /* ??? This duplicates information provided to the compiler by the
11204 ??? scheduler description. Some day, teach genautomata to output
11205 ??? the latencies and then CSE will just use that. */
11206
11207 static bool
11208 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11209 int opno ATTRIBUTE_UNUSED,
11210 int *total, bool speed ATTRIBUTE_UNUSED)
11211 {
11212 int code = GET_CODE (x);
11213 bool float_mode_p = FLOAT_MODE_P (mode);
11214
11215 switch (code)
11216 {
11217 case CONST_INT:
11218 if (SMALL_INT (x))
11219 *total = 0;
11220 else
11221 *total = 2;
11222 return true;
11223
11224 case CONST_WIDE_INT:
11225 *total = 0;
11226 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11227 *total += 2;
11228 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11229 *total += 2;
11230 return true;
11231
11232 case HIGH:
11233 *total = 2;
11234 return true;
11235
11236 case CONST:
11237 case LABEL_REF:
11238 case SYMBOL_REF:
11239 *total = 4;
11240 return true;
11241
11242 case CONST_DOUBLE:
11243 *total = 8;
11244 return true;
11245
11246 case MEM:
11247 /* If outer-code was a sign or zero extension, a cost
11248 of COSTS_N_INSNS (1) was already added in. This is
11249 why we are subtracting it back out. */
11250 if (outer_code == ZERO_EXTEND)
11251 {
11252 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11253 }
11254 else if (outer_code == SIGN_EXTEND)
11255 {
11256 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11257 }
11258 else if (float_mode_p)
11259 {
11260 *total = sparc_costs->float_load;
11261 }
11262 else
11263 {
11264 *total = sparc_costs->int_load;
11265 }
11266
11267 return true;
11268
11269 case PLUS:
11270 case MINUS:
11271 if (float_mode_p)
11272 *total = sparc_costs->float_plusminus;
11273 else
11274 *total = COSTS_N_INSNS (1);
11275 return false;
11276
11277 case FMA:
11278 {
11279 rtx sub;
11280
11281 gcc_assert (float_mode_p);
11282 *total = sparc_costs->float_mul;
11283
11284 sub = XEXP (x, 0);
11285 if (GET_CODE (sub) == NEG)
11286 sub = XEXP (sub, 0);
11287 *total += rtx_cost (sub, mode, FMA, 0, speed);
11288
11289 sub = XEXP (x, 2);
11290 if (GET_CODE (sub) == NEG)
11291 sub = XEXP (sub, 0);
11292 *total += rtx_cost (sub, mode, FMA, 2, speed);
11293 return true;
11294 }
11295
11296 case MULT:
11297 if (float_mode_p)
11298 *total = sparc_costs->float_mul;
11299 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11300 *total = COSTS_N_INSNS (25);
11301 else
11302 {
11303 int bit_cost;
11304
11305 bit_cost = 0;
11306 if (sparc_costs->int_mul_bit_factor)
11307 {
11308 int nbits;
11309
11310 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11311 {
11312 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11313 for (nbits = 0; value != 0; value &= value - 1)
11314 nbits++;
11315 }
11316 else
11317 nbits = 7;
11318
11319 if (nbits < 3)
11320 nbits = 3;
11321 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11322 bit_cost = COSTS_N_INSNS (bit_cost);
11323 }
11324
11325 if (mode == DImode || !TARGET_HARD_MUL)
11326 *total = sparc_costs->int_mulX + bit_cost;
11327 else
11328 *total = sparc_costs->int_mul + bit_cost;
11329 }
11330 return false;
11331
11332 case ASHIFT:
11333 case ASHIFTRT:
11334 case LSHIFTRT:
11335 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11336 return false;
11337
11338 case DIV:
11339 case UDIV:
11340 case MOD:
11341 case UMOD:
11342 if (float_mode_p)
11343 {
11344 if (mode == DFmode)
11345 *total = sparc_costs->float_div_df;
11346 else
11347 *total = sparc_costs->float_div_sf;
11348 }
11349 else
11350 {
11351 if (mode == DImode)
11352 *total = sparc_costs->int_divX;
11353 else
11354 *total = sparc_costs->int_div;
11355 }
11356 return false;
11357
11358 case NEG:
11359 if (! float_mode_p)
11360 {
11361 *total = COSTS_N_INSNS (1);
11362 return false;
11363 }
11364 /* FALLTHRU */
11365
11366 case ABS:
11367 case FLOAT:
11368 case UNSIGNED_FLOAT:
11369 case FIX:
11370 case UNSIGNED_FIX:
11371 case FLOAT_EXTEND:
11372 case FLOAT_TRUNCATE:
11373 *total = sparc_costs->float_move;
11374 return false;
11375
11376 case SQRT:
11377 if (mode == DFmode)
11378 *total = sparc_costs->float_sqrt_df;
11379 else
11380 *total = sparc_costs->float_sqrt_sf;
11381 return false;
11382
11383 case COMPARE:
11384 if (float_mode_p)
11385 *total = sparc_costs->float_cmp;
11386 else
11387 *total = COSTS_N_INSNS (1);
11388 return false;
11389
11390 case IF_THEN_ELSE:
11391 if (float_mode_p)
11392 *total = sparc_costs->float_cmove;
11393 else
11394 *total = sparc_costs->int_cmove;
11395 return false;
11396
11397 case IOR:
11398 /* Handle the NAND vector patterns. */
11399 if (sparc_vector_mode_supported_p (mode)
11400 && GET_CODE (XEXP (x, 0)) == NOT
11401 && GET_CODE (XEXP (x, 1)) == NOT)
11402 {
11403 *total = COSTS_N_INSNS (1);
11404 return true;
11405 }
11406 else
11407 return false;
11408
11409 default:
11410 return false;
11411 }
11412 }
11413
11414 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11415
11416 static inline bool
11417 general_or_i64_p (reg_class_t rclass)
11418 {
11419 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11420 }
11421
11422 /* Implement TARGET_REGISTER_MOVE_COST. */
11423
11424 static int
11425 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11426 reg_class_t from, reg_class_t to)
11427 {
11428 bool need_memory = false;
11429
11430 /* This helps postreload CSE to eliminate redundant comparisons. */
11431 if (from == NO_REGS || to == NO_REGS)
11432 return 100;
11433
11434 if (from == FPCC_REGS || to == FPCC_REGS)
11435 need_memory = true;
11436 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11437 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11438 {
11439 if (TARGET_VIS3)
11440 {
11441 int size = GET_MODE_SIZE (mode);
11442 if (size == 8 || size == 4)
11443 {
11444 if (! TARGET_ARCH32 || size == 4)
11445 return 4;
11446 else
11447 return 6;
11448 }
11449 }
11450 need_memory = true;
11451 }
11452
11453 if (need_memory)
11454 {
11455 if (sparc_cpu == PROCESSOR_ULTRASPARC
11456 || sparc_cpu == PROCESSOR_ULTRASPARC3
11457 || sparc_cpu == PROCESSOR_NIAGARA
11458 || sparc_cpu == PROCESSOR_NIAGARA2
11459 || sparc_cpu == PROCESSOR_NIAGARA3
11460 || sparc_cpu == PROCESSOR_NIAGARA4
11461 || sparc_cpu == PROCESSOR_NIAGARA7)
11462 return 12;
11463
11464 return 6;
11465 }
11466
11467 return 2;
11468 }
11469
11470 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11471 This is achieved by means of a manual dynamic stack space allocation in
11472 the current frame. We make the assumption that SEQ doesn't contain any
11473 function calls, with the possible exception of calls to the GOT helper. */
11474
11475 static void
11476 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11477 {
11478 /* We must preserve the lowest 16 words for the register save area. */
11479 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11480 /* We really need only 2 words of fresh stack space. */
11481 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11482
11483 rtx slot
11484 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11485 SPARC_STACK_BIAS + offset));
11486
11487 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11488 emit_insn (gen_rtx_SET (slot, reg));
11489 if (reg2)
11490 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11491 reg2));
11492 emit_insn (seq);
11493 if (reg2)
11494 emit_insn (gen_rtx_SET (reg2,
11495 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11496 emit_insn (gen_rtx_SET (reg, slot));
11497 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11498 }
11499
11500 /* Output the assembler code for a thunk function. THUNK_DECL is the
11501 declaration for the thunk function itself, FUNCTION is the decl for
11502 the target function. DELTA is an immediate constant offset to be
11503 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11504 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11505
11506 static void
11507 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11508 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11509 tree function)
11510 {
11511 rtx this_rtx, funexp;
11512 rtx_insn *insn;
11513 unsigned int int_arg_first;
11514
11515 reload_completed = 1;
11516 epilogue_completed = 1;
11517
11518 emit_note (NOTE_INSN_PROLOGUE_END);
11519
11520 if (TARGET_FLAT)
11521 {
11522 sparc_leaf_function_p = 1;
11523
11524 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11525 }
11526 else if (flag_delayed_branch)
11527 {
11528 /* We will emit a regular sibcall below, so we need to instruct
11529 output_sibcall that we are in a leaf function. */
11530 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11531
11532 /* This will cause final.c to invoke leaf_renumber_regs so we
11533 must behave as if we were in a not-yet-leafified function. */
11534 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11535 }
11536 else
11537 {
11538 /* We will emit the sibcall manually below, so we will need to
11539 manually spill non-leaf registers. */
11540 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11541
11542 /* We really are in a leaf function. */
11543 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11544 }
11545
11546 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11547 returns a structure, the structure return pointer is there instead. */
11548 if (TARGET_ARCH64
11549 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11550 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11551 else
11552 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11553
11554 /* Add DELTA. When possible use a plain add, otherwise load it into
11555 a register first. */
11556 if (delta)
11557 {
11558 rtx delta_rtx = GEN_INT (delta);
11559
11560 if (! SPARC_SIMM13_P (delta))
11561 {
11562 rtx scratch = gen_rtx_REG (Pmode, 1);
11563 emit_move_insn (scratch, delta_rtx);
11564 delta_rtx = scratch;
11565 }
11566
11567 /* THIS_RTX += DELTA. */
11568 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11569 }
11570
11571 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11572 if (vcall_offset)
11573 {
11574 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11575 rtx scratch = gen_rtx_REG (Pmode, 1);
11576
11577 gcc_assert (vcall_offset < 0);
11578
11579 /* SCRATCH = *THIS_RTX. */
11580 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11581
11582 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11583 may not have any available scratch register at this point. */
11584 if (SPARC_SIMM13_P (vcall_offset))
11585 ;
11586 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11587 else if (! fixed_regs[5]
11588 /* The below sequence is made up of at least 2 insns,
11589 while the default method may need only one. */
11590 && vcall_offset < -8192)
11591 {
11592 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11593 emit_move_insn (scratch2, vcall_offset_rtx);
11594 vcall_offset_rtx = scratch2;
11595 }
11596 else
11597 {
11598 rtx increment = GEN_INT (-4096);
11599
11600 /* VCALL_OFFSET is a negative number whose typical range can be
11601 estimated as -32768..0 in 32-bit mode. In almost all cases
11602 it is therefore cheaper to emit multiple add insns than
11603 spilling and loading the constant into a register (at least
11604 6 insns). */
11605 while (! SPARC_SIMM13_P (vcall_offset))
11606 {
11607 emit_insn (gen_add2_insn (scratch, increment));
11608 vcall_offset += 4096;
11609 }
11610 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11611 }
11612
11613 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11614 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11615 gen_rtx_PLUS (Pmode,
11616 scratch,
11617 vcall_offset_rtx)));
11618
11619 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11620 emit_insn (gen_add2_insn (this_rtx, scratch));
11621 }
11622
11623 /* Generate a tail call to the target function. */
11624 if (! TREE_USED (function))
11625 {
11626 assemble_external (function);
11627 TREE_USED (function) = 1;
11628 }
11629 funexp = XEXP (DECL_RTL (function), 0);
11630
11631 if (flag_delayed_branch)
11632 {
11633 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11634 insn = emit_call_insn (gen_sibcall (funexp));
11635 SIBLING_CALL_P (insn) = 1;
11636 }
11637 else
11638 {
11639 /* The hoops we have to jump through in order to generate a sibcall
11640 without using delay slots... */
11641 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11642
11643 if (flag_pic)
11644 {
11645 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11646 start_sequence ();
11647 load_got_register (); /* clobbers %o7 */
11648 scratch = sparc_legitimize_pic_address (funexp, scratch);
11649 seq = get_insns ();
11650 end_sequence ();
11651 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11652 }
11653 else if (TARGET_ARCH32)
11654 {
11655 emit_insn (gen_rtx_SET (scratch,
11656 gen_rtx_HIGH (SImode, funexp)));
11657 emit_insn (gen_rtx_SET (scratch,
11658 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11659 }
11660 else /* TARGET_ARCH64 */
11661 {
11662 switch (sparc_cmodel)
11663 {
11664 case CM_MEDLOW:
11665 case CM_MEDMID:
11666 /* The destination can serve as a temporary. */
11667 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11668 break;
11669
11670 case CM_MEDANY:
11671 case CM_EMBMEDANY:
11672 /* The destination cannot serve as a temporary. */
11673 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11674 start_sequence ();
11675 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11676 seq = get_insns ();
11677 end_sequence ();
11678 emit_and_preserve (seq, spill_reg, 0);
11679 break;
11680
11681 default:
11682 gcc_unreachable ();
11683 }
11684 }
11685
11686 emit_jump_insn (gen_indirect_jump (scratch));
11687 }
11688
11689 emit_barrier ();
11690
11691 /* Run just enough of rest_of_compilation to get the insns emitted.
11692 There's not really enough bulk here to make other passes such as
11693 instruction scheduling worth while. Note that use_thunk calls
11694 assemble_start_function and assemble_end_function. */
11695 insn = get_insns ();
11696 shorten_branches (insn);
11697 final_start_function (insn, file, 1);
11698 final (insn, file, 1);
11699 final_end_function ();
11700
11701 reload_completed = 0;
11702 epilogue_completed = 0;
11703 }
11704
11705 /* Return true if sparc_output_mi_thunk would be able to output the
11706 assembler code for the thunk function specified by the arguments
11707 it is passed, and false otherwise. */
11708 static bool
11709 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11710 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11711 HOST_WIDE_INT vcall_offset,
11712 const_tree function ATTRIBUTE_UNUSED)
11713 {
11714 /* Bound the loop used in the default method above. */
11715 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11716 }
11717
11718 /* How to allocate a 'struct machine_function'. */
11719
11720 static struct machine_function *
11721 sparc_init_machine_status (void)
11722 {
11723 return ggc_cleared_alloc<machine_function> ();
11724 }
11725
11726 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11727 We need to emit DTP-relative relocations. */
11728
11729 static void
11730 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11731 {
11732 switch (size)
11733 {
11734 case 4:
11735 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11736 break;
11737 case 8:
11738 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11739 break;
11740 default:
11741 gcc_unreachable ();
11742 }
11743 output_addr_const (file, x);
11744 fputs (")", file);
11745 }
11746
11747 /* Do whatever processing is required at the end of a file. */
11748
11749 static void
11750 sparc_file_end (void)
11751 {
11752 /* If we need to emit the special GOT helper function, do so now. */
11753 if (got_helper_rtx)
11754 {
11755 const char *name = XSTR (got_helper_rtx, 0);
11756 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11757 #ifdef DWARF2_UNWIND_INFO
11758 bool do_cfi;
11759 #endif
11760
11761 if (USE_HIDDEN_LINKONCE)
11762 {
11763 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11764 get_identifier (name),
11765 build_function_type_list (void_type_node,
11766 NULL_TREE));
11767 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11768 NULL_TREE, void_type_node);
11769 TREE_PUBLIC (decl) = 1;
11770 TREE_STATIC (decl) = 1;
11771 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11772 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11773 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11774 resolve_unique_section (decl, 0, flag_function_sections);
11775 allocate_struct_function (decl, true);
11776 cfun->is_thunk = 1;
11777 current_function_decl = decl;
11778 init_varasm_status ();
11779 assemble_start_function (decl, name);
11780 }
11781 else
11782 {
11783 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11784 switch_to_section (text_section);
11785 if (align > 0)
11786 ASM_OUTPUT_ALIGN (asm_out_file, align);
11787 ASM_OUTPUT_LABEL (asm_out_file, name);
11788 }
11789
11790 #ifdef DWARF2_UNWIND_INFO
11791 do_cfi = dwarf2out_do_cfi_asm ();
11792 if (do_cfi)
11793 fprintf (asm_out_file, "\t.cfi_startproc\n");
11794 #endif
11795 if (flag_delayed_branch)
11796 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11797 reg_name, reg_name);
11798 else
11799 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11800 reg_name, reg_name);
11801 #ifdef DWARF2_UNWIND_INFO
11802 if (do_cfi)
11803 fprintf (asm_out_file, "\t.cfi_endproc\n");
11804 #endif
11805 }
11806
11807 if (NEED_INDICATE_EXEC_STACK)
11808 file_end_indicate_exec_stack ();
11809
11810 #ifdef TARGET_SOLARIS
11811 solaris_file_end ();
11812 #endif
11813 }
11814
11815 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11816 /* Implement TARGET_MANGLE_TYPE. */
11817
11818 static const char *
11819 sparc_mangle_type (const_tree type)
11820 {
11821 if (TARGET_ARCH32
11822 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11823 && TARGET_LONG_DOUBLE_128)
11824 return "g";
11825
11826 /* For all other types, use normal C++ mangling. */
11827 return NULL;
11828 }
11829 #endif
11830
11831 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11832 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11833 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11834
11835 void
11836 sparc_emit_membar_for_model (enum memmodel model,
11837 int load_store, int before_after)
11838 {
11839 /* Bits for the MEMBAR mmask field. */
11840 const int LoadLoad = 1;
11841 const int StoreLoad = 2;
11842 const int LoadStore = 4;
11843 const int StoreStore = 8;
11844
11845 int mm = 0, implied = 0;
11846
11847 switch (sparc_memory_model)
11848 {
11849 case SMM_SC:
11850 /* Sequential Consistency. All memory transactions are immediately
11851 visible in sequential execution order. No barriers needed. */
11852 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11853 break;
11854
11855 case SMM_TSO:
11856 /* Total Store Ordering: all memory transactions with store semantics
11857 are followed by an implied StoreStore. */
11858 implied |= StoreStore;
11859
11860 /* If we're not looking for a raw barrer (before+after), then atomic
11861 operations get the benefit of being both load and store. */
11862 if (load_store == 3 && before_after == 1)
11863 implied |= StoreLoad;
11864 /* FALLTHRU */
11865
11866 case SMM_PSO:
11867 /* Partial Store Ordering: all memory transactions with load semantics
11868 are followed by an implied LoadLoad | LoadStore. */
11869 implied |= LoadLoad | LoadStore;
11870
11871 /* If we're not looking for a raw barrer (before+after), then atomic
11872 operations get the benefit of being both load and store. */
11873 if (load_store == 3 && before_after == 2)
11874 implied |= StoreLoad | StoreStore;
11875 /* FALLTHRU */
11876
11877 case SMM_RMO:
11878 /* Relaxed Memory Ordering: no implicit bits. */
11879 break;
11880
11881 default:
11882 gcc_unreachable ();
11883 }
11884
11885 if (before_after & 1)
11886 {
11887 if (is_mm_release (model) || is_mm_acq_rel (model)
11888 || is_mm_seq_cst (model))
11889 {
11890 if (load_store & 1)
11891 mm |= LoadLoad | StoreLoad;
11892 if (load_store & 2)
11893 mm |= LoadStore | StoreStore;
11894 }
11895 }
11896 if (before_after & 2)
11897 {
11898 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11899 || is_mm_seq_cst (model))
11900 {
11901 if (load_store & 1)
11902 mm |= LoadLoad | LoadStore;
11903 if (load_store & 2)
11904 mm |= StoreLoad | StoreStore;
11905 }
11906 }
11907
11908 /* Remove the bits implied by the system memory model. */
11909 mm &= ~implied;
11910
11911 /* For raw barriers (before+after), always emit a barrier.
11912 This will become a compile-time barrier if needed. */
11913 if (mm || before_after == 3)
11914 emit_insn (gen_membar (GEN_INT (mm)));
11915 }
11916
11917 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11918 compare and swap on the word containing the byte or half-word. */
11919
11920 static void
11921 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11922 rtx oldval, rtx newval)
11923 {
11924 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11925 rtx addr = gen_reg_rtx (Pmode);
11926 rtx off = gen_reg_rtx (SImode);
11927 rtx oldv = gen_reg_rtx (SImode);
11928 rtx newv = gen_reg_rtx (SImode);
11929 rtx oldvalue = gen_reg_rtx (SImode);
11930 rtx newvalue = gen_reg_rtx (SImode);
11931 rtx res = gen_reg_rtx (SImode);
11932 rtx resv = gen_reg_rtx (SImode);
11933 rtx memsi, val, mask, cc;
11934
11935 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11936
11937 if (Pmode != SImode)
11938 addr1 = gen_lowpart (SImode, addr1);
11939 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11940
11941 memsi = gen_rtx_MEM (SImode, addr);
11942 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11943 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11944
11945 val = copy_to_reg (memsi);
11946
11947 emit_insn (gen_rtx_SET (off,
11948 gen_rtx_XOR (SImode, off,
11949 GEN_INT (GET_MODE (mem) == QImode
11950 ? 3 : 2))));
11951
11952 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11953
11954 if (GET_MODE (mem) == QImode)
11955 mask = force_reg (SImode, GEN_INT (0xff));
11956 else
11957 mask = force_reg (SImode, GEN_INT (0xffff));
11958
11959 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11960
11961 emit_insn (gen_rtx_SET (val,
11962 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11963 val)));
11964
11965 oldval = gen_lowpart (SImode, oldval);
11966 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11967
11968 newval = gen_lowpart_common (SImode, newval);
11969 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11970
11971 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11972
11973 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11974
11975 rtx_code_label *end_label = gen_label_rtx ();
11976 rtx_code_label *loop_label = gen_label_rtx ();
11977 emit_label (loop_label);
11978
11979 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11980
11981 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11982
11983 emit_move_insn (bool_result, const1_rtx);
11984
11985 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11986
11987 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11988
11989 emit_insn (gen_rtx_SET (resv,
11990 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11991 res)));
11992
11993 emit_move_insn (bool_result, const0_rtx);
11994
11995 cc = gen_compare_reg_1 (NE, resv, val);
11996 emit_insn (gen_rtx_SET (val, resv));
11997
11998 /* Use cbranchcc4 to separate the compare and branch! */
11999 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12000 cc, const0_rtx, loop_label));
12001
12002 emit_label (end_label);
12003
12004 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12005
12006 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12007
12008 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12009 }
12010
12011 /* Expand code to perform a compare-and-swap. */
12012
12013 void
12014 sparc_expand_compare_and_swap (rtx operands[])
12015 {
12016 rtx bval, retval, mem, oldval, newval;
12017 machine_mode mode;
12018 enum memmodel model;
12019
12020 bval = operands[0];
12021 retval = operands[1];
12022 mem = operands[2];
12023 oldval = operands[3];
12024 newval = operands[4];
12025 model = (enum memmodel) INTVAL (operands[6]);
12026 mode = GET_MODE (mem);
12027
12028 sparc_emit_membar_for_model (model, 3, 1);
12029
12030 if (reg_overlap_mentioned_p (retval, oldval))
12031 oldval = copy_to_reg (oldval);
12032
12033 if (mode == QImode || mode == HImode)
12034 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12035 else
12036 {
12037 rtx (*gen) (rtx, rtx, rtx, rtx);
12038 rtx x;
12039
12040 if (mode == SImode)
12041 gen = gen_atomic_compare_and_swapsi_1;
12042 else
12043 gen = gen_atomic_compare_and_swapdi_1;
12044 emit_insn (gen (retval, mem, oldval, newval));
12045
12046 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12047 if (x != bval)
12048 convert_move (bval, x, 1);
12049 }
12050
12051 sparc_emit_membar_for_model (model, 3, 2);
12052 }
12053
12054 void
12055 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12056 {
12057 rtx t_1, t_2, t_3;
12058
12059 sel = gen_lowpart (DImode, sel);
12060 switch (vmode)
12061 {
12062 case V2SImode:
12063 /* inp = xxxxxxxAxxxxxxxB */
12064 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12065 NULL_RTX, 1, OPTAB_DIRECT);
12066 /* t_1 = ....xxxxxxxAxxx. */
12067 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12068 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12069 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12070 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12071 /* sel = .......B */
12072 /* t_1 = ...A.... */
12073 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12074 /* sel = ...A...B */
12075 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12076 /* sel = AAAABBBB * 4 */
12077 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12078 /* sel = { A*4, A*4+1, A*4+2, ... } */
12079 break;
12080
12081 case V4HImode:
12082 /* inp = xxxAxxxBxxxCxxxD */
12083 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12084 NULL_RTX, 1, OPTAB_DIRECT);
12085 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12086 NULL_RTX, 1, OPTAB_DIRECT);
12087 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12088 NULL_RTX, 1, OPTAB_DIRECT);
12089 /* t_1 = ..xxxAxxxBxxxCxx */
12090 /* t_2 = ....xxxAxxxBxxxC */
12091 /* t_3 = ......xxxAxxxBxx */
12092 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12093 GEN_INT (0x07),
12094 NULL_RTX, 1, OPTAB_DIRECT);
12095 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12096 GEN_INT (0x0700),
12097 NULL_RTX, 1, OPTAB_DIRECT);
12098 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12099 GEN_INT (0x070000),
12100 NULL_RTX, 1, OPTAB_DIRECT);
12101 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12102 GEN_INT (0x07000000),
12103 NULL_RTX, 1, OPTAB_DIRECT);
12104 /* sel = .......D */
12105 /* t_1 = .....C.. */
12106 /* t_2 = ...B.... */
12107 /* t_3 = .A...... */
12108 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12109 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12110 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12111 /* sel = .A.B.C.D */
12112 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12113 /* sel = AABBCCDD * 2 */
12114 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12115 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12116 break;
12117
12118 case V8QImode:
12119 /* input = xAxBxCxDxExFxGxH */
12120 sel = expand_simple_binop (DImode, AND, sel,
12121 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12122 | 0x0f0f0f0f),
12123 NULL_RTX, 1, OPTAB_DIRECT);
12124 /* sel = .A.B.C.D.E.F.G.H */
12125 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12126 NULL_RTX, 1, OPTAB_DIRECT);
12127 /* t_1 = ..A.B.C.D.E.F.G. */
12128 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12129 NULL_RTX, 1, OPTAB_DIRECT);
12130 /* sel = .AABBCCDDEEFFGGH */
12131 sel = expand_simple_binop (DImode, AND, sel,
12132 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12133 | 0xff00ff),
12134 NULL_RTX, 1, OPTAB_DIRECT);
12135 /* sel = ..AB..CD..EF..GH */
12136 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12137 NULL_RTX, 1, OPTAB_DIRECT);
12138 /* t_1 = ....AB..CD..EF.. */
12139 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12140 NULL_RTX, 1, OPTAB_DIRECT);
12141 /* sel = ..ABABCDCDEFEFGH */
12142 sel = expand_simple_binop (DImode, AND, sel,
12143 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12144 NULL_RTX, 1, OPTAB_DIRECT);
12145 /* sel = ....ABCD....EFGH */
12146 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12147 NULL_RTX, 1, OPTAB_DIRECT);
12148 /* t_1 = ........ABCD.... */
12149 sel = gen_lowpart (SImode, sel);
12150 t_1 = gen_lowpart (SImode, t_1);
12151 break;
12152
12153 default:
12154 gcc_unreachable ();
12155 }
12156
12157 /* Always perform the final addition/merge within the bmask insn. */
12158 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12159 }
12160
12161 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12162
12163 static bool
12164 sparc_frame_pointer_required (void)
12165 {
12166 /* If the stack pointer is dynamically modified in the function, it cannot
12167 serve as the frame pointer. */
12168 if (cfun->calls_alloca)
12169 return true;
12170
12171 /* If the function receives nonlocal gotos, it needs to save the frame
12172 pointer in the nonlocal_goto_save_area object. */
12173 if (cfun->has_nonlocal_label)
12174 return true;
12175
12176 /* In flat mode, that's it. */
12177 if (TARGET_FLAT)
12178 return false;
12179
12180 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12181 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12182 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12183 }
12184
12185 /* The way this is structured, we can't eliminate SFP in favor of SP
12186 if the frame pointer is required: we want to use the SFP->HFP elimination
12187 in that case. But the test in update_eliminables doesn't know we are
12188 assuming below that we only do the former elimination. */
12189
12190 static bool
12191 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12192 {
12193 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12194 }
12195
12196 /* Return the hard frame pointer directly to bypass the stack bias. */
12197
12198 static rtx
12199 sparc_builtin_setjmp_frame_value (void)
12200 {
12201 return hard_frame_pointer_rtx;
12202 }
12203
12204 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12205 they won't be allocated. */
12206
12207 static void
12208 sparc_conditional_register_usage (void)
12209 {
12210 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12211 {
12212 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12213 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12214 }
12215 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12216 /* then honor it. */
12217 if (TARGET_ARCH32 && fixed_regs[5])
12218 fixed_regs[5] = 1;
12219 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12220 fixed_regs[5] = 0;
12221 if (! TARGET_V9)
12222 {
12223 int regno;
12224 for (regno = SPARC_FIRST_V9_FP_REG;
12225 regno <= SPARC_LAST_V9_FP_REG;
12226 regno++)
12227 fixed_regs[regno] = 1;
12228 /* %fcc0 is used by v8 and v9. */
12229 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12230 regno <= SPARC_LAST_V9_FCC_REG;
12231 regno++)
12232 fixed_regs[regno] = 1;
12233 }
12234 if (! TARGET_FPU)
12235 {
12236 int regno;
12237 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12238 fixed_regs[regno] = 1;
12239 }
12240 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12241 /* then honor it. Likewise with g3 and g4. */
12242 if (fixed_regs[2] == 2)
12243 fixed_regs[2] = ! TARGET_APP_REGS;
12244 if (fixed_regs[3] == 2)
12245 fixed_regs[3] = ! TARGET_APP_REGS;
12246 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12247 fixed_regs[4] = ! TARGET_APP_REGS;
12248 else if (TARGET_CM_EMBMEDANY)
12249 fixed_regs[4] = 1;
12250 else if (fixed_regs[4] == 2)
12251 fixed_regs[4] = 0;
12252 if (TARGET_FLAT)
12253 {
12254 int regno;
12255 /* Disable leaf functions. */
12256 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12257 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12258 leaf_reg_remap [regno] = regno;
12259 }
12260 if (TARGET_VIS)
12261 global_regs[SPARC_GSR_REG] = 1;
12262 }
12263
12264 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12265
12266 - We can't load constants into FP registers.
12267 - We can't load FP constants into integer registers when soft-float,
12268 because there is no soft-float pattern with a r/F constraint.
12269 - We can't load FP constants into integer registers for TFmode unless
12270 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12271 - Try and reload integer constants (symbolic or otherwise) back into
12272 registers directly, rather than having them dumped to memory. */
12273
12274 static reg_class_t
12275 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12276 {
12277 machine_mode mode = GET_MODE (x);
12278 if (CONSTANT_P (x))
12279 {
12280 if (FP_REG_CLASS_P (rclass)
12281 || rclass == GENERAL_OR_FP_REGS
12282 || rclass == GENERAL_OR_EXTRA_FP_REGS
12283 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12284 || (mode == TFmode && ! const_zero_operand (x, mode)))
12285 return NO_REGS;
12286
12287 if (GET_MODE_CLASS (mode) == MODE_INT)
12288 return GENERAL_REGS;
12289
12290 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12291 {
12292 if (! FP_REG_CLASS_P (rclass)
12293 || !(const_zero_operand (x, mode)
12294 || const_all_ones_operand (x, mode)))
12295 return NO_REGS;
12296 }
12297 }
12298
12299 if (TARGET_VIS3
12300 && ! TARGET_ARCH64
12301 && (rclass == EXTRA_FP_REGS
12302 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12303 {
12304 int regno = true_regnum (x);
12305
12306 if (SPARC_INT_REG_P (regno))
12307 return (rclass == EXTRA_FP_REGS
12308 ? FP_REGS : GENERAL_OR_FP_REGS);
12309 }
12310
12311 return rclass;
12312 }
12313
12314 /* Return true if we use LRA instead of reload pass. */
12315
12316 static bool
12317 sparc_lra_p (void)
12318 {
12319 return TARGET_LRA;
12320 }
12321
12322 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12323 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12324
12325 const char *
12326 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12327 {
12328 char mulstr[32];
12329
12330 gcc_assert (! TARGET_ARCH64);
12331
12332 if (sparc_check_64 (operands[1], insn) <= 0)
12333 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12334 if (which_alternative == 1)
12335 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12336 if (GET_CODE (operands[2]) == CONST_INT)
12337 {
12338 if (which_alternative == 1)
12339 {
12340 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12341 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12342 output_asm_insn (mulstr, operands);
12343 return "srlx\t%L0, 32, %H0";
12344 }
12345 else
12346 {
12347 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12348 output_asm_insn ("or\t%L1, %3, %3", operands);
12349 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12350 output_asm_insn (mulstr, operands);
12351 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12352 return "mov\t%3, %L0";
12353 }
12354 }
12355 else if (rtx_equal_p (operands[1], operands[2]))
12356 {
12357 if (which_alternative == 1)
12358 {
12359 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12360 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12361 output_asm_insn (mulstr, operands);
12362 return "srlx\t%L0, 32, %H0";
12363 }
12364 else
12365 {
12366 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12367 output_asm_insn ("or\t%L1, %3, %3", operands);
12368 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12369 output_asm_insn (mulstr, operands);
12370 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12371 return "mov\t%3, %L0";
12372 }
12373 }
12374 if (sparc_check_64 (operands[2], insn) <= 0)
12375 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12376 if (which_alternative == 1)
12377 {
12378 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12379 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12380 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12381 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12382 output_asm_insn (mulstr, operands);
12383 return "srlx\t%L0, 32, %H0";
12384 }
12385 else
12386 {
12387 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12388 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12389 output_asm_insn ("or\t%L1, %3, %3", operands);
12390 output_asm_insn ("or\t%L2, %4, %4", operands);
12391 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12392 output_asm_insn (mulstr, operands);
12393 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12394 return "mov\t%3, %L0";
12395 }
12396 }
12397
12398 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12399 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12400 and INNER_MODE are the modes describing TARGET. */
12401
12402 static void
12403 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12404 machine_mode inner_mode)
12405 {
12406 rtx t1, final_insn, sel;
12407 int bmask;
12408
12409 t1 = gen_reg_rtx (mode);
12410
12411 elt = convert_modes (SImode, inner_mode, elt, true);
12412 emit_move_insn (gen_lowpart(SImode, t1), elt);
12413
12414 switch (mode)
12415 {
12416 case V2SImode:
12417 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12418 bmask = 0x45674567;
12419 break;
12420 case V4HImode:
12421 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12422 bmask = 0x67676767;
12423 break;
12424 case V8QImode:
12425 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12426 bmask = 0x77777777;
12427 break;
12428 default:
12429 gcc_unreachable ();
12430 }
12431
12432 sel = force_reg (SImode, GEN_INT (bmask));
12433 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12434 emit_insn (final_insn);
12435 }
12436
12437 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12438 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12439
12440 static void
12441 vector_init_fpmerge (rtx target, rtx elt)
12442 {
12443 rtx t1, t2, t2_low, t3, t3_low;
12444
12445 t1 = gen_reg_rtx (V4QImode);
12446 elt = convert_modes (SImode, QImode, elt, true);
12447 emit_move_insn (gen_lowpart (SImode, t1), elt);
12448
12449 t2 = gen_reg_rtx (V8QImode);
12450 t2_low = gen_lowpart (V4QImode, t2);
12451 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12452
12453 t3 = gen_reg_rtx (V8QImode);
12454 t3_low = gen_lowpart (V4QImode, t3);
12455 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12456
12457 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12458 }
12459
12460 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12461 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12462
12463 static void
12464 vector_init_faligndata (rtx target, rtx elt)
12465 {
12466 rtx t1 = gen_reg_rtx (V4HImode);
12467 int i;
12468
12469 elt = convert_modes (SImode, HImode, elt, true);
12470 emit_move_insn (gen_lowpart (SImode, t1), elt);
12471
12472 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12473 force_reg (SImode, GEN_INT (6)),
12474 const0_rtx));
12475
12476 for (i = 0; i < 4; i++)
12477 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12478 }
12479
12480 /* Emit code to initialize TARGET to values for individual fields VALS. */
12481
12482 void
12483 sparc_expand_vector_init (rtx target, rtx vals)
12484 {
12485 const machine_mode mode = GET_MODE (target);
12486 const machine_mode inner_mode = GET_MODE_INNER (mode);
12487 const int n_elts = GET_MODE_NUNITS (mode);
12488 int i, n_var = 0;
12489 bool all_same = true;
12490 rtx mem;
12491
12492 for (i = 0; i < n_elts; i++)
12493 {
12494 rtx x = XVECEXP (vals, 0, i);
12495 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12496 n_var++;
12497
12498 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12499 all_same = false;
12500 }
12501
12502 if (n_var == 0)
12503 {
12504 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12505 return;
12506 }
12507
12508 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12509 {
12510 if (GET_MODE_SIZE (inner_mode) == 4)
12511 {
12512 emit_move_insn (gen_lowpart (SImode, target),
12513 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12514 return;
12515 }
12516 else if (GET_MODE_SIZE (inner_mode) == 8)
12517 {
12518 emit_move_insn (gen_lowpart (DImode, target),
12519 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12520 return;
12521 }
12522 }
12523 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12524 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12525 {
12526 emit_move_insn (gen_highpart (word_mode, target),
12527 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12528 emit_move_insn (gen_lowpart (word_mode, target),
12529 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12530 return;
12531 }
12532
12533 if (all_same && GET_MODE_SIZE (mode) == 8)
12534 {
12535 if (TARGET_VIS2)
12536 {
12537 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12538 return;
12539 }
12540 if (mode == V8QImode)
12541 {
12542 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12543 return;
12544 }
12545 if (mode == V4HImode)
12546 {
12547 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12548 return;
12549 }
12550 }
12551
12552 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12553 for (i = 0; i < n_elts; i++)
12554 emit_move_insn (adjust_address_nv (mem, inner_mode,
12555 i * GET_MODE_SIZE (inner_mode)),
12556 XVECEXP (vals, 0, i));
12557 emit_move_insn (target, mem);
12558 }
12559
12560 /* Implement TARGET_SECONDARY_RELOAD. */
12561
12562 static reg_class_t
12563 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12564 machine_mode mode, secondary_reload_info *sri)
12565 {
12566 enum reg_class rclass = (enum reg_class) rclass_i;
12567
12568 sri->icode = CODE_FOR_nothing;
12569 sri->extra_cost = 0;
12570
12571 /* We need a temporary when loading/storing a HImode/QImode value
12572 between memory and the FPU registers. This can happen when combine puts
12573 a paradoxical subreg in a float/fix conversion insn. */
12574 if (FP_REG_CLASS_P (rclass)
12575 && (mode == HImode || mode == QImode)
12576 && (GET_CODE (x) == MEM
12577 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12578 && true_regnum (x) == -1)))
12579 return GENERAL_REGS;
12580
12581 /* On 32-bit we need a temporary when loading/storing a DFmode value
12582 between unaligned memory and the upper FPU registers. */
12583 if (TARGET_ARCH32
12584 && rclass == EXTRA_FP_REGS
12585 && mode == DFmode
12586 && GET_CODE (x) == MEM
12587 && ! mem_min_alignment (x, 8))
12588 return FP_REGS;
12589
12590 if (((TARGET_CM_MEDANY
12591 && symbolic_operand (x, mode))
12592 || (TARGET_CM_EMBMEDANY
12593 && text_segment_operand (x, mode)))
12594 && ! flag_pic)
12595 {
12596 if (in_p)
12597 sri->icode = direct_optab_handler (reload_in_optab, mode);
12598 else
12599 sri->icode = direct_optab_handler (reload_out_optab, mode);
12600 return NO_REGS;
12601 }
12602
12603 if (TARGET_VIS3 && TARGET_ARCH32)
12604 {
12605 int regno = true_regnum (x);
12606
12607 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12608 to move 8-byte values in 4-byte pieces. This only works via
12609 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12610 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12611 an FP_REGS intermediate move. */
12612 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12613 || ((general_or_i64_p (rclass)
12614 || rclass == GENERAL_OR_FP_REGS)
12615 && SPARC_FP_REG_P (regno)))
12616 {
12617 sri->extra_cost = 2;
12618 return FP_REGS;
12619 }
12620 }
12621
12622 return NO_REGS;
12623 }
12624
12625 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12626 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12627
12628 bool
12629 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12630 {
12631 enum rtx_code rc = GET_CODE (operands[1]);
12632 machine_mode cmp_mode;
12633 rtx cc_reg, dst, cmp;
12634
12635 cmp = operands[1];
12636 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12637 return false;
12638
12639 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12640 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12641
12642 cmp_mode = GET_MODE (XEXP (cmp, 0));
12643 rc = GET_CODE (cmp);
12644
12645 dst = operands[0];
12646 if (! rtx_equal_p (operands[2], dst)
12647 && ! rtx_equal_p (operands[3], dst))
12648 {
12649 if (reg_overlap_mentioned_p (dst, cmp))
12650 dst = gen_reg_rtx (mode);
12651
12652 emit_move_insn (dst, operands[3]);
12653 }
12654 else if (operands[2] == dst)
12655 {
12656 operands[2] = operands[3];
12657
12658 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12659 rc = reverse_condition_maybe_unordered (rc);
12660 else
12661 rc = reverse_condition (rc);
12662 }
12663
12664 if (XEXP (cmp, 1) == const0_rtx
12665 && GET_CODE (XEXP (cmp, 0)) == REG
12666 && cmp_mode == DImode
12667 && v9_regcmp_p (rc))
12668 cc_reg = XEXP (cmp, 0);
12669 else
12670 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12671
12672 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12673
12674 emit_insn (gen_rtx_SET (dst,
12675 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12676
12677 if (dst != operands[0])
12678 emit_move_insn (operands[0], dst);
12679
12680 return true;
12681 }
12682
12683 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12684 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12685 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12686 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12687 code to be used for the condition mask. */
12688
12689 void
12690 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12691 {
12692 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12693 enum rtx_code code = GET_CODE (operands[3]);
12694
12695 mask = gen_reg_rtx (Pmode);
12696 cop0 = operands[4];
12697 cop1 = operands[5];
12698 if (code == LT || code == GE)
12699 {
12700 rtx t;
12701
12702 code = swap_condition (code);
12703 t = cop0; cop0 = cop1; cop1 = t;
12704 }
12705
12706 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12707
12708 fcmp = gen_rtx_UNSPEC (Pmode,
12709 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12710 fcode);
12711
12712 cmask = gen_rtx_UNSPEC (DImode,
12713 gen_rtvec (2, mask, gsr),
12714 ccode);
12715
12716 bshuf = gen_rtx_UNSPEC (mode,
12717 gen_rtvec (3, operands[1], operands[2], gsr),
12718 UNSPEC_BSHUFFLE);
12719
12720 emit_insn (gen_rtx_SET (mask, fcmp));
12721 emit_insn (gen_rtx_SET (gsr, cmask));
12722
12723 emit_insn (gen_rtx_SET (operands[0], bshuf));
12724 }
12725
12726 /* On sparc, any mode which naturally allocates into the float
12727 registers should return 4 here. */
12728
12729 unsigned int
12730 sparc_regmode_natural_size (machine_mode mode)
12731 {
12732 int size = UNITS_PER_WORD;
12733
12734 if (TARGET_ARCH64)
12735 {
12736 enum mode_class mclass = GET_MODE_CLASS (mode);
12737
12738 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12739 size = 4;
12740 }
12741
12742 return size;
12743 }
12744
12745 /* Return TRUE if it is a good idea to tie two pseudo registers
12746 when one has mode MODE1 and one has mode MODE2.
12747 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12748 for any hard reg, then this must be FALSE for correct output.
12749
12750 For V9 we have to deal with the fact that only the lower 32 floating
12751 point registers are 32-bit addressable. */
12752
12753 bool
12754 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12755 {
12756 enum mode_class mclass1, mclass2;
12757 unsigned short size1, size2;
12758
12759 if (mode1 == mode2)
12760 return true;
12761
12762 mclass1 = GET_MODE_CLASS (mode1);
12763 mclass2 = GET_MODE_CLASS (mode2);
12764 if (mclass1 != mclass2)
12765 return false;
12766
12767 if (! TARGET_V9)
12768 return true;
12769
12770 /* Classes are the same and we are V9 so we have to deal with upper
12771 vs. lower floating point registers. If one of the modes is a
12772 4-byte mode, and the other is not, we have to mark them as not
12773 tieable because only the lower 32 floating point register are
12774 addressable 32-bits at a time.
12775
12776 We can't just test explicitly for SFmode, otherwise we won't
12777 cover the vector mode cases properly. */
12778
12779 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12780 return true;
12781
12782 size1 = GET_MODE_SIZE (mode1);
12783 size2 = GET_MODE_SIZE (mode2);
12784 if ((size1 > 4 && size2 == 4)
12785 || (size2 > 4 && size1 == 4))
12786 return false;
12787
12788 return true;
12789 }
12790
12791 /* Implement TARGET_CSTORE_MODE. */
12792
12793 static machine_mode
12794 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12795 {
12796 return (TARGET_ARCH64 ? DImode : SImode);
12797 }
12798
12799 /* Return the compound expression made of T1 and T2. */
12800
12801 static inline tree
12802 compound_expr (tree t1, tree t2)
12803 {
12804 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12805 }
12806
12807 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12808
12809 static void
12810 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12811 {
12812 if (!TARGET_FPU)
12813 return;
12814
12815 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12816 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12817
12818 /* We generate the equivalent of feholdexcept (&fenv_var):
12819
12820 unsigned int fenv_var;
12821 __builtin_store_fsr (&fenv_var);
12822
12823 unsigned int tmp1_var;
12824 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12825
12826 __builtin_load_fsr (&tmp1_var); */
12827
12828 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12829 TREE_ADDRESSABLE (fenv_var) = 1;
12830 tree fenv_addr = build_fold_addr_expr (fenv_var);
12831 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12832 tree hold_stfsr
12833 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12834 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12835
12836 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12837 TREE_ADDRESSABLE (tmp1_var) = 1;
12838 tree masked_fenv_var
12839 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12840 build_int_cst (unsigned_type_node,
12841 ~(accrued_exception_mask | trap_enable_mask)));
12842 tree hold_mask
12843 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12844 NULL_TREE, NULL_TREE);
12845
12846 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12847 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12848 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12849
12850 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12851
12852 /* We reload the value of tmp1_var to clear the exceptions:
12853
12854 __builtin_load_fsr (&tmp1_var); */
12855
12856 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12857
12858 /* We generate the equivalent of feupdateenv (&fenv_var):
12859
12860 unsigned int tmp2_var;
12861 __builtin_store_fsr (&tmp2_var);
12862
12863 __builtin_load_fsr (&fenv_var);
12864
12865 if (SPARC_LOW_FE_EXCEPT_VALUES)
12866 tmp2_var >>= 5;
12867 __atomic_feraiseexcept ((int) tmp2_var); */
12868
12869 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12870 TREE_ADDRESSABLE (tmp2_var) = 1;
12871 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12872 tree update_stfsr
12873 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12874 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12875
12876 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12877
12878 tree atomic_feraiseexcept
12879 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12880 tree update_call
12881 = build_call_expr (atomic_feraiseexcept, 1,
12882 fold_convert (integer_type_node, tmp2_var));
12883
12884 if (SPARC_LOW_FE_EXCEPT_VALUES)
12885 {
12886 tree shifted_tmp2_var
12887 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12888 build_int_cst (unsigned_type_node, 5));
12889 tree update_shift
12890 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12891 update_call = compound_expr (update_shift, update_call);
12892 }
12893
12894 *update
12895 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12896 }
12897
12898 #include "gt-sparc.h"