]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
* config/sparc/sparc.opt (msubxc): New option.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_print_operand_punct_valid_p (unsigned char);
643 static void sparc_print_operand (FILE *, rtx, int);
644 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
645 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
646 machine_mode,
647 secondary_reload_info *);
648 static machine_mode sparc_cstore_mode (enum insn_code icode);
649 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
650 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
651 \f
652 #ifdef SUBTARGET_ATTRIBUTE_TABLE
653 /* Table of valid machine attributes. */
654 static const struct attribute_spec sparc_attribute_table[] =
655 {
656 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
657 do_diagnostic } */
658 SUBTARGET_ATTRIBUTE_TABLE,
659 { NULL, 0, 0, false, false, false, NULL, false }
660 };
661 #endif
662 \f
663 /* Option handling. */
664
665 /* Parsed value. */
666 enum cmodel sparc_cmodel;
667
668 char sparc_hard_reg_printed[8];
669
670 /* Initialize the GCC target structure. */
671
672 /* The default is to use .half rather than .short for aligned HI objects. */
673 #undef TARGET_ASM_ALIGNED_HI_OP
674 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
675
676 #undef TARGET_ASM_UNALIGNED_HI_OP
677 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
678 #undef TARGET_ASM_UNALIGNED_SI_OP
679 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
680 #undef TARGET_ASM_UNALIGNED_DI_OP
681 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
682
683 /* The target hook has to handle DI-mode values. */
684 #undef TARGET_ASM_INTEGER
685 #define TARGET_ASM_INTEGER sparc_assemble_integer
686
687 #undef TARGET_ASM_FUNCTION_PROLOGUE
688 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
689 #undef TARGET_ASM_FUNCTION_EPILOGUE
690 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
691
692 #undef TARGET_SCHED_ADJUST_COST
693 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
694 #undef TARGET_SCHED_ISSUE_RATE
695 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
696 #undef TARGET_SCHED_INIT
697 #define TARGET_SCHED_INIT sparc_sched_init
698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
700
701 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
702 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
703
704 #undef TARGET_INIT_LIBFUNCS
705 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
706
707 #undef TARGET_LEGITIMIZE_ADDRESS
708 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
709 #undef TARGET_DELEGITIMIZE_ADDRESS
710 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
711 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
712 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
713
714 #undef TARGET_INIT_BUILTINS
715 #define TARGET_INIT_BUILTINS sparc_init_builtins
716 #undef TARGET_BUILTIN_DECL
717 #define TARGET_BUILTIN_DECL sparc_builtin_decl
718 #undef TARGET_EXPAND_BUILTIN
719 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
720 #undef TARGET_FOLD_BUILTIN
721 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
722
723 #if TARGET_TLS
724 #undef TARGET_HAVE_TLS
725 #define TARGET_HAVE_TLS true
726 #endif
727
728 #undef TARGET_CANNOT_FORCE_CONST_MEM
729 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
730
731 #undef TARGET_ASM_OUTPUT_MI_THUNK
732 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
735
736 #undef TARGET_RTX_COSTS
737 #define TARGET_RTX_COSTS sparc_rtx_costs
738 #undef TARGET_ADDRESS_COST
739 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
740 #undef TARGET_REGISTER_MOVE_COST
741 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
742
743 #undef TARGET_PROMOTE_FUNCTION_MODE
744 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
745
746 #undef TARGET_FUNCTION_VALUE
747 #define TARGET_FUNCTION_VALUE sparc_function_value
748 #undef TARGET_LIBCALL_VALUE
749 #define TARGET_LIBCALL_VALUE sparc_libcall_value
750 #undef TARGET_FUNCTION_VALUE_REGNO_P
751 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
752
753 #undef TARGET_STRUCT_VALUE_RTX
754 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
755 #undef TARGET_RETURN_IN_MEMORY
756 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
757 #undef TARGET_MUST_PASS_IN_STACK
758 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
759 #undef TARGET_PASS_BY_REFERENCE
760 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
761 #undef TARGET_ARG_PARTIAL_BYTES
762 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
763 #undef TARGET_FUNCTION_ARG_ADVANCE
764 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
765 #undef TARGET_FUNCTION_ARG
766 #define TARGET_FUNCTION_ARG sparc_function_arg
767 #undef TARGET_FUNCTION_INCOMING_ARG
768 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
769 #undef TARGET_FUNCTION_ARG_BOUNDARY
770 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
771
772 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
773 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
774 #undef TARGET_STRICT_ARGUMENT_NAMING
775 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
776
777 #undef TARGET_EXPAND_BUILTIN_VA_START
778 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
779 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
780 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
781
782 #undef TARGET_VECTOR_MODE_SUPPORTED_P
783 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
784
785 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
786 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
787
788 #ifdef SUBTARGET_INSERT_ATTRIBUTES
789 #undef TARGET_INSERT_ATTRIBUTES
790 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
791 #endif
792
793 #ifdef SUBTARGET_ATTRIBUTE_TABLE
794 #undef TARGET_ATTRIBUTE_TABLE
795 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
796 #endif
797
798 #undef TARGET_OPTION_OVERRIDE
799 #define TARGET_OPTION_OVERRIDE sparc_option_override
800
801 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
802 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
803 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
804 #endif
805
806 #undef TARGET_ASM_FILE_END
807 #define TARGET_ASM_FILE_END sparc_file_end
808
809 #undef TARGET_FRAME_POINTER_REQUIRED
810 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
811
812 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
813 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
814
815 #undef TARGET_CAN_ELIMINATE
816 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
817
818 #undef TARGET_PREFERRED_RELOAD_CLASS
819 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
820
821 #undef TARGET_SECONDARY_RELOAD
822 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
823
824 #undef TARGET_CONDITIONAL_REGISTER_USAGE
825 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
826
827 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
828 #undef TARGET_MANGLE_TYPE
829 #define TARGET_MANGLE_TYPE sparc_mangle_type
830 #endif
831
832 #undef TARGET_LRA_P
833 #define TARGET_LRA_P hook_bool_void_false
834
835 #undef TARGET_LEGITIMATE_ADDRESS_P
836 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
837
838 #undef TARGET_LEGITIMATE_CONSTANT_P
839 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
840
841 #undef TARGET_TRAMPOLINE_INIT
842 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
843
844 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
845 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
846 #undef TARGET_PRINT_OPERAND
847 #define TARGET_PRINT_OPERAND sparc_print_operand
848 #undef TARGET_PRINT_OPERAND_ADDRESS
849 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
850
851 /* The value stored by LDSTUB. */
852 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
853 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
854
855 #undef TARGET_CSTORE_MODE
856 #define TARGET_CSTORE_MODE sparc_cstore_mode
857
858 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
859 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
860
861 #undef TARGET_FIXED_CONDITION_CODE_REGS
862 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
863
864 struct gcc_target targetm = TARGET_INITIALIZER;
865
866 /* Return the memory reference contained in X if any, zero otherwise. */
867
868 static rtx
869 mem_ref (rtx x)
870 {
871 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
872 x = XEXP (x, 0);
873
874 if (MEM_P (x))
875 return x;
876
877 return NULL_RTX;
878 }
879
880 /* We use a machine specific pass to enable workarounds for errata.
881 We need to have the (essentially) final form of the insn stream in order
882 to properly detect the various hazards. Therefore, this machine specific
883 pass runs as late as possible. The pass is inserted in the pass pipeline
884 at the end of sparc_option_override. */
885
886 static unsigned int
887 sparc_do_work_around_errata (void)
888 {
889 rtx_insn *insn, *next;
890
891 /* Force all instructions to be split into their final form. */
892 split_all_insns_noflow ();
893
894 /* Now look for specific patterns in the insn stream. */
895 for (insn = get_insns (); insn; insn = next)
896 {
897 bool insert_nop = false;
898 rtx set;
899
900 /* Look into the instruction in a delay slot. */
901 if (NONJUMP_INSN_P (insn))
902 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
903 insn = seq->insn (1);
904
905 /* Look for a single-word load into an odd-numbered FP register. */
906 if (sparc_fix_at697f
907 && NONJUMP_INSN_P (insn)
908 && (set = single_set (insn)) != NULL_RTX
909 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
910 && MEM_P (SET_SRC (set))
911 && REG_P (SET_DEST (set))
912 && REGNO (SET_DEST (set)) > 31
913 && REGNO (SET_DEST (set)) % 2 != 0)
914 {
915 /* The wrong dependency is on the enclosing double register. */
916 const unsigned int x = REGNO (SET_DEST (set)) - 1;
917 unsigned int src1, src2, dest;
918 int code;
919
920 next = next_active_insn (insn);
921 if (!next)
922 break;
923 /* If the insn is a branch, then it cannot be problematic. */
924 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
925 continue;
926
927 extract_insn (next);
928 code = INSN_CODE (next);
929
930 switch (code)
931 {
932 case CODE_FOR_adddf3:
933 case CODE_FOR_subdf3:
934 case CODE_FOR_muldf3:
935 case CODE_FOR_divdf3:
936 dest = REGNO (recog_data.operand[0]);
937 src1 = REGNO (recog_data.operand[1]);
938 src2 = REGNO (recog_data.operand[2]);
939 if (src1 != src2)
940 {
941 /* Case [1-4]:
942 ld [address], %fx+1
943 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
944 if ((src1 == x || src2 == x)
945 && (dest == src1 || dest == src2))
946 insert_nop = true;
947 }
948 else
949 {
950 /* Case 5:
951 ld [address], %fx+1
952 FPOPd %fx, %fx, %fx */
953 if (src1 == x
954 && dest == src1
955 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
956 insert_nop = true;
957 }
958 break;
959
960 case CODE_FOR_sqrtdf2:
961 dest = REGNO (recog_data.operand[0]);
962 src1 = REGNO (recog_data.operand[1]);
963 /* Case 6:
964 ld [address], %fx+1
965 fsqrtd %fx, %fx */
966 if (src1 == x && dest == src1)
967 insert_nop = true;
968 break;
969
970 default:
971 break;
972 }
973 }
974
975 /* Look for a single-word load into an integer register. */
976 else if (sparc_fix_ut699
977 && NONJUMP_INSN_P (insn)
978 && (set = single_set (insn)) != NULL_RTX
979 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
980 && mem_ref (SET_SRC (set)) != NULL_RTX
981 && REG_P (SET_DEST (set))
982 && REGNO (SET_DEST (set)) < 32)
983 {
984 /* There is no problem if the second memory access has a data
985 dependency on the first single-cycle load. */
986 rtx x = SET_DEST (set);
987
988 next = next_active_insn (insn);
989 if (!next)
990 break;
991 /* If the insn is a branch, then it cannot be problematic. */
992 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
993 continue;
994
995 /* Look for a second memory access to/from an integer register. */
996 if ((set = single_set (next)) != NULL_RTX)
997 {
998 rtx src = SET_SRC (set);
999 rtx dest = SET_DEST (set);
1000 rtx mem;
1001
1002 /* LDD is affected. */
1003 if ((mem = mem_ref (src)) != NULL_RTX
1004 && REG_P (dest)
1005 && REGNO (dest) < 32
1006 && !reg_mentioned_p (x, XEXP (mem, 0)))
1007 insert_nop = true;
1008
1009 /* STD is *not* affected. */
1010 else if (MEM_P (dest)
1011 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1012 && (src == CONST0_RTX (GET_MODE (dest))
1013 || (REG_P (src)
1014 && REGNO (src) < 32
1015 && REGNO (src) != REGNO (x)))
1016 && !reg_mentioned_p (x, XEXP (dest, 0)))
1017 insert_nop = true;
1018 }
1019 }
1020
1021 /* Look for a single-word load/operation into an FP register. */
1022 else if (sparc_fix_ut699
1023 && NONJUMP_INSN_P (insn)
1024 && (set = single_set (insn)) != NULL_RTX
1025 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1026 && REG_P (SET_DEST (set))
1027 && REGNO (SET_DEST (set)) > 31)
1028 {
1029 /* Number of instructions in the problematic window. */
1030 const int n_insns = 4;
1031 /* The problematic combination is with the sibling FP register. */
1032 const unsigned int x = REGNO (SET_DEST (set));
1033 const unsigned int y = x ^ 1;
1034 rtx_insn *after;
1035 int i;
1036
1037 next = next_active_insn (insn);
1038 if (!next)
1039 break;
1040 /* If the insn is a branch, then it cannot be problematic. */
1041 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1042 continue;
1043
1044 /* Look for a second load/operation into the sibling FP register. */
1045 if (!((set = single_set (next)) != NULL_RTX
1046 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1047 && REG_P (SET_DEST (set))
1048 && REGNO (SET_DEST (set)) == y))
1049 continue;
1050
1051 /* Look for a (possible) store from the FP register in the next N
1052 instructions, but bail out if it is again modified or if there
1053 is a store from the sibling FP register before this store. */
1054 for (after = next, i = 0; i < n_insns; i++)
1055 {
1056 bool branch_p;
1057
1058 after = next_active_insn (after);
1059 if (!after)
1060 break;
1061
1062 /* This is a branch with an empty delay slot. */
1063 if (!NONJUMP_INSN_P (after))
1064 {
1065 if (++i == n_insns)
1066 break;
1067 branch_p = true;
1068 after = NULL;
1069 }
1070 /* This is a branch with a filled delay slot. */
1071 else if (rtx_sequence *seq =
1072 dyn_cast <rtx_sequence *> (PATTERN (after)))
1073 {
1074 if (++i == n_insns)
1075 break;
1076 branch_p = true;
1077 after = seq->insn (1);
1078 }
1079 /* This is a regular instruction. */
1080 else
1081 branch_p = false;
1082
1083 if (after && (set = single_set (after)) != NULL_RTX)
1084 {
1085 const rtx src = SET_SRC (set);
1086 const rtx dest = SET_DEST (set);
1087 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1088
1089 /* If the FP register is again modified before the store,
1090 then the store isn't affected. */
1091 if (REG_P (dest)
1092 && (REGNO (dest) == x
1093 || (REGNO (dest) == y && size == 8)))
1094 break;
1095
1096 if (MEM_P (dest) && REG_P (src))
1097 {
1098 /* If there is a store from the sibling FP register
1099 before the store, then the store is not affected. */
1100 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1101 break;
1102
1103 /* Otherwise, the store is affected. */
1104 if (REGNO (src) == x && size == 4)
1105 {
1106 insert_nop = true;
1107 break;
1108 }
1109 }
1110 }
1111
1112 /* If we have a branch in the first M instructions, then we
1113 cannot see the (M+2)th instruction so we play safe. */
1114 if (branch_p && i <= (n_insns - 2))
1115 {
1116 insert_nop = true;
1117 break;
1118 }
1119 }
1120 }
1121
1122 else
1123 next = NEXT_INSN (insn);
1124
1125 if (insert_nop)
1126 emit_insn_before (gen_nop (), next);
1127 }
1128
1129 return 0;
1130 }
1131
1132 namespace {
1133
1134 const pass_data pass_data_work_around_errata =
1135 {
1136 RTL_PASS, /* type */
1137 "errata", /* name */
1138 OPTGROUP_NONE, /* optinfo_flags */
1139 TV_MACH_DEP, /* tv_id */
1140 0, /* properties_required */
1141 0, /* properties_provided */
1142 0, /* properties_destroyed */
1143 0, /* todo_flags_start */
1144 0, /* todo_flags_finish */
1145 };
1146
1147 class pass_work_around_errata : public rtl_opt_pass
1148 {
1149 public:
1150 pass_work_around_errata(gcc::context *ctxt)
1151 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1152 {}
1153
1154 /* opt_pass methods: */
1155 virtual bool gate (function *)
1156 {
1157 /* The only errata we handle are those of the AT697F and UT699. */
1158 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1159 }
1160
1161 virtual unsigned int execute (function *)
1162 {
1163 return sparc_do_work_around_errata ();
1164 }
1165
1166 }; // class pass_work_around_errata
1167
1168 } // anon namespace
1169
1170 rtl_opt_pass *
1171 make_pass_work_around_errata (gcc::context *ctxt)
1172 {
1173 return new pass_work_around_errata (ctxt);
1174 }
1175
1176 /* Helpers for TARGET_DEBUG_OPTIONS. */
1177 static void
1178 dump_target_flag_bits (const int flags)
1179 {
1180 if (flags & MASK_64BIT)
1181 fprintf (stderr, "64BIT ");
1182 if (flags & MASK_APP_REGS)
1183 fprintf (stderr, "APP_REGS ");
1184 if (flags & MASK_FASTER_STRUCTS)
1185 fprintf (stderr, "FASTER_STRUCTS ");
1186 if (flags & MASK_FLAT)
1187 fprintf (stderr, "FLAT ");
1188 if (flags & MASK_FMAF)
1189 fprintf (stderr, "FMAF ");
1190 if (flags & MASK_FPU)
1191 fprintf (stderr, "FPU ");
1192 if (flags & MASK_HARD_QUAD)
1193 fprintf (stderr, "HARD_QUAD ");
1194 if (flags & MASK_POPC)
1195 fprintf (stderr, "POPC ");
1196 if (flags & MASK_PTR64)
1197 fprintf (stderr, "PTR64 ");
1198 if (flags & MASK_STACK_BIAS)
1199 fprintf (stderr, "STACK_BIAS ");
1200 if (flags & MASK_UNALIGNED_DOUBLES)
1201 fprintf (stderr, "UNALIGNED_DOUBLES ");
1202 if (flags & MASK_V8PLUS)
1203 fprintf (stderr, "V8PLUS ");
1204 if (flags & MASK_VIS)
1205 fprintf (stderr, "VIS ");
1206 if (flags & MASK_VIS2)
1207 fprintf (stderr, "VIS2 ");
1208 if (flags & MASK_VIS3)
1209 fprintf (stderr, "VIS3 ");
1210 if (flags & MASK_VIS4)
1211 fprintf (stderr, "VIS4 ");
1212 if (flags & MASK_CBCOND)
1213 fprintf (stderr, "CBCOND ");
1214 if (flags & MASK_DEPRECATED_V8_INSNS)
1215 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1216 if (flags & MASK_SPARCLET)
1217 fprintf (stderr, "SPARCLET ");
1218 if (flags & MASK_SPARCLITE)
1219 fprintf (stderr, "SPARCLITE ");
1220 if (flags & MASK_V8)
1221 fprintf (stderr, "V8 ");
1222 if (flags & MASK_V9)
1223 fprintf (stderr, "V9 ");
1224 }
1225
1226 static void
1227 dump_target_flags (const char *prefix, const int flags)
1228 {
1229 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1230 dump_target_flag_bits (flags);
1231 fprintf(stderr, "]\n");
1232 }
1233
1234 /* Validate and override various options, and do some machine dependent
1235 initialization. */
1236
1237 static void
1238 sparc_option_override (void)
1239 {
1240 static struct code_model {
1241 const char *const name;
1242 const enum cmodel value;
1243 } const cmodels[] = {
1244 { "32", CM_32 },
1245 { "medlow", CM_MEDLOW },
1246 { "medmid", CM_MEDMID },
1247 { "medany", CM_MEDANY },
1248 { "embmedany", CM_EMBMEDANY },
1249 { NULL, (enum cmodel) 0 }
1250 };
1251 const struct code_model *cmodel;
1252 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1253 static struct cpu_default {
1254 const int cpu;
1255 const enum processor_type processor;
1256 } const cpu_default[] = {
1257 /* There must be one entry here for each TARGET_CPU value. */
1258 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1259 { TARGET_CPU_v8, PROCESSOR_V8 },
1260 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1261 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1262 { TARGET_CPU_leon, PROCESSOR_LEON },
1263 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1264 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1265 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1266 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1267 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1268 { TARGET_CPU_v9, PROCESSOR_V9 },
1269 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1270 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1271 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1272 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1273 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1274 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1275 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1276 { -1, PROCESSOR_V7 }
1277 };
1278 const struct cpu_default *def;
1279 /* Table of values for -m{cpu,tune}=. This must match the order of
1280 the enum processor_type in sparc-opts.h. */
1281 static struct cpu_table {
1282 const char *const name;
1283 const int disable;
1284 const int enable;
1285 } const cpu_table[] = {
1286 { "v7", MASK_ISA, 0 },
1287 { "cypress", MASK_ISA, 0 },
1288 { "v8", MASK_ISA, MASK_V8 },
1289 /* TI TMS390Z55 supersparc */
1290 { "supersparc", MASK_ISA, MASK_V8 },
1291 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1292 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1293 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1294 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1295 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1296 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1297 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1298 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1299 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1300 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1301 { "sparclet", MASK_ISA, MASK_SPARCLET },
1302 /* TEMIC sparclet */
1303 { "tsc701", MASK_ISA, MASK_SPARCLET },
1304 { "v9", MASK_ISA, MASK_V9 },
1305 /* UltraSPARC I, II, IIi */
1306 { "ultrasparc", MASK_ISA,
1307 /* Although insns using %y are deprecated, it is a clear win. */
1308 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1309 /* UltraSPARC III */
1310 /* ??? Check if %y issue still holds true. */
1311 { "ultrasparc3", MASK_ISA,
1312 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1313 /* UltraSPARC T1 */
1314 { "niagara", MASK_ISA,
1315 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1316 /* UltraSPARC T2 */
1317 { "niagara2", MASK_ISA,
1318 MASK_V9|MASK_POPC|MASK_VIS2 },
1319 /* UltraSPARC T3 */
1320 { "niagara3", MASK_ISA,
1321 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1322 /* UltraSPARC T4 */
1323 { "niagara4", MASK_ISA,
1324 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1325 /* UltraSPARC M7 */
1326 { "niagara7", MASK_ISA,
1327 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1328 };
1329 const struct cpu_table *cpu;
1330 unsigned int i;
1331 int fpu;
1332
1333 if (sparc_debug_string != NULL)
1334 {
1335 const char *q;
1336 char *p;
1337
1338 p = ASTRDUP (sparc_debug_string);
1339 while ((q = strtok (p, ",")) != NULL)
1340 {
1341 bool invert;
1342 int mask;
1343
1344 p = NULL;
1345 if (*q == '!')
1346 {
1347 invert = true;
1348 q++;
1349 }
1350 else
1351 invert = false;
1352
1353 if (! strcmp (q, "all"))
1354 mask = MASK_DEBUG_ALL;
1355 else if (! strcmp (q, "options"))
1356 mask = MASK_DEBUG_OPTIONS;
1357 else
1358 error ("unknown -mdebug-%s switch", q);
1359
1360 if (invert)
1361 sparc_debug &= ~mask;
1362 else
1363 sparc_debug |= mask;
1364 }
1365 }
1366
1367 if (TARGET_DEBUG_OPTIONS)
1368 {
1369 dump_target_flags("Initial target_flags", target_flags);
1370 dump_target_flags("target_flags_explicit", target_flags_explicit);
1371 }
1372
1373 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1374 SUBTARGET_OVERRIDE_OPTIONS;
1375 #endif
1376
1377 #ifndef SPARC_BI_ARCH
1378 /* Check for unsupported architecture size. */
1379 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1380 error ("%s is not supported by this configuration",
1381 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1382 #endif
1383
1384 /* We force all 64bit archs to use 128 bit long double */
1385 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1386 {
1387 error ("-mlong-double-64 not allowed with -m64");
1388 target_flags |= MASK_LONG_DOUBLE_128;
1389 }
1390
1391 /* Code model selection. */
1392 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1393
1394 #ifdef SPARC_BI_ARCH
1395 if (TARGET_ARCH32)
1396 sparc_cmodel = CM_32;
1397 #endif
1398
1399 if (sparc_cmodel_string != NULL)
1400 {
1401 if (TARGET_ARCH64)
1402 {
1403 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1404 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1405 break;
1406 if (cmodel->name == NULL)
1407 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1408 else
1409 sparc_cmodel = cmodel->value;
1410 }
1411 else
1412 error ("-mcmodel= is not supported on 32 bit systems");
1413 }
1414
1415 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1416 for (i = 8; i < 16; i++)
1417 if (!call_used_regs [i])
1418 {
1419 error ("-fcall-saved-REG is not supported for out registers");
1420 call_used_regs [i] = 1;
1421 }
1422
1423 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1424
1425 /* Set the default CPU. */
1426 if (!global_options_set.x_sparc_cpu_and_features)
1427 {
1428 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1429 if (def->cpu == TARGET_CPU_DEFAULT)
1430 break;
1431 gcc_assert (def->cpu != -1);
1432 sparc_cpu_and_features = def->processor;
1433 }
1434
1435 if (!global_options_set.x_sparc_cpu)
1436 sparc_cpu = sparc_cpu_and_features;
1437
1438 cpu = &cpu_table[(int) sparc_cpu_and_features];
1439
1440 if (TARGET_DEBUG_OPTIONS)
1441 {
1442 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1443 fprintf (stderr, "sparc_cpu: %s\n",
1444 cpu_table[(int) sparc_cpu].name);
1445 dump_target_flags ("cpu->disable", cpu->disable);
1446 dump_target_flags ("cpu->enable", cpu->enable);
1447 }
1448
1449 target_flags &= ~cpu->disable;
1450 target_flags |= (cpu->enable
1451 #ifndef HAVE_AS_FMAF_HPC_VIS3
1452 & ~(MASK_FMAF | MASK_VIS3)
1453 #endif
1454 #ifndef HAVE_AS_SPARC4
1455 & ~MASK_CBCOND
1456 #endif
1457 #ifndef HAVE_AS_SPARC5_VIS4
1458 & ~(MASK_VIS4 | MASK_SUBXC)
1459 #endif
1460 #ifndef HAVE_AS_LEON
1461 & ~(MASK_LEON | MASK_LEON3)
1462 #endif
1463 );
1464
1465 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1466 the processor default. */
1467 if (target_flags_explicit & MASK_FPU)
1468 target_flags = (target_flags & ~MASK_FPU) | fpu;
1469
1470 /* -mvis2 implies -mvis */
1471 if (TARGET_VIS2)
1472 target_flags |= MASK_VIS;
1473
1474 /* -mvis3 implies -mvis2 and -mvis */
1475 if (TARGET_VIS3)
1476 target_flags |= MASK_VIS2 | MASK_VIS;
1477
1478 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1479 if (TARGET_VIS4)
1480 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1481
1482 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1483 disabled. */
1484 if (! TARGET_FPU)
1485 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1486 | MASK_FMAF);
1487
1488 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1489 are available.
1490 -m64 also implies v9. */
1491 if (TARGET_VIS || TARGET_ARCH64)
1492 {
1493 target_flags |= MASK_V9;
1494 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1495 }
1496
1497 /* -mvis also implies -mv8plus on 32-bit */
1498 if (TARGET_VIS && ! TARGET_ARCH64)
1499 target_flags |= MASK_V8PLUS;
1500
1501 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1502 if (TARGET_V9 && TARGET_ARCH32)
1503 target_flags |= MASK_DEPRECATED_V8_INSNS;
1504
1505 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1506 if (! TARGET_V9 || TARGET_ARCH64)
1507 target_flags &= ~MASK_V8PLUS;
1508
1509 /* Don't use stack biasing in 32 bit mode. */
1510 if (TARGET_ARCH32)
1511 target_flags &= ~MASK_STACK_BIAS;
1512
1513 /* Supply a default value for align_functions. */
1514 if (align_functions == 0
1515 && (sparc_cpu == PROCESSOR_ULTRASPARC
1516 || sparc_cpu == PROCESSOR_ULTRASPARC3
1517 || sparc_cpu == PROCESSOR_NIAGARA
1518 || sparc_cpu == PROCESSOR_NIAGARA2
1519 || sparc_cpu == PROCESSOR_NIAGARA3
1520 || sparc_cpu == PROCESSOR_NIAGARA4
1521 || sparc_cpu == PROCESSOR_NIAGARA7))
1522 align_functions = 32;
1523
1524 /* Validate PCC_STRUCT_RETURN. */
1525 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1526 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1527
1528 /* Only use .uaxword when compiling for a 64-bit target. */
1529 if (!TARGET_ARCH64)
1530 targetm.asm_out.unaligned_op.di = NULL;
1531
1532 /* Do various machine dependent initializations. */
1533 sparc_init_modes ();
1534
1535 /* Set up function hooks. */
1536 init_machine_status = sparc_init_machine_status;
1537
1538 switch (sparc_cpu)
1539 {
1540 case PROCESSOR_V7:
1541 case PROCESSOR_CYPRESS:
1542 sparc_costs = &cypress_costs;
1543 break;
1544 case PROCESSOR_V8:
1545 case PROCESSOR_SPARCLITE:
1546 case PROCESSOR_SUPERSPARC:
1547 sparc_costs = &supersparc_costs;
1548 break;
1549 case PROCESSOR_F930:
1550 case PROCESSOR_F934:
1551 case PROCESSOR_HYPERSPARC:
1552 case PROCESSOR_SPARCLITE86X:
1553 sparc_costs = &hypersparc_costs;
1554 break;
1555 case PROCESSOR_LEON:
1556 sparc_costs = &leon_costs;
1557 break;
1558 case PROCESSOR_LEON3:
1559 case PROCESSOR_LEON3V7:
1560 sparc_costs = &leon3_costs;
1561 break;
1562 case PROCESSOR_SPARCLET:
1563 case PROCESSOR_TSC701:
1564 sparc_costs = &sparclet_costs;
1565 break;
1566 case PROCESSOR_V9:
1567 case PROCESSOR_ULTRASPARC:
1568 sparc_costs = &ultrasparc_costs;
1569 break;
1570 case PROCESSOR_ULTRASPARC3:
1571 sparc_costs = &ultrasparc3_costs;
1572 break;
1573 case PROCESSOR_NIAGARA:
1574 sparc_costs = &niagara_costs;
1575 break;
1576 case PROCESSOR_NIAGARA2:
1577 sparc_costs = &niagara2_costs;
1578 break;
1579 case PROCESSOR_NIAGARA3:
1580 sparc_costs = &niagara3_costs;
1581 break;
1582 case PROCESSOR_NIAGARA4:
1583 sparc_costs = &niagara4_costs;
1584 break;
1585 case PROCESSOR_NIAGARA7:
1586 sparc_costs = &niagara7_costs;
1587 break;
1588 case PROCESSOR_NATIVE:
1589 gcc_unreachable ();
1590 };
1591
1592 if (sparc_memory_model == SMM_DEFAULT)
1593 {
1594 /* Choose the memory model for the operating system. */
1595 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1596 if (os_default != SMM_DEFAULT)
1597 sparc_memory_model = os_default;
1598 /* Choose the most relaxed model for the processor. */
1599 else if (TARGET_V9)
1600 sparc_memory_model = SMM_RMO;
1601 else if (TARGET_LEON3)
1602 sparc_memory_model = SMM_TSO;
1603 else if (TARGET_LEON)
1604 sparc_memory_model = SMM_SC;
1605 else if (TARGET_V8)
1606 sparc_memory_model = SMM_PSO;
1607 else
1608 sparc_memory_model = SMM_SC;
1609 }
1610
1611 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1612 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1613 target_flags |= MASK_LONG_DOUBLE_128;
1614 #endif
1615
1616 if (TARGET_DEBUG_OPTIONS)
1617 dump_target_flags ("Final target_flags", target_flags);
1618
1619 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1620 can run at the same time. More important, it is the threshold
1621 defining when additional prefetches will be dropped by the
1622 hardware.
1623
1624 The UltraSPARC-III features a documented prefetch queue with a
1625 size of 8. Additional prefetches issued in the cpu are
1626 dropped.
1627
1628 Niagara processors are different. In these processors prefetches
1629 are handled much like regular loads. The L1 miss buffer is 32
1630 entries, but prefetches start getting affected when 30 entries
1631 become occupied. That occupation could be a mix of regular loads
1632 and prefetches though. And that buffer is shared by all threads.
1633 Once the threshold is reached, if the core is running a single
1634 thread the prefetch will retry. If more than one thread is
1635 running, the prefetch will be dropped.
1636
1637 All this makes it very difficult to determine how many
1638 simultaneous prefetches can be issued simultaneously, even in a
1639 single-threaded program. Experimental results show that setting
1640 this parameter to 32 works well when the number of threads is not
1641 high. */
1642 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1643 ((sparc_cpu == PROCESSOR_ULTRASPARC
1644 || sparc_cpu == PROCESSOR_NIAGARA
1645 || sparc_cpu == PROCESSOR_NIAGARA2
1646 || sparc_cpu == PROCESSOR_NIAGARA3
1647 || sparc_cpu == PROCESSOR_NIAGARA4)
1648 ? 2
1649 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1650 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1651 ? 32 : 3))),
1652 global_options.x_param_values,
1653 global_options_set.x_param_values);
1654
1655 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1656 params.def), so no maybe_set_param_value is needed.
1657
1658 The Oracle SPARC Architecture (previously the UltraSPARC
1659 Architecture) specification states that when a PREFETCH[A]
1660 instruction is executed an implementation-specific amount of data
1661 is prefetched, and that it is at least 64 bytes long (aligned to
1662 at least 64 bytes).
1663
1664 However, this is not correct. The M7 (and implementations prior
1665 to that) does not guarantee a 64B prefetch into a cache if the
1666 line size is smaller. A single cache line is all that is ever
1667 prefetched. So for the M7, where the L1D$ has 32B lines and the
1668 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1669 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1670 is a read_n prefetch, which is the only type which allocates to
1671 the L1.) */
1672
1673 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1674 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1675 Niagara processors feature a L1D$ of 16KB. */
1676 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1677 ((sparc_cpu == PROCESSOR_ULTRASPARC
1678 || sparc_cpu == PROCESSOR_ULTRASPARC3
1679 || sparc_cpu == PROCESSOR_NIAGARA
1680 || sparc_cpu == PROCESSOR_NIAGARA2
1681 || sparc_cpu == PROCESSOR_NIAGARA3
1682 || sparc_cpu == PROCESSOR_NIAGARA4
1683 || sparc_cpu == PROCESSOR_NIAGARA7)
1684 ? 16 : 64),
1685 global_options.x_param_values,
1686 global_options_set.x_param_values);
1687
1688
1689 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1690 that 512 is the default in params.def. */
1691 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1692 (sparc_cpu == PROCESSOR_NIAGARA4
1693 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1694 ? 256 : 512)),
1695 global_options.x_param_values,
1696 global_options_set.x_param_values);
1697
1698
1699 /* Disable save slot sharing for call-clobbered registers by default.
1700 The IRA sharing algorithm works on single registers only and this
1701 pessimizes for double floating-point registers. */
1702 if (!global_options_set.x_flag_ira_share_save_slots)
1703 flag_ira_share_save_slots = 0;
1704
1705 /* We register a machine specific pass to work around errata, if any.
1706 The pass mut be scheduled as late as possible so that we have the
1707 (essentially) final form of the insn stream to work on.
1708 Registering the pass must be done at start up. It's convenient to
1709 do it here. */
1710 opt_pass *errata_pass = make_pass_work_around_errata (g);
1711 struct register_pass_info insert_pass_work_around_errata =
1712 {
1713 errata_pass, /* pass */
1714 "dbr", /* reference_pass_name */
1715 1, /* ref_pass_instance_number */
1716 PASS_POS_INSERT_AFTER /* po_op */
1717 };
1718 register_pass (&insert_pass_work_around_errata);
1719 }
1720 \f
1721 /* Miscellaneous utilities. */
1722
1723 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1724 or branch on register contents instructions. */
1725
1726 int
1727 v9_regcmp_p (enum rtx_code code)
1728 {
1729 return (code == EQ || code == NE || code == GE || code == LT
1730 || code == LE || code == GT);
1731 }
1732
1733 /* Nonzero if OP is a floating point constant which can
1734 be loaded into an integer register using a single
1735 sethi instruction. */
1736
1737 int
1738 fp_sethi_p (rtx op)
1739 {
1740 if (GET_CODE (op) == CONST_DOUBLE)
1741 {
1742 long i;
1743
1744 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1745 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1746 }
1747
1748 return 0;
1749 }
1750
1751 /* Nonzero if OP is a floating point constant which can
1752 be loaded into an integer register using a single
1753 mov instruction. */
1754
1755 int
1756 fp_mov_p (rtx op)
1757 {
1758 if (GET_CODE (op) == CONST_DOUBLE)
1759 {
1760 long i;
1761
1762 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1763 return SPARC_SIMM13_P (i);
1764 }
1765
1766 return 0;
1767 }
1768
1769 /* Nonzero if OP is a floating point constant which can
1770 be loaded into an integer register using a high/losum
1771 instruction sequence. */
1772
1773 int
1774 fp_high_losum_p (rtx op)
1775 {
1776 /* The constraints calling this should only be in
1777 SFmode move insns, so any constant which cannot
1778 be moved using a single insn will do. */
1779 if (GET_CODE (op) == CONST_DOUBLE)
1780 {
1781 long i;
1782
1783 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1784 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1785 }
1786
1787 return 0;
1788 }
1789
1790 /* Return true if the address of LABEL can be loaded by means of the
1791 mov{si,di}_pic_label_ref patterns in PIC mode. */
1792
1793 static bool
1794 can_use_mov_pic_label_ref (rtx label)
1795 {
1796 /* VxWorks does not impose a fixed gap between segments; the run-time
1797 gap can be different from the object-file gap. We therefore can't
1798 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1799 are absolutely sure that X is in the same segment as the GOT.
1800 Unfortunately, the flexibility of linker scripts means that we
1801 can't be sure of that in general, so assume that GOT-relative
1802 accesses are never valid on VxWorks. */
1803 if (TARGET_VXWORKS_RTP)
1804 return false;
1805
1806 /* Similarly, if the label is non-local, it might end up being placed
1807 in a different section than the current one; now mov_pic_label_ref
1808 requires the label and the code to be in the same section. */
1809 if (LABEL_REF_NONLOCAL_P (label))
1810 return false;
1811
1812 /* Finally, if we are reordering basic blocks and partition into hot
1813 and cold sections, this might happen for any label. */
1814 if (flag_reorder_blocks_and_partition)
1815 return false;
1816
1817 return true;
1818 }
1819
1820 /* Expand a move instruction. Return true if all work is done. */
1821
1822 bool
1823 sparc_expand_move (machine_mode mode, rtx *operands)
1824 {
1825 /* Handle sets of MEM first. */
1826 if (GET_CODE (operands[0]) == MEM)
1827 {
1828 /* 0 is a register (or a pair of registers) on SPARC. */
1829 if (register_or_zero_operand (operands[1], mode))
1830 return false;
1831
1832 if (!reload_in_progress)
1833 {
1834 operands[0] = validize_mem (operands[0]);
1835 operands[1] = force_reg (mode, operands[1]);
1836 }
1837 }
1838
1839 /* Fixup TLS cases. */
1840 if (TARGET_HAVE_TLS
1841 && CONSTANT_P (operands[1])
1842 && sparc_tls_referenced_p (operands [1]))
1843 {
1844 operands[1] = sparc_legitimize_tls_address (operands[1]);
1845 return false;
1846 }
1847
1848 /* Fixup PIC cases. */
1849 if (flag_pic && CONSTANT_P (operands[1]))
1850 {
1851 if (pic_address_needs_scratch (operands[1]))
1852 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1853
1854 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1855 if (GET_CODE (operands[1]) == LABEL_REF
1856 && can_use_mov_pic_label_ref (operands[1]))
1857 {
1858 if (mode == SImode)
1859 {
1860 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1861 return true;
1862 }
1863
1864 if (mode == DImode)
1865 {
1866 gcc_assert (TARGET_ARCH64);
1867 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1868 return true;
1869 }
1870 }
1871
1872 if (symbolic_operand (operands[1], mode))
1873 {
1874 operands[1]
1875 = sparc_legitimize_pic_address (operands[1],
1876 reload_in_progress
1877 ? operands[0] : NULL_RTX);
1878 return false;
1879 }
1880 }
1881
1882 /* If we are trying to toss an integer constant into FP registers,
1883 or loading a FP or vector constant, force it into memory. */
1884 if (CONSTANT_P (operands[1])
1885 && REG_P (operands[0])
1886 && (SPARC_FP_REG_P (REGNO (operands[0]))
1887 || SCALAR_FLOAT_MODE_P (mode)
1888 || VECTOR_MODE_P (mode)))
1889 {
1890 /* emit_group_store will send such bogosity to us when it is
1891 not storing directly into memory. So fix this up to avoid
1892 crashes in output_constant_pool. */
1893 if (operands [1] == const0_rtx)
1894 operands[1] = CONST0_RTX (mode);
1895
1896 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1897 always other regs. */
1898 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1899 && (const_zero_operand (operands[1], mode)
1900 || const_all_ones_operand (operands[1], mode)))
1901 return false;
1902
1903 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1904 /* We are able to build any SF constant in integer registers
1905 with at most 2 instructions. */
1906 && (mode == SFmode
1907 /* And any DF constant in integer registers. */
1908 || (mode == DFmode
1909 && ! can_create_pseudo_p ())))
1910 return false;
1911
1912 operands[1] = force_const_mem (mode, operands[1]);
1913 if (!reload_in_progress)
1914 operands[1] = validize_mem (operands[1]);
1915 return false;
1916 }
1917
1918 /* Accept non-constants and valid constants unmodified. */
1919 if (!CONSTANT_P (operands[1])
1920 || GET_CODE (operands[1]) == HIGH
1921 || input_operand (operands[1], mode))
1922 return false;
1923
1924 switch (mode)
1925 {
1926 case QImode:
1927 /* All QImode constants require only one insn, so proceed. */
1928 break;
1929
1930 case HImode:
1931 case SImode:
1932 sparc_emit_set_const32 (operands[0], operands[1]);
1933 return true;
1934
1935 case DImode:
1936 /* input_operand should have filtered out 32-bit mode. */
1937 sparc_emit_set_const64 (operands[0], operands[1]);
1938 return true;
1939
1940 case TImode:
1941 {
1942 rtx high, low;
1943 /* TImode isn't available in 32-bit mode. */
1944 split_double (operands[1], &high, &low);
1945 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1946 high));
1947 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1948 low));
1949 }
1950 return true;
1951
1952 default:
1953 gcc_unreachable ();
1954 }
1955
1956 return false;
1957 }
1958
1959 /* Load OP1, a 32-bit constant, into OP0, a register.
1960 We know it can't be done in one insn when we get
1961 here, the move expander guarantees this. */
1962
1963 static void
1964 sparc_emit_set_const32 (rtx op0, rtx op1)
1965 {
1966 machine_mode mode = GET_MODE (op0);
1967 rtx temp = op0;
1968
1969 if (can_create_pseudo_p ())
1970 temp = gen_reg_rtx (mode);
1971
1972 if (GET_CODE (op1) == CONST_INT)
1973 {
1974 gcc_assert (!small_int_operand (op1, mode)
1975 && !const_high_operand (op1, mode));
1976
1977 /* Emit them as real moves instead of a HIGH/LO_SUM,
1978 this way CSE can see everything and reuse intermediate
1979 values if it wants. */
1980 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1981 & ~(HOST_WIDE_INT) 0x3ff)));
1982
1983 emit_insn (gen_rtx_SET (op0,
1984 gen_rtx_IOR (mode, temp,
1985 GEN_INT (INTVAL (op1) & 0x3ff))));
1986 }
1987 else
1988 {
1989 /* A symbol, emit in the traditional way. */
1990 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1991 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1992 }
1993 }
1994
1995 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1996 If TEMP is nonzero, we are forbidden to use any other scratch
1997 registers. Otherwise, we are allowed to generate them as needed.
1998
1999 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2000 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2001
2002 void
2003 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2004 {
2005 rtx temp1, temp2, temp3, temp4, temp5;
2006 rtx ti_temp = 0;
2007
2008 if (temp && GET_MODE (temp) == TImode)
2009 {
2010 ti_temp = temp;
2011 temp = gen_rtx_REG (DImode, REGNO (temp));
2012 }
2013
2014 /* SPARC-V9 code-model support. */
2015 switch (sparc_cmodel)
2016 {
2017 case CM_MEDLOW:
2018 /* The range spanned by all instructions in the object is less
2019 than 2^31 bytes (2GB) and the distance from any instruction
2020 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2021 than 2^31 bytes (2GB).
2022
2023 The executable must be in the low 4TB of the virtual address
2024 space.
2025
2026 sethi %hi(symbol), %temp1
2027 or %temp1, %lo(symbol), %reg */
2028 if (temp)
2029 temp1 = temp; /* op0 is allowed. */
2030 else
2031 temp1 = gen_reg_rtx (DImode);
2032
2033 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2034 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2035 break;
2036
2037 case CM_MEDMID:
2038 /* The range spanned by all instructions in the object is less
2039 than 2^31 bytes (2GB) and the distance from any instruction
2040 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2041 than 2^31 bytes (2GB).
2042
2043 The executable must be in the low 16TB of the virtual address
2044 space.
2045
2046 sethi %h44(symbol), %temp1
2047 or %temp1, %m44(symbol), %temp2
2048 sllx %temp2, 12, %temp3
2049 or %temp3, %l44(symbol), %reg */
2050 if (temp)
2051 {
2052 temp1 = op0;
2053 temp2 = op0;
2054 temp3 = temp; /* op0 is allowed. */
2055 }
2056 else
2057 {
2058 temp1 = gen_reg_rtx (DImode);
2059 temp2 = gen_reg_rtx (DImode);
2060 temp3 = gen_reg_rtx (DImode);
2061 }
2062
2063 emit_insn (gen_seth44 (temp1, op1));
2064 emit_insn (gen_setm44 (temp2, temp1, op1));
2065 emit_insn (gen_rtx_SET (temp3,
2066 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2067 emit_insn (gen_setl44 (op0, temp3, op1));
2068 break;
2069
2070 case CM_MEDANY:
2071 /* The range spanned by all instructions in the object is less
2072 than 2^31 bytes (2GB) and the distance from any instruction
2073 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2074 than 2^31 bytes (2GB).
2075
2076 The executable can be placed anywhere in the virtual address
2077 space.
2078
2079 sethi %hh(symbol), %temp1
2080 sethi %lm(symbol), %temp2
2081 or %temp1, %hm(symbol), %temp3
2082 sllx %temp3, 32, %temp4
2083 or %temp4, %temp2, %temp5
2084 or %temp5, %lo(symbol), %reg */
2085 if (temp)
2086 {
2087 /* It is possible that one of the registers we got for operands[2]
2088 might coincide with that of operands[0] (which is why we made
2089 it TImode). Pick the other one to use as our scratch. */
2090 if (rtx_equal_p (temp, op0))
2091 {
2092 gcc_assert (ti_temp);
2093 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2094 }
2095 temp1 = op0;
2096 temp2 = temp; /* op0 is _not_ allowed, see above. */
2097 temp3 = op0;
2098 temp4 = op0;
2099 temp5 = op0;
2100 }
2101 else
2102 {
2103 temp1 = gen_reg_rtx (DImode);
2104 temp2 = gen_reg_rtx (DImode);
2105 temp3 = gen_reg_rtx (DImode);
2106 temp4 = gen_reg_rtx (DImode);
2107 temp5 = gen_reg_rtx (DImode);
2108 }
2109
2110 emit_insn (gen_sethh (temp1, op1));
2111 emit_insn (gen_setlm (temp2, op1));
2112 emit_insn (gen_sethm (temp3, temp1, op1));
2113 emit_insn (gen_rtx_SET (temp4,
2114 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2115 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2116 emit_insn (gen_setlo (op0, temp5, op1));
2117 break;
2118
2119 case CM_EMBMEDANY:
2120 /* Old old old backwards compatibility kruft here.
2121 Essentially it is MEDLOW with a fixed 64-bit
2122 virtual base added to all data segment addresses.
2123 Text-segment stuff is computed like MEDANY, we can't
2124 reuse the code above because the relocation knobs
2125 look different.
2126
2127 Data segment: sethi %hi(symbol), %temp1
2128 add %temp1, EMBMEDANY_BASE_REG, %temp2
2129 or %temp2, %lo(symbol), %reg */
2130 if (data_segment_operand (op1, GET_MODE (op1)))
2131 {
2132 if (temp)
2133 {
2134 temp1 = temp; /* op0 is allowed. */
2135 temp2 = op0;
2136 }
2137 else
2138 {
2139 temp1 = gen_reg_rtx (DImode);
2140 temp2 = gen_reg_rtx (DImode);
2141 }
2142
2143 emit_insn (gen_embmedany_sethi (temp1, op1));
2144 emit_insn (gen_embmedany_brsum (temp2, temp1));
2145 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2146 }
2147
2148 /* Text segment: sethi %uhi(symbol), %temp1
2149 sethi %hi(symbol), %temp2
2150 or %temp1, %ulo(symbol), %temp3
2151 sllx %temp3, 32, %temp4
2152 or %temp4, %temp2, %temp5
2153 or %temp5, %lo(symbol), %reg */
2154 else
2155 {
2156 if (temp)
2157 {
2158 /* It is possible that one of the registers we got for operands[2]
2159 might coincide with that of operands[0] (which is why we made
2160 it TImode). Pick the other one to use as our scratch. */
2161 if (rtx_equal_p (temp, op0))
2162 {
2163 gcc_assert (ti_temp);
2164 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2165 }
2166 temp1 = op0;
2167 temp2 = temp; /* op0 is _not_ allowed, see above. */
2168 temp3 = op0;
2169 temp4 = op0;
2170 temp5 = op0;
2171 }
2172 else
2173 {
2174 temp1 = gen_reg_rtx (DImode);
2175 temp2 = gen_reg_rtx (DImode);
2176 temp3 = gen_reg_rtx (DImode);
2177 temp4 = gen_reg_rtx (DImode);
2178 temp5 = gen_reg_rtx (DImode);
2179 }
2180
2181 emit_insn (gen_embmedany_textuhi (temp1, op1));
2182 emit_insn (gen_embmedany_texthi (temp2, op1));
2183 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2184 emit_insn (gen_rtx_SET (temp4,
2185 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2186 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2187 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2188 }
2189 break;
2190
2191 default:
2192 gcc_unreachable ();
2193 }
2194 }
2195
2196 /* These avoid problems when cross compiling. If we do not
2197 go through all this hair then the optimizer will see
2198 invalid REG_EQUAL notes or in some cases none at all. */
2199 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2200 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2201 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2202 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2203
2204 /* The optimizer is not to assume anything about exactly
2205 which bits are set for a HIGH, they are unspecified.
2206 Unfortunately this leads to many missed optimizations
2207 during CSE. We mask out the non-HIGH bits, and matches
2208 a plain movdi, to alleviate this problem. */
2209 static rtx
2210 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2211 {
2212 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2213 }
2214
2215 static rtx
2216 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2217 {
2218 return gen_rtx_SET (dest, GEN_INT (val));
2219 }
2220
2221 static rtx
2222 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2223 {
2224 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2225 }
2226
2227 static rtx
2228 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2229 {
2230 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2231 }
2232
2233 /* Worker routines for 64-bit constant formation on arch64.
2234 One of the key things to be doing in these emissions is
2235 to create as many temp REGs as possible. This makes it
2236 possible for half-built constants to be used later when
2237 such values are similar to something required later on.
2238 Without doing this, the optimizer cannot see such
2239 opportunities. */
2240
2241 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2242 unsigned HOST_WIDE_INT, int);
2243
2244 static void
2245 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2246 unsigned HOST_WIDE_INT low_bits, int is_neg)
2247 {
2248 unsigned HOST_WIDE_INT high_bits;
2249
2250 if (is_neg)
2251 high_bits = (~low_bits) & 0xffffffff;
2252 else
2253 high_bits = low_bits;
2254
2255 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2256 if (!is_neg)
2257 {
2258 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2259 }
2260 else
2261 {
2262 /* If we are XOR'ing with -1, then we should emit a one's complement
2263 instead. This way the combiner will notice logical operations
2264 such as ANDN later on and substitute. */
2265 if ((low_bits & 0x3ff) == 0x3ff)
2266 {
2267 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2268 }
2269 else
2270 {
2271 emit_insn (gen_rtx_SET (op0,
2272 gen_safe_XOR64 (temp,
2273 (-(HOST_WIDE_INT)0x400
2274 | (low_bits & 0x3ff)))));
2275 }
2276 }
2277 }
2278
2279 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2280 unsigned HOST_WIDE_INT, int);
2281
2282 static void
2283 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2284 unsigned HOST_WIDE_INT high_bits,
2285 unsigned HOST_WIDE_INT low_immediate,
2286 int shift_count)
2287 {
2288 rtx temp2 = op0;
2289
2290 if ((high_bits & 0xfffffc00) != 0)
2291 {
2292 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2293 if ((high_bits & ~0xfffffc00) != 0)
2294 emit_insn (gen_rtx_SET (op0,
2295 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2296 else
2297 temp2 = temp;
2298 }
2299 else
2300 {
2301 emit_insn (gen_safe_SET64 (temp, high_bits));
2302 temp2 = temp;
2303 }
2304
2305 /* Now shift it up into place. */
2306 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2307 GEN_INT (shift_count))));
2308
2309 /* If there is a low immediate part piece, finish up by
2310 putting that in as well. */
2311 if (low_immediate != 0)
2312 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2313 }
2314
2315 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2316 unsigned HOST_WIDE_INT);
2317
2318 /* Full 64-bit constant decomposition. Even though this is the
2319 'worst' case, we still optimize a few things away. */
2320 static void
2321 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2322 unsigned HOST_WIDE_INT high_bits,
2323 unsigned HOST_WIDE_INT low_bits)
2324 {
2325 rtx sub_temp = op0;
2326
2327 if (can_create_pseudo_p ())
2328 sub_temp = gen_reg_rtx (DImode);
2329
2330 if ((high_bits & 0xfffffc00) != 0)
2331 {
2332 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2333 if ((high_bits & ~0xfffffc00) != 0)
2334 emit_insn (gen_rtx_SET (sub_temp,
2335 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2336 else
2337 sub_temp = temp;
2338 }
2339 else
2340 {
2341 emit_insn (gen_safe_SET64 (temp, high_bits));
2342 sub_temp = temp;
2343 }
2344
2345 if (can_create_pseudo_p ())
2346 {
2347 rtx temp2 = gen_reg_rtx (DImode);
2348 rtx temp3 = gen_reg_rtx (DImode);
2349 rtx temp4 = gen_reg_rtx (DImode);
2350
2351 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2352 GEN_INT (32))));
2353
2354 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2355 if ((low_bits & ~0xfffffc00) != 0)
2356 {
2357 emit_insn (gen_rtx_SET (temp3,
2358 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2359 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2360 }
2361 else
2362 {
2363 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2364 }
2365 }
2366 else
2367 {
2368 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2369 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2370 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2371 int to_shift = 12;
2372
2373 /* We are in the middle of reload, so this is really
2374 painful. However we do still make an attempt to
2375 avoid emitting truly stupid code. */
2376 if (low1 != const0_rtx)
2377 {
2378 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2379 GEN_INT (to_shift))));
2380 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2381 sub_temp = op0;
2382 to_shift = 12;
2383 }
2384 else
2385 {
2386 to_shift += 12;
2387 }
2388 if (low2 != const0_rtx)
2389 {
2390 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2391 GEN_INT (to_shift))));
2392 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2393 sub_temp = op0;
2394 to_shift = 8;
2395 }
2396 else
2397 {
2398 to_shift += 8;
2399 }
2400 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2401 GEN_INT (to_shift))));
2402 if (low3 != const0_rtx)
2403 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2404 /* phew... */
2405 }
2406 }
2407
2408 /* Analyze a 64-bit constant for certain properties. */
2409 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2410 unsigned HOST_WIDE_INT,
2411 int *, int *, int *);
2412
2413 static void
2414 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2415 unsigned HOST_WIDE_INT low_bits,
2416 int *hbsp, int *lbsp, int *abbasp)
2417 {
2418 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2419 int i;
2420
2421 lowest_bit_set = highest_bit_set = -1;
2422 i = 0;
2423 do
2424 {
2425 if ((lowest_bit_set == -1)
2426 && ((low_bits >> i) & 1))
2427 lowest_bit_set = i;
2428 if ((highest_bit_set == -1)
2429 && ((high_bits >> (32 - i - 1)) & 1))
2430 highest_bit_set = (64 - i - 1);
2431 }
2432 while (++i < 32
2433 && ((highest_bit_set == -1)
2434 || (lowest_bit_set == -1)));
2435 if (i == 32)
2436 {
2437 i = 0;
2438 do
2439 {
2440 if ((lowest_bit_set == -1)
2441 && ((high_bits >> i) & 1))
2442 lowest_bit_set = i + 32;
2443 if ((highest_bit_set == -1)
2444 && ((low_bits >> (32 - i - 1)) & 1))
2445 highest_bit_set = 32 - i - 1;
2446 }
2447 while (++i < 32
2448 && ((highest_bit_set == -1)
2449 || (lowest_bit_set == -1)));
2450 }
2451 /* If there are no bits set this should have gone out
2452 as one instruction! */
2453 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2454 all_bits_between_are_set = 1;
2455 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2456 {
2457 if (i < 32)
2458 {
2459 if ((low_bits & (1 << i)) != 0)
2460 continue;
2461 }
2462 else
2463 {
2464 if ((high_bits & (1 << (i - 32))) != 0)
2465 continue;
2466 }
2467 all_bits_between_are_set = 0;
2468 break;
2469 }
2470 *hbsp = highest_bit_set;
2471 *lbsp = lowest_bit_set;
2472 *abbasp = all_bits_between_are_set;
2473 }
2474
2475 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2476
2477 static int
2478 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2479 unsigned HOST_WIDE_INT low_bits)
2480 {
2481 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2482
2483 if (high_bits == 0
2484 || high_bits == 0xffffffff)
2485 return 1;
2486
2487 analyze_64bit_constant (high_bits, low_bits,
2488 &highest_bit_set, &lowest_bit_set,
2489 &all_bits_between_are_set);
2490
2491 if ((highest_bit_set == 63
2492 || lowest_bit_set == 0)
2493 && all_bits_between_are_set != 0)
2494 return 1;
2495
2496 if ((highest_bit_set - lowest_bit_set) < 21)
2497 return 1;
2498
2499 return 0;
2500 }
2501
2502 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2503 unsigned HOST_WIDE_INT,
2504 int, int);
2505
2506 static unsigned HOST_WIDE_INT
2507 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2508 unsigned HOST_WIDE_INT low_bits,
2509 int lowest_bit_set, int shift)
2510 {
2511 HOST_WIDE_INT hi, lo;
2512
2513 if (lowest_bit_set < 32)
2514 {
2515 lo = (low_bits >> lowest_bit_set) << shift;
2516 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2517 }
2518 else
2519 {
2520 lo = 0;
2521 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2522 }
2523 gcc_assert (! (hi & lo));
2524 return (hi | lo);
2525 }
2526
2527 /* Here we are sure to be arch64 and this is an integer constant
2528 being loaded into a register. Emit the most efficient
2529 insn sequence possible. Detection of all the 1-insn cases
2530 has been done already. */
2531 static void
2532 sparc_emit_set_const64 (rtx op0, rtx op1)
2533 {
2534 unsigned HOST_WIDE_INT high_bits, low_bits;
2535 int lowest_bit_set, highest_bit_set;
2536 int all_bits_between_are_set;
2537 rtx temp = 0;
2538
2539 /* Sanity check that we know what we are working with. */
2540 gcc_assert (TARGET_ARCH64
2541 && (GET_CODE (op0) == SUBREG
2542 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2543
2544 if (! can_create_pseudo_p ())
2545 temp = op0;
2546
2547 if (GET_CODE (op1) != CONST_INT)
2548 {
2549 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2550 return;
2551 }
2552
2553 if (! temp)
2554 temp = gen_reg_rtx (DImode);
2555
2556 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2557 low_bits = (INTVAL (op1) & 0xffffffff);
2558
2559 /* low_bits bits 0 --> 31
2560 high_bits bits 32 --> 63 */
2561
2562 analyze_64bit_constant (high_bits, low_bits,
2563 &highest_bit_set, &lowest_bit_set,
2564 &all_bits_between_are_set);
2565
2566 /* First try for a 2-insn sequence. */
2567
2568 /* These situations are preferred because the optimizer can
2569 * do more things with them:
2570 * 1) mov -1, %reg
2571 * sllx %reg, shift, %reg
2572 * 2) mov -1, %reg
2573 * srlx %reg, shift, %reg
2574 * 3) mov some_small_const, %reg
2575 * sllx %reg, shift, %reg
2576 */
2577 if (((highest_bit_set == 63
2578 || lowest_bit_set == 0)
2579 && all_bits_between_are_set != 0)
2580 || ((highest_bit_set - lowest_bit_set) < 12))
2581 {
2582 HOST_WIDE_INT the_const = -1;
2583 int shift = lowest_bit_set;
2584
2585 if ((highest_bit_set != 63
2586 && lowest_bit_set != 0)
2587 || all_bits_between_are_set == 0)
2588 {
2589 the_const =
2590 create_simple_focus_bits (high_bits, low_bits,
2591 lowest_bit_set, 0);
2592 }
2593 else if (lowest_bit_set == 0)
2594 shift = -(63 - highest_bit_set);
2595
2596 gcc_assert (SPARC_SIMM13_P (the_const));
2597 gcc_assert (shift != 0);
2598
2599 emit_insn (gen_safe_SET64 (temp, the_const));
2600 if (shift > 0)
2601 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2602 GEN_INT (shift))));
2603 else if (shift < 0)
2604 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2605 GEN_INT (-shift))));
2606 return;
2607 }
2608
2609 /* Now a range of 22 or less bits set somewhere.
2610 * 1) sethi %hi(focus_bits), %reg
2611 * sllx %reg, shift, %reg
2612 * 2) sethi %hi(focus_bits), %reg
2613 * srlx %reg, shift, %reg
2614 */
2615 if ((highest_bit_set - lowest_bit_set) < 21)
2616 {
2617 unsigned HOST_WIDE_INT focus_bits =
2618 create_simple_focus_bits (high_bits, low_bits,
2619 lowest_bit_set, 10);
2620
2621 gcc_assert (SPARC_SETHI_P (focus_bits));
2622 gcc_assert (lowest_bit_set != 10);
2623
2624 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2625
2626 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2627 if (lowest_bit_set < 10)
2628 emit_insn (gen_rtx_SET (op0,
2629 gen_rtx_LSHIFTRT (DImode, temp,
2630 GEN_INT (10 - lowest_bit_set))));
2631 else if (lowest_bit_set > 10)
2632 emit_insn (gen_rtx_SET (op0,
2633 gen_rtx_ASHIFT (DImode, temp,
2634 GEN_INT (lowest_bit_set - 10))));
2635 return;
2636 }
2637
2638 /* 1) sethi %hi(low_bits), %reg
2639 * or %reg, %lo(low_bits), %reg
2640 * 2) sethi %hi(~low_bits), %reg
2641 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2642 */
2643 if (high_bits == 0
2644 || high_bits == 0xffffffff)
2645 {
2646 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2647 (high_bits == 0xffffffff));
2648 return;
2649 }
2650
2651 /* Now, try 3-insn sequences. */
2652
2653 /* 1) sethi %hi(high_bits), %reg
2654 * or %reg, %lo(high_bits), %reg
2655 * sllx %reg, 32, %reg
2656 */
2657 if (low_bits == 0)
2658 {
2659 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2660 return;
2661 }
2662
2663 /* We may be able to do something quick
2664 when the constant is negated, so try that. */
2665 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2666 (~low_bits) & 0xfffffc00))
2667 {
2668 /* NOTE: The trailing bits get XOR'd so we need the
2669 non-negated bits, not the negated ones. */
2670 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2671
2672 if ((((~high_bits) & 0xffffffff) == 0
2673 && ((~low_bits) & 0x80000000) == 0)
2674 || (((~high_bits) & 0xffffffff) == 0xffffffff
2675 && ((~low_bits) & 0x80000000) != 0))
2676 {
2677 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2678
2679 if ((SPARC_SETHI_P (fast_int)
2680 && (~high_bits & 0xffffffff) == 0)
2681 || SPARC_SIMM13_P (fast_int))
2682 emit_insn (gen_safe_SET64 (temp, fast_int));
2683 else
2684 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2685 }
2686 else
2687 {
2688 rtx negated_const;
2689 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2690 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2691 sparc_emit_set_const64 (temp, negated_const);
2692 }
2693
2694 /* If we are XOR'ing with -1, then we should emit a one's complement
2695 instead. This way the combiner will notice logical operations
2696 such as ANDN later on and substitute. */
2697 if (trailing_bits == 0x3ff)
2698 {
2699 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2700 }
2701 else
2702 {
2703 emit_insn (gen_rtx_SET (op0,
2704 gen_safe_XOR64 (temp,
2705 (-0x400 | trailing_bits))));
2706 }
2707 return;
2708 }
2709
2710 /* 1) sethi %hi(xxx), %reg
2711 * or %reg, %lo(xxx), %reg
2712 * sllx %reg, yyy, %reg
2713 *
2714 * ??? This is just a generalized version of the low_bits==0
2715 * thing above, FIXME...
2716 */
2717 if ((highest_bit_set - lowest_bit_set) < 32)
2718 {
2719 unsigned HOST_WIDE_INT focus_bits =
2720 create_simple_focus_bits (high_bits, low_bits,
2721 lowest_bit_set, 0);
2722
2723 /* We can't get here in this state. */
2724 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2725
2726 /* So what we know is that the set bits straddle the
2727 middle of the 64-bit word. */
2728 sparc_emit_set_const64_quick2 (op0, temp,
2729 focus_bits, 0,
2730 lowest_bit_set);
2731 return;
2732 }
2733
2734 /* 1) sethi %hi(high_bits), %reg
2735 * or %reg, %lo(high_bits), %reg
2736 * sllx %reg, 32, %reg
2737 * or %reg, low_bits, %reg
2738 */
2739 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2740 {
2741 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2742 return;
2743 }
2744
2745 /* The easiest way when all else fails, is full decomposition. */
2746 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2747 }
2748
2749 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2750
2751 static bool
2752 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2753 {
2754 *p1 = SPARC_ICC_REG;
2755 *p2 = SPARC_FCC_REG;
2756 return true;
2757 }
2758
2759 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2760 return the mode to be used for the comparison. For floating-point,
2761 CCFP[E]mode is used. CCNZmode should be used when the first operand
2762 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2763 processing is needed. */
2764
2765 machine_mode
2766 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2767 {
2768 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2769 {
2770 switch (op)
2771 {
2772 case EQ:
2773 case NE:
2774 case UNORDERED:
2775 case ORDERED:
2776 case UNLT:
2777 case UNLE:
2778 case UNGT:
2779 case UNGE:
2780 case UNEQ:
2781 case LTGT:
2782 return CCFPmode;
2783
2784 case LT:
2785 case LE:
2786 case GT:
2787 case GE:
2788 return CCFPEmode;
2789
2790 default:
2791 gcc_unreachable ();
2792 }
2793 }
2794 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2795 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2796 {
2797 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2798 return CCXNZmode;
2799 else
2800 return CCNZmode;
2801 }
2802 else
2803 {
2804 /* This is for the cmp<mode>_sne pattern. */
2805 if (GET_CODE (x) == NOT && y == constm1_rtx)
2806 {
2807 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2808 return CCXCmode;
2809 else
2810 return CCCmode;
2811 }
2812
2813 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2814 return CCXmode;
2815 else
2816 return CCmode;
2817 }
2818 }
2819
2820 /* Emit the compare insn and return the CC reg for a CODE comparison
2821 with operands X and Y. */
2822
2823 static rtx
2824 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2825 {
2826 machine_mode mode;
2827 rtx cc_reg;
2828
2829 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2830 return x;
2831
2832 mode = SELECT_CC_MODE (code, x, y);
2833
2834 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2835 fcc regs (cse can't tell they're really call clobbered regs and will
2836 remove a duplicate comparison even if there is an intervening function
2837 call - it will then try to reload the cc reg via an int reg which is why
2838 we need the movcc patterns). It is possible to provide the movcc
2839 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2840 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2841 to tell cse that CCFPE mode registers (even pseudos) are call
2842 clobbered. */
2843
2844 /* ??? This is an experiment. Rather than making changes to cse which may
2845 or may not be easy/clean, we do our own cse. This is possible because
2846 we will generate hard registers. Cse knows they're call clobbered (it
2847 doesn't know the same thing about pseudos). If we guess wrong, no big
2848 deal, but if we win, great! */
2849
2850 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2851 #if 1 /* experiment */
2852 {
2853 int reg;
2854 /* We cycle through the registers to ensure they're all exercised. */
2855 static int next_fcc_reg = 0;
2856 /* Previous x,y for each fcc reg. */
2857 static rtx prev_args[4][2];
2858
2859 /* Scan prev_args for x,y. */
2860 for (reg = 0; reg < 4; reg++)
2861 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2862 break;
2863 if (reg == 4)
2864 {
2865 reg = next_fcc_reg;
2866 prev_args[reg][0] = x;
2867 prev_args[reg][1] = y;
2868 next_fcc_reg = (next_fcc_reg + 1) & 3;
2869 }
2870 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2871 }
2872 #else
2873 cc_reg = gen_reg_rtx (mode);
2874 #endif /* ! experiment */
2875 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2876 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2877 else
2878 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2879
2880 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2881 will only result in an unrecognizable insn so no point in asserting. */
2882 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2883
2884 return cc_reg;
2885 }
2886
2887
2888 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2889
2890 rtx
2891 gen_compare_reg (rtx cmp)
2892 {
2893 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2894 }
2895
2896 /* This function is used for v9 only.
2897 DEST is the target of the Scc insn.
2898 CODE is the code for an Scc's comparison.
2899 X and Y are the values we compare.
2900
2901 This function is needed to turn
2902
2903 (set (reg:SI 110)
2904 (gt (reg:CCX 100 %icc)
2905 (const_int 0)))
2906 into
2907 (set (reg:SI 110)
2908 (gt:DI (reg:CCX 100 %icc)
2909 (const_int 0)))
2910
2911 IE: The instruction recognizer needs to see the mode of the comparison to
2912 find the right instruction. We could use "gt:DI" right in the
2913 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2914
2915 static int
2916 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2917 {
2918 if (! TARGET_ARCH64
2919 && (GET_MODE (x) == DImode
2920 || GET_MODE (dest) == DImode))
2921 return 0;
2922
2923 /* Try to use the movrCC insns. */
2924 if (TARGET_ARCH64
2925 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2926 && y == const0_rtx
2927 && v9_regcmp_p (compare_code))
2928 {
2929 rtx op0 = x;
2930 rtx temp;
2931
2932 /* Special case for op0 != 0. This can be done with one instruction if
2933 dest == x. */
2934
2935 if (compare_code == NE
2936 && GET_MODE (dest) == DImode
2937 && rtx_equal_p (op0, dest))
2938 {
2939 emit_insn (gen_rtx_SET (dest,
2940 gen_rtx_IF_THEN_ELSE (DImode,
2941 gen_rtx_fmt_ee (compare_code, DImode,
2942 op0, const0_rtx),
2943 const1_rtx,
2944 dest)));
2945 return 1;
2946 }
2947
2948 if (reg_overlap_mentioned_p (dest, op0))
2949 {
2950 /* Handle the case where dest == x.
2951 We "early clobber" the result. */
2952 op0 = gen_reg_rtx (GET_MODE (x));
2953 emit_move_insn (op0, x);
2954 }
2955
2956 emit_insn (gen_rtx_SET (dest, const0_rtx));
2957 if (GET_MODE (op0) != DImode)
2958 {
2959 temp = gen_reg_rtx (DImode);
2960 convert_move (temp, op0, 0);
2961 }
2962 else
2963 temp = op0;
2964 emit_insn (gen_rtx_SET (dest,
2965 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2966 gen_rtx_fmt_ee (compare_code, DImode,
2967 temp, const0_rtx),
2968 const1_rtx,
2969 dest)));
2970 return 1;
2971 }
2972 else
2973 {
2974 x = gen_compare_reg_1 (compare_code, x, y);
2975 y = const0_rtx;
2976
2977 emit_insn (gen_rtx_SET (dest, const0_rtx));
2978 emit_insn (gen_rtx_SET (dest,
2979 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2980 gen_rtx_fmt_ee (compare_code,
2981 GET_MODE (x), x, y),
2982 const1_rtx, dest)));
2983 return 1;
2984 }
2985 }
2986
2987
2988 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2989 without jumps using the addx/subx instructions. */
2990
2991 bool
2992 emit_scc_insn (rtx operands[])
2993 {
2994 rtx tem, x, y;
2995 enum rtx_code code;
2996 machine_mode mode;
2997
2998 /* The quad-word fp compare library routines all return nonzero to indicate
2999 true, which is different from the equivalent libgcc routines, so we must
3000 handle them specially here. */
3001 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3002 {
3003 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3004 GET_CODE (operands[1]));
3005 operands[2] = XEXP (operands[1], 0);
3006 operands[3] = XEXP (operands[1], 1);
3007 }
3008
3009 code = GET_CODE (operands[1]);
3010 x = operands[2];
3011 y = operands[3];
3012 mode = GET_MODE (x);
3013
3014 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3015 more applications). The exception to this is "reg != 0" which can
3016 be done in one instruction on v9 (so we do it). */
3017 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3018 {
3019 if (y != const0_rtx)
3020 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3021
3022 rtx pat = gen_rtx_SET (operands[0],
3023 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3024 x, const0_rtx));
3025
3026 /* If we can use addx/subx or addxc/subxc, add a clobber for CC. */
3027 if (mode == SImode
3028 || (code == NE && TARGET_VIS3)
3029 || (code == EQ && TARGET_SUBXC))
3030 {
3031 rtx clobber
3032 = gen_rtx_CLOBBER (VOIDmode,
3033 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3034 SPARC_ICC_REG));
3035 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3036 }
3037
3038 emit_insn (pat);
3039 return true;
3040 }
3041
3042 /* We can do LTU in DImode using the addxc instruction with VIS3
3043 and GEU in DImode using the subxc instruction with SUBXC. */
3044 if (TARGET_ARCH64
3045 && mode == DImode
3046 && !((code == LTU || code == GTU) && TARGET_VIS3)
3047 && !((code == GEU || code == LEU) && TARGET_SUBXC)
3048 && gen_v9_scc (operands[0], code, x, y))
3049 return true;
3050
3051 /* We can do LTU and GEU using the addx/subx instructions too. And
3052 for GTU/LEU, if both operands are registers swap them and fall
3053 back to the easy case. */
3054 if (code == GTU || code == LEU)
3055 {
3056 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3057 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3058 {
3059 tem = x;
3060 x = y;
3061 y = tem;
3062 code = swap_condition (code);
3063 }
3064 }
3065
3066 if (code == LTU || code == GEU)
3067 {
3068 emit_insn (gen_rtx_SET (operands[0],
3069 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3070 gen_compare_reg_1 (code, x, y),
3071 const0_rtx)));
3072 return true;
3073 }
3074
3075 /* All the posibilities to use addx/subx based sequences has been
3076 exhausted, try for a 3 instruction sequence using v9 conditional
3077 moves. */
3078 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3079 return true;
3080
3081 /* Nope, do branches. */
3082 return false;
3083 }
3084
3085 /* Emit a conditional jump insn for the v9 architecture using comparison code
3086 CODE and jump target LABEL.
3087 This function exists to take advantage of the v9 brxx insns. */
3088
3089 static void
3090 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3091 {
3092 emit_jump_insn (gen_rtx_SET (pc_rtx,
3093 gen_rtx_IF_THEN_ELSE (VOIDmode,
3094 gen_rtx_fmt_ee (code, GET_MODE (op0),
3095 op0, const0_rtx),
3096 gen_rtx_LABEL_REF (VOIDmode, label),
3097 pc_rtx)));
3098 }
3099
3100 /* Emit a conditional jump insn for the UA2011 architecture using
3101 comparison code CODE and jump target LABEL. This function exists
3102 to take advantage of the UA2011 Compare and Branch insns. */
3103
3104 static void
3105 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3106 {
3107 rtx if_then_else;
3108
3109 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3110 gen_rtx_fmt_ee(code, GET_MODE(op0),
3111 op0, op1),
3112 gen_rtx_LABEL_REF (VOIDmode, label),
3113 pc_rtx);
3114
3115 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3116 }
3117
3118 void
3119 emit_conditional_branch_insn (rtx operands[])
3120 {
3121 /* The quad-word fp compare library routines all return nonzero to indicate
3122 true, which is different from the equivalent libgcc routines, so we must
3123 handle them specially here. */
3124 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3125 {
3126 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3127 GET_CODE (operands[0]));
3128 operands[1] = XEXP (operands[0], 0);
3129 operands[2] = XEXP (operands[0], 1);
3130 }
3131
3132 /* If we can tell early on that the comparison is against a constant
3133 that won't fit in the 5-bit signed immediate field of a cbcond,
3134 use one of the other v9 conditional branch sequences. */
3135 if (TARGET_CBCOND
3136 && GET_CODE (operands[1]) == REG
3137 && (GET_MODE (operands[1]) == SImode
3138 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3139 && (GET_CODE (operands[2]) != CONST_INT
3140 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3141 {
3142 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3143 return;
3144 }
3145
3146 if (TARGET_ARCH64 && operands[2] == const0_rtx
3147 && GET_CODE (operands[1]) == REG
3148 && GET_MODE (operands[1]) == DImode)
3149 {
3150 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3151 return;
3152 }
3153
3154 operands[1] = gen_compare_reg (operands[0]);
3155 operands[2] = const0_rtx;
3156 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3157 operands[1], operands[2]);
3158 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3159 operands[3]));
3160 }
3161
3162
3163 /* Generate a DFmode part of a hard TFmode register.
3164 REG is the TFmode hard register, LOW is 1 for the
3165 low 64bit of the register and 0 otherwise.
3166 */
3167 rtx
3168 gen_df_reg (rtx reg, int low)
3169 {
3170 int regno = REGNO (reg);
3171
3172 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3173 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3174 return gen_rtx_REG (DFmode, regno);
3175 }
3176 \f
3177 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3178 Unlike normal calls, TFmode operands are passed by reference. It is
3179 assumed that no more than 3 operands are required. */
3180
3181 static void
3182 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3183 {
3184 rtx ret_slot = NULL, arg[3], func_sym;
3185 int i;
3186
3187 /* We only expect to be called for conversions, unary, and binary ops. */
3188 gcc_assert (nargs == 2 || nargs == 3);
3189
3190 for (i = 0; i < nargs; ++i)
3191 {
3192 rtx this_arg = operands[i];
3193 rtx this_slot;
3194
3195 /* TFmode arguments and return values are passed by reference. */
3196 if (GET_MODE (this_arg) == TFmode)
3197 {
3198 int force_stack_temp;
3199
3200 force_stack_temp = 0;
3201 if (TARGET_BUGGY_QP_LIB && i == 0)
3202 force_stack_temp = 1;
3203
3204 if (GET_CODE (this_arg) == MEM
3205 && ! force_stack_temp)
3206 {
3207 tree expr = MEM_EXPR (this_arg);
3208 if (expr)
3209 mark_addressable (expr);
3210 this_arg = XEXP (this_arg, 0);
3211 }
3212 else if (CONSTANT_P (this_arg)
3213 && ! force_stack_temp)
3214 {
3215 this_slot = force_const_mem (TFmode, this_arg);
3216 this_arg = XEXP (this_slot, 0);
3217 }
3218 else
3219 {
3220 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3221
3222 /* Operand 0 is the return value. We'll copy it out later. */
3223 if (i > 0)
3224 emit_move_insn (this_slot, this_arg);
3225 else
3226 ret_slot = this_slot;
3227
3228 this_arg = XEXP (this_slot, 0);
3229 }
3230 }
3231
3232 arg[i] = this_arg;
3233 }
3234
3235 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3236
3237 if (GET_MODE (operands[0]) == TFmode)
3238 {
3239 if (nargs == 2)
3240 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3241 arg[0], GET_MODE (arg[0]),
3242 arg[1], GET_MODE (arg[1]));
3243 else
3244 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3245 arg[0], GET_MODE (arg[0]),
3246 arg[1], GET_MODE (arg[1]),
3247 arg[2], GET_MODE (arg[2]));
3248
3249 if (ret_slot)
3250 emit_move_insn (operands[0], ret_slot);
3251 }
3252 else
3253 {
3254 rtx ret;
3255
3256 gcc_assert (nargs == 2);
3257
3258 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3259 GET_MODE (operands[0]), 1,
3260 arg[1], GET_MODE (arg[1]));
3261
3262 if (ret != operands[0])
3263 emit_move_insn (operands[0], ret);
3264 }
3265 }
3266
3267 /* Expand soft-float TFmode calls to sparc abi routines. */
3268
3269 static void
3270 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3271 {
3272 const char *func;
3273
3274 switch (code)
3275 {
3276 case PLUS:
3277 func = "_Qp_add";
3278 break;
3279 case MINUS:
3280 func = "_Qp_sub";
3281 break;
3282 case MULT:
3283 func = "_Qp_mul";
3284 break;
3285 case DIV:
3286 func = "_Qp_div";
3287 break;
3288 default:
3289 gcc_unreachable ();
3290 }
3291
3292 emit_soft_tfmode_libcall (func, 3, operands);
3293 }
3294
3295 static void
3296 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3297 {
3298 const char *func;
3299
3300 gcc_assert (code == SQRT);
3301 func = "_Qp_sqrt";
3302
3303 emit_soft_tfmode_libcall (func, 2, operands);
3304 }
3305
3306 static void
3307 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3308 {
3309 const char *func;
3310
3311 switch (code)
3312 {
3313 case FLOAT_EXTEND:
3314 switch (GET_MODE (operands[1]))
3315 {
3316 case SFmode:
3317 func = "_Qp_stoq";
3318 break;
3319 case DFmode:
3320 func = "_Qp_dtoq";
3321 break;
3322 default:
3323 gcc_unreachable ();
3324 }
3325 break;
3326
3327 case FLOAT_TRUNCATE:
3328 switch (GET_MODE (operands[0]))
3329 {
3330 case SFmode:
3331 func = "_Qp_qtos";
3332 break;
3333 case DFmode:
3334 func = "_Qp_qtod";
3335 break;
3336 default:
3337 gcc_unreachable ();
3338 }
3339 break;
3340
3341 case FLOAT:
3342 switch (GET_MODE (operands[1]))
3343 {
3344 case SImode:
3345 func = "_Qp_itoq";
3346 if (TARGET_ARCH64)
3347 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3348 break;
3349 case DImode:
3350 func = "_Qp_xtoq";
3351 break;
3352 default:
3353 gcc_unreachable ();
3354 }
3355 break;
3356
3357 case UNSIGNED_FLOAT:
3358 switch (GET_MODE (operands[1]))
3359 {
3360 case SImode:
3361 func = "_Qp_uitoq";
3362 if (TARGET_ARCH64)
3363 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3364 break;
3365 case DImode:
3366 func = "_Qp_uxtoq";
3367 break;
3368 default:
3369 gcc_unreachable ();
3370 }
3371 break;
3372
3373 case FIX:
3374 switch (GET_MODE (operands[0]))
3375 {
3376 case SImode:
3377 func = "_Qp_qtoi";
3378 break;
3379 case DImode:
3380 func = "_Qp_qtox";
3381 break;
3382 default:
3383 gcc_unreachable ();
3384 }
3385 break;
3386
3387 case UNSIGNED_FIX:
3388 switch (GET_MODE (operands[0]))
3389 {
3390 case SImode:
3391 func = "_Qp_qtoui";
3392 break;
3393 case DImode:
3394 func = "_Qp_qtoux";
3395 break;
3396 default:
3397 gcc_unreachable ();
3398 }
3399 break;
3400
3401 default:
3402 gcc_unreachable ();
3403 }
3404
3405 emit_soft_tfmode_libcall (func, 2, operands);
3406 }
3407
3408 /* Expand a hard-float tfmode operation. All arguments must be in
3409 registers. */
3410
3411 static void
3412 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3413 {
3414 rtx op, dest;
3415
3416 if (GET_RTX_CLASS (code) == RTX_UNARY)
3417 {
3418 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3419 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3420 }
3421 else
3422 {
3423 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3424 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3425 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3426 operands[1], operands[2]);
3427 }
3428
3429 if (register_operand (operands[0], VOIDmode))
3430 dest = operands[0];
3431 else
3432 dest = gen_reg_rtx (GET_MODE (operands[0]));
3433
3434 emit_insn (gen_rtx_SET (dest, op));
3435
3436 if (dest != operands[0])
3437 emit_move_insn (operands[0], dest);
3438 }
3439
3440 void
3441 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3442 {
3443 if (TARGET_HARD_QUAD)
3444 emit_hard_tfmode_operation (code, operands);
3445 else
3446 emit_soft_tfmode_binop (code, operands);
3447 }
3448
3449 void
3450 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3451 {
3452 if (TARGET_HARD_QUAD)
3453 emit_hard_tfmode_operation (code, operands);
3454 else
3455 emit_soft_tfmode_unop (code, operands);
3456 }
3457
3458 void
3459 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3460 {
3461 if (TARGET_HARD_QUAD)
3462 emit_hard_tfmode_operation (code, operands);
3463 else
3464 emit_soft_tfmode_cvt (code, operands);
3465 }
3466 \f
3467 /* Return nonzero if a branch/jump/call instruction will be emitting
3468 nop into its delay slot. */
3469
3470 int
3471 empty_delay_slot (rtx_insn *insn)
3472 {
3473 rtx seq;
3474
3475 /* If no previous instruction (should not happen), return true. */
3476 if (PREV_INSN (insn) == NULL)
3477 return 1;
3478
3479 seq = NEXT_INSN (PREV_INSN (insn));
3480 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3481 return 0;
3482
3483 return 1;
3484 }
3485
3486 /* Return nonzero if we should emit a nop after a cbcond instruction.
3487 The cbcond instruction does not have a delay slot, however there is
3488 a severe performance penalty if a control transfer appears right
3489 after a cbcond. Therefore we emit a nop when we detect this
3490 situation. */
3491
3492 int
3493 emit_cbcond_nop (rtx_insn *insn)
3494 {
3495 rtx next = next_active_insn (insn);
3496
3497 if (!next)
3498 return 1;
3499
3500 if (NONJUMP_INSN_P (next)
3501 && GET_CODE (PATTERN (next)) == SEQUENCE)
3502 next = XVECEXP (PATTERN (next), 0, 0);
3503 else if (CALL_P (next)
3504 && GET_CODE (PATTERN (next)) == PARALLEL)
3505 {
3506 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3507
3508 if (GET_CODE (delay) == RETURN)
3509 {
3510 /* It's a sibling call. Do not emit the nop if we're going
3511 to emit something other than the jump itself as the first
3512 instruction of the sibcall sequence. */
3513 if (sparc_leaf_function_p || TARGET_FLAT)
3514 return 0;
3515 }
3516 }
3517
3518 if (NONJUMP_INSN_P (next))
3519 return 0;
3520
3521 return 1;
3522 }
3523
3524 /* Return nonzero if TRIAL can go into the call delay slot. */
3525
3526 int
3527 eligible_for_call_delay (rtx_insn *trial)
3528 {
3529 rtx pat;
3530
3531 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3532 return 0;
3533
3534 /* Binutils allows
3535 call __tls_get_addr, %tgd_call (foo)
3536 add %l7, %o0, %o0, %tgd_add (foo)
3537 while Sun as/ld does not. */
3538 if (TARGET_GNU_TLS || !TARGET_TLS)
3539 return 1;
3540
3541 pat = PATTERN (trial);
3542
3543 /* We must reject tgd_add{32|64}, i.e.
3544 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3545 and tldm_add{32|64}, i.e.
3546 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3547 for Sun as/ld. */
3548 if (GET_CODE (pat) == SET
3549 && GET_CODE (SET_SRC (pat)) == PLUS)
3550 {
3551 rtx unspec = XEXP (SET_SRC (pat), 1);
3552
3553 if (GET_CODE (unspec) == UNSPEC
3554 && (XINT (unspec, 1) == UNSPEC_TLSGD
3555 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3556 return 0;
3557 }
3558
3559 return 1;
3560 }
3561
3562 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3563 instruction. RETURN_P is true if the v9 variant 'return' is to be
3564 considered in the test too.
3565
3566 TRIAL must be a SET whose destination is a REG appropriate for the
3567 'restore' instruction or, if RETURN_P is true, for the 'return'
3568 instruction. */
3569
3570 static int
3571 eligible_for_restore_insn (rtx trial, bool return_p)
3572 {
3573 rtx pat = PATTERN (trial);
3574 rtx src = SET_SRC (pat);
3575 bool src_is_freg = false;
3576 rtx src_reg;
3577
3578 /* Since we now can do moves between float and integer registers when
3579 VIS3 is enabled, we have to catch this case. We can allow such
3580 moves when doing a 'return' however. */
3581 src_reg = src;
3582 if (GET_CODE (src_reg) == SUBREG)
3583 src_reg = SUBREG_REG (src_reg);
3584 if (GET_CODE (src_reg) == REG
3585 && SPARC_FP_REG_P (REGNO (src_reg)))
3586 src_is_freg = true;
3587
3588 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3589 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3590 && arith_operand (src, GET_MODE (src))
3591 && ! src_is_freg)
3592 {
3593 if (TARGET_ARCH64)
3594 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3595 else
3596 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3597 }
3598
3599 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3600 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3601 && arith_double_operand (src, GET_MODE (src))
3602 && ! src_is_freg)
3603 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3604
3605 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3606 else if (! TARGET_FPU && register_operand (src, SFmode))
3607 return 1;
3608
3609 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3610 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3611 return 1;
3612
3613 /* If we have the 'return' instruction, anything that does not use
3614 local or output registers and can go into a delay slot wins. */
3615 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3616 return 1;
3617
3618 /* The 'restore src1,src2,dest' pattern for SImode. */
3619 else if (GET_CODE (src) == PLUS
3620 && register_operand (XEXP (src, 0), SImode)
3621 && arith_operand (XEXP (src, 1), SImode))
3622 return 1;
3623
3624 /* The 'restore src1,src2,dest' pattern for DImode. */
3625 else if (GET_CODE (src) == PLUS
3626 && register_operand (XEXP (src, 0), DImode)
3627 && arith_double_operand (XEXP (src, 1), DImode))
3628 return 1;
3629
3630 /* The 'restore src1,%lo(src2),dest' pattern. */
3631 else if (GET_CODE (src) == LO_SUM
3632 && ! TARGET_CM_MEDMID
3633 && ((register_operand (XEXP (src, 0), SImode)
3634 && immediate_operand (XEXP (src, 1), SImode))
3635 || (TARGET_ARCH64
3636 && register_operand (XEXP (src, 0), DImode)
3637 && immediate_operand (XEXP (src, 1), DImode))))
3638 return 1;
3639
3640 /* The 'restore src,src,dest' pattern. */
3641 else if (GET_CODE (src) == ASHIFT
3642 && (register_operand (XEXP (src, 0), SImode)
3643 || register_operand (XEXP (src, 0), DImode))
3644 && XEXP (src, 1) == const1_rtx)
3645 return 1;
3646
3647 return 0;
3648 }
3649
3650 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3651
3652 int
3653 eligible_for_return_delay (rtx_insn *trial)
3654 {
3655 int regno;
3656 rtx pat;
3657
3658 /* If the function uses __builtin_eh_return, the eh_return machinery
3659 occupies the delay slot. */
3660 if (crtl->calls_eh_return)
3661 return 0;
3662
3663 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3664 return 0;
3665
3666 /* In the case of a leaf or flat function, anything can go into the slot. */
3667 if (sparc_leaf_function_p || TARGET_FLAT)
3668 return 1;
3669
3670 if (!NONJUMP_INSN_P (trial))
3671 return 0;
3672
3673 pat = PATTERN (trial);
3674 if (GET_CODE (pat) == PARALLEL)
3675 {
3676 int i;
3677
3678 if (! TARGET_V9)
3679 return 0;
3680 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3681 {
3682 rtx expr = XVECEXP (pat, 0, i);
3683 if (GET_CODE (expr) != SET)
3684 return 0;
3685 if (GET_CODE (SET_DEST (expr)) != REG)
3686 return 0;
3687 regno = REGNO (SET_DEST (expr));
3688 if (regno >= 8 && regno < 24)
3689 return 0;
3690 }
3691 return !epilogue_renumber (&pat, 1);
3692 }
3693
3694 if (GET_CODE (pat) != SET)
3695 return 0;
3696
3697 if (GET_CODE (SET_DEST (pat)) != REG)
3698 return 0;
3699
3700 regno = REGNO (SET_DEST (pat));
3701
3702 /* Otherwise, only operations which can be done in tandem with
3703 a `restore' or `return' insn can go into the delay slot. */
3704 if (regno >= 8 && regno < 24)
3705 return 0;
3706
3707 /* If this instruction sets up floating point register and we have a return
3708 instruction, it can probably go in. But restore will not work
3709 with FP_REGS. */
3710 if (! SPARC_INT_REG_P (regno))
3711 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3712
3713 return eligible_for_restore_insn (trial, true);
3714 }
3715
3716 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3717
3718 int
3719 eligible_for_sibcall_delay (rtx_insn *trial)
3720 {
3721 rtx pat;
3722
3723 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3724 return 0;
3725
3726 if (!NONJUMP_INSN_P (trial))
3727 return 0;
3728
3729 pat = PATTERN (trial);
3730
3731 if (sparc_leaf_function_p || TARGET_FLAT)
3732 {
3733 /* If the tail call is done using the call instruction,
3734 we have to restore %o7 in the delay slot. */
3735 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3736 return 0;
3737
3738 /* %g1 is used to build the function address */
3739 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3740 return 0;
3741
3742 return 1;
3743 }
3744
3745 if (GET_CODE (pat) != SET)
3746 return 0;
3747
3748 /* Otherwise, only operations which can be done in tandem with
3749 a `restore' insn can go into the delay slot. */
3750 if (GET_CODE (SET_DEST (pat)) != REG
3751 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3752 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3753 return 0;
3754
3755 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3756 in most cases. */
3757 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3758 return 0;
3759
3760 return eligible_for_restore_insn (trial, false);
3761 }
3762 \f
3763 /* Determine if it's legal to put X into the constant pool. This
3764 is not possible if X contains the address of a symbol that is
3765 not constant (TLS) or not known at final link time (PIC). */
3766
3767 static bool
3768 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3769 {
3770 switch (GET_CODE (x))
3771 {
3772 case CONST_INT:
3773 case CONST_WIDE_INT:
3774 case CONST_DOUBLE:
3775 case CONST_VECTOR:
3776 /* Accept all non-symbolic constants. */
3777 return false;
3778
3779 case LABEL_REF:
3780 /* Labels are OK iff we are non-PIC. */
3781 return flag_pic != 0;
3782
3783 case SYMBOL_REF:
3784 /* 'Naked' TLS symbol references are never OK,
3785 non-TLS symbols are OK iff we are non-PIC. */
3786 if (SYMBOL_REF_TLS_MODEL (x))
3787 return true;
3788 else
3789 return flag_pic != 0;
3790
3791 case CONST:
3792 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3793 case PLUS:
3794 case MINUS:
3795 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3796 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3797 case UNSPEC:
3798 return true;
3799 default:
3800 gcc_unreachable ();
3801 }
3802 }
3803 \f
3804 /* Global Offset Table support. */
3805 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3806 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3807
3808 /* Return the SYMBOL_REF for the Global Offset Table. */
3809
3810 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3811
3812 static rtx
3813 sparc_got (void)
3814 {
3815 if (!sparc_got_symbol)
3816 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3817
3818 return sparc_got_symbol;
3819 }
3820
3821 /* Ensure that we are not using patterns that are not OK with PIC. */
3822
3823 int
3824 check_pic (int i)
3825 {
3826 rtx op;
3827
3828 switch (flag_pic)
3829 {
3830 case 1:
3831 op = recog_data.operand[i];
3832 gcc_assert (GET_CODE (op) != SYMBOL_REF
3833 && (GET_CODE (op) != CONST
3834 || (GET_CODE (XEXP (op, 0)) == MINUS
3835 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3836 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3837 /* fallthrough */
3838 case 2:
3839 default:
3840 return 1;
3841 }
3842 }
3843
3844 /* Return true if X is an address which needs a temporary register when
3845 reloaded while generating PIC code. */
3846
3847 int
3848 pic_address_needs_scratch (rtx x)
3849 {
3850 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3851 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3853 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3854 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3855 return 1;
3856
3857 return 0;
3858 }
3859
3860 /* Determine if a given RTX is a valid constant. We already know this
3861 satisfies CONSTANT_P. */
3862
3863 static bool
3864 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3865 {
3866 switch (GET_CODE (x))
3867 {
3868 case CONST:
3869 case SYMBOL_REF:
3870 if (sparc_tls_referenced_p (x))
3871 return false;
3872 break;
3873
3874 case CONST_DOUBLE:
3875 /* Floating point constants are generally not ok.
3876 The only exception is 0.0 and all-ones in VIS. */
3877 if (TARGET_VIS
3878 && SCALAR_FLOAT_MODE_P (mode)
3879 && (const_zero_operand (x, mode)
3880 || const_all_ones_operand (x, mode)))
3881 return true;
3882
3883 return false;
3884
3885 case CONST_VECTOR:
3886 /* Vector constants are generally not ok.
3887 The only exception is 0 or -1 in VIS. */
3888 if (TARGET_VIS
3889 && (const_zero_operand (x, mode)
3890 || const_all_ones_operand (x, mode)))
3891 return true;
3892
3893 return false;
3894
3895 default:
3896 break;
3897 }
3898
3899 return true;
3900 }
3901
3902 /* Determine if a given RTX is a valid constant address. */
3903
3904 bool
3905 constant_address_p (rtx x)
3906 {
3907 switch (GET_CODE (x))
3908 {
3909 case LABEL_REF:
3910 case CONST_INT:
3911 case HIGH:
3912 return true;
3913
3914 case CONST:
3915 if (flag_pic && pic_address_needs_scratch (x))
3916 return false;
3917 return sparc_legitimate_constant_p (Pmode, x);
3918
3919 case SYMBOL_REF:
3920 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3921
3922 default:
3923 return false;
3924 }
3925 }
3926
3927 /* Nonzero if the constant value X is a legitimate general operand
3928 when generating PIC code. It is given that flag_pic is on and
3929 that X satisfies CONSTANT_P. */
3930
3931 bool
3932 legitimate_pic_operand_p (rtx x)
3933 {
3934 if (pic_address_needs_scratch (x))
3935 return false;
3936 if (sparc_tls_referenced_p (x))
3937 return false;
3938 return true;
3939 }
3940
3941 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3942 (CONST_INT_P (X) \
3943 && INTVAL (X) >= -0x1000 \
3944 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3945
3946 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3947 (CONST_INT_P (X) \
3948 && INTVAL (X) >= -0x1000 \
3949 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3950
3951 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3952
3953 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3954 ordinarily. This changes a bit when generating PIC. */
3955
3956 static bool
3957 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3958 {
3959 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3960
3961 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3962 rs1 = addr;
3963 else if (GET_CODE (addr) == PLUS)
3964 {
3965 rs1 = XEXP (addr, 0);
3966 rs2 = XEXP (addr, 1);
3967
3968 /* Canonicalize. REG comes first, if there are no regs,
3969 LO_SUM comes first. */
3970 if (!REG_P (rs1)
3971 && GET_CODE (rs1) != SUBREG
3972 && (REG_P (rs2)
3973 || GET_CODE (rs2) == SUBREG
3974 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3975 {
3976 rs1 = XEXP (addr, 1);
3977 rs2 = XEXP (addr, 0);
3978 }
3979
3980 if ((flag_pic == 1
3981 && rs1 == pic_offset_table_rtx
3982 && !REG_P (rs2)
3983 && GET_CODE (rs2) != SUBREG
3984 && GET_CODE (rs2) != LO_SUM
3985 && GET_CODE (rs2) != MEM
3986 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3987 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3988 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3989 || ((REG_P (rs1)
3990 || GET_CODE (rs1) == SUBREG)
3991 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3992 {
3993 imm1 = rs2;
3994 rs2 = NULL;
3995 }
3996 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3997 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3998 {
3999 /* We prohibit REG + REG for TFmode when there are no quad move insns
4000 and we consequently need to split. We do this because REG+REG
4001 is not an offsettable address. If we get the situation in reload
4002 where source and destination of a movtf pattern are both MEMs with
4003 REG+REG address, then only one of them gets converted to an
4004 offsettable address. */
4005 if (mode == TFmode
4006 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4007 return 0;
4008
4009 /* Likewise for TImode, but in all cases. */
4010 if (mode == TImode)
4011 return 0;
4012
4013 /* We prohibit REG + REG on ARCH32 if not optimizing for
4014 DFmode/DImode because then mem_min_alignment is likely to be zero
4015 after reload and the forced split would lack a matching splitter
4016 pattern. */
4017 if (TARGET_ARCH32 && !optimize
4018 && (mode == DFmode || mode == DImode))
4019 return 0;
4020 }
4021 else if (USE_AS_OFFSETABLE_LO10
4022 && GET_CODE (rs1) == LO_SUM
4023 && TARGET_ARCH64
4024 && ! TARGET_CM_MEDMID
4025 && RTX_OK_FOR_OLO10_P (rs2, mode))
4026 {
4027 rs2 = NULL;
4028 imm1 = XEXP (rs1, 1);
4029 rs1 = XEXP (rs1, 0);
4030 if (!CONSTANT_P (imm1)
4031 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4032 return 0;
4033 }
4034 }
4035 else if (GET_CODE (addr) == LO_SUM)
4036 {
4037 rs1 = XEXP (addr, 0);
4038 imm1 = XEXP (addr, 1);
4039
4040 if (!CONSTANT_P (imm1)
4041 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4042 return 0;
4043
4044 /* We can't allow TFmode in 32-bit mode, because an offset greater
4045 than the alignment (8) may cause the LO_SUM to overflow. */
4046 if (mode == TFmode && TARGET_ARCH32)
4047 return 0;
4048
4049 /* During reload, accept the HIGH+LO_SUM construct generated by
4050 sparc_legitimize_reload_address. */
4051 if (reload_in_progress
4052 && GET_CODE (rs1) == HIGH
4053 && XEXP (rs1, 0) == imm1)
4054 return 1;
4055 }
4056 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4057 return 1;
4058 else
4059 return 0;
4060
4061 if (GET_CODE (rs1) == SUBREG)
4062 rs1 = SUBREG_REG (rs1);
4063 if (!REG_P (rs1))
4064 return 0;
4065
4066 if (rs2)
4067 {
4068 if (GET_CODE (rs2) == SUBREG)
4069 rs2 = SUBREG_REG (rs2);
4070 if (!REG_P (rs2))
4071 return 0;
4072 }
4073
4074 if (strict)
4075 {
4076 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4077 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4078 return 0;
4079 }
4080 else
4081 {
4082 if ((! SPARC_INT_REG_P (REGNO (rs1))
4083 && REGNO (rs1) != FRAME_POINTER_REGNUM
4084 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4085 || (rs2
4086 && (! SPARC_INT_REG_P (REGNO (rs2))
4087 && REGNO (rs2) != FRAME_POINTER_REGNUM
4088 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4089 return 0;
4090 }
4091 return 1;
4092 }
4093
4094 /* Return the SYMBOL_REF for the tls_get_addr function. */
4095
4096 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4097
4098 static rtx
4099 sparc_tls_get_addr (void)
4100 {
4101 if (!sparc_tls_symbol)
4102 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4103
4104 return sparc_tls_symbol;
4105 }
4106
4107 /* Return the Global Offset Table to be used in TLS mode. */
4108
4109 static rtx
4110 sparc_tls_got (void)
4111 {
4112 /* In PIC mode, this is just the PIC offset table. */
4113 if (flag_pic)
4114 {
4115 crtl->uses_pic_offset_table = 1;
4116 return pic_offset_table_rtx;
4117 }
4118
4119 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4120 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4121 if (TARGET_SUN_TLS && TARGET_ARCH32)
4122 {
4123 load_got_register ();
4124 return global_offset_table_rtx;
4125 }
4126
4127 /* In all other cases, we load a new pseudo with the GOT symbol. */
4128 return copy_to_reg (sparc_got ());
4129 }
4130
4131 /* Return true if X contains a thread-local symbol. */
4132
4133 static bool
4134 sparc_tls_referenced_p (rtx x)
4135 {
4136 if (!TARGET_HAVE_TLS)
4137 return false;
4138
4139 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4140 x = XEXP (XEXP (x, 0), 0);
4141
4142 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4143 return true;
4144
4145 /* That's all we handle in sparc_legitimize_tls_address for now. */
4146 return false;
4147 }
4148
4149 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4150 this (thread-local) address. */
4151
4152 static rtx
4153 sparc_legitimize_tls_address (rtx addr)
4154 {
4155 rtx temp1, temp2, temp3, ret, o0, got;
4156 rtx_insn *insn;
4157
4158 gcc_assert (can_create_pseudo_p ());
4159
4160 if (GET_CODE (addr) == SYMBOL_REF)
4161 switch (SYMBOL_REF_TLS_MODEL (addr))
4162 {
4163 case TLS_MODEL_GLOBAL_DYNAMIC:
4164 start_sequence ();
4165 temp1 = gen_reg_rtx (SImode);
4166 temp2 = gen_reg_rtx (SImode);
4167 ret = gen_reg_rtx (Pmode);
4168 o0 = gen_rtx_REG (Pmode, 8);
4169 got = sparc_tls_got ();
4170 emit_insn (gen_tgd_hi22 (temp1, addr));
4171 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4172 if (TARGET_ARCH32)
4173 {
4174 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4175 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4176 addr, const1_rtx));
4177 }
4178 else
4179 {
4180 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4181 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4182 addr, const1_rtx));
4183 }
4184 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4185 insn = get_insns ();
4186 end_sequence ();
4187 emit_libcall_block (insn, ret, o0, addr);
4188 break;
4189
4190 case TLS_MODEL_LOCAL_DYNAMIC:
4191 start_sequence ();
4192 temp1 = gen_reg_rtx (SImode);
4193 temp2 = gen_reg_rtx (SImode);
4194 temp3 = gen_reg_rtx (Pmode);
4195 ret = gen_reg_rtx (Pmode);
4196 o0 = gen_rtx_REG (Pmode, 8);
4197 got = sparc_tls_got ();
4198 emit_insn (gen_tldm_hi22 (temp1));
4199 emit_insn (gen_tldm_lo10 (temp2, temp1));
4200 if (TARGET_ARCH32)
4201 {
4202 emit_insn (gen_tldm_add32 (o0, got, temp2));
4203 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4204 const1_rtx));
4205 }
4206 else
4207 {
4208 emit_insn (gen_tldm_add64 (o0, got, temp2));
4209 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4210 const1_rtx));
4211 }
4212 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4213 insn = get_insns ();
4214 end_sequence ();
4215 emit_libcall_block (insn, temp3, o0,
4216 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4217 UNSPEC_TLSLD_BASE));
4218 temp1 = gen_reg_rtx (SImode);
4219 temp2 = gen_reg_rtx (SImode);
4220 emit_insn (gen_tldo_hix22 (temp1, addr));
4221 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4222 if (TARGET_ARCH32)
4223 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4224 else
4225 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4226 break;
4227
4228 case TLS_MODEL_INITIAL_EXEC:
4229 temp1 = gen_reg_rtx (SImode);
4230 temp2 = gen_reg_rtx (SImode);
4231 temp3 = gen_reg_rtx (Pmode);
4232 got = sparc_tls_got ();
4233 emit_insn (gen_tie_hi22 (temp1, addr));
4234 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4235 if (TARGET_ARCH32)
4236 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4237 else
4238 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4239 if (TARGET_SUN_TLS)
4240 {
4241 ret = gen_reg_rtx (Pmode);
4242 if (TARGET_ARCH32)
4243 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4244 temp3, addr));
4245 else
4246 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4247 temp3, addr));
4248 }
4249 else
4250 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4251 break;
4252
4253 case TLS_MODEL_LOCAL_EXEC:
4254 temp1 = gen_reg_rtx (Pmode);
4255 temp2 = gen_reg_rtx (Pmode);
4256 if (TARGET_ARCH32)
4257 {
4258 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4259 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4260 }
4261 else
4262 {
4263 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4264 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4265 }
4266 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4267 break;
4268
4269 default:
4270 gcc_unreachable ();
4271 }
4272
4273 else if (GET_CODE (addr) == CONST)
4274 {
4275 rtx base, offset;
4276
4277 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4278
4279 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4280 offset = XEXP (XEXP (addr, 0), 1);
4281
4282 base = force_operand (base, NULL_RTX);
4283 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4284 offset = force_reg (Pmode, offset);
4285 ret = gen_rtx_PLUS (Pmode, base, offset);
4286 }
4287
4288 else
4289 gcc_unreachable (); /* for now ... */
4290
4291 return ret;
4292 }
4293
4294 /* Legitimize PIC addresses. If the address is already position-independent,
4295 we return ORIG. Newly generated position-independent addresses go into a
4296 reg. This is REG if nonzero, otherwise we allocate register(s) as
4297 necessary. */
4298
4299 static rtx
4300 sparc_legitimize_pic_address (rtx orig, rtx reg)
4301 {
4302 bool gotdata_op = false;
4303
4304 if (GET_CODE (orig) == SYMBOL_REF
4305 /* See the comment in sparc_expand_move. */
4306 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4307 {
4308 rtx pic_ref, address;
4309 rtx_insn *insn;
4310
4311 if (reg == 0)
4312 {
4313 gcc_assert (can_create_pseudo_p ());
4314 reg = gen_reg_rtx (Pmode);
4315 }
4316
4317 if (flag_pic == 2)
4318 {
4319 /* If not during reload, allocate another temp reg here for loading
4320 in the address, so that these instructions can be optimized
4321 properly. */
4322 rtx temp_reg = (! can_create_pseudo_p ()
4323 ? reg : gen_reg_rtx (Pmode));
4324
4325 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4326 won't get confused into thinking that these two instructions
4327 are loading in the true address of the symbol. If in the
4328 future a PIC rtx exists, that should be used instead. */
4329 if (TARGET_ARCH64)
4330 {
4331 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4332 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4333 }
4334 else
4335 {
4336 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4337 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4338 }
4339 address = temp_reg;
4340 gotdata_op = true;
4341 }
4342 else
4343 address = orig;
4344
4345 crtl->uses_pic_offset_table = 1;
4346 if (gotdata_op)
4347 {
4348 if (TARGET_ARCH64)
4349 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4350 pic_offset_table_rtx,
4351 address, orig));
4352 else
4353 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4354 pic_offset_table_rtx,
4355 address, orig));
4356 }
4357 else
4358 {
4359 pic_ref
4360 = gen_const_mem (Pmode,
4361 gen_rtx_PLUS (Pmode,
4362 pic_offset_table_rtx, address));
4363 insn = emit_move_insn (reg, pic_ref);
4364 }
4365
4366 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4367 by loop. */
4368 set_unique_reg_note (insn, REG_EQUAL, orig);
4369 return reg;
4370 }
4371 else if (GET_CODE (orig) == CONST)
4372 {
4373 rtx base, offset;
4374
4375 if (GET_CODE (XEXP (orig, 0)) == PLUS
4376 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4377 return orig;
4378
4379 if (reg == 0)
4380 {
4381 gcc_assert (can_create_pseudo_p ());
4382 reg = gen_reg_rtx (Pmode);
4383 }
4384
4385 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4386 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4387 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4388 base == reg ? NULL_RTX : reg);
4389
4390 if (GET_CODE (offset) == CONST_INT)
4391 {
4392 if (SMALL_INT (offset))
4393 return plus_constant (Pmode, base, INTVAL (offset));
4394 else if (can_create_pseudo_p ())
4395 offset = force_reg (Pmode, offset);
4396 else
4397 /* If we reach here, then something is seriously wrong. */
4398 gcc_unreachable ();
4399 }
4400 return gen_rtx_PLUS (Pmode, base, offset);
4401 }
4402 else if (GET_CODE (orig) == LABEL_REF)
4403 /* ??? We ought to be checking that the register is live instead, in case
4404 it is eliminated. */
4405 crtl->uses_pic_offset_table = 1;
4406
4407 return orig;
4408 }
4409
4410 /* Try machine-dependent ways of modifying an illegitimate address X
4411 to be legitimate. If we find one, return the new, valid address.
4412
4413 OLDX is the address as it was before break_out_memory_refs was called.
4414 In some cases it is useful to look at this to decide what needs to be done.
4415
4416 MODE is the mode of the operand pointed to by X.
4417
4418 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4419
4420 static rtx
4421 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4422 machine_mode mode)
4423 {
4424 rtx orig_x = x;
4425
4426 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4427 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4428 force_operand (XEXP (x, 0), NULL_RTX));
4429 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4430 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4431 force_operand (XEXP (x, 1), NULL_RTX));
4432 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4433 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4434 XEXP (x, 1));
4435 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4436 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4437 force_operand (XEXP (x, 1), NULL_RTX));
4438
4439 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4440 return x;
4441
4442 if (sparc_tls_referenced_p (x))
4443 x = sparc_legitimize_tls_address (x);
4444 else if (flag_pic)
4445 x = sparc_legitimize_pic_address (x, NULL_RTX);
4446 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4447 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4448 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4449 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4450 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4451 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4452 else if (GET_CODE (x) == SYMBOL_REF
4453 || GET_CODE (x) == CONST
4454 || GET_CODE (x) == LABEL_REF)
4455 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4456
4457 return x;
4458 }
4459
4460 /* Delegitimize an address that was legitimized by the above function. */
4461
4462 static rtx
4463 sparc_delegitimize_address (rtx x)
4464 {
4465 x = delegitimize_mem_from_attrs (x);
4466
4467 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4468 switch (XINT (XEXP (x, 1), 1))
4469 {
4470 case UNSPEC_MOVE_PIC:
4471 case UNSPEC_TLSLE:
4472 x = XVECEXP (XEXP (x, 1), 0, 0);
4473 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4474 break;
4475 default:
4476 break;
4477 }
4478
4479 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4480 if (GET_CODE (x) == MINUS
4481 && REG_P (XEXP (x, 0))
4482 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4483 && GET_CODE (XEXP (x, 1)) == LO_SUM
4484 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4485 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4486 {
4487 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4488 gcc_assert (GET_CODE (x) == LABEL_REF);
4489 }
4490
4491 return x;
4492 }
4493
4494 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4495 replace the input X, or the original X if no replacement is called for.
4496 The output parameter *WIN is 1 if the calling macro should goto WIN,
4497 0 if it should not.
4498
4499 For SPARC, we wish to handle addresses by splitting them into
4500 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4501 This cuts the number of extra insns by one.
4502
4503 Do nothing when generating PIC code and the address is a symbolic
4504 operand or requires a scratch register. */
4505
4506 rtx
4507 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4508 int opnum, int type,
4509 int ind_levels ATTRIBUTE_UNUSED, int *win)
4510 {
4511 /* Decompose SImode constants into HIGH+LO_SUM. */
4512 if (CONSTANT_P (x)
4513 && (mode != TFmode || TARGET_ARCH64)
4514 && GET_MODE (x) == SImode
4515 && GET_CODE (x) != LO_SUM
4516 && GET_CODE (x) != HIGH
4517 && sparc_cmodel <= CM_MEDLOW
4518 && !(flag_pic
4519 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4520 {
4521 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4522 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4523 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4524 opnum, (enum reload_type)type);
4525 *win = 1;
4526 return x;
4527 }
4528
4529 /* We have to recognize what we have already generated above. */
4530 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4531 {
4532 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4533 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4534 opnum, (enum reload_type)type);
4535 *win = 1;
4536 return x;
4537 }
4538
4539 *win = 0;
4540 return x;
4541 }
4542
4543 /* Return true if ADDR (a legitimate address expression)
4544 has an effect that depends on the machine mode it is used for.
4545
4546 In PIC mode,
4547
4548 (mem:HI [%l7+a])
4549
4550 is not equivalent to
4551
4552 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4553
4554 because [%l7+a+1] is interpreted as the address of (a+1). */
4555
4556
4557 static bool
4558 sparc_mode_dependent_address_p (const_rtx addr,
4559 addr_space_t as ATTRIBUTE_UNUSED)
4560 {
4561 if (flag_pic && GET_CODE (addr) == PLUS)
4562 {
4563 rtx op0 = XEXP (addr, 0);
4564 rtx op1 = XEXP (addr, 1);
4565 if (op0 == pic_offset_table_rtx
4566 && symbolic_operand (op1, VOIDmode))
4567 return true;
4568 }
4569
4570 return false;
4571 }
4572
4573 #ifdef HAVE_GAS_HIDDEN
4574 # define USE_HIDDEN_LINKONCE 1
4575 #else
4576 # define USE_HIDDEN_LINKONCE 0
4577 #endif
4578
4579 static void
4580 get_pc_thunk_name (char name[32], unsigned int regno)
4581 {
4582 const char *reg_name = reg_names[regno];
4583
4584 /* Skip the leading '%' as that cannot be used in a
4585 symbol name. */
4586 reg_name += 1;
4587
4588 if (USE_HIDDEN_LINKONCE)
4589 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4590 else
4591 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4592 }
4593
4594 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4595
4596 static rtx
4597 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4598 {
4599 int orig_flag_pic = flag_pic;
4600 rtx insn;
4601
4602 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4603 flag_pic = 0;
4604 if (TARGET_ARCH64)
4605 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4606 else
4607 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4608 flag_pic = orig_flag_pic;
4609
4610 return insn;
4611 }
4612
4613 /* Emit code to load the GOT register. */
4614
4615 void
4616 load_got_register (void)
4617 {
4618 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4619 if (!global_offset_table_rtx)
4620 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4621
4622 if (TARGET_VXWORKS_RTP)
4623 emit_insn (gen_vxworks_load_got ());
4624 else
4625 {
4626 /* The GOT symbol is subject to a PC-relative relocation so we need a
4627 helper function to add the PC value and thus get the final value. */
4628 if (!got_helper_rtx)
4629 {
4630 char name[32];
4631 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4632 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4633 }
4634
4635 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4636 got_helper_rtx,
4637 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4638 }
4639
4640 /* Need to emit this whether or not we obey regdecls,
4641 since setjmp/longjmp can cause life info to screw up.
4642 ??? In the case where we don't obey regdecls, this is not sufficient
4643 since we may not fall out the bottom. */
4644 emit_use (global_offset_table_rtx);
4645 }
4646
4647 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4648 address of the call target. */
4649
4650 void
4651 sparc_emit_call_insn (rtx pat, rtx addr)
4652 {
4653 rtx_insn *insn;
4654
4655 insn = emit_call_insn (pat);
4656
4657 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4658 if (TARGET_VXWORKS_RTP
4659 && flag_pic
4660 && GET_CODE (addr) == SYMBOL_REF
4661 && (SYMBOL_REF_DECL (addr)
4662 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4663 : !SYMBOL_REF_LOCAL_P (addr)))
4664 {
4665 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4666 crtl->uses_pic_offset_table = 1;
4667 }
4668 }
4669 \f
4670 /* Return 1 if RTX is a MEM which is known to be aligned to at
4671 least a DESIRED byte boundary. */
4672
4673 int
4674 mem_min_alignment (rtx mem, int desired)
4675 {
4676 rtx addr, base, offset;
4677
4678 /* If it's not a MEM we can't accept it. */
4679 if (GET_CODE (mem) != MEM)
4680 return 0;
4681
4682 /* Obviously... */
4683 if (!TARGET_UNALIGNED_DOUBLES
4684 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4685 return 1;
4686
4687 /* ??? The rest of the function predates MEM_ALIGN so
4688 there is probably a bit of redundancy. */
4689 addr = XEXP (mem, 0);
4690 base = offset = NULL_RTX;
4691 if (GET_CODE (addr) == PLUS)
4692 {
4693 if (GET_CODE (XEXP (addr, 0)) == REG)
4694 {
4695 base = XEXP (addr, 0);
4696
4697 /* What we are saying here is that if the base
4698 REG is aligned properly, the compiler will make
4699 sure any REG based index upon it will be so
4700 as well. */
4701 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4702 offset = XEXP (addr, 1);
4703 else
4704 offset = const0_rtx;
4705 }
4706 }
4707 else if (GET_CODE (addr) == REG)
4708 {
4709 base = addr;
4710 offset = const0_rtx;
4711 }
4712
4713 if (base != NULL_RTX)
4714 {
4715 int regno = REGNO (base);
4716
4717 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4718 {
4719 /* Check if the compiler has recorded some information
4720 about the alignment of the base REG. If reload has
4721 completed, we already matched with proper alignments.
4722 If not running global_alloc, reload might give us
4723 unaligned pointer to local stack though. */
4724 if (((cfun != 0
4725 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4726 || (optimize && reload_completed))
4727 && (INTVAL (offset) & (desired - 1)) == 0)
4728 return 1;
4729 }
4730 else
4731 {
4732 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4733 return 1;
4734 }
4735 }
4736 else if (! TARGET_UNALIGNED_DOUBLES
4737 || CONSTANT_P (addr)
4738 || GET_CODE (addr) == LO_SUM)
4739 {
4740 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4741 is true, in which case we can only assume that an access is aligned if
4742 it is to a constant address, or the address involves a LO_SUM. */
4743 return 1;
4744 }
4745
4746 /* An obviously unaligned address. */
4747 return 0;
4748 }
4749
4750 \f
4751 /* Vectors to keep interesting information about registers where it can easily
4752 be got. We used to use the actual mode value as the bit number, but there
4753 are more than 32 modes now. Instead we use two tables: one indexed by
4754 hard register number, and one indexed by mode. */
4755
4756 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4757 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4758 mapped into one sparc_mode_class mode. */
4759
4760 enum sparc_mode_class {
4761 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4762 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4763 CC_MODE, CCFP_MODE
4764 };
4765
4766 /* Modes for single-word and smaller quantities. */
4767 #define S_MODES \
4768 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4769
4770 /* Modes for double-word and smaller quantities. */
4771 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4772
4773 /* Modes for quad-word and smaller quantities. */
4774 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4775
4776 /* Modes for 8-word and smaller quantities. */
4777 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4778
4779 /* Modes for single-float quantities. */
4780 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4781
4782 /* Modes for double-float and smaller quantities. */
4783 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4784
4785 /* Modes for quad-float and smaller quantities. */
4786 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4787
4788 /* Modes for quad-float pairs and smaller quantities. */
4789 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4790
4791 /* Modes for double-float only quantities. */
4792 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4793
4794 /* Modes for quad-float and double-float only quantities. */
4795 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4796
4797 /* Modes for quad-float pairs and double-float only quantities. */
4798 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4799
4800 /* Modes for condition codes. */
4801 #define CC_MODES (1 << (int) CC_MODE)
4802 #define CCFP_MODES (1 << (int) CCFP_MODE)
4803
4804 /* Value is 1 if register/mode pair is acceptable on sparc.
4805
4806 The funny mixture of D and T modes is because integer operations
4807 do not specially operate on tetra quantities, so non-quad-aligned
4808 registers can hold quadword quantities (except %o4 and %i4 because
4809 they cross fixed registers).
4810
4811 ??? Note that, despite the settings, non-double-aligned parameter
4812 registers can hold double-word quantities in 32-bit mode. */
4813
4814 /* This points to either the 32 bit or the 64 bit version. */
4815 const int *hard_regno_mode_classes;
4816
4817 static const int hard_32bit_mode_classes[] = {
4818 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4819 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4820 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4821 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4822
4823 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4824 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4825 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4826 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4827
4828 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4829 and none can hold SFmode/SImode values. */
4830 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4831 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4832 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4833 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4834
4835 /* %fcc[0123] */
4836 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4837
4838 /* %icc, %sfp, %gsr */
4839 CC_MODES, 0, D_MODES
4840 };
4841
4842 static const int hard_64bit_mode_classes[] = {
4843 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4844 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4845 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4846 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4847
4848 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4849 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4850 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4851 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4852
4853 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4854 and none can hold SFmode/SImode values. */
4855 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4856 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4857 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4858 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4859
4860 /* %fcc[0123] */
4861 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4862
4863 /* %icc, %sfp, %gsr */
4864 CC_MODES, 0, D_MODES
4865 };
4866
4867 int sparc_mode_class [NUM_MACHINE_MODES];
4868
4869 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4870
4871 static void
4872 sparc_init_modes (void)
4873 {
4874 int i;
4875
4876 for (i = 0; i < NUM_MACHINE_MODES; i++)
4877 {
4878 machine_mode m = (machine_mode) i;
4879 unsigned int size = GET_MODE_SIZE (m);
4880
4881 switch (GET_MODE_CLASS (m))
4882 {
4883 case MODE_INT:
4884 case MODE_PARTIAL_INT:
4885 case MODE_COMPLEX_INT:
4886 if (size < 4)
4887 sparc_mode_class[i] = 1 << (int) H_MODE;
4888 else if (size == 4)
4889 sparc_mode_class[i] = 1 << (int) S_MODE;
4890 else if (size == 8)
4891 sparc_mode_class[i] = 1 << (int) D_MODE;
4892 else if (size == 16)
4893 sparc_mode_class[i] = 1 << (int) T_MODE;
4894 else if (size == 32)
4895 sparc_mode_class[i] = 1 << (int) O_MODE;
4896 else
4897 sparc_mode_class[i] = 0;
4898 break;
4899 case MODE_VECTOR_INT:
4900 if (size == 4)
4901 sparc_mode_class[i] = 1 << (int) SF_MODE;
4902 else if (size == 8)
4903 sparc_mode_class[i] = 1 << (int) DF_MODE;
4904 else
4905 sparc_mode_class[i] = 0;
4906 break;
4907 case MODE_FLOAT:
4908 case MODE_COMPLEX_FLOAT:
4909 if (size == 4)
4910 sparc_mode_class[i] = 1 << (int) SF_MODE;
4911 else if (size == 8)
4912 sparc_mode_class[i] = 1 << (int) DF_MODE;
4913 else if (size == 16)
4914 sparc_mode_class[i] = 1 << (int) TF_MODE;
4915 else if (size == 32)
4916 sparc_mode_class[i] = 1 << (int) OF_MODE;
4917 else
4918 sparc_mode_class[i] = 0;
4919 break;
4920 case MODE_CC:
4921 if (m == CCFPmode || m == CCFPEmode)
4922 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4923 else
4924 sparc_mode_class[i] = 1 << (int) CC_MODE;
4925 break;
4926 default:
4927 sparc_mode_class[i] = 0;
4928 break;
4929 }
4930 }
4931
4932 if (TARGET_ARCH64)
4933 hard_regno_mode_classes = hard_64bit_mode_classes;
4934 else
4935 hard_regno_mode_classes = hard_32bit_mode_classes;
4936
4937 /* Initialize the array used by REGNO_REG_CLASS. */
4938 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4939 {
4940 if (i < 16 && TARGET_V8PLUS)
4941 sparc_regno_reg_class[i] = I64_REGS;
4942 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4943 sparc_regno_reg_class[i] = GENERAL_REGS;
4944 else if (i < 64)
4945 sparc_regno_reg_class[i] = FP_REGS;
4946 else if (i < 96)
4947 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4948 else if (i < 100)
4949 sparc_regno_reg_class[i] = FPCC_REGS;
4950 else
4951 sparc_regno_reg_class[i] = NO_REGS;
4952 }
4953 }
4954 \f
4955 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4956
4957 static inline bool
4958 save_global_or_fp_reg_p (unsigned int regno,
4959 int leaf_function ATTRIBUTE_UNUSED)
4960 {
4961 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4962 }
4963
4964 /* Return whether the return address register (%i7) is needed. */
4965
4966 static inline bool
4967 return_addr_reg_needed_p (int leaf_function)
4968 {
4969 /* If it is live, for example because of __builtin_return_address (0). */
4970 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4971 return true;
4972
4973 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4974 if (!leaf_function
4975 /* Loading the GOT register clobbers %o7. */
4976 || crtl->uses_pic_offset_table
4977 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4978 return true;
4979
4980 return false;
4981 }
4982
4983 /* Return whether REGNO, a local or in register, must be saved/restored. */
4984
4985 static bool
4986 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4987 {
4988 /* General case: call-saved registers live at some point. */
4989 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4990 return true;
4991
4992 /* Frame pointer register (%fp) if needed. */
4993 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4994 return true;
4995
4996 /* Return address register (%i7) if needed. */
4997 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4998 return true;
4999
5000 /* GOT register (%l7) if needed. */
5001 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5002 return true;
5003
5004 /* If the function accesses prior frames, the frame pointer and the return
5005 address of the previous frame must be saved on the stack. */
5006 if (crtl->accesses_prior_frames
5007 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5008 return true;
5009
5010 return false;
5011 }
5012
5013 /* Compute the frame size required by the function. This function is called
5014 during the reload pass and also by sparc_expand_prologue. */
5015
5016 HOST_WIDE_INT
5017 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5018 {
5019 HOST_WIDE_INT frame_size, apparent_frame_size;
5020 int args_size, n_global_fp_regs = 0;
5021 bool save_local_in_regs_p = false;
5022 unsigned int i;
5023
5024 /* If the function allocates dynamic stack space, the dynamic offset is
5025 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5026 if (leaf_function && !cfun->calls_alloca)
5027 args_size = 0;
5028 else
5029 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5030
5031 /* Calculate space needed for global registers. */
5032 if (TARGET_ARCH64)
5033 {
5034 for (i = 0; i < 8; i++)
5035 if (save_global_or_fp_reg_p (i, 0))
5036 n_global_fp_regs += 2;
5037 }
5038 else
5039 {
5040 for (i = 0; i < 8; i += 2)
5041 if (save_global_or_fp_reg_p (i, 0)
5042 || save_global_or_fp_reg_p (i + 1, 0))
5043 n_global_fp_regs += 2;
5044 }
5045
5046 /* In the flat window model, find out which local and in registers need to
5047 be saved. We don't reserve space in the current frame for them as they
5048 will be spilled into the register window save area of the caller's frame.
5049 However, as soon as we use this register window save area, we must create
5050 that of the current frame to make it the live one. */
5051 if (TARGET_FLAT)
5052 for (i = 16; i < 32; i++)
5053 if (save_local_or_in_reg_p (i, leaf_function))
5054 {
5055 save_local_in_regs_p = true;
5056 break;
5057 }
5058
5059 /* Calculate space needed for FP registers. */
5060 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5061 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5062 n_global_fp_regs += 2;
5063
5064 if (size == 0
5065 && n_global_fp_regs == 0
5066 && args_size == 0
5067 && !save_local_in_regs_p)
5068 frame_size = apparent_frame_size = 0;
5069 else
5070 {
5071 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5072 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5073 apparent_frame_size += n_global_fp_regs * 4;
5074
5075 /* We need to add the size of the outgoing argument area. */
5076 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5077
5078 /* And that of the register window save area. */
5079 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5080
5081 /* Finally, bump to the appropriate alignment. */
5082 frame_size = SPARC_STACK_ALIGN (frame_size);
5083 }
5084
5085 /* Set up values for use in prologue and epilogue. */
5086 sparc_frame_size = frame_size;
5087 sparc_apparent_frame_size = apparent_frame_size;
5088 sparc_n_global_fp_regs = n_global_fp_regs;
5089 sparc_save_local_in_regs_p = save_local_in_regs_p;
5090
5091 return frame_size;
5092 }
5093
5094 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5095
5096 int
5097 sparc_initial_elimination_offset (int to)
5098 {
5099 int offset;
5100
5101 if (to == STACK_POINTER_REGNUM)
5102 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5103 else
5104 offset = 0;
5105
5106 offset += SPARC_STACK_BIAS;
5107 return offset;
5108 }
5109
5110 /* Output any necessary .register pseudo-ops. */
5111
5112 void
5113 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5114 {
5115 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5116 int i;
5117
5118 if (TARGET_ARCH32)
5119 return;
5120
5121 /* Check if %g[2367] were used without
5122 .register being printed for them already. */
5123 for (i = 2; i < 8; i++)
5124 {
5125 if (df_regs_ever_live_p (i)
5126 && ! sparc_hard_reg_printed [i])
5127 {
5128 sparc_hard_reg_printed [i] = 1;
5129 /* %g7 is used as TLS base register, use #ignore
5130 for it instead of #scratch. */
5131 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5132 i == 7 ? "ignore" : "scratch");
5133 }
5134 if (i == 3) i = 5;
5135 }
5136 #endif
5137 }
5138
5139 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5140
5141 #if PROBE_INTERVAL > 4096
5142 #error Cannot use indexed addressing mode for stack probing
5143 #endif
5144
5145 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5146 inclusive. These are offsets from the current stack pointer.
5147
5148 Note that we don't use the REG+REG addressing mode for the probes because
5149 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5150 so the advantages of having a single code win here. */
5151
5152 static void
5153 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5154 {
5155 rtx g1 = gen_rtx_REG (Pmode, 1);
5156
5157 /* See if we have a constant small number of probes to generate. If so,
5158 that's the easy case. */
5159 if (size <= PROBE_INTERVAL)
5160 {
5161 emit_move_insn (g1, GEN_INT (first));
5162 emit_insn (gen_rtx_SET (g1,
5163 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5164 emit_stack_probe (plus_constant (Pmode, g1, -size));
5165 }
5166
5167 /* The run-time loop is made up of 9 insns in the generic case while the
5168 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5169 else if (size <= 4 * PROBE_INTERVAL)
5170 {
5171 HOST_WIDE_INT i;
5172
5173 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5174 emit_insn (gen_rtx_SET (g1,
5175 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5176 emit_stack_probe (g1);
5177
5178 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5179 it exceeds SIZE. If only two probes are needed, this will not
5180 generate any code. Then probe at FIRST + SIZE. */
5181 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5182 {
5183 emit_insn (gen_rtx_SET (g1,
5184 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5185 emit_stack_probe (g1);
5186 }
5187
5188 emit_stack_probe (plus_constant (Pmode, g1,
5189 (i - PROBE_INTERVAL) - size));
5190 }
5191
5192 /* Otherwise, do the same as above, but in a loop. Note that we must be
5193 extra careful with variables wrapping around because we might be at
5194 the very top (or the very bottom) of the address space and we have
5195 to be able to handle this case properly; in particular, we use an
5196 equality test for the loop condition. */
5197 else
5198 {
5199 HOST_WIDE_INT rounded_size;
5200 rtx g4 = gen_rtx_REG (Pmode, 4);
5201
5202 emit_move_insn (g1, GEN_INT (first));
5203
5204
5205 /* Step 1: round SIZE to the previous multiple of the interval. */
5206
5207 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5208 emit_move_insn (g4, GEN_INT (rounded_size));
5209
5210
5211 /* Step 2: compute initial and final value of the loop counter. */
5212
5213 /* TEST_ADDR = SP + FIRST. */
5214 emit_insn (gen_rtx_SET (g1,
5215 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5216
5217 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5218 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5219
5220
5221 /* Step 3: the loop
5222
5223 while (TEST_ADDR != LAST_ADDR)
5224 {
5225 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5226 probe at TEST_ADDR
5227 }
5228
5229 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5230 until it is equal to ROUNDED_SIZE. */
5231
5232 if (TARGET_ARCH64)
5233 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5234 else
5235 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5236
5237
5238 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5239 that SIZE is equal to ROUNDED_SIZE. */
5240
5241 if (size != rounded_size)
5242 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5243 }
5244
5245 /* Make sure nothing is scheduled before we are done. */
5246 emit_insn (gen_blockage ());
5247 }
5248
5249 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5250 absolute addresses. */
5251
5252 const char *
5253 output_probe_stack_range (rtx reg1, rtx reg2)
5254 {
5255 static int labelno = 0;
5256 char loop_lab[32];
5257 rtx xops[2];
5258
5259 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5260
5261 /* Loop. */
5262 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5263
5264 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5265 xops[0] = reg1;
5266 xops[1] = GEN_INT (-PROBE_INTERVAL);
5267 output_asm_insn ("add\t%0, %1, %0", xops);
5268
5269 /* Test if TEST_ADDR == LAST_ADDR. */
5270 xops[1] = reg2;
5271 output_asm_insn ("cmp\t%0, %1", xops);
5272
5273 /* Probe at TEST_ADDR and branch. */
5274 if (TARGET_ARCH64)
5275 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5276 else
5277 fputs ("\tbne\t", asm_out_file);
5278 assemble_name_raw (asm_out_file, loop_lab);
5279 fputc ('\n', asm_out_file);
5280 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5281 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5282
5283 return "";
5284 }
5285
5286 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5287 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5288 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5289 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5290 the action to be performed if it returns false. Return the new offset. */
5291
5292 typedef bool (*sorr_pred_t) (unsigned int, int);
5293 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5294
5295 static int
5296 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5297 int offset, int leaf_function, sorr_pred_t save_p,
5298 sorr_act_t action_true, sorr_act_t action_false)
5299 {
5300 unsigned int i;
5301 rtx mem;
5302 rtx_insn *insn;
5303
5304 if (TARGET_ARCH64 && high <= 32)
5305 {
5306 int fp_offset = -1;
5307
5308 for (i = low; i < high; i++)
5309 {
5310 if (save_p (i, leaf_function))
5311 {
5312 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5313 base, offset));
5314 if (action_true == SORR_SAVE)
5315 {
5316 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5317 RTX_FRAME_RELATED_P (insn) = 1;
5318 }
5319 else /* action_true == SORR_RESTORE */
5320 {
5321 /* The frame pointer must be restored last since its old
5322 value may be used as base address for the frame. This
5323 is problematic in 64-bit mode only because of the lack
5324 of double-word load instruction. */
5325 if (i == HARD_FRAME_POINTER_REGNUM)
5326 fp_offset = offset;
5327 else
5328 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5329 }
5330 offset += 8;
5331 }
5332 else if (action_false == SORR_ADVANCE)
5333 offset += 8;
5334 }
5335
5336 if (fp_offset >= 0)
5337 {
5338 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5339 emit_move_insn (hard_frame_pointer_rtx, mem);
5340 }
5341 }
5342 else
5343 {
5344 for (i = low; i < high; i += 2)
5345 {
5346 bool reg0 = save_p (i, leaf_function);
5347 bool reg1 = save_p (i + 1, leaf_function);
5348 machine_mode mode;
5349 int regno;
5350
5351 if (reg0 && reg1)
5352 {
5353 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5354 regno = i;
5355 }
5356 else if (reg0)
5357 {
5358 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5359 regno = i;
5360 }
5361 else if (reg1)
5362 {
5363 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5364 regno = i + 1;
5365 offset += 4;
5366 }
5367 else
5368 {
5369 if (action_false == SORR_ADVANCE)
5370 offset += 8;
5371 continue;
5372 }
5373
5374 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5375 if (action_true == SORR_SAVE)
5376 {
5377 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5378 RTX_FRAME_RELATED_P (insn) = 1;
5379 if (mode == DImode)
5380 {
5381 rtx set1, set2;
5382 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5383 offset));
5384 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5385 RTX_FRAME_RELATED_P (set1) = 1;
5386 mem
5387 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5388 offset + 4));
5389 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5390 RTX_FRAME_RELATED_P (set2) = 1;
5391 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5392 gen_rtx_PARALLEL (VOIDmode,
5393 gen_rtvec (2, set1, set2)));
5394 }
5395 }
5396 else /* action_true == SORR_RESTORE */
5397 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5398
5399 /* Bump and round down to double word
5400 in case we already bumped by 4. */
5401 offset = ROUND_DOWN (offset + 8, 8);
5402 }
5403 }
5404
5405 return offset;
5406 }
5407
5408 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5409
5410 static rtx
5411 emit_adjust_base_to_offset (rtx base, int offset)
5412 {
5413 /* ??? This might be optimized a little as %g1 might already have a
5414 value close enough that a single add insn will do. */
5415 /* ??? Although, all of this is probably only a temporary fix because
5416 if %g1 can hold a function result, then sparc_expand_epilogue will
5417 lose (the result will be clobbered). */
5418 rtx new_base = gen_rtx_REG (Pmode, 1);
5419 emit_move_insn (new_base, GEN_INT (offset));
5420 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5421 return new_base;
5422 }
5423
5424 /* Emit code to save/restore call-saved global and FP registers. */
5425
5426 static void
5427 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5428 {
5429 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5430 {
5431 base = emit_adjust_base_to_offset (base, offset);
5432 offset = 0;
5433 }
5434
5435 offset
5436 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5437 save_global_or_fp_reg_p, action, SORR_NONE);
5438 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5439 save_global_or_fp_reg_p, action, SORR_NONE);
5440 }
5441
5442 /* Emit code to save/restore call-saved local and in registers. */
5443
5444 static void
5445 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5446 {
5447 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5448 {
5449 base = emit_adjust_base_to_offset (base, offset);
5450 offset = 0;
5451 }
5452
5453 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5454 save_local_or_in_reg_p, action, SORR_ADVANCE);
5455 }
5456
5457 /* Emit a window_save insn. */
5458
5459 static rtx_insn *
5460 emit_window_save (rtx increment)
5461 {
5462 rtx_insn *insn = emit_insn (gen_window_save (increment));
5463 RTX_FRAME_RELATED_P (insn) = 1;
5464
5465 /* The incoming return address (%o7) is saved in %i7. */
5466 add_reg_note (insn, REG_CFA_REGISTER,
5467 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5468 gen_rtx_REG (Pmode,
5469 INCOMING_RETURN_ADDR_REGNUM)));
5470
5471 /* The window save event. */
5472 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5473
5474 /* The CFA is %fp, the hard frame pointer. */
5475 add_reg_note (insn, REG_CFA_DEF_CFA,
5476 plus_constant (Pmode, hard_frame_pointer_rtx,
5477 INCOMING_FRAME_SP_OFFSET));
5478
5479 return insn;
5480 }
5481
5482 /* Generate an increment for the stack pointer. */
5483
5484 static rtx
5485 gen_stack_pointer_inc (rtx increment)
5486 {
5487 return gen_rtx_SET (stack_pointer_rtx,
5488 gen_rtx_PLUS (Pmode,
5489 stack_pointer_rtx,
5490 increment));
5491 }
5492
5493 /* Expand the function prologue. The prologue is responsible for reserving
5494 storage for the frame, saving the call-saved registers and loading the
5495 GOT register if needed. */
5496
5497 void
5498 sparc_expand_prologue (void)
5499 {
5500 HOST_WIDE_INT size;
5501 rtx_insn *insn;
5502
5503 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5504 on the final value of the flag means deferring the prologue/epilogue
5505 expansion until just before the second scheduling pass, which is too
5506 late to emit multiple epilogues or return insns.
5507
5508 Of course we are making the assumption that the value of the flag
5509 will not change between now and its final value. Of the three parts
5510 of the formula, only the last one can reasonably vary. Let's take a
5511 closer look, after assuming that the first two ones are set to true
5512 (otherwise the last value is effectively silenced).
5513
5514 If only_leaf_regs_used returns false, the global predicate will also
5515 be false so the actual frame size calculated below will be positive.
5516 As a consequence, the save_register_window insn will be emitted in
5517 the instruction stream; now this insn explicitly references %fp
5518 which is not a leaf register so only_leaf_regs_used will always
5519 return false subsequently.
5520
5521 If only_leaf_regs_used returns true, we hope that the subsequent
5522 optimization passes won't cause non-leaf registers to pop up. For
5523 example, the regrename pass has special provisions to not rename to
5524 non-leaf registers in a leaf function. */
5525 sparc_leaf_function_p
5526 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5527
5528 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5529
5530 if (flag_stack_usage_info)
5531 current_function_static_stack_size = size;
5532
5533 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5534 {
5535 if (crtl->is_leaf && !cfun->calls_alloca)
5536 {
5537 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5538 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5539 size - STACK_CHECK_PROTECT);
5540 }
5541 else if (size > 0)
5542 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5543 }
5544
5545 if (size == 0)
5546 ; /* do nothing. */
5547 else if (sparc_leaf_function_p)
5548 {
5549 rtx size_int_rtx = GEN_INT (-size);
5550
5551 if (size <= 4096)
5552 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5553 else if (size <= 8192)
5554 {
5555 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557
5558 /* %sp is still the CFA register. */
5559 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5560 }
5561 else
5562 {
5563 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5564 emit_move_insn (size_rtx, size_int_rtx);
5565 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5566 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5567 gen_stack_pointer_inc (size_int_rtx));
5568 }
5569
5570 RTX_FRAME_RELATED_P (insn) = 1;
5571 }
5572 else
5573 {
5574 rtx size_int_rtx = GEN_INT (-size);
5575
5576 if (size <= 4096)
5577 emit_window_save (size_int_rtx);
5578 else if (size <= 8192)
5579 {
5580 emit_window_save (GEN_INT (-4096));
5581
5582 /* %sp is not the CFA register anymore. */
5583 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5584
5585 /* Make sure no %fp-based store is issued until after the frame is
5586 established. The offset between the frame pointer and the stack
5587 pointer is calculated relative to the value of the stack pointer
5588 at the end of the function prologue, and moving instructions that
5589 access the stack via the frame pointer between the instructions
5590 that decrement the stack pointer could result in accessing the
5591 register window save area, which is volatile. */
5592 emit_insn (gen_frame_blockage ());
5593 }
5594 else
5595 {
5596 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5597 emit_move_insn (size_rtx, size_int_rtx);
5598 emit_window_save (size_rtx);
5599 }
5600 }
5601
5602 if (sparc_leaf_function_p)
5603 {
5604 sparc_frame_base_reg = stack_pointer_rtx;
5605 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5606 }
5607 else
5608 {
5609 sparc_frame_base_reg = hard_frame_pointer_rtx;
5610 sparc_frame_base_offset = SPARC_STACK_BIAS;
5611 }
5612
5613 if (sparc_n_global_fp_regs > 0)
5614 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5615 sparc_frame_base_offset
5616 - sparc_apparent_frame_size,
5617 SORR_SAVE);
5618
5619 /* Load the GOT register if needed. */
5620 if (crtl->uses_pic_offset_table)
5621 load_got_register ();
5622
5623 /* Advertise that the data calculated just above are now valid. */
5624 sparc_prologue_data_valid_p = true;
5625 }
5626
5627 /* Expand the function prologue. The prologue is responsible for reserving
5628 storage for the frame, saving the call-saved registers and loading the
5629 GOT register if needed. */
5630
5631 void
5632 sparc_flat_expand_prologue (void)
5633 {
5634 HOST_WIDE_INT size;
5635 rtx_insn *insn;
5636
5637 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5638
5639 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5640
5641 if (flag_stack_usage_info)
5642 current_function_static_stack_size = size;
5643
5644 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5645 {
5646 if (crtl->is_leaf && !cfun->calls_alloca)
5647 {
5648 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5649 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5650 size - STACK_CHECK_PROTECT);
5651 }
5652 else if (size > 0)
5653 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5654 }
5655
5656 if (sparc_save_local_in_regs_p)
5657 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5658 SORR_SAVE);
5659
5660 if (size == 0)
5661 ; /* do nothing. */
5662 else
5663 {
5664 rtx size_int_rtx, size_rtx;
5665
5666 size_rtx = size_int_rtx = GEN_INT (-size);
5667
5668 /* We establish the frame (i.e. decrement the stack pointer) first, even
5669 if we use a frame pointer, because we cannot clobber any call-saved
5670 registers, including the frame pointer, if we haven't created a new
5671 register save area, for the sake of compatibility with the ABI. */
5672 if (size <= 4096)
5673 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5674 else if (size <= 8192 && !frame_pointer_needed)
5675 {
5676 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5677 RTX_FRAME_RELATED_P (insn) = 1;
5678 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5679 }
5680 else
5681 {
5682 size_rtx = gen_rtx_REG (Pmode, 1);
5683 emit_move_insn (size_rtx, size_int_rtx);
5684 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5685 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5686 gen_stack_pointer_inc (size_int_rtx));
5687 }
5688 RTX_FRAME_RELATED_P (insn) = 1;
5689
5690 /* Ensure nothing is scheduled until after the frame is established. */
5691 emit_insn (gen_blockage ());
5692
5693 if (frame_pointer_needed)
5694 {
5695 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5696 gen_rtx_MINUS (Pmode,
5697 stack_pointer_rtx,
5698 size_rtx)));
5699 RTX_FRAME_RELATED_P (insn) = 1;
5700
5701 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5702 gen_rtx_SET (hard_frame_pointer_rtx,
5703 plus_constant (Pmode, stack_pointer_rtx,
5704 size)));
5705 }
5706
5707 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5708 {
5709 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5710 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5711
5712 insn = emit_move_insn (i7, o7);
5713 RTX_FRAME_RELATED_P (insn) = 1;
5714
5715 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5716
5717 /* Prevent this instruction from ever being considered dead,
5718 even if this function has no epilogue. */
5719 emit_use (i7);
5720 }
5721 }
5722
5723 if (frame_pointer_needed)
5724 {
5725 sparc_frame_base_reg = hard_frame_pointer_rtx;
5726 sparc_frame_base_offset = SPARC_STACK_BIAS;
5727 }
5728 else
5729 {
5730 sparc_frame_base_reg = stack_pointer_rtx;
5731 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5732 }
5733
5734 if (sparc_n_global_fp_regs > 0)
5735 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5736 sparc_frame_base_offset
5737 - sparc_apparent_frame_size,
5738 SORR_SAVE);
5739
5740 /* Load the GOT register if needed. */
5741 if (crtl->uses_pic_offset_table)
5742 load_got_register ();
5743
5744 /* Advertise that the data calculated just above are now valid. */
5745 sparc_prologue_data_valid_p = true;
5746 }
5747
5748 /* This function generates the assembly code for function entry, which boils
5749 down to emitting the necessary .register directives. */
5750
5751 static void
5752 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5753 {
5754 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5755 if (!TARGET_FLAT)
5756 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5757
5758 sparc_output_scratch_registers (file);
5759 }
5760
5761 /* Expand the function epilogue, either normal or part of a sibcall.
5762 We emit all the instructions except the return or the call. */
5763
5764 void
5765 sparc_expand_epilogue (bool for_eh)
5766 {
5767 HOST_WIDE_INT size = sparc_frame_size;
5768
5769 if (sparc_n_global_fp_regs > 0)
5770 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5771 sparc_frame_base_offset
5772 - sparc_apparent_frame_size,
5773 SORR_RESTORE);
5774
5775 if (size == 0 || for_eh)
5776 ; /* do nothing. */
5777 else if (sparc_leaf_function_p)
5778 {
5779 if (size <= 4096)
5780 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5781 else if (size <= 8192)
5782 {
5783 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5784 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5785 }
5786 else
5787 {
5788 rtx reg = gen_rtx_REG (Pmode, 1);
5789 emit_move_insn (reg, GEN_INT (size));
5790 emit_insn (gen_stack_pointer_inc (reg));
5791 }
5792 }
5793 }
5794
5795 /* Expand the function epilogue, either normal or part of a sibcall.
5796 We emit all the instructions except the return or the call. */
5797
5798 void
5799 sparc_flat_expand_epilogue (bool for_eh)
5800 {
5801 HOST_WIDE_INT size = sparc_frame_size;
5802
5803 if (sparc_n_global_fp_regs > 0)
5804 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5805 sparc_frame_base_offset
5806 - sparc_apparent_frame_size,
5807 SORR_RESTORE);
5808
5809 /* If we have a frame pointer, we'll need both to restore it before the
5810 frame is destroyed and use its current value in destroying the frame.
5811 Since we don't have an atomic way to do that in the flat window model,
5812 we save the current value into a temporary register (%g1). */
5813 if (frame_pointer_needed && !for_eh)
5814 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5815
5816 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5817 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5818 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5819
5820 if (sparc_save_local_in_regs_p)
5821 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5822 sparc_frame_base_offset,
5823 SORR_RESTORE);
5824
5825 if (size == 0 || for_eh)
5826 ; /* do nothing. */
5827 else if (frame_pointer_needed)
5828 {
5829 /* Make sure the frame is destroyed after everything else is done. */
5830 emit_insn (gen_blockage ());
5831
5832 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5833 }
5834 else
5835 {
5836 /* Likewise. */
5837 emit_insn (gen_blockage ());
5838
5839 if (size <= 4096)
5840 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5841 else if (size <= 8192)
5842 {
5843 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5844 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5845 }
5846 else
5847 {
5848 rtx reg = gen_rtx_REG (Pmode, 1);
5849 emit_move_insn (reg, GEN_INT (size));
5850 emit_insn (gen_stack_pointer_inc (reg));
5851 }
5852 }
5853 }
5854
5855 /* Return true if it is appropriate to emit `return' instructions in the
5856 body of a function. */
5857
5858 bool
5859 sparc_can_use_return_insn_p (void)
5860 {
5861 return sparc_prologue_data_valid_p
5862 && sparc_n_global_fp_regs == 0
5863 && TARGET_FLAT
5864 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5865 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5866 }
5867
5868 /* This function generates the assembly code for function exit. */
5869
5870 static void
5871 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5872 {
5873 /* If the last two instructions of a function are "call foo; dslot;"
5874 the return address might point to the first instruction in the next
5875 function and we have to output a dummy nop for the sake of sane
5876 backtraces in such cases. This is pointless for sibling calls since
5877 the return address is explicitly adjusted. */
5878
5879 rtx_insn *insn = get_last_insn ();
5880
5881 rtx last_real_insn = prev_real_insn (insn);
5882 if (last_real_insn
5883 && NONJUMP_INSN_P (last_real_insn)
5884 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5885 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5886
5887 if (last_real_insn
5888 && CALL_P (last_real_insn)
5889 && !SIBLING_CALL_P (last_real_insn))
5890 fputs("\tnop\n", file);
5891
5892 sparc_output_deferred_case_vectors ();
5893 }
5894
5895 /* Output a 'restore' instruction. */
5896
5897 static void
5898 output_restore (rtx pat)
5899 {
5900 rtx operands[3];
5901
5902 if (! pat)
5903 {
5904 fputs ("\t restore\n", asm_out_file);
5905 return;
5906 }
5907
5908 gcc_assert (GET_CODE (pat) == SET);
5909
5910 operands[0] = SET_DEST (pat);
5911 pat = SET_SRC (pat);
5912
5913 switch (GET_CODE (pat))
5914 {
5915 case PLUS:
5916 operands[1] = XEXP (pat, 0);
5917 operands[2] = XEXP (pat, 1);
5918 output_asm_insn (" restore %r1, %2, %Y0", operands);
5919 break;
5920 case LO_SUM:
5921 operands[1] = XEXP (pat, 0);
5922 operands[2] = XEXP (pat, 1);
5923 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5924 break;
5925 case ASHIFT:
5926 operands[1] = XEXP (pat, 0);
5927 gcc_assert (XEXP (pat, 1) == const1_rtx);
5928 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5929 break;
5930 default:
5931 operands[1] = pat;
5932 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5933 break;
5934 }
5935 }
5936
5937 /* Output a return. */
5938
5939 const char *
5940 output_return (rtx_insn *insn)
5941 {
5942 if (crtl->calls_eh_return)
5943 {
5944 /* If the function uses __builtin_eh_return, the eh_return
5945 machinery occupies the delay slot. */
5946 gcc_assert (!final_sequence);
5947
5948 if (flag_delayed_branch)
5949 {
5950 if (!TARGET_FLAT && TARGET_V9)
5951 fputs ("\treturn\t%i7+8\n", asm_out_file);
5952 else
5953 {
5954 if (!TARGET_FLAT)
5955 fputs ("\trestore\n", asm_out_file);
5956
5957 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5958 }
5959
5960 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5961 }
5962 else
5963 {
5964 if (!TARGET_FLAT)
5965 fputs ("\trestore\n", asm_out_file);
5966
5967 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5968 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5969 }
5970 }
5971 else if (sparc_leaf_function_p || TARGET_FLAT)
5972 {
5973 /* This is a leaf or flat function so we don't have to bother restoring
5974 the register window, which frees us from dealing with the convoluted
5975 semantics of restore/return. We simply output the jump to the
5976 return address and the insn in the delay slot (if any). */
5977
5978 return "jmp\t%%o7+%)%#";
5979 }
5980 else
5981 {
5982 /* This is a regular function so we have to restore the register window.
5983 We may have a pending insn for the delay slot, which will be either
5984 combined with the 'restore' instruction or put in the delay slot of
5985 the 'return' instruction. */
5986
5987 if (final_sequence)
5988 {
5989 rtx delay, pat;
5990
5991 delay = NEXT_INSN (insn);
5992 gcc_assert (delay);
5993
5994 pat = PATTERN (delay);
5995
5996 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5997 {
5998 epilogue_renumber (&pat, 0);
5999 return "return\t%%i7+%)%#";
6000 }
6001 else
6002 {
6003 output_asm_insn ("jmp\t%%i7+%)", NULL);
6004 output_restore (pat);
6005 PATTERN (delay) = gen_blockage ();
6006 INSN_CODE (delay) = -1;
6007 }
6008 }
6009 else
6010 {
6011 /* The delay slot is empty. */
6012 if (TARGET_V9)
6013 return "return\t%%i7+%)\n\t nop";
6014 else if (flag_delayed_branch)
6015 return "jmp\t%%i7+%)\n\t restore";
6016 else
6017 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6018 }
6019 }
6020
6021 return "";
6022 }
6023
6024 /* Output a sibling call. */
6025
6026 const char *
6027 output_sibcall (rtx_insn *insn, rtx call_operand)
6028 {
6029 rtx operands[1];
6030
6031 gcc_assert (flag_delayed_branch);
6032
6033 operands[0] = call_operand;
6034
6035 if (sparc_leaf_function_p || TARGET_FLAT)
6036 {
6037 /* This is a leaf or flat function so we don't have to bother restoring
6038 the register window. We simply output the jump to the function and
6039 the insn in the delay slot (if any). */
6040
6041 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6042
6043 if (final_sequence)
6044 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6045 operands);
6046 else
6047 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6048 it into branch if possible. */
6049 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6050 operands);
6051 }
6052 else
6053 {
6054 /* This is a regular function so we have to restore the register window.
6055 We may have a pending insn for the delay slot, which will be combined
6056 with the 'restore' instruction. */
6057
6058 output_asm_insn ("call\t%a0, 0", operands);
6059
6060 if (final_sequence)
6061 {
6062 rtx_insn *delay = NEXT_INSN (insn);
6063 gcc_assert (delay);
6064
6065 output_restore (PATTERN (delay));
6066
6067 PATTERN (delay) = gen_blockage ();
6068 INSN_CODE (delay) = -1;
6069 }
6070 else
6071 output_restore (NULL_RTX);
6072 }
6073
6074 return "";
6075 }
6076 \f
6077 /* Functions for handling argument passing.
6078
6079 For 32-bit, the first 6 args are normally in registers and the rest are
6080 pushed. Any arg that starts within the first 6 words is at least
6081 partially passed in a register unless its data type forbids.
6082
6083 For 64-bit, the argument registers are laid out as an array of 16 elements
6084 and arguments are added sequentially. The first 6 int args and up to the
6085 first 16 fp args (depending on size) are passed in regs.
6086
6087 Slot Stack Integral Float Float in structure Double Long Double
6088 ---- ----- -------- ----- ------------------ ------ -----------
6089 15 [SP+248] %f31 %f30,%f31 %d30
6090 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6091 13 [SP+232] %f27 %f26,%f27 %d26
6092 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6093 11 [SP+216] %f23 %f22,%f23 %d22
6094 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6095 9 [SP+200] %f19 %f18,%f19 %d18
6096 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6097 7 [SP+184] %f15 %f14,%f15 %d14
6098 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6099 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6100 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6101 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6102 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6103 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6104 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6105
6106 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6107
6108 Integral arguments are always passed as 64-bit quantities appropriately
6109 extended.
6110
6111 Passing of floating point values is handled as follows.
6112 If a prototype is in scope:
6113 If the value is in a named argument (i.e. not a stdarg function or a
6114 value not part of the `...') then the value is passed in the appropriate
6115 fp reg.
6116 If the value is part of the `...' and is passed in one of the first 6
6117 slots then the value is passed in the appropriate int reg.
6118 If the value is part of the `...' and is not passed in one of the first 6
6119 slots then the value is passed in memory.
6120 If a prototype is not in scope:
6121 If the value is one of the first 6 arguments the value is passed in the
6122 appropriate integer reg and the appropriate fp reg.
6123 If the value is not one of the first 6 arguments the value is passed in
6124 the appropriate fp reg and in memory.
6125
6126
6127 Summary of the calling conventions implemented by GCC on the SPARC:
6128
6129 32-bit ABI:
6130 size argument return value
6131
6132 small integer <4 int. reg. int. reg.
6133 word 4 int. reg. int. reg.
6134 double word 8 int. reg. int. reg.
6135
6136 _Complex small integer <8 int. reg. int. reg.
6137 _Complex word 8 int. reg. int. reg.
6138 _Complex double word 16 memory int. reg.
6139
6140 vector integer <=8 int. reg. FP reg.
6141 vector integer >8 memory memory
6142
6143 float 4 int. reg. FP reg.
6144 double 8 int. reg. FP reg.
6145 long double 16 memory memory
6146
6147 _Complex float 8 memory FP reg.
6148 _Complex double 16 memory FP reg.
6149 _Complex long double 32 memory FP reg.
6150
6151 vector float any memory memory
6152
6153 aggregate any memory memory
6154
6155
6156
6157 64-bit ABI:
6158 size argument return value
6159
6160 small integer <8 int. reg. int. reg.
6161 word 8 int. reg. int. reg.
6162 double word 16 int. reg. int. reg.
6163
6164 _Complex small integer <16 int. reg. int. reg.
6165 _Complex word 16 int. reg. int. reg.
6166 _Complex double word 32 memory int. reg.
6167
6168 vector integer <=16 FP reg. FP reg.
6169 vector integer 16<s<=32 memory FP reg.
6170 vector integer >32 memory memory
6171
6172 float 4 FP reg. FP reg.
6173 double 8 FP reg. FP reg.
6174 long double 16 FP reg. FP reg.
6175
6176 _Complex float 8 FP reg. FP reg.
6177 _Complex double 16 FP reg. FP reg.
6178 _Complex long double 32 memory FP reg.
6179
6180 vector float <=16 FP reg. FP reg.
6181 vector float 16<s<=32 memory FP reg.
6182 vector float >32 memory memory
6183
6184 aggregate <=16 reg. reg.
6185 aggregate 16<s<=32 memory reg.
6186 aggregate >32 memory memory
6187
6188
6189
6190 Note #1: complex floating-point types follow the extended SPARC ABIs as
6191 implemented by the Sun compiler.
6192
6193 Note #2: integral vector types follow the scalar floating-point types
6194 conventions to match what is implemented by the Sun VIS SDK.
6195
6196 Note #3: floating-point vector types follow the aggregate types
6197 conventions. */
6198
6199
6200 /* Maximum number of int regs for args. */
6201 #define SPARC_INT_ARG_MAX 6
6202 /* Maximum number of fp regs for args. */
6203 #define SPARC_FP_ARG_MAX 16
6204 /* Number of words (partially) occupied for a given size in units. */
6205 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6206
6207 /* Handle the INIT_CUMULATIVE_ARGS macro.
6208 Initialize a variable CUM of type CUMULATIVE_ARGS
6209 for a call to a function whose data type is FNTYPE.
6210 For a library call, FNTYPE is 0. */
6211
6212 void
6213 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6214 {
6215 cum->words = 0;
6216 cum->prototype_p = fntype && prototype_p (fntype);
6217 cum->libcall_p = !fntype;
6218 }
6219
6220 /* Handle promotion of pointer and integer arguments. */
6221
6222 static machine_mode
6223 sparc_promote_function_mode (const_tree type, machine_mode mode,
6224 int *punsignedp, const_tree, int)
6225 {
6226 if (type && POINTER_TYPE_P (type))
6227 {
6228 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6229 return Pmode;
6230 }
6231
6232 /* Integral arguments are passed as full words, as per the ABI. */
6233 if (GET_MODE_CLASS (mode) == MODE_INT
6234 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6235 return word_mode;
6236
6237 return mode;
6238 }
6239
6240 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6241
6242 static bool
6243 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6244 {
6245 return TARGET_ARCH64 ? true : false;
6246 }
6247
6248 /* Traverse the record TYPE recursively and call FUNC on its fields.
6249 NAMED is true if this is for a named parameter. DATA is passed
6250 to FUNC for each field. OFFSET is the starting position and
6251 PACKED is true if we are inside a packed record. */
6252
6253 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6254 static void
6255 traverse_record_type (const_tree type, bool named, T *data,
6256 HOST_WIDE_INT offset = 0, bool packed = false)
6257 {
6258 /* The ABI obviously doesn't specify how packed structures are passed.
6259 These are passed in integer regs if possible, otherwise memory. */
6260 if (!packed)
6261 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6262 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6263 {
6264 packed = true;
6265 break;
6266 }
6267
6268 /* Walk the real fields, but skip those with no size or a zero size.
6269 ??? Fields with variable offset are handled as having zero offset. */
6270 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6271 if (TREE_CODE (field) == FIELD_DECL)
6272 {
6273 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6274 continue;
6275
6276 HOST_WIDE_INT bitpos = offset;
6277 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6278 bitpos += int_bit_position (field);
6279
6280 tree field_type = TREE_TYPE (field);
6281 if (TREE_CODE (field_type) == RECORD_TYPE)
6282 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6283 packed);
6284 else
6285 {
6286 const bool fp_type
6287 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6288 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6289 data);
6290 }
6291 }
6292 }
6293
6294 /* Handle recursive register classifying for structure layout. */
6295
6296 typedef struct
6297 {
6298 bool fp_regs; /* true if field eligible to FP registers. */
6299 bool fp_regs_in_first_word; /* true if such field in first word. */
6300 } classify_data_t;
6301
6302 /* A subroutine of function_arg_slotno. Classify the field. */
6303
6304 inline void
6305 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6306 classify_data_t *data)
6307 {
6308 if (fp)
6309 {
6310 data->fp_regs = true;
6311 if (bitpos < BITS_PER_WORD)
6312 data->fp_regs_in_first_word = true;
6313 }
6314 }
6315
6316 /* Compute the slot number to pass an argument in.
6317 Return the slot number or -1 if passing on the stack.
6318
6319 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6320 the preceding args and about the function being called.
6321 MODE is the argument's machine mode.
6322 TYPE is the data type of the argument (as a tree).
6323 This is null for libcalls where that information may
6324 not be available.
6325 NAMED is nonzero if this argument is a named parameter
6326 (otherwise it is an extra parameter matching an ellipsis).
6327 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6328 *PREGNO records the register number to use if scalar type.
6329 *PPADDING records the amount of padding needed in words. */
6330
6331 static int
6332 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6333 const_tree type, bool named, bool incoming,
6334 int *pregno, int *ppadding)
6335 {
6336 int regbase = (incoming
6337 ? SPARC_INCOMING_INT_ARG_FIRST
6338 : SPARC_OUTGOING_INT_ARG_FIRST);
6339 int slotno = cum->words;
6340 enum mode_class mclass;
6341 int regno;
6342
6343 *ppadding = 0;
6344
6345 if (type && TREE_ADDRESSABLE (type))
6346 return -1;
6347
6348 if (TARGET_ARCH32
6349 && mode == BLKmode
6350 && type
6351 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6352 return -1;
6353
6354 /* For SPARC64, objects requiring 16-byte alignment get it. */
6355 if (TARGET_ARCH64
6356 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6357 && (slotno & 1) != 0)
6358 slotno++, *ppadding = 1;
6359
6360 mclass = GET_MODE_CLASS (mode);
6361 if (type && TREE_CODE (type) == VECTOR_TYPE)
6362 {
6363 /* Vector types deserve special treatment because they are
6364 polymorphic wrt their mode, depending upon whether VIS
6365 instructions are enabled. */
6366 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6367 {
6368 /* The SPARC port defines no floating-point vector modes. */
6369 gcc_assert (mode == BLKmode);
6370 }
6371 else
6372 {
6373 /* Integral vector types should either have a vector
6374 mode or an integral mode, because we are guaranteed
6375 by pass_by_reference that their size is not greater
6376 than 16 bytes and TImode is 16-byte wide. */
6377 gcc_assert (mode != BLKmode);
6378
6379 /* Vector integers are handled like floats according to
6380 the Sun VIS SDK. */
6381 mclass = MODE_FLOAT;
6382 }
6383 }
6384
6385 switch (mclass)
6386 {
6387 case MODE_FLOAT:
6388 case MODE_COMPLEX_FLOAT:
6389 case MODE_VECTOR_INT:
6390 if (TARGET_ARCH64 && TARGET_FPU && named)
6391 {
6392 /* If all arg slots are filled, then must pass on stack. */
6393 if (slotno >= SPARC_FP_ARG_MAX)
6394 return -1;
6395
6396 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6397 /* Arguments filling only one single FP register are
6398 right-justified in the outer double FP register. */
6399 if (GET_MODE_SIZE (mode) <= 4)
6400 regno++;
6401 break;
6402 }
6403 /* fallthrough */
6404
6405 case MODE_INT:
6406 case MODE_COMPLEX_INT:
6407 /* If all arg slots are filled, then must pass on stack. */
6408 if (slotno >= SPARC_INT_ARG_MAX)
6409 return -1;
6410
6411 regno = regbase + slotno;
6412 break;
6413
6414 case MODE_RANDOM:
6415 if (mode == VOIDmode)
6416 /* MODE is VOIDmode when generating the actual call. */
6417 return -1;
6418
6419 gcc_assert (mode == BLKmode);
6420
6421 if (TARGET_ARCH32
6422 || !type
6423 || (TREE_CODE (type) != RECORD_TYPE
6424 && TREE_CODE (type) != VECTOR_TYPE))
6425 {
6426 /* If all arg slots are filled, then must pass on stack. */
6427 if (slotno >= SPARC_INT_ARG_MAX)
6428 return -1;
6429
6430 regno = regbase + slotno;
6431 }
6432 else /* TARGET_ARCH64 && type */
6433 {
6434 /* If all arg slots are filled, then must pass on stack. */
6435 if (slotno >= SPARC_FP_ARG_MAX)
6436 return -1;
6437
6438 if (TREE_CODE (type) == RECORD_TYPE)
6439 {
6440 classify_data_t data = { false, false };
6441 traverse_record_type<classify_data_t, classify_registers>
6442 (type, named, &data);
6443
6444 if (data.fp_regs)
6445 {
6446 /* If all FP slots are filled except for the last one and
6447 there is no FP field in the first word, then must pass
6448 on stack. */
6449 if (slotno >= SPARC_FP_ARG_MAX - 1
6450 && !data.fp_regs_in_first_word)
6451 return -1;
6452 }
6453 else
6454 {
6455 /* If all int slots are filled, then must pass on stack. */
6456 if (slotno >= SPARC_INT_ARG_MAX)
6457 return -1;
6458 }
6459 }
6460
6461 /* PREGNO isn't set since both int and FP regs can be used. */
6462 return slotno;
6463 }
6464 break;
6465
6466 default :
6467 gcc_unreachable ();
6468 }
6469
6470 *pregno = regno;
6471 return slotno;
6472 }
6473
6474 /* Handle recursive register counting/assigning for structure layout. */
6475
6476 typedef struct
6477 {
6478 int slotno; /* slot number of the argument. */
6479 int regbase; /* regno of the base register. */
6480 int intoffset; /* offset of the first pending integer field. */
6481 int nregs; /* number of words passed in registers. */
6482 bool stack; /* true if part of the argument is on the stack. */
6483 rtx ret; /* return expression being built. */
6484 } assign_data_t;
6485
6486 /* A subroutine of function_arg_record_value. Compute the number of integer
6487 registers to be assigned between PARMS->intoffset and BITPOS. Return
6488 true if at least one integer register is assigned or false otherwise. */
6489
6490 static bool
6491 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6492 {
6493 if (data->intoffset < 0)
6494 return false;
6495
6496 const int intoffset = data->intoffset;
6497 data->intoffset = -1;
6498
6499 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6500 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6501 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6502 int nregs = (endbit - startbit) / BITS_PER_WORD;
6503
6504 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6505 {
6506 nregs = SPARC_INT_ARG_MAX - this_slotno;
6507
6508 /* We need to pass this field (partly) on the stack. */
6509 data->stack = 1;
6510 }
6511
6512 if (nregs <= 0)
6513 return false;
6514
6515 *pnregs = nregs;
6516 return true;
6517 }
6518
6519 /* A subroutine of function_arg_record_value. Compute the number and the mode
6520 of the FP registers to be assigned for FIELD. Return true if at least one
6521 FP register is assigned or false otherwise. */
6522
6523 static bool
6524 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6525 assign_data_t *data,
6526 int *pnregs, machine_mode *pmode)
6527 {
6528 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6529 machine_mode mode = DECL_MODE (field);
6530 int nregs, nslots;
6531
6532 /* Slots are counted as words while regs are counted as having the size of
6533 the (inner) mode. */
6534 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6535 {
6536 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6537 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6538 }
6539 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6540 {
6541 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6542 nregs = 2;
6543 }
6544 else
6545 nregs = 1;
6546
6547 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6548
6549 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6550 {
6551 nslots = SPARC_FP_ARG_MAX - this_slotno;
6552 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6553
6554 /* We need to pass this field (partly) on the stack. */
6555 data->stack = 1;
6556
6557 if (nregs <= 0)
6558 return false;
6559 }
6560
6561 *pnregs = nregs;
6562 *pmode = mode;
6563 return true;
6564 }
6565
6566 /* A subroutine of function_arg_record_value. Count the number of registers
6567 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6568
6569 inline void
6570 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6571 assign_data_t *data)
6572 {
6573 if (fp)
6574 {
6575 int nregs;
6576 machine_mode mode;
6577
6578 if (compute_int_layout (bitpos, data, &nregs))
6579 data->nregs += nregs;
6580
6581 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6582 data->nregs += nregs;
6583 }
6584 else
6585 {
6586 if (data->intoffset < 0)
6587 data->intoffset = bitpos;
6588 }
6589 }
6590
6591 /* A subroutine of function_arg_record_value. Assign the bits of the
6592 structure between PARMS->intoffset and BITPOS to integer registers. */
6593
6594 static void
6595 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6596 {
6597 int intoffset = data->intoffset;
6598 machine_mode mode;
6599 int nregs;
6600
6601 if (!compute_int_layout (bitpos, data, &nregs))
6602 return;
6603
6604 /* If this is the trailing part of a word, only load that much into
6605 the register. Otherwise load the whole register. Note that in
6606 the latter case we may pick up unwanted bits. It's not a problem
6607 at the moment but may wish to revisit. */
6608 if (intoffset % BITS_PER_WORD != 0)
6609 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6610 MODE_INT);
6611 else
6612 mode = word_mode;
6613
6614 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6615 unsigned int regno = data->regbase + this_slotno;
6616 intoffset /= BITS_PER_UNIT;
6617
6618 do
6619 {
6620 rtx reg = gen_rtx_REG (mode, regno);
6621 XVECEXP (data->ret, 0, data->stack + data->nregs)
6622 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6623 data->nregs += 1;
6624 mode = word_mode;
6625 regno += 1;
6626 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6627 }
6628 while (--nregs > 0);
6629 }
6630
6631 /* A subroutine of function_arg_record_value. Assign FIELD at position
6632 BITPOS to FP registers. */
6633
6634 static void
6635 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6636 assign_data_t *data)
6637 {
6638 int nregs;
6639 machine_mode mode;
6640
6641 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6642 return;
6643
6644 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6645 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6646 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6647 regno++;
6648 int pos = bitpos / BITS_PER_UNIT;
6649
6650 do
6651 {
6652 rtx reg = gen_rtx_REG (mode, regno);
6653 XVECEXP (data->ret, 0, data->stack + data->nregs)
6654 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6655 data->nregs += 1;
6656 regno += GET_MODE_SIZE (mode) / 4;
6657 pos += GET_MODE_SIZE (mode);
6658 }
6659 while (--nregs > 0);
6660 }
6661
6662 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6663 the structure between PARMS->intoffset and BITPOS to registers. */
6664
6665 inline void
6666 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6667 assign_data_t *data)
6668 {
6669 if (fp)
6670 {
6671 assign_int_registers (bitpos, data);
6672
6673 assign_fp_registers (field, bitpos, data);
6674 }
6675 else
6676 {
6677 if (data->intoffset < 0)
6678 data->intoffset = bitpos;
6679 }
6680 }
6681
6682 /* Used by function_arg and sparc_function_value_1 to implement the complex
6683 conventions of the 64-bit ABI for passing and returning structures.
6684 Return an expression valid as a return value for the FUNCTION_ARG
6685 and TARGET_FUNCTION_VALUE.
6686
6687 TYPE is the data type of the argument (as a tree).
6688 This is null for libcalls where that information may
6689 not be available.
6690 MODE is the argument's machine mode.
6691 SLOTNO is the index number of the argument's slot in the parameter array.
6692 NAMED is true if this argument is a named parameter
6693 (otherwise it is an extra parameter matching an ellipsis).
6694 REGBASE is the regno of the base register for the parameter array. */
6695
6696 static rtx
6697 function_arg_record_value (const_tree type, machine_mode mode,
6698 int slotno, bool named, int regbase)
6699 {
6700 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6701 assign_data_t data;
6702 int nregs;
6703
6704 data.slotno = slotno;
6705 data.regbase = regbase;
6706
6707 /* Count how many registers we need. */
6708 data.nregs = 0;
6709 data.intoffset = 0;
6710 data.stack = false;
6711 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6712
6713 /* Take into account pending integer fields. */
6714 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6715 data.nregs += nregs;
6716
6717 /* Allocate the vector and handle some annoying special cases. */
6718 nregs = data.nregs;
6719
6720 if (nregs == 0)
6721 {
6722 /* ??? Empty structure has no value? Duh? */
6723 if (typesize <= 0)
6724 {
6725 /* Though there's nothing really to store, return a word register
6726 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6727 leads to breakage due to the fact that there are zero bytes to
6728 load. */
6729 return gen_rtx_REG (mode, regbase);
6730 }
6731
6732 /* ??? C++ has structures with no fields, and yet a size. Give up
6733 for now and pass everything back in integer registers. */
6734 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6735 if (nregs + slotno > SPARC_INT_ARG_MAX)
6736 nregs = SPARC_INT_ARG_MAX - slotno;
6737 }
6738
6739 gcc_assert (nregs > 0);
6740
6741 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6742
6743 /* If at least one field must be passed on the stack, generate
6744 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6745 also be passed on the stack. We can't do much better because the
6746 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6747 of structures for which the fields passed exclusively in registers
6748 are not at the beginning of the structure. */
6749 if (data.stack)
6750 XVECEXP (data.ret, 0, 0)
6751 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6752
6753 /* Assign the registers. */
6754 data.nregs = 0;
6755 data.intoffset = 0;
6756 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6757
6758 /* Assign pending integer fields. */
6759 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6760
6761 gcc_assert (data.nregs == nregs);
6762
6763 return data.ret;
6764 }
6765
6766 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6767 of the 64-bit ABI for passing and returning unions.
6768 Return an expression valid as a return value for the FUNCTION_ARG
6769 and TARGET_FUNCTION_VALUE.
6770
6771 SIZE is the size in bytes of the union.
6772 MODE is the argument's machine mode.
6773 REGNO is the hard register the union will be passed in. */
6774
6775 static rtx
6776 function_arg_union_value (int size, machine_mode mode, int slotno,
6777 int regno)
6778 {
6779 int nwords = CEIL_NWORDS (size), i;
6780 rtx regs;
6781
6782 /* See comment in previous function for empty structures. */
6783 if (nwords == 0)
6784 return gen_rtx_REG (mode, regno);
6785
6786 if (slotno == SPARC_INT_ARG_MAX - 1)
6787 nwords = 1;
6788
6789 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6790
6791 for (i = 0; i < nwords; i++)
6792 {
6793 /* Unions are passed left-justified. */
6794 XVECEXP (regs, 0, i)
6795 = gen_rtx_EXPR_LIST (VOIDmode,
6796 gen_rtx_REG (word_mode, regno),
6797 GEN_INT (UNITS_PER_WORD * i));
6798 regno++;
6799 }
6800
6801 return regs;
6802 }
6803
6804 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6805 for passing and returning BLKmode vectors.
6806 Return an expression valid as a return value for the FUNCTION_ARG
6807 and TARGET_FUNCTION_VALUE.
6808
6809 SIZE is the size in bytes of the vector.
6810 REGNO is the FP hard register the vector will be passed in. */
6811
6812 static rtx
6813 function_arg_vector_value (int size, int regno)
6814 {
6815 const int nregs = MAX (1, size / 8);
6816 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6817
6818 if (size < 8)
6819 XVECEXP (regs, 0, 0)
6820 = gen_rtx_EXPR_LIST (VOIDmode,
6821 gen_rtx_REG (SImode, regno),
6822 const0_rtx);
6823 else
6824 for (int i = 0; i < nregs; i++)
6825 XVECEXP (regs, 0, i)
6826 = gen_rtx_EXPR_LIST (VOIDmode,
6827 gen_rtx_REG (DImode, regno + 2*i),
6828 GEN_INT (i*8));
6829
6830 return regs;
6831 }
6832
6833 /* Determine where to put an argument to a function.
6834 Value is zero to push the argument on the stack,
6835 or a hard register in which to store the argument.
6836
6837 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6838 the preceding args and about the function being called.
6839 MODE is the argument's machine mode.
6840 TYPE is the data type of the argument (as a tree).
6841 This is null for libcalls where that information may
6842 not be available.
6843 NAMED is true if this argument is a named parameter
6844 (otherwise it is an extra parameter matching an ellipsis).
6845 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6846 TARGET_FUNCTION_INCOMING_ARG. */
6847
6848 static rtx
6849 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6850 const_tree type, bool named, bool incoming)
6851 {
6852 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6853
6854 int regbase = (incoming
6855 ? SPARC_INCOMING_INT_ARG_FIRST
6856 : SPARC_OUTGOING_INT_ARG_FIRST);
6857 int slotno, regno, padding;
6858 enum mode_class mclass = GET_MODE_CLASS (mode);
6859
6860 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6861 &regno, &padding);
6862 if (slotno == -1)
6863 return 0;
6864
6865 /* Vector types deserve special treatment because they are polymorphic wrt
6866 their mode, depending upon whether VIS instructions are enabled. */
6867 if (type && TREE_CODE (type) == VECTOR_TYPE)
6868 {
6869 HOST_WIDE_INT size = int_size_in_bytes (type);
6870 gcc_assert ((TARGET_ARCH32 && size <= 8)
6871 || (TARGET_ARCH64 && size <= 16));
6872
6873 if (mode == BLKmode)
6874 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6875
6876 mclass = MODE_FLOAT;
6877 }
6878
6879 if (TARGET_ARCH32)
6880 return gen_rtx_REG (mode, regno);
6881
6882 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6883 and are promoted to registers if possible. */
6884 if (type && TREE_CODE (type) == RECORD_TYPE)
6885 {
6886 HOST_WIDE_INT size = int_size_in_bytes (type);
6887 gcc_assert (size <= 16);
6888
6889 return function_arg_record_value (type, mode, slotno, named, regbase);
6890 }
6891
6892 /* Unions up to 16 bytes in size are passed in integer registers. */
6893 else if (type && TREE_CODE (type) == UNION_TYPE)
6894 {
6895 HOST_WIDE_INT size = int_size_in_bytes (type);
6896 gcc_assert (size <= 16);
6897
6898 return function_arg_union_value (size, mode, slotno, regno);
6899 }
6900
6901 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6902 but also have the slot allocated for them.
6903 If no prototype is in scope fp values in register slots get passed
6904 in two places, either fp regs and int regs or fp regs and memory. */
6905 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6906 && SPARC_FP_REG_P (regno))
6907 {
6908 rtx reg = gen_rtx_REG (mode, regno);
6909 if (cum->prototype_p || cum->libcall_p)
6910 return reg;
6911 else
6912 {
6913 rtx v0, v1;
6914
6915 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6916 {
6917 int intreg;
6918
6919 /* On incoming, we don't need to know that the value
6920 is passed in %f0 and %i0, and it confuses other parts
6921 causing needless spillage even on the simplest cases. */
6922 if (incoming)
6923 return reg;
6924
6925 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6926 + (regno - SPARC_FP_ARG_FIRST) / 2);
6927
6928 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6929 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6930 const0_rtx);
6931 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6932 }
6933 else
6934 {
6935 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6936 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6937 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6938 }
6939 }
6940 }
6941
6942 /* All other aggregate types are passed in an integer register in a mode
6943 corresponding to the size of the type. */
6944 else if (type && AGGREGATE_TYPE_P (type))
6945 {
6946 HOST_WIDE_INT size = int_size_in_bytes (type);
6947 gcc_assert (size <= 16);
6948
6949 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6950 }
6951
6952 return gen_rtx_REG (mode, regno);
6953 }
6954
6955 /* Handle the TARGET_FUNCTION_ARG target hook. */
6956
6957 static rtx
6958 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6959 const_tree type, bool named)
6960 {
6961 return sparc_function_arg_1 (cum, mode, type, named, false);
6962 }
6963
6964 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6965
6966 static rtx
6967 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6968 const_tree type, bool named)
6969 {
6970 return sparc_function_arg_1 (cum, mode, type, named, true);
6971 }
6972
6973 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6974
6975 static unsigned int
6976 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6977 {
6978 return ((TARGET_ARCH64
6979 && (GET_MODE_ALIGNMENT (mode) == 128
6980 || (type && TYPE_ALIGN (type) == 128)))
6981 ? 128
6982 : PARM_BOUNDARY);
6983 }
6984
6985 /* For an arg passed partly in registers and partly in memory,
6986 this is the number of bytes of registers used.
6987 For args passed entirely in registers or entirely in memory, zero.
6988
6989 Any arg that starts in the first 6 regs but won't entirely fit in them
6990 needs partial registers on v8. On v9, structures with integer
6991 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6992 values that begin in the last fp reg [where "last fp reg" varies with the
6993 mode] will be split between that reg and memory. */
6994
6995 static int
6996 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6997 tree type, bool named)
6998 {
6999 int slotno, regno, padding;
7000
7001 /* We pass false for incoming here, it doesn't matter. */
7002 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7003 false, &regno, &padding);
7004
7005 if (slotno == -1)
7006 return 0;
7007
7008 if (TARGET_ARCH32)
7009 {
7010 if ((slotno + (mode == BLKmode
7011 ? CEIL_NWORDS (int_size_in_bytes (type))
7012 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7013 > SPARC_INT_ARG_MAX)
7014 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7015 }
7016 else
7017 {
7018 /* We are guaranteed by pass_by_reference that the size of the
7019 argument is not greater than 16 bytes, so we only need to return
7020 one word if the argument is partially passed in registers. */
7021
7022 if (type && AGGREGATE_TYPE_P (type))
7023 {
7024 int size = int_size_in_bytes (type);
7025
7026 if (size > UNITS_PER_WORD
7027 && (slotno == SPARC_INT_ARG_MAX - 1
7028 || slotno == SPARC_FP_ARG_MAX - 1))
7029 return UNITS_PER_WORD;
7030 }
7031 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7032 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7033 && ! (TARGET_FPU && named)))
7034 {
7035 /* The complex types are passed as packed types. */
7036 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7037 && slotno == SPARC_INT_ARG_MAX - 1)
7038 return UNITS_PER_WORD;
7039 }
7040 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7041 {
7042 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7043 > SPARC_FP_ARG_MAX)
7044 return UNITS_PER_WORD;
7045 }
7046 }
7047
7048 return 0;
7049 }
7050
7051 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7052 Specify whether to pass the argument by reference. */
7053
7054 static bool
7055 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7056 machine_mode mode, const_tree type,
7057 bool named ATTRIBUTE_UNUSED)
7058 {
7059 if (TARGET_ARCH32)
7060 /* Original SPARC 32-bit ABI says that structures and unions,
7061 and quad-precision floats are passed by reference. For Pascal,
7062 also pass arrays by reference. All other base types are passed
7063 in registers.
7064
7065 Extended ABI (as implemented by the Sun compiler) says that all
7066 complex floats are passed by reference. Pass complex integers
7067 in registers up to 8 bytes. More generally, enforce the 2-word
7068 cap for passing arguments in registers.
7069
7070 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7071 integers are passed like floats of the same size, that is in
7072 registers up to 8 bytes. Pass all vector floats by reference
7073 like structure and unions. */
7074 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7075 || mode == SCmode
7076 /* Catch CDImode, TFmode, DCmode and TCmode. */
7077 || GET_MODE_SIZE (mode) > 8
7078 || (type
7079 && TREE_CODE (type) == VECTOR_TYPE
7080 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7081 else
7082 /* Original SPARC 64-bit ABI says that structures and unions
7083 smaller than 16 bytes are passed in registers, as well as
7084 all other base types.
7085
7086 Extended ABI (as implemented by the Sun compiler) says that
7087 complex floats are passed in registers up to 16 bytes. Pass
7088 all complex integers in registers up to 16 bytes. More generally,
7089 enforce the 2-word cap for passing arguments in registers.
7090
7091 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7092 integers are passed like floats of the same size, that is in
7093 registers (up to 16 bytes). Pass all vector floats like structure
7094 and unions. */
7095 return ((type
7096 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7097 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7098 /* Catch CTImode and TCmode. */
7099 || GET_MODE_SIZE (mode) > 16);
7100 }
7101
7102 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7103 Update the data in CUM to advance over an argument
7104 of mode MODE and data type TYPE.
7105 TYPE is null for libcalls where that information may not be available. */
7106
7107 static void
7108 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7109 const_tree type, bool named)
7110 {
7111 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7112 int regno, padding;
7113
7114 /* We pass false for incoming here, it doesn't matter. */
7115 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7116
7117 /* If argument requires leading padding, add it. */
7118 cum->words += padding;
7119
7120 if (TARGET_ARCH32)
7121 cum->words += (mode == BLKmode
7122 ? CEIL_NWORDS (int_size_in_bytes (type))
7123 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7124 else
7125 {
7126 if (type && AGGREGATE_TYPE_P (type))
7127 {
7128 int size = int_size_in_bytes (type);
7129
7130 if (size <= 8)
7131 ++cum->words;
7132 else if (size <= 16)
7133 cum->words += 2;
7134 else /* passed by reference */
7135 ++cum->words;
7136 }
7137 else
7138 cum->words += (mode == BLKmode
7139 ? CEIL_NWORDS (int_size_in_bytes (type))
7140 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7141 }
7142 }
7143
7144 /* Handle the FUNCTION_ARG_PADDING macro.
7145 For the 64 bit ABI structs are always stored left shifted in their
7146 argument slot. */
7147
7148 enum direction
7149 function_arg_padding (machine_mode mode, const_tree type)
7150 {
7151 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7152 return upward;
7153
7154 /* Fall back to the default. */
7155 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7156 }
7157
7158 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7159 Specify whether to return the return value in memory. */
7160
7161 static bool
7162 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7163 {
7164 if (TARGET_ARCH32)
7165 /* Original SPARC 32-bit ABI says that structures and unions,
7166 and quad-precision floats are returned in memory. All other
7167 base types are returned in registers.
7168
7169 Extended ABI (as implemented by the Sun compiler) says that
7170 all complex floats are returned in registers (8 FP registers
7171 at most for '_Complex long double'). Return all complex integers
7172 in registers (4 at most for '_Complex long long').
7173
7174 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7175 integers are returned like floats of the same size, that is in
7176 registers up to 8 bytes and in memory otherwise. Return all
7177 vector floats in memory like structure and unions; note that
7178 they always have BLKmode like the latter. */
7179 return (TYPE_MODE (type) == BLKmode
7180 || TYPE_MODE (type) == TFmode
7181 || (TREE_CODE (type) == VECTOR_TYPE
7182 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7183 else
7184 /* Original SPARC 64-bit ABI says that structures and unions
7185 smaller than 32 bytes are returned in registers, as well as
7186 all other base types.
7187
7188 Extended ABI (as implemented by the Sun compiler) says that all
7189 complex floats are returned in registers (8 FP registers at most
7190 for '_Complex long double'). Return all complex integers in
7191 registers (4 at most for '_Complex TItype').
7192
7193 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7194 integers are returned like floats of the same size, that is in
7195 registers. Return all vector floats like structure and unions;
7196 note that they always have BLKmode like the latter. */
7197 return (TYPE_MODE (type) == BLKmode
7198 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7199 }
7200
7201 /* Handle the TARGET_STRUCT_VALUE target hook.
7202 Return where to find the structure return value address. */
7203
7204 static rtx
7205 sparc_struct_value_rtx (tree fndecl, int incoming)
7206 {
7207 if (TARGET_ARCH64)
7208 return 0;
7209 else
7210 {
7211 rtx mem;
7212
7213 if (incoming)
7214 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7215 STRUCT_VALUE_OFFSET));
7216 else
7217 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7218 STRUCT_VALUE_OFFSET));
7219
7220 /* Only follow the SPARC ABI for fixed-size structure returns.
7221 Variable size structure returns are handled per the normal
7222 procedures in GCC. This is enabled by -mstd-struct-return */
7223 if (incoming == 2
7224 && sparc_std_struct_return
7225 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7226 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7227 {
7228 /* We must check and adjust the return address, as it is optional
7229 as to whether the return object is really provided. */
7230 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7231 rtx scratch = gen_reg_rtx (SImode);
7232 rtx_code_label *endlab = gen_label_rtx ();
7233
7234 /* Calculate the return object size. */
7235 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7236 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7237 /* Construct a temporary return value. */
7238 rtx temp_val
7239 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7240
7241 /* Implement SPARC 32-bit psABI callee return struct checking:
7242
7243 Fetch the instruction where we will return to and see if
7244 it's an unimp instruction (the most significant 10 bits
7245 will be zero). */
7246 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7247 plus_constant (Pmode,
7248 ret_reg, 8)));
7249 /* Assume the size is valid and pre-adjust. */
7250 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7251 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7252 0, endlab);
7253 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7254 /* Write the address of the memory pointed to by temp_val into
7255 the memory pointed to by mem. */
7256 emit_move_insn (mem, XEXP (temp_val, 0));
7257 emit_label (endlab);
7258 }
7259
7260 return mem;
7261 }
7262 }
7263
7264 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7265 For v9, function return values are subject to the same rules as arguments,
7266 except that up to 32 bytes may be returned in registers. */
7267
7268 static rtx
7269 sparc_function_value_1 (const_tree type, machine_mode mode,
7270 bool outgoing)
7271 {
7272 /* Beware that the two values are swapped here wrt function_arg. */
7273 int regbase = (outgoing
7274 ? SPARC_INCOMING_INT_ARG_FIRST
7275 : SPARC_OUTGOING_INT_ARG_FIRST);
7276 enum mode_class mclass = GET_MODE_CLASS (mode);
7277 int regno;
7278
7279 /* Vector types deserve special treatment because they are polymorphic wrt
7280 their mode, depending upon whether VIS instructions are enabled. */
7281 if (type && TREE_CODE (type) == VECTOR_TYPE)
7282 {
7283 HOST_WIDE_INT size = int_size_in_bytes (type);
7284 gcc_assert ((TARGET_ARCH32 && size <= 8)
7285 || (TARGET_ARCH64 && size <= 32));
7286
7287 if (mode == BLKmode)
7288 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7289
7290 mclass = MODE_FLOAT;
7291 }
7292
7293 if (TARGET_ARCH64 && type)
7294 {
7295 /* Structures up to 32 bytes in size are returned in registers. */
7296 if (TREE_CODE (type) == RECORD_TYPE)
7297 {
7298 HOST_WIDE_INT size = int_size_in_bytes (type);
7299 gcc_assert (size <= 32);
7300
7301 return function_arg_record_value (type, mode, 0, 1, regbase);
7302 }
7303
7304 /* Unions up to 32 bytes in size are returned in integer registers. */
7305 else if (TREE_CODE (type) == UNION_TYPE)
7306 {
7307 HOST_WIDE_INT size = int_size_in_bytes (type);
7308 gcc_assert (size <= 32);
7309
7310 return function_arg_union_value (size, mode, 0, regbase);
7311 }
7312
7313 /* Objects that require it are returned in FP registers. */
7314 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7315 ;
7316
7317 /* All other aggregate types are returned in an integer register in a
7318 mode corresponding to the size of the type. */
7319 else if (AGGREGATE_TYPE_P (type))
7320 {
7321 /* All other aggregate types are passed in an integer register
7322 in a mode corresponding to the size of the type. */
7323 HOST_WIDE_INT size = int_size_in_bytes (type);
7324 gcc_assert (size <= 32);
7325
7326 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7327
7328 /* ??? We probably should have made the same ABI change in
7329 3.4.0 as the one we made for unions. The latter was
7330 required by the SCD though, while the former is not
7331 specified, so we favored compatibility and efficiency.
7332
7333 Now we're stuck for aggregates larger than 16 bytes,
7334 because OImode vanished in the meantime. Let's not
7335 try to be unduly clever, and simply follow the ABI
7336 for unions in that case. */
7337 if (mode == BLKmode)
7338 return function_arg_union_value (size, mode, 0, regbase);
7339 else
7340 mclass = MODE_INT;
7341 }
7342
7343 /* We should only have pointer and integer types at this point. This
7344 must match sparc_promote_function_mode. */
7345 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7346 mode = word_mode;
7347 }
7348
7349 /* We should only have pointer and integer types at this point, except with
7350 -freg-struct-return. This must match sparc_promote_function_mode. */
7351 else if (TARGET_ARCH32
7352 && !(type && AGGREGATE_TYPE_P (type))
7353 && mclass == MODE_INT
7354 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7355 mode = word_mode;
7356
7357 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7358 regno = SPARC_FP_ARG_FIRST;
7359 else
7360 regno = regbase;
7361
7362 return gen_rtx_REG (mode, regno);
7363 }
7364
7365 /* Handle TARGET_FUNCTION_VALUE.
7366 On the SPARC, the value is found in the first "output" register, but the
7367 called function leaves it in the first "input" register. */
7368
7369 static rtx
7370 sparc_function_value (const_tree valtype,
7371 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7372 bool outgoing)
7373 {
7374 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7375 }
7376
7377 /* Handle TARGET_LIBCALL_VALUE. */
7378
7379 static rtx
7380 sparc_libcall_value (machine_mode mode,
7381 const_rtx fun ATTRIBUTE_UNUSED)
7382 {
7383 return sparc_function_value_1 (NULL_TREE, mode, false);
7384 }
7385
7386 /* Handle FUNCTION_VALUE_REGNO_P.
7387 On the SPARC, the first "output" reg is used for integer values, and the
7388 first floating point register is used for floating point values. */
7389
7390 static bool
7391 sparc_function_value_regno_p (const unsigned int regno)
7392 {
7393 return (regno == 8 || (TARGET_FPU && regno == 32));
7394 }
7395
7396 /* Do what is necessary for `va_start'. We look at the current function
7397 to determine if stdarg or varargs is used and return the address of
7398 the first unnamed parameter. */
7399
7400 static rtx
7401 sparc_builtin_saveregs (void)
7402 {
7403 int first_reg = crtl->args.info.words;
7404 rtx address;
7405 int regno;
7406
7407 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7408 emit_move_insn (gen_rtx_MEM (word_mode,
7409 gen_rtx_PLUS (Pmode,
7410 frame_pointer_rtx,
7411 GEN_INT (FIRST_PARM_OFFSET (0)
7412 + (UNITS_PER_WORD
7413 * regno)))),
7414 gen_rtx_REG (word_mode,
7415 SPARC_INCOMING_INT_ARG_FIRST + regno));
7416
7417 address = gen_rtx_PLUS (Pmode,
7418 frame_pointer_rtx,
7419 GEN_INT (FIRST_PARM_OFFSET (0)
7420 + UNITS_PER_WORD * first_reg));
7421
7422 return address;
7423 }
7424
7425 /* Implement `va_start' for stdarg. */
7426
7427 static void
7428 sparc_va_start (tree valist, rtx nextarg)
7429 {
7430 nextarg = expand_builtin_saveregs ();
7431 std_expand_builtin_va_start (valist, nextarg);
7432 }
7433
7434 /* Implement `va_arg' for stdarg. */
7435
7436 static tree
7437 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7438 gimple_seq *post_p)
7439 {
7440 HOST_WIDE_INT size, rsize, align;
7441 tree addr, incr;
7442 bool indirect;
7443 tree ptrtype = build_pointer_type (type);
7444
7445 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7446 {
7447 indirect = true;
7448 size = rsize = UNITS_PER_WORD;
7449 align = 0;
7450 }
7451 else
7452 {
7453 indirect = false;
7454 size = int_size_in_bytes (type);
7455 rsize = ROUND_UP (size, UNITS_PER_WORD);
7456 align = 0;
7457
7458 if (TARGET_ARCH64)
7459 {
7460 /* For SPARC64, objects requiring 16-byte alignment get it. */
7461 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7462 align = 2 * UNITS_PER_WORD;
7463
7464 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7465 are left-justified in their slots. */
7466 if (AGGREGATE_TYPE_P (type))
7467 {
7468 if (size == 0)
7469 size = rsize = UNITS_PER_WORD;
7470 else
7471 size = rsize;
7472 }
7473 }
7474 }
7475
7476 incr = valist;
7477 if (align)
7478 {
7479 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7480 incr = fold_convert (sizetype, incr);
7481 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7482 size_int (-align));
7483 incr = fold_convert (ptr_type_node, incr);
7484 }
7485
7486 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7487 addr = incr;
7488
7489 if (BYTES_BIG_ENDIAN && size < rsize)
7490 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7491
7492 if (indirect)
7493 {
7494 addr = fold_convert (build_pointer_type (ptrtype), addr);
7495 addr = build_va_arg_indirect_ref (addr);
7496 }
7497
7498 /* If the address isn't aligned properly for the type, we need a temporary.
7499 FIXME: This is inefficient, usually we can do this in registers. */
7500 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7501 {
7502 tree tmp = create_tmp_var (type, "va_arg_tmp");
7503 tree dest_addr = build_fold_addr_expr (tmp);
7504 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7505 3, dest_addr, addr, size_int (rsize));
7506 TREE_ADDRESSABLE (tmp) = 1;
7507 gimplify_and_add (copy, pre_p);
7508 addr = dest_addr;
7509 }
7510
7511 else
7512 addr = fold_convert (ptrtype, addr);
7513
7514 incr = fold_build_pointer_plus_hwi (incr, rsize);
7515 gimplify_assign (valist, incr, post_p);
7516
7517 return build_va_arg_indirect_ref (addr);
7518 }
7519 \f
7520 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7521 Specify whether the vector mode is supported by the hardware. */
7522
7523 static bool
7524 sparc_vector_mode_supported_p (machine_mode mode)
7525 {
7526 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7527 }
7528 \f
7529 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7530
7531 static machine_mode
7532 sparc_preferred_simd_mode (machine_mode mode)
7533 {
7534 if (TARGET_VIS)
7535 switch (mode)
7536 {
7537 case SImode:
7538 return V2SImode;
7539 case HImode:
7540 return V4HImode;
7541 case QImode:
7542 return V8QImode;
7543
7544 default:;
7545 }
7546
7547 return word_mode;
7548 }
7549 \f
7550 /* Return the string to output an unconditional branch to LABEL, which is
7551 the operand number of the label.
7552
7553 DEST is the destination insn (i.e. the label), INSN is the source. */
7554
7555 const char *
7556 output_ubranch (rtx dest, rtx_insn *insn)
7557 {
7558 static char string[64];
7559 bool v9_form = false;
7560 int delta;
7561 char *p;
7562
7563 /* Even if we are trying to use cbcond for this, evaluate
7564 whether we can use V9 branches as our backup plan. */
7565
7566 delta = 5000000;
7567 if (INSN_ADDRESSES_SET_P ())
7568 delta = (INSN_ADDRESSES (INSN_UID (dest))
7569 - INSN_ADDRESSES (INSN_UID (insn)));
7570
7571 /* Leave some instructions for "slop". */
7572 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7573 v9_form = true;
7574
7575 if (TARGET_CBCOND)
7576 {
7577 bool emit_nop = emit_cbcond_nop (insn);
7578 bool far = false;
7579 const char *rval;
7580
7581 if (delta < -500 || delta > 500)
7582 far = true;
7583
7584 if (far)
7585 {
7586 if (v9_form)
7587 rval = "ba,a,pt\t%%xcc, %l0";
7588 else
7589 rval = "b,a\t%l0";
7590 }
7591 else
7592 {
7593 if (emit_nop)
7594 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7595 else
7596 rval = "cwbe\t%%g0, %%g0, %l0";
7597 }
7598 return rval;
7599 }
7600
7601 if (v9_form)
7602 strcpy (string, "ba%*,pt\t%%xcc, ");
7603 else
7604 strcpy (string, "b%*\t");
7605
7606 p = strchr (string, '\0');
7607 *p++ = '%';
7608 *p++ = 'l';
7609 *p++ = '0';
7610 *p++ = '%';
7611 *p++ = '(';
7612 *p = '\0';
7613
7614 return string;
7615 }
7616
7617 /* Return the string to output a conditional branch to LABEL, which is
7618 the operand number of the label. OP is the conditional expression.
7619 XEXP (OP, 0) is assumed to be a condition code register (integer or
7620 floating point) and its mode specifies what kind of comparison we made.
7621
7622 DEST is the destination insn (i.e. the label), INSN is the source.
7623
7624 REVERSED is nonzero if we should reverse the sense of the comparison.
7625
7626 ANNUL is nonzero if we should generate an annulling branch. */
7627
7628 const char *
7629 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7630 rtx_insn *insn)
7631 {
7632 static char string[64];
7633 enum rtx_code code = GET_CODE (op);
7634 rtx cc_reg = XEXP (op, 0);
7635 machine_mode mode = GET_MODE (cc_reg);
7636 const char *labelno, *branch;
7637 int spaces = 8, far;
7638 char *p;
7639
7640 /* v9 branches are limited to +-1MB. If it is too far away,
7641 change
7642
7643 bne,pt %xcc, .LC30
7644
7645 to
7646
7647 be,pn %xcc, .+12
7648 nop
7649 ba .LC30
7650
7651 and
7652
7653 fbne,a,pn %fcc2, .LC29
7654
7655 to
7656
7657 fbe,pt %fcc2, .+16
7658 nop
7659 ba .LC29 */
7660
7661 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7662 if (reversed ^ far)
7663 {
7664 /* Reversal of FP compares takes care -- an ordered compare
7665 becomes an unordered compare and vice versa. */
7666 if (mode == CCFPmode || mode == CCFPEmode)
7667 code = reverse_condition_maybe_unordered (code);
7668 else
7669 code = reverse_condition (code);
7670 }
7671
7672 /* Start by writing the branch condition. */
7673 if (mode == CCFPmode || mode == CCFPEmode)
7674 {
7675 switch (code)
7676 {
7677 case NE:
7678 branch = "fbne";
7679 break;
7680 case EQ:
7681 branch = "fbe";
7682 break;
7683 case GE:
7684 branch = "fbge";
7685 break;
7686 case GT:
7687 branch = "fbg";
7688 break;
7689 case LE:
7690 branch = "fble";
7691 break;
7692 case LT:
7693 branch = "fbl";
7694 break;
7695 case UNORDERED:
7696 branch = "fbu";
7697 break;
7698 case ORDERED:
7699 branch = "fbo";
7700 break;
7701 case UNGT:
7702 branch = "fbug";
7703 break;
7704 case UNLT:
7705 branch = "fbul";
7706 break;
7707 case UNEQ:
7708 branch = "fbue";
7709 break;
7710 case UNGE:
7711 branch = "fbuge";
7712 break;
7713 case UNLE:
7714 branch = "fbule";
7715 break;
7716 case LTGT:
7717 branch = "fblg";
7718 break;
7719 default:
7720 gcc_unreachable ();
7721 }
7722
7723 /* ??? !v9: FP branches cannot be preceded by another floating point
7724 insn. Because there is currently no concept of pre-delay slots,
7725 we can fix this only by always emitting a nop before a floating
7726 point branch. */
7727
7728 string[0] = '\0';
7729 if (! TARGET_V9)
7730 strcpy (string, "nop\n\t");
7731 strcat (string, branch);
7732 }
7733 else
7734 {
7735 switch (code)
7736 {
7737 case NE:
7738 branch = "bne";
7739 break;
7740 case EQ:
7741 branch = "be";
7742 break;
7743 case GE:
7744 if (mode == CCNZmode || mode == CCXNZmode)
7745 branch = "bpos";
7746 else
7747 branch = "bge";
7748 break;
7749 case GT:
7750 branch = "bg";
7751 break;
7752 case LE:
7753 branch = "ble";
7754 break;
7755 case LT:
7756 if (mode == CCNZmode || mode == CCXNZmode)
7757 branch = "bneg";
7758 else
7759 branch = "bl";
7760 break;
7761 case GEU:
7762 branch = "bgeu";
7763 break;
7764 case GTU:
7765 branch = "bgu";
7766 break;
7767 case LEU:
7768 branch = "bleu";
7769 break;
7770 case LTU:
7771 branch = "blu";
7772 break;
7773 default:
7774 gcc_unreachable ();
7775 }
7776 strcpy (string, branch);
7777 }
7778 spaces -= strlen (branch);
7779 p = strchr (string, '\0');
7780
7781 /* Now add the annulling, the label, and a possible noop. */
7782 if (annul && ! far)
7783 {
7784 strcpy (p, ",a");
7785 p += 2;
7786 spaces -= 2;
7787 }
7788
7789 if (TARGET_V9)
7790 {
7791 rtx note;
7792 int v8 = 0;
7793
7794 if (! far && insn && INSN_ADDRESSES_SET_P ())
7795 {
7796 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7797 - INSN_ADDRESSES (INSN_UID (insn)));
7798 /* Leave some instructions for "slop". */
7799 if (delta < -260000 || delta >= 260000)
7800 v8 = 1;
7801 }
7802
7803 switch (mode)
7804 {
7805 case CCmode:
7806 case CCNZmode:
7807 case CCCmode:
7808 labelno = "%%icc, ";
7809 if (v8)
7810 labelno = "";
7811 break;
7812 case CCXmode:
7813 case CCXNZmode:
7814 case CCXCmode:
7815 labelno = "%%xcc, ";
7816 gcc_assert (!v8);
7817 break;
7818 case CCFPmode:
7819 case CCFPEmode:
7820 {
7821 static char v9_fcc_labelno[] = "%%fccX, ";
7822 /* Set the char indicating the number of the fcc reg to use. */
7823 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7824 labelno = v9_fcc_labelno;
7825 if (v8)
7826 {
7827 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7828 labelno = "";
7829 }
7830 }
7831 break;
7832 default:
7833 gcc_unreachable ();
7834 }
7835
7836 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7837 {
7838 strcpy (p,
7839 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7840 ? ",pt" : ",pn");
7841 p += 3;
7842 spaces -= 3;
7843 }
7844 }
7845 else
7846 labelno = "";
7847
7848 if (spaces > 0)
7849 *p++ = '\t';
7850 else
7851 *p++ = ' ';
7852 strcpy (p, labelno);
7853 p = strchr (p, '\0');
7854 if (far)
7855 {
7856 strcpy (p, ".+12\n\t nop\n\tb\t");
7857 /* Skip the next insn if requested or
7858 if we know that it will be a nop. */
7859 if (annul || ! final_sequence)
7860 p[3] = '6';
7861 p += 14;
7862 }
7863 *p++ = '%';
7864 *p++ = 'l';
7865 *p++ = label + '0';
7866 *p++ = '%';
7867 *p++ = '#';
7868 *p = '\0';
7869
7870 return string;
7871 }
7872
7873 /* Emit a library call comparison between floating point X and Y.
7874 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7875 Return the new operator to be used in the comparison sequence.
7876
7877 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7878 values as arguments instead of the TFmode registers themselves,
7879 that's why we cannot call emit_float_lib_cmp. */
7880
7881 rtx
7882 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7883 {
7884 const char *qpfunc;
7885 rtx slot0, slot1, result, tem, tem2, libfunc;
7886 machine_mode mode;
7887 enum rtx_code new_comparison;
7888
7889 switch (comparison)
7890 {
7891 case EQ:
7892 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7893 break;
7894
7895 case NE:
7896 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7897 break;
7898
7899 case GT:
7900 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7901 break;
7902
7903 case GE:
7904 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7905 break;
7906
7907 case LT:
7908 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7909 break;
7910
7911 case LE:
7912 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7913 break;
7914
7915 case ORDERED:
7916 case UNORDERED:
7917 case UNGT:
7918 case UNLT:
7919 case UNEQ:
7920 case UNGE:
7921 case UNLE:
7922 case LTGT:
7923 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7924 break;
7925
7926 default:
7927 gcc_unreachable ();
7928 }
7929
7930 if (TARGET_ARCH64)
7931 {
7932 if (MEM_P (x))
7933 {
7934 tree expr = MEM_EXPR (x);
7935 if (expr)
7936 mark_addressable (expr);
7937 slot0 = x;
7938 }
7939 else
7940 {
7941 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7942 emit_move_insn (slot0, x);
7943 }
7944
7945 if (MEM_P (y))
7946 {
7947 tree expr = MEM_EXPR (y);
7948 if (expr)
7949 mark_addressable (expr);
7950 slot1 = y;
7951 }
7952 else
7953 {
7954 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7955 emit_move_insn (slot1, y);
7956 }
7957
7958 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7959 emit_library_call (libfunc, LCT_NORMAL,
7960 DImode, 2,
7961 XEXP (slot0, 0), Pmode,
7962 XEXP (slot1, 0), Pmode);
7963 mode = DImode;
7964 }
7965 else
7966 {
7967 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7968 emit_library_call (libfunc, LCT_NORMAL,
7969 SImode, 2,
7970 x, TFmode, y, TFmode);
7971 mode = SImode;
7972 }
7973
7974
7975 /* Immediately move the result of the libcall into a pseudo
7976 register so reload doesn't clobber the value if it needs
7977 the return register for a spill reg. */
7978 result = gen_reg_rtx (mode);
7979 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7980
7981 switch (comparison)
7982 {
7983 default:
7984 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7985 case ORDERED:
7986 case UNORDERED:
7987 new_comparison = (comparison == UNORDERED ? EQ : NE);
7988 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7989 case UNGT:
7990 case UNGE:
7991 new_comparison = (comparison == UNGT ? GT : NE);
7992 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7993 case UNLE:
7994 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7995 case UNLT:
7996 tem = gen_reg_rtx (mode);
7997 if (TARGET_ARCH32)
7998 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7999 else
8000 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8001 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8002 case UNEQ:
8003 case LTGT:
8004 tem = gen_reg_rtx (mode);
8005 if (TARGET_ARCH32)
8006 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8007 else
8008 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8009 tem2 = gen_reg_rtx (mode);
8010 if (TARGET_ARCH32)
8011 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8012 else
8013 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8014 new_comparison = (comparison == UNEQ ? EQ : NE);
8015 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8016 }
8017
8018 gcc_unreachable ();
8019 }
8020
8021 /* Generate an unsigned DImode to FP conversion. This is the same code
8022 optabs would emit if we didn't have TFmode patterns. */
8023
8024 void
8025 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8026 {
8027 rtx i0, i1, f0, in, out;
8028
8029 out = operands[0];
8030 in = force_reg (DImode, operands[1]);
8031 rtx_code_label *neglab = gen_label_rtx ();
8032 rtx_code_label *donelab = gen_label_rtx ();
8033 i0 = gen_reg_rtx (DImode);
8034 i1 = gen_reg_rtx (DImode);
8035 f0 = gen_reg_rtx (mode);
8036
8037 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8038
8039 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8040 emit_jump_insn (gen_jump (donelab));
8041 emit_barrier ();
8042
8043 emit_label (neglab);
8044
8045 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8046 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8047 emit_insn (gen_iordi3 (i0, i0, i1));
8048 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8049 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8050
8051 emit_label (donelab);
8052 }
8053
8054 /* Generate an FP to unsigned DImode conversion. This is the same code
8055 optabs would emit if we didn't have TFmode patterns. */
8056
8057 void
8058 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8059 {
8060 rtx i0, i1, f0, in, out, limit;
8061
8062 out = operands[0];
8063 in = force_reg (mode, operands[1]);
8064 rtx_code_label *neglab = gen_label_rtx ();
8065 rtx_code_label *donelab = gen_label_rtx ();
8066 i0 = gen_reg_rtx (DImode);
8067 i1 = gen_reg_rtx (DImode);
8068 limit = gen_reg_rtx (mode);
8069 f0 = gen_reg_rtx (mode);
8070
8071 emit_move_insn (limit,
8072 const_double_from_real_value (
8073 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8074 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8075
8076 emit_insn (gen_rtx_SET (out,
8077 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8078 emit_jump_insn (gen_jump (donelab));
8079 emit_barrier ();
8080
8081 emit_label (neglab);
8082
8083 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8084 emit_insn (gen_rtx_SET (i0,
8085 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8086 emit_insn (gen_movdi (i1, const1_rtx));
8087 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8088 emit_insn (gen_xordi3 (out, i0, i1));
8089
8090 emit_label (donelab);
8091 }
8092
8093 /* Return the string to output a compare and branch instruction to DEST.
8094 DEST is the destination insn (i.e. the label), INSN is the source,
8095 and OP is the conditional expression. */
8096
8097 const char *
8098 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8099 {
8100 machine_mode mode = GET_MODE (XEXP (op, 0));
8101 enum rtx_code code = GET_CODE (op);
8102 const char *cond_str, *tmpl;
8103 int far, emit_nop, len;
8104 static char string[64];
8105 char size_char;
8106
8107 /* Compare and Branch is limited to +-2KB. If it is too far away,
8108 change
8109
8110 cxbne X, Y, .LC30
8111
8112 to
8113
8114 cxbe X, Y, .+16
8115 nop
8116 ba,pt xcc, .LC30
8117 nop */
8118
8119 len = get_attr_length (insn);
8120
8121 far = len == 4;
8122 emit_nop = len == 2;
8123
8124 if (far)
8125 code = reverse_condition (code);
8126
8127 size_char = ((mode == SImode) ? 'w' : 'x');
8128
8129 switch (code)
8130 {
8131 case NE:
8132 cond_str = "ne";
8133 break;
8134
8135 case EQ:
8136 cond_str = "e";
8137 break;
8138
8139 case GE:
8140 cond_str = "ge";
8141 break;
8142
8143 case GT:
8144 cond_str = "g";
8145 break;
8146
8147 case LE:
8148 cond_str = "le";
8149 break;
8150
8151 case LT:
8152 cond_str = "l";
8153 break;
8154
8155 case GEU:
8156 cond_str = "cc";
8157 break;
8158
8159 case GTU:
8160 cond_str = "gu";
8161 break;
8162
8163 case LEU:
8164 cond_str = "leu";
8165 break;
8166
8167 case LTU:
8168 cond_str = "cs";
8169 break;
8170
8171 default:
8172 gcc_unreachable ();
8173 }
8174
8175 if (far)
8176 {
8177 int veryfar = 1, delta;
8178
8179 if (INSN_ADDRESSES_SET_P ())
8180 {
8181 delta = (INSN_ADDRESSES (INSN_UID (dest))
8182 - INSN_ADDRESSES (INSN_UID (insn)));
8183 /* Leave some instructions for "slop". */
8184 if (delta >= -260000 && delta < 260000)
8185 veryfar = 0;
8186 }
8187
8188 if (veryfar)
8189 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8190 else
8191 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8192 }
8193 else
8194 {
8195 if (emit_nop)
8196 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8197 else
8198 tmpl = "c%cb%s\t%%1, %%2, %%3";
8199 }
8200
8201 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8202
8203 return string;
8204 }
8205
8206 /* Return the string to output a conditional branch to LABEL, testing
8207 register REG. LABEL is the operand number of the label; REG is the
8208 operand number of the reg. OP is the conditional expression. The mode
8209 of REG says what kind of comparison we made.
8210
8211 DEST is the destination insn (i.e. the label), INSN is the source.
8212
8213 REVERSED is nonzero if we should reverse the sense of the comparison.
8214
8215 ANNUL is nonzero if we should generate an annulling branch. */
8216
8217 const char *
8218 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8219 int annul, rtx_insn *insn)
8220 {
8221 static char string[64];
8222 enum rtx_code code = GET_CODE (op);
8223 machine_mode mode = GET_MODE (XEXP (op, 0));
8224 rtx note;
8225 int far;
8226 char *p;
8227
8228 /* branch on register are limited to +-128KB. If it is too far away,
8229 change
8230
8231 brnz,pt %g1, .LC30
8232
8233 to
8234
8235 brz,pn %g1, .+12
8236 nop
8237 ba,pt %xcc, .LC30
8238
8239 and
8240
8241 brgez,a,pn %o1, .LC29
8242
8243 to
8244
8245 brlz,pt %o1, .+16
8246 nop
8247 ba,pt %xcc, .LC29 */
8248
8249 far = get_attr_length (insn) >= 3;
8250
8251 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8252 if (reversed ^ far)
8253 code = reverse_condition (code);
8254
8255 /* Only 64 bit versions of these instructions exist. */
8256 gcc_assert (mode == DImode);
8257
8258 /* Start by writing the branch condition. */
8259
8260 switch (code)
8261 {
8262 case NE:
8263 strcpy (string, "brnz");
8264 break;
8265
8266 case EQ:
8267 strcpy (string, "brz");
8268 break;
8269
8270 case GE:
8271 strcpy (string, "brgez");
8272 break;
8273
8274 case LT:
8275 strcpy (string, "brlz");
8276 break;
8277
8278 case LE:
8279 strcpy (string, "brlez");
8280 break;
8281
8282 case GT:
8283 strcpy (string, "brgz");
8284 break;
8285
8286 default:
8287 gcc_unreachable ();
8288 }
8289
8290 p = strchr (string, '\0');
8291
8292 /* Now add the annulling, reg, label, and nop. */
8293 if (annul && ! far)
8294 {
8295 strcpy (p, ",a");
8296 p += 2;
8297 }
8298
8299 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8300 {
8301 strcpy (p,
8302 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8303 ? ",pt" : ",pn");
8304 p += 3;
8305 }
8306
8307 *p = p < string + 8 ? '\t' : ' ';
8308 p++;
8309 *p++ = '%';
8310 *p++ = '0' + reg;
8311 *p++ = ',';
8312 *p++ = ' ';
8313 if (far)
8314 {
8315 int veryfar = 1, delta;
8316
8317 if (INSN_ADDRESSES_SET_P ())
8318 {
8319 delta = (INSN_ADDRESSES (INSN_UID (dest))
8320 - INSN_ADDRESSES (INSN_UID (insn)));
8321 /* Leave some instructions for "slop". */
8322 if (delta >= -260000 && delta < 260000)
8323 veryfar = 0;
8324 }
8325
8326 strcpy (p, ".+12\n\t nop\n\t");
8327 /* Skip the next insn if requested or
8328 if we know that it will be a nop. */
8329 if (annul || ! final_sequence)
8330 p[3] = '6';
8331 p += 12;
8332 if (veryfar)
8333 {
8334 strcpy (p, "b\t");
8335 p += 2;
8336 }
8337 else
8338 {
8339 strcpy (p, "ba,pt\t%%xcc, ");
8340 p += 13;
8341 }
8342 }
8343 *p++ = '%';
8344 *p++ = 'l';
8345 *p++ = '0' + label;
8346 *p++ = '%';
8347 *p++ = '#';
8348 *p = '\0';
8349
8350 return string;
8351 }
8352
8353 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8354 Such instructions cannot be used in the delay slot of return insn on v9.
8355 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8356 */
8357
8358 static int
8359 epilogue_renumber (register rtx *where, int test)
8360 {
8361 register const char *fmt;
8362 register int i;
8363 register enum rtx_code code;
8364
8365 if (*where == 0)
8366 return 0;
8367
8368 code = GET_CODE (*where);
8369
8370 switch (code)
8371 {
8372 case REG:
8373 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8374 return 1;
8375 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8376 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8377 /* fallthrough */
8378 case SCRATCH:
8379 case CC0:
8380 case PC:
8381 case CONST_INT:
8382 case CONST_WIDE_INT:
8383 case CONST_DOUBLE:
8384 return 0;
8385
8386 /* Do not replace the frame pointer with the stack pointer because
8387 it can cause the delayed instruction to load below the stack.
8388 This occurs when instructions like:
8389
8390 (set (reg/i:SI 24 %i0)
8391 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8392 (const_int -20 [0xffffffec])) 0))
8393
8394 are in the return delayed slot. */
8395 case PLUS:
8396 if (GET_CODE (XEXP (*where, 0)) == REG
8397 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8398 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8399 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8400 return 1;
8401 break;
8402
8403 case MEM:
8404 if (SPARC_STACK_BIAS
8405 && GET_CODE (XEXP (*where, 0)) == REG
8406 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8407 return 1;
8408 break;
8409
8410 default:
8411 break;
8412 }
8413
8414 fmt = GET_RTX_FORMAT (code);
8415
8416 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8417 {
8418 if (fmt[i] == 'E')
8419 {
8420 register int j;
8421 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8422 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8423 return 1;
8424 }
8425 else if (fmt[i] == 'e'
8426 && epilogue_renumber (&(XEXP (*where, i)), test))
8427 return 1;
8428 }
8429 return 0;
8430 }
8431 \f
8432 /* Leaf functions and non-leaf functions have different needs. */
8433
8434 static const int
8435 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8436
8437 static const int
8438 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8439
8440 static const int *const reg_alloc_orders[] = {
8441 reg_leaf_alloc_order,
8442 reg_nonleaf_alloc_order};
8443
8444 void
8445 order_regs_for_local_alloc (void)
8446 {
8447 static int last_order_nonleaf = 1;
8448
8449 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8450 {
8451 last_order_nonleaf = !last_order_nonleaf;
8452 memcpy ((char *) reg_alloc_order,
8453 (const char *) reg_alloc_orders[last_order_nonleaf],
8454 FIRST_PSEUDO_REGISTER * sizeof (int));
8455 }
8456 }
8457 \f
8458 /* Return 1 if REG and MEM are legitimate enough to allow the various
8459 mem<-->reg splits to be run. */
8460
8461 int
8462 sparc_splitdi_legitimate (rtx reg, rtx mem)
8463 {
8464 /* Punt if we are here by mistake. */
8465 gcc_assert (reload_completed);
8466
8467 /* We must have an offsettable memory reference. */
8468 if (! offsettable_memref_p (mem))
8469 return 0;
8470
8471 /* If we have legitimate args for ldd/std, we do not want
8472 the split to happen. */
8473 if ((REGNO (reg) % 2) == 0
8474 && mem_min_alignment (mem, 8))
8475 return 0;
8476
8477 /* Success. */
8478 return 1;
8479 }
8480
8481 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8482
8483 int
8484 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8485 {
8486 int regno1, regno2;
8487
8488 if (GET_CODE (reg1) == SUBREG)
8489 reg1 = SUBREG_REG (reg1);
8490 if (GET_CODE (reg1) != REG)
8491 return 0;
8492 regno1 = REGNO (reg1);
8493
8494 if (GET_CODE (reg2) == SUBREG)
8495 reg2 = SUBREG_REG (reg2);
8496 if (GET_CODE (reg2) != REG)
8497 return 0;
8498 regno2 = REGNO (reg2);
8499
8500 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8501 return 1;
8502
8503 if (TARGET_VIS3)
8504 {
8505 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8506 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8507 return 1;
8508 }
8509
8510 return 0;
8511 }
8512
8513 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8514 This makes them candidates for using ldd and std insns.
8515
8516 Note reg1 and reg2 *must* be hard registers. */
8517
8518 int
8519 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8520 {
8521 /* We might have been passed a SUBREG. */
8522 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8523 return 0;
8524
8525 if (REGNO (reg1) % 2 != 0)
8526 return 0;
8527
8528 /* Integer ldd is deprecated in SPARC V9 */
8529 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8530 return 0;
8531
8532 return (REGNO (reg1) == REGNO (reg2) - 1);
8533 }
8534
8535 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8536 an ldd or std insn.
8537
8538 This can only happen when addr1 and addr2, the addresses in mem1
8539 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8540 addr1 must also be aligned on a 64-bit boundary.
8541
8542 Also iff dependent_reg_rtx is not null it should not be used to
8543 compute the address for mem1, i.e. we cannot optimize a sequence
8544 like:
8545 ld [%o0], %o0
8546 ld [%o0 + 4], %o1
8547 to
8548 ldd [%o0], %o0
8549 nor:
8550 ld [%g3 + 4], %g3
8551 ld [%g3], %g2
8552 to
8553 ldd [%g3], %g2
8554
8555 But, note that the transformation from:
8556 ld [%g2 + 4], %g3
8557 ld [%g2], %g2
8558 to
8559 ldd [%g2], %g2
8560 is perfectly fine. Thus, the peephole2 patterns always pass us
8561 the destination register of the first load, never the second one.
8562
8563 For stores we don't have a similar problem, so dependent_reg_rtx is
8564 NULL_RTX. */
8565
8566 int
8567 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8568 {
8569 rtx addr1, addr2;
8570 unsigned int reg1;
8571 HOST_WIDE_INT offset1;
8572
8573 /* The mems cannot be volatile. */
8574 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8575 return 0;
8576
8577 /* MEM1 should be aligned on a 64-bit boundary. */
8578 if (MEM_ALIGN (mem1) < 64)
8579 return 0;
8580
8581 addr1 = XEXP (mem1, 0);
8582 addr2 = XEXP (mem2, 0);
8583
8584 /* Extract a register number and offset (if used) from the first addr. */
8585 if (GET_CODE (addr1) == PLUS)
8586 {
8587 /* If not a REG, return zero. */
8588 if (GET_CODE (XEXP (addr1, 0)) != REG)
8589 return 0;
8590 else
8591 {
8592 reg1 = REGNO (XEXP (addr1, 0));
8593 /* The offset must be constant! */
8594 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8595 return 0;
8596 offset1 = INTVAL (XEXP (addr1, 1));
8597 }
8598 }
8599 else if (GET_CODE (addr1) != REG)
8600 return 0;
8601 else
8602 {
8603 reg1 = REGNO (addr1);
8604 /* This was a simple (mem (reg)) expression. Offset is 0. */
8605 offset1 = 0;
8606 }
8607
8608 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8609 if (GET_CODE (addr2) != PLUS)
8610 return 0;
8611
8612 if (GET_CODE (XEXP (addr2, 0)) != REG
8613 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8614 return 0;
8615
8616 if (reg1 != REGNO (XEXP (addr2, 0)))
8617 return 0;
8618
8619 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8620 return 0;
8621
8622 /* The first offset must be evenly divisible by 8 to ensure the
8623 address is 64 bit aligned. */
8624 if (offset1 % 8 != 0)
8625 return 0;
8626
8627 /* The offset for the second addr must be 4 more than the first addr. */
8628 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8629 return 0;
8630
8631 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8632 instructions. */
8633 return 1;
8634 }
8635
8636 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8637
8638 rtx
8639 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8640 {
8641 rtx x = widen_memory_access (mem1, mode, 0);
8642 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8643 return x;
8644 }
8645
8646 /* Return 1 if reg is a pseudo, or is the first register in
8647 a hard register pair. This makes it suitable for use in
8648 ldd and std insns. */
8649
8650 int
8651 register_ok_for_ldd (rtx reg)
8652 {
8653 /* We might have been passed a SUBREG. */
8654 if (!REG_P (reg))
8655 return 0;
8656
8657 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8658 return (REGNO (reg) % 2 == 0);
8659
8660 return 1;
8661 }
8662
8663 /* Return 1 if OP, a MEM, has an address which is known to be
8664 aligned to an 8-byte boundary. */
8665
8666 int
8667 memory_ok_for_ldd (rtx op)
8668 {
8669 /* In 64-bit mode, we assume that the address is word-aligned. */
8670 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8671 return 0;
8672
8673 if (! can_create_pseudo_p ()
8674 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8675 return 0;
8676
8677 return 1;
8678 }
8679 \f
8680 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8681
8682 static bool
8683 sparc_print_operand_punct_valid_p (unsigned char code)
8684 {
8685 if (code == '#'
8686 || code == '*'
8687 || code == '('
8688 || code == ')'
8689 || code == '_'
8690 || code == '&')
8691 return true;
8692
8693 return false;
8694 }
8695
8696 /* Implement TARGET_PRINT_OPERAND.
8697 Print operand X (an rtx) in assembler syntax to file FILE.
8698 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8699 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8700
8701 static void
8702 sparc_print_operand (FILE *file, rtx x, int code)
8703 {
8704 const char *s;
8705
8706 switch (code)
8707 {
8708 case '#':
8709 /* Output an insn in a delay slot. */
8710 if (final_sequence)
8711 sparc_indent_opcode = 1;
8712 else
8713 fputs ("\n\t nop", file);
8714 return;
8715 case '*':
8716 /* Output an annul flag if there's nothing for the delay slot and we
8717 are optimizing. This is always used with '(' below.
8718 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8719 this is a dbx bug. So, we only do this when optimizing.
8720 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8721 Always emit a nop in case the next instruction is a branch. */
8722 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8723 fputs (",a", file);
8724 return;
8725 case '(':
8726 /* Output a 'nop' if there's nothing for the delay slot and we are
8727 not optimizing. This is always used with '*' above. */
8728 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8729 fputs ("\n\t nop", file);
8730 else if (final_sequence)
8731 sparc_indent_opcode = 1;
8732 return;
8733 case ')':
8734 /* Output the right displacement from the saved PC on function return.
8735 The caller may have placed an "unimp" insn immediately after the call
8736 so we have to account for it. This insn is used in the 32-bit ABI
8737 when calling a function that returns a non zero-sized structure. The
8738 64-bit ABI doesn't have it. Be careful to have this test be the same
8739 as that for the call. The exception is when sparc_std_struct_return
8740 is enabled, the psABI is followed exactly and the adjustment is made
8741 by the code in sparc_struct_value_rtx. The call emitted is the same
8742 when sparc_std_struct_return is enabled. */
8743 if (!TARGET_ARCH64
8744 && cfun->returns_struct
8745 && !sparc_std_struct_return
8746 && DECL_SIZE (DECL_RESULT (current_function_decl))
8747 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8748 == INTEGER_CST
8749 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8750 fputs ("12", file);
8751 else
8752 fputc ('8', file);
8753 return;
8754 case '_':
8755 /* Output the Embedded Medium/Anywhere code model base register. */
8756 fputs (EMBMEDANY_BASE_REG, file);
8757 return;
8758 case '&':
8759 /* Print some local dynamic TLS name. */
8760 if (const char *name = get_some_local_dynamic_name ())
8761 assemble_name (file, name);
8762 else
8763 output_operand_lossage ("'%%&' used without any "
8764 "local dynamic TLS references");
8765 return;
8766
8767 case 'Y':
8768 /* Adjust the operand to take into account a RESTORE operation. */
8769 if (GET_CODE (x) == CONST_INT)
8770 break;
8771 else if (GET_CODE (x) != REG)
8772 output_operand_lossage ("invalid %%Y operand");
8773 else if (REGNO (x) < 8)
8774 fputs (reg_names[REGNO (x)], file);
8775 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8776 fputs (reg_names[REGNO (x)-16], file);
8777 else
8778 output_operand_lossage ("invalid %%Y operand");
8779 return;
8780 case 'L':
8781 /* Print out the low order register name of a register pair. */
8782 if (WORDS_BIG_ENDIAN)
8783 fputs (reg_names[REGNO (x)+1], file);
8784 else
8785 fputs (reg_names[REGNO (x)], file);
8786 return;
8787 case 'H':
8788 /* Print out the high order register name of a register pair. */
8789 if (WORDS_BIG_ENDIAN)
8790 fputs (reg_names[REGNO (x)], file);
8791 else
8792 fputs (reg_names[REGNO (x)+1], file);
8793 return;
8794 case 'R':
8795 /* Print out the second register name of a register pair or quad.
8796 I.e., R (%o0) => %o1. */
8797 fputs (reg_names[REGNO (x)+1], file);
8798 return;
8799 case 'S':
8800 /* Print out the third register name of a register quad.
8801 I.e., S (%o0) => %o2. */
8802 fputs (reg_names[REGNO (x)+2], file);
8803 return;
8804 case 'T':
8805 /* Print out the fourth register name of a register quad.
8806 I.e., T (%o0) => %o3. */
8807 fputs (reg_names[REGNO (x)+3], file);
8808 return;
8809 case 'x':
8810 /* Print a condition code register. */
8811 if (REGNO (x) == SPARC_ICC_REG)
8812 {
8813 switch (GET_MODE (x))
8814 {
8815 case CCmode:
8816 case CCNZmode:
8817 case CCCmode:
8818 s = "%icc";
8819 break;
8820 case CCXmode:
8821 case CCXNZmode:
8822 case CCXCmode:
8823 s = "%xcc";
8824 break;
8825 default:
8826 gcc_unreachable ();
8827 }
8828 fputs (s, file);
8829 }
8830 else
8831 /* %fccN register */
8832 fputs (reg_names[REGNO (x)], file);
8833 return;
8834 case 'm':
8835 /* Print the operand's address only. */
8836 output_address (GET_MODE (x), XEXP (x, 0));
8837 return;
8838 case 'r':
8839 /* In this case we need a register. Use %g0 if the
8840 operand is const0_rtx. */
8841 if (x == const0_rtx
8842 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8843 {
8844 fputs ("%g0", file);
8845 return;
8846 }
8847 else
8848 break;
8849
8850 case 'A':
8851 switch (GET_CODE (x))
8852 {
8853 case IOR:
8854 s = "or";
8855 break;
8856 case AND:
8857 s = "and";
8858 break;
8859 case XOR:
8860 s = "xor";
8861 break;
8862 default:
8863 output_operand_lossage ("invalid %%A operand");
8864 s = "";
8865 break;
8866 }
8867 fputs (s, file);
8868 return;
8869
8870 case 'B':
8871 switch (GET_CODE (x))
8872 {
8873 case IOR:
8874 s = "orn";
8875 break;
8876 case AND:
8877 s = "andn";
8878 break;
8879 case XOR:
8880 s = "xnor";
8881 break;
8882 default:
8883 output_operand_lossage ("invalid %%B operand");
8884 s = "";
8885 break;
8886 }
8887 fputs (s, file);
8888 return;
8889
8890 /* This is used by the conditional move instructions. */
8891 case 'C':
8892 {
8893 machine_mode mode = GET_MODE (XEXP (x, 0));
8894 switch (GET_CODE (x))
8895 {
8896 case NE:
8897 s = "ne";
8898 break;
8899 case EQ:
8900 s = "e";
8901 break;
8902 case GE:
8903 if (mode == CCNZmode || mode == CCXNZmode)
8904 s = "pos";
8905 else
8906 s = "ge";
8907 break;
8908 case GT:
8909 s = "g";
8910 break;
8911 case LE:
8912 s = "le";
8913 break;
8914 case LT:
8915 if (mode == CCNZmode || mode == CCXNZmode)
8916 s = "neg";
8917 else
8918 s = "l";
8919 break;
8920 case GEU:
8921 s = "geu";
8922 break;
8923 case GTU:
8924 s = "gu";
8925 break;
8926 case LEU:
8927 s = "leu";
8928 break;
8929 case LTU:
8930 s = "lu";
8931 break;
8932 case LTGT:
8933 s = "lg";
8934 break;
8935 case UNORDERED:
8936 s = "u";
8937 break;
8938 case ORDERED:
8939 s = "o";
8940 break;
8941 case UNLT:
8942 s = "ul";
8943 break;
8944 case UNLE:
8945 s = "ule";
8946 break;
8947 case UNGT:
8948 s = "ug";
8949 break;
8950 case UNGE:
8951 s = "uge"
8952 ; break;
8953 case UNEQ:
8954 s = "ue";
8955 break;
8956 default:
8957 output_operand_lossage ("invalid %%C operand");
8958 s = "";
8959 break;
8960 }
8961 fputs (s, file);
8962 return;
8963 }
8964
8965 /* This are used by the movr instruction pattern. */
8966 case 'D':
8967 {
8968 switch (GET_CODE (x))
8969 {
8970 case NE:
8971 s = "ne";
8972 break;
8973 case EQ:
8974 s = "e";
8975 break;
8976 case GE:
8977 s = "gez";
8978 break;
8979 case LT:
8980 s = "lz";
8981 break;
8982 case LE:
8983 s = "lez";
8984 break;
8985 case GT:
8986 s = "gz";
8987 break;
8988 default:
8989 output_operand_lossage ("invalid %%D operand");
8990 s = "";
8991 break;
8992 }
8993 fputs (s, file);
8994 return;
8995 }
8996
8997 case 'b':
8998 {
8999 /* Print a sign-extended character. */
9000 int i = trunc_int_for_mode (INTVAL (x), QImode);
9001 fprintf (file, "%d", i);
9002 return;
9003 }
9004
9005 case 'f':
9006 /* Operand must be a MEM; write its address. */
9007 if (GET_CODE (x) != MEM)
9008 output_operand_lossage ("invalid %%f operand");
9009 output_address (GET_MODE (x), XEXP (x, 0));
9010 return;
9011
9012 case 's':
9013 {
9014 /* Print a sign-extended 32-bit value. */
9015 HOST_WIDE_INT i;
9016 if (GET_CODE(x) == CONST_INT)
9017 i = INTVAL (x);
9018 else
9019 {
9020 output_operand_lossage ("invalid %%s operand");
9021 return;
9022 }
9023 i = trunc_int_for_mode (i, SImode);
9024 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9025 return;
9026 }
9027
9028 case 0:
9029 /* Do nothing special. */
9030 break;
9031
9032 default:
9033 /* Undocumented flag. */
9034 output_operand_lossage ("invalid operand output code");
9035 }
9036
9037 if (GET_CODE (x) == REG)
9038 fputs (reg_names[REGNO (x)], file);
9039 else if (GET_CODE (x) == MEM)
9040 {
9041 fputc ('[', file);
9042 /* Poor Sun assembler doesn't understand absolute addressing. */
9043 if (CONSTANT_P (XEXP (x, 0)))
9044 fputs ("%g0+", file);
9045 output_address (GET_MODE (x), XEXP (x, 0));
9046 fputc (']', file);
9047 }
9048 else if (GET_CODE (x) == HIGH)
9049 {
9050 fputs ("%hi(", file);
9051 output_addr_const (file, XEXP (x, 0));
9052 fputc (')', file);
9053 }
9054 else if (GET_CODE (x) == LO_SUM)
9055 {
9056 sparc_print_operand (file, XEXP (x, 0), 0);
9057 if (TARGET_CM_MEDMID)
9058 fputs ("+%l44(", file);
9059 else
9060 fputs ("+%lo(", file);
9061 output_addr_const (file, XEXP (x, 1));
9062 fputc (')', file);
9063 }
9064 else if (GET_CODE (x) == CONST_DOUBLE)
9065 output_operand_lossage ("floating-point constant not a valid immediate operand");
9066 else
9067 output_addr_const (file, x);
9068 }
9069
9070 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9071
9072 static void
9073 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9074 {
9075 register rtx base, index = 0;
9076 int offset = 0;
9077 register rtx addr = x;
9078
9079 if (REG_P (addr))
9080 fputs (reg_names[REGNO (addr)], file);
9081 else if (GET_CODE (addr) == PLUS)
9082 {
9083 if (CONST_INT_P (XEXP (addr, 0)))
9084 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9085 else if (CONST_INT_P (XEXP (addr, 1)))
9086 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9087 else
9088 base = XEXP (addr, 0), index = XEXP (addr, 1);
9089 if (GET_CODE (base) == LO_SUM)
9090 {
9091 gcc_assert (USE_AS_OFFSETABLE_LO10
9092 && TARGET_ARCH64
9093 && ! TARGET_CM_MEDMID);
9094 output_operand (XEXP (base, 0), 0);
9095 fputs ("+%lo(", file);
9096 output_address (VOIDmode, XEXP (base, 1));
9097 fprintf (file, ")+%d", offset);
9098 }
9099 else
9100 {
9101 fputs (reg_names[REGNO (base)], file);
9102 if (index == 0)
9103 fprintf (file, "%+d", offset);
9104 else if (REG_P (index))
9105 fprintf (file, "+%s", reg_names[REGNO (index)]);
9106 else if (GET_CODE (index) == SYMBOL_REF
9107 || GET_CODE (index) == LABEL_REF
9108 || GET_CODE (index) == CONST)
9109 fputc ('+', file), output_addr_const (file, index);
9110 else gcc_unreachable ();
9111 }
9112 }
9113 else if (GET_CODE (addr) == MINUS
9114 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9115 {
9116 output_addr_const (file, XEXP (addr, 0));
9117 fputs ("-(", file);
9118 output_addr_const (file, XEXP (addr, 1));
9119 fputs ("-.)", file);
9120 }
9121 else if (GET_CODE (addr) == LO_SUM)
9122 {
9123 output_operand (XEXP (addr, 0), 0);
9124 if (TARGET_CM_MEDMID)
9125 fputs ("+%l44(", file);
9126 else
9127 fputs ("+%lo(", file);
9128 output_address (VOIDmode, XEXP (addr, 1));
9129 fputc (')', file);
9130 }
9131 else if (flag_pic
9132 && GET_CODE (addr) == CONST
9133 && GET_CODE (XEXP (addr, 0)) == MINUS
9134 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9135 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9136 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9137 {
9138 addr = XEXP (addr, 0);
9139 output_addr_const (file, XEXP (addr, 0));
9140 /* Group the args of the second CONST in parenthesis. */
9141 fputs ("-(", file);
9142 /* Skip past the second CONST--it does nothing for us. */
9143 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9144 /* Close the parenthesis. */
9145 fputc (')', file);
9146 }
9147 else
9148 {
9149 output_addr_const (file, addr);
9150 }
9151 }
9152 \f
9153 /* Target hook for assembling integer objects. The sparc version has
9154 special handling for aligned DI-mode objects. */
9155
9156 static bool
9157 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9158 {
9159 /* ??? We only output .xword's for symbols and only then in environments
9160 where the assembler can handle them. */
9161 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9162 {
9163 if (TARGET_V9)
9164 {
9165 assemble_integer_with_op ("\t.xword\t", x);
9166 return true;
9167 }
9168 else
9169 {
9170 assemble_aligned_integer (4, const0_rtx);
9171 assemble_aligned_integer (4, x);
9172 return true;
9173 }
9174 }
9175 return default_assemble_integer (x, size, aligned_p);
9176 }
9177 \f
9178 /* Return the value of a code used in the .proc pseudo-op that says
9179 what kind of result this function returns. For non-C types, we pick
9180 the closest C type. */
9181
9182 #ifndef SHORT_TYPE_SIZE
9183 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9184 #endif
9185
9186 #ifndef INT_TYPE_SIZE
9187 #define INT_TYPE_SIZE BITS_PER_WORD
9188 #endif
9189
9190 #ifndef LONG_TYPE_SIZE
9191 #define LONG_TYPE_SIZE BITS_PER_WORD
9192 #endif
9193
9194 #ifndef LONG_LONG_TYPE_SIZE
9195 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9196 #endif
9197
9198 #ifndef FLOAT_TYPE_SIZE
9199 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9200 #endif
9201
9202 #ifndef DOUBLE_TYPE_SIZE
9203 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9204 #endif
9205
9206 #ifndef LONG_DOUBLE_TYPE_SIZE
9207 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9208 #endif
9209
9210 unsigned long
9211 sparc_type_code (register tree type)
9212 {
9213 register unsigned long qualifiers = 0;
9214 register unsigned shift;
9215
9216 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9217 setting more, since some assemblers will give an error for this. Also,
9218 we must be careful to avoid shifts of 32 bits or more to avoid getting
9219 unpredictable results. */
9220
9221 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9222 {
9223 switch (TREE_CODE (type))
9224 {
9225 case ERROR_MARK:
9226 return qualifiers;
9227
9228 case ARRAY_TYPE:
9229 qualifiers |= (3 << shift);
9230 break;
9231
9232 case FUNCTION_TYPE:
9233 case METHOD_TYPE:
9234 qualifiers |= (2 << shift);
9235 break;
9236
9237 case POINTER_TYPE:
9238 case REFERENCE_TYPE:
9239 case OFFSET_TYPE:
9240 qualifiers |= (1 << shift);
9241 break;
9242
9243 case RECORD_TYPE:
9244 return (qualifiers | 8);
9245
9246 case UNION_TYPE:
9247 case QUAL_UNION_TYPE:
9248 return (qualifiers | 9);
9249
9250 case ENUMERAL_TYPE:
9251 return (qualifiers | 10);
9252
9253 case VOID_TYPE:
9254 return (qualifiers | 16);
9255
9256 case INTEGER_TYPE:
9257 /* If this is a range type, consider it to be the underlying
9258 type. */
9259 if (TREE_TYPE (type) != 0)
9260 break;
9261
9262 /* Carefully distinguish all the standard types of C,
9263 without messing up if the language is not C. We do this by
9264 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9265 look at both the names and the above fields, but that's redundant.
9266 Any type whose size is between two C types will be considered
9267 to be the wider of the two types. Also, we do not have a
9268 special code to use for "long long", so anything wider than
9269 long is treated the same. Note that we can't distinguish
9270 between "int" and "long" in this code if they are the same
9271 size, but that's fine, since neither can the assembler. */
9272
9273 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9274 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9275
9276 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9277 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9278
9279 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9280 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9281
9282 else
9283 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9284
9285 case REAL_TYPE:
9286 /* If this is a range type, consider it to be the underlying
9287 type. */
9288 if (TREE_TYPE (type) != 0)
9289 break;
9290
9291 /* Carefully distinguish all the standard types of C,
9292 without messing up if the language is not C. */
9293
9294 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9295 return (qualifiers | 6);
9296
9297 else
9298 return (qualifiers | 7);
9299
9300 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9301 /* ??? We need to distinguish between double and float complex types,
9302 but I don't know how yet because I can't reach this code from
9303 existing front-ends. */
9304 return (qualifiers | 7); /* Who knows? */
9305
9306 case VECTOR_TYPE:
9307 case BOOLEAN_TYPE: /* Boolean truth value type. */
9308 case LANG_TYPE:
9309 case NULLPTR_TYPE:
9310 return qualifiers;
9311
9312 default:
9313 gcc_unreachable (); /* Not a type! */
9314 }
9315 }
9316
9317 return qualifiers;
9318 }
9319 \f
9320 /* Nested function support. */
9321
9322 /* Emit RTL insns to initialize the variable parts of a trampoline.
9323 FNADDR is an RTX for the address of the function's pure code.
9324 CXT is an RTX for the static chain value for the function.
9325
9326 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9327 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9328 (to store insns). This is a bit excessive. Perhaps a different
9329 mechanism would be better here.
9330
9331 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9332
9333 static void
9334 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9335 {
9336 /* SPARC 32-bit trampoline:
9337
9338 sethi %hi(fn), %g1
9339 sethi %hi(static), %g2
9340 jmp %g1+%lo(fn)
9341 or %g2, %lo(static), %g2
9342
9343 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9344 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9345 */
9346
9347 emit_move_insn
9348 (adjust_address (m_tramp, SImode, 0),
9349 expand_binop (SImode, ior_optab,
9350 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9351 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9352 NULL_RTX, 1, OPTAB_DIRECT));
9353
9354 emit_move_insn
9355 (adjust_address (m_tramp, SImode, 4),
9356 expand_binop (SImode, ior_optab,
9357 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9358 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9359 NULL_RTX, 1, OPTAB_DIRECT));
9360
9361 emit_move_insn
9362 (adjust_address (m_tramp, SImode, 8),
9363 expand_binop (SImode, ior_optab,
9364 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9365 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9366 NULL_RTX, 1, OPTAB_DIRECT));
9367
9368 emit_move_insn
9369 (adjust_address (m_tramp, SImode, 12),
9370 expand_binop (SImode, ior_optab,
9371 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9372 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9373 NULL_RTX, 1, OPTAB_DIRECT));
9374
9375 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9376 aligned on a 16 byte boundary so one flush clears it all. */
9377 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9378 if (sparc_cpu != PROCESSOR_ULTRASPARC
9379 && sparc_cpu != PROCESSOR_ULTRASPARC3
9380 && sparc_cpu != PROCESSOR_NIAGARA
9381 && sparc_cpu != PROCESSOR_NIAGARA2
9382 && sparc_cpu != PROCESSOR_NIAGARA3
9383 && sparc_cpu != PROCESSOR_NIAGARA4
9384 && sparc_cpu != PROCESSOR_NIAGARA7)
9385 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9386
9387 /* Call __enable_execute_stack after writing onto the stack to make sure
9388 the stack address is accessible. */
9389 #ifdef HAVE_ENABLE_EXECUTE_STACK
9390 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9391 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9392 #endif
9393
9394 }
9395
9396 /* The 64-bit version is simpler because it makes more sense to load the
9397 values as "immediate" data out of the trampoline. It's also easier since
9398 we can read the PC without clobbering a register. */
9399
9400 static void
9401 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9402 {
9403 /* SPARC 64-bit trampoline:
9404
9405 rd %pc, %g1
9406 ldx [%g1+24], %g5
9407 jmp %g5
9408 ldx [%g1+16], %g5
9409 +16 bytes data
9410 */
9411
9412 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9413 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9414 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9415 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9416 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9417 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9418 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9419 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9420 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9421 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9422 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9423
9424 if (sparc_cpu != PROCESSOR_ULTRASPARC
9425 && sparc_cpu != PROCESSOR_ULTRASPARC3
9426 && sparc_cpu != PROCESSOR_NIAGARA
9427 && sparc_cpu != PROCESSOR_NIAGARA2
9428 && sparc_cpu != PROCESSOR_NIAGARA3
9429 && sparc_cpu != PROCESSOR_NIAGARA4
9430 && sparc_cpu != PROCESSOR_NIAGARA7)
9431 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9432
9433 /* Call __enable_execute_stack after writing onto the stack to make sure
9434 the stack address is accessible. */
9435 #ifdef HAVE_ENABLE_EXECUTE_STACK
9436 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9437 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9438 #endif
9439 }
9440
9441 /* Worker for TARGET_TRAMPOLINE_INIT. */
9442
9443 static void
9444 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9445 {
9446 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9447 cxt = force_reg (Pmode, cxt);
9448 if (TARGET_ARCH64)
9449 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9450 else
9451 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9452 }
9453 \f
9454 /* Adjust the cost of a scheduling dependency. Return the new cost of
9455 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9456
9457 static int
9458 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9459 int cost)
9460 {
9461 enum attr_type insn_type;
9462
9463 if (recog_memoized (insn) < 0)
9464 return cost;
9465
9466 insn_type = get_attr_type (insn);
9467
9468 if (dep_type == 0)
9469 {
9470 /* Data dependency; DEP_INSN writes a register that INSN reads some
9471 cycles later. */
9472
9473 /* if a load, then the dependence must be on the memory address;
9474 add an extra "cycle". Note that the cost could be two cycles
9475 if the reg was written late in an instruction group; we ca not tell
9476 here. */
9477 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9478 return cost + 3;
9479
9480 /* Get the delay only if the address of the store is the dependence. */
9481 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9482 {
9483 rtx pat = PATTERN(insn);
9484 rtx dep_pat = PATTERN (dep_insn);
9485
9486 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9487 return cost; /* This should not happen! */
9488
9489 /* The dependency between the two instructions was on the data that
9490 is being stored. Assume that this implies that the address of the
9491 store is not dependent. */
9492 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9493 return cost;
9494
9495 return cost + 3; /* An approximation. */
9496 }
9497
9498 /* A shift instruction cannot receive its data from an instruction
9499 in the same cycle; add a one cycle penalty. */
9500 if (insn_type == TYPE_SHIFT)
9501 return cost + 3; /* Split before cascade into shift. */
9502 }
9503 else
9504 {
9505 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9506 INSN writes some cycles later. */
9507
9508 /* These are only significant for the fpu unit; writing a fp reg before
9509 the fpu has finished with it stalls the processor. */
9510
9511 /* Reusing an integer register causes no problems. */
9512 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9513 return 0;
9514 }
9515
9516 return cost;
9517 }
9518
9519 static int
9520 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9521 int cost)
9522 {
9523 enum attr_type insn_type, dep_type;
9524 rtx pat = PATTERN(insn);
9525 rtx dep_pat = PATTERN (dep_insn);
9526
9527 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9528 return cost;
9529
9530 insn_type = get_attr_type (insn);
9531 dep_type = get_attr_type (dep_insn);
9532
9533 switch (dtype)
9534 {
9535 case 0:
9536 /* Data dependency; DEP_INSN writes a register that INSN reads some
9537 cycles later. */
9538
9539 switch (insn_type)
9540 {
9541 case TYPE_STORE:
9542 case TYPE_FPSTORE:
9543 /* Get the delay iff the address of the store is the dependence. */
9544 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9545 return cost;
9546
9547 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9548 return cost;
9549 return cost + 3;
9550
9551 case TYPE_LOAD:
9552 case TYPE_SLOAD:
9553 case TYPE_FPLOAD:
9554 /* If a load, then the dependence must be on the memory address. If
9555 the addresses aren't equal, then it might be a false dependency */
9556 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9557 {
9558 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9559 || GET_CODE (SET_DEST (dep_pat)) != MEM
9560 || GET_CODE (SET_SRC (pat)) != MEM
9561 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9562 XEXP (SET_SRC (pat), 0)))
9563 return cost + 2;
9564
9565 return cost + 8;
9566 }
9567 break;
9568
9569 case TYPE_BRANCH:
9570 /* Compare to branch latency is 0. There is no benefit from
9571 separating compare and branch. */
9572 if (dep_type == TYPE_COMPARE)
9573 return 0;
9574 /* Floating point compare to branch latency is less than
9575 compare to conditional move. */
9576 if (dep_type == TYPE_FPCMP)
9577 return cost - 1;
9578 break;
9579 default:
9580 break;
9581 }
9582 break;
9583
9584 case REG_DEP_ANTI:
9585 /* Anti-dependencies only penalize the fpu unit. */
9586 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9587 return 0;
9588 break;
9589
9590 default:
9591 break;
9592 }
9593
9594 return cost;
9595 }
9596
9597 static int
9598 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9599 unsigned int)
9600 {
9601 switch (sparc_cpu)
9602 {
9603 case PROCESSOR_SUPERSPARC:
9604 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9605 break;
9606 case PROCESSOR_HYPERSPARC:
9607 case PROCESSOR_SPARCLITE86X:
9608 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9609 break;
9610 default:
9611 break;
9612 }
9613 return cost;
9614 }
9615
9616 static void
9617 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9618 int sched_verbose ATTRIBUTE_UNUSED,
9619 int max_ready ATTRIBUTE_UNUSED)
9620 {}
9621
9622 static int
9623 sparc_use_sched_lookahead (void)
9624 {
9625 if (sparc_cpu == PROCESSOR_NIAGARA
9626 || sparc_cpu == PROCESSOR_NIAGARA2
9627 || sparc_cpu == PROCESSOR_NIAGARA3)
9628 return 0;
9629 if (sparc_cpu == PROCESSOR_NIAGARA4
9630 || sparc_cpu == PROCESSOR_NIAGARA7)
9631 return 2;
9632 if (sparc_cpu == PROCESSOR_ULTRASPARC
9633 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9634 return 4;
9635 if ((1 << sparc_cpu) &
9636 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9637 (1 << PROCESSOR_SPARCLITE86X)))
9638 return 3;
9639 return 0;
9640 }
9641
9642 static int
9643 sparc_issue_rate (void)
9644 {
9645 switch (sparc_cpu)
9646 {
9647 case PROCESSOR_NIAGARA:
9648 case PROCESSOR_NIAGARA2:
9649 case PROCESSOR_NIAGARA3:
9650 default:
9651 return 1;
9652 case PROCESSOR_NIAGARA4:
9653 case PROCESSOR_NIAGARA7:
9654 case PROCESSOR_V9:
9655 /* Assume V9 processors are capable of at least dual-issue. */
9656 return 2;
9657 case PROCESSOR_SUPERSPARC:
9658 return 3;
9659 case PROCESSOR_HYPERSPARC:
9660 case PROCESSOR_SPARCLITE86X:
9661 return 2;
9662 case PROCESSOR_ULTRASPARC:
9663 case PROCESSOR_ULTRASPARC3:
9664 return 4;
9665 }
9666 }
9667
9668 static int
9669 set_extends (rtx_insn *insn)
9670 {
9671 register rtx pat = PATTERN (insn);
9672
9673 switch (GET_CODE (SET_SRC (pat)))
9674 {
9675 /* Load and some shift instructions zero extend. */
9676 case MEM:
9677 case ZERO_EXTEND:
9678 /* sethi clears the high bits */
9679 case HIGH:
9680 /* LO_SUM is used with sethi. sethi cleared the high
9681 bits and the values used with lo_sum are positive */
9682 case LO_SUM:
9683 /* Store flag stores 0 or 1 */
9684 case LT: case LTU:
9685 case GT: case GTU:
9686 case LE: case LEU:
9687 case GE: case GEU:
9688 case EQ:
9689 case NE:
9690 return 1;
9691 case AND:
9692 {
9693 rtx op0 = XEXP (SET_SRC (pat), 0);
9694 rtx op1 = XEXP (SET_SRC (pat), 1);
9695 if (GET_CODE (op1) == CONST_INT)
9696 return INTVAL (op1) >= 0;
9697 if (GET_CODE (op0) != REG)
9698 return 0;
9699 if (sparc_check_64 (op0, insn) == 1)
9700 return 1;
9701 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9702 }
9703 case IOR:
9704 case XOR:
9705 {
9706 rtx op0 = XEXP (SET_SRC (pat), 0);
9707 rtx op1 = XEXP (SET_SRC (pat), 1);
9708 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9709 return 0;
9710 if (GET_CODE (op1) == CONST_INT)
9711 return INTVAL (op1) >= 0;
9712 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9713 }
9714 case LSHIFTRT:
9715 return GET_MODE (SET_SRC (pat)) == SImode;
9716 /* Positive integers leave the high bits zero. */
9717 case CONST_INT:
9718 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9719 case ASHIFTRT:
9720 case SIGN_EXTEND:
9721 return - (GET_MODE (SET_SRC (pat)) == SImode);
9722 case REG:
9723 return sparc_check_64 (SET_SRC (pat), insn);
9724 default:
9725 return 0;
9726 }
9727 }
9728
9729 /* We _ought_ to have only one kind per function, but... */
9730 static GTY(()) rtx sparc_addr_diff_list;
9731 static GTY(()) rtx sparc_addr_list;
9732
9733 void
9734 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9735 {
9736 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9737 if (diff)
9738 sparc_addr_diff_list
9739 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9740 else
9741 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9742 }
9743
9744 static void
9745 sparc_output_addr_vec (rtx vec)
9746 {
9747 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9748 int idx, vlen = XVECLEN (body, 0);
9749
9750 #ifdef ASM_OUTPUT_ADDR_VEC_START
9751 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9752 #endif
9753
9754 #ifdef ASM_OUTPUT_CASE_LABEL
9755 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9756 NEXT_INSN (lab));
9757 #else
9758 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9759 #endif
9760
9761 for (idx = 0; idx < vlen; idx++)
9762 {
9763 ASM_OUTPUT_ADDR_VEC_ELT
9764 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9765 }
9766
9767 #ifdef ASM_OUTPUT_ADDR_VEC_END
9768 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9769 #endif
9770 }
9771
9772 static void
9773 sparc_output_addr_diff_vec (rtx vec)
9774 {
9775 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9776 rtx base = XEXP (XEXP (body, 0), 0);
9777 int idx, vlen = XVECLEN (body, 1);
9778
9779 #ifdef ASM_OUTPUT_ADDR_VEC_START
9780 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9781 #endif
9782
9783 #ifdef ASM_OUTPUT_CASE_LABEL
9784 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9785 NEXT_INSN (lab));
9786 #else
9787 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9788 #endif
9789
9790 for (idx = 0; idx < vlen; idx++)
9791 {
9792 ASM_OUTPUT_ADDR_DIFF_ELT
9793 (asm_out_file,
9794 body,
9795 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9796 CODE_LABEL_NUMBER (base));
9797 }
9798
9799 #ifdef ASM_OUTPUT_ADDR_VEC_END
9800 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9801 #endif
9802 }
9803
9804 static void
9805 sparc_output_deferred_case_vectors (void)
9806 {
9807 rtx t;
9808 int align;
9809
9810 if (sparc_addr_list == NULL_RTX
9811 && sparc_addr_diff_list == NULL_RTX)
9812 return;
9813
9814 /* Align to cache line in the function's code section. */
9815 switch_to_section (current_function_section ());
9816
9817 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9818 if (align > 0)
9819 ASM_OUTPUT_ALIGN (asm_out_file, align);
9820
9821 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9822 sparc_output_addr_vec (XEXP (t, 0));
9823 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9824 sparc_output_addr_diff_vec (XEXP (t, 0));
9825
9826 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9827 }
9828
9829 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9830 unknown. Return 1 if the high bits are zero, -1 if the register is
9831 sign extended. */
9832 int
9833 sparc_check_64 (rtx x, rtx_insn *insn)
9834 {
9835 /* If a register is set only once it is safe to ignore insns this
9836 code does not know how to handle. The loop will either recognize
9837 the single set and return the correct value or fail to recognize
9838 it and return 0. */
9839 int set_once = 0;
9840 rtx y = x;
9841
9842 gcc_assert (GET_CODE (x) == REG);
9843
9844 if (GET_MODE (x) == DImode)
9845 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9846
9847 if (flag_expensive_optimizations
9848 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9849 set_once = 1;
9850
9851 if (insn == 0)
9852 {
9853 if (set_once)
9854 insn = get_last_insn_anywhere ();
9855 else
9856 return 0;
9857 }
9858
9859 while ((insn = PREV_INSN (insn)))
9860 {
9861 switch (GET_CODE (insn))
9862 {
9863 case JUMP_INSN:
9864 case NOTE:
9865 break;
9866 case CODE_LABEL:
9867 case CALL_INSN:
9868 default:
9869 if (! set_once)
9870 return 0;
9871 break;
9872 case INSN:
9873 {
9874 rtx pat = PATTERN (insn);
9875 if (GET_CODE (pat) != SET)
9876 return 0;
9877 if (rtx_equal_p (x, SET_DEST (pat)))
9878 return set_extends (insn);
9879 if (y && rtx_equal_p (y, SET_DEST (pat)))
9880 return set_extends (insn);
9881 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9882 return 0;
9883 }
9884 }
9885 }
9886 return 0;
9887 }
9888
9889 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9890 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9891
9892 const char *
9893 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9894 {
9895 static char asm_code[60];
9896
9897 /* The scratch register is only required when the destination
9898 register is not a 64-bit global or out register. */
9899 if (which_alternative != 2)
9900 operands[3] = operands[0];
9901
9902 /* We can only shift by constants <= 63. */
9903 if (GET_CODE (operands[2]) == CONST_INT)
9904 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9905
9906 if (GET_CODE (operands[1]) == CONST_INT)
9907 {
9908 output_asm_insn ("mov\t%1, %3", operands);
9909 }
9910 else
9911 {
9912 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9913 if (sparc_check_64 (operands[1], insn) <= 0)
9914 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9915 output_asm_insn ("or\t%L1, %3, %3", operands);
9916 }
9917
9918 strcpy (asm_code, opcode);
9919
9920 if (which_alternative != 2)
9921 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9922 else
9923 return
9924 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9925 }
9926 \f
9927 /* Output rtl to increment the profiler label LABELNO
9928 for profiling a function entry. */
9929
9930 void
9931 sparc_profile_hook (int labelno)
9932 {
9933 char buf[32];
9934 rtx lab, fun;
9935
9936 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9937 if (NO_PROFILE_COUNTERS)
9938 {
9939 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9940 }
9941 else
9942 {
9943 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9944 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9945 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9946 }
9947 }
9948 \f
9949 #ifdef TARGET_SOLARIS
9950 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9951
9952 static void
9953 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9954 tree decl ATTRIBUTE_UNUSED)
9955 {
9956 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9957 {
9958 solaris_elf_asm_comdat_section (name, flags, decl);
9959 return;
9960 }
9961
9962 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9963
9964 if (!(flags & SECTION_DEBUG))
9965 fputs (",#alloc", asm_out_file);
9966 if (flags & SECTION_WRITE)
9967 fputs (",#write", asm_out_file);
9968 if (flags & SECTION_TLS)
9969 fputs (",#tls", asm_out_file);
9970 if (flags & SECTION_CODE)
9971 fputs (",#execinstr", asm_out_file);
9972
9973 if (flags & SECTION_NOTYPE)
9974 ;
9975 else if (flags & SECTION_BSS)
9976 fputs (",#nobits", asm_out_file);
9977 else
9978 fputs (",#progbits", asm_out_file);
9979
9980 fputc ('\n', asm_out_file);
9981 }
9982 #endif /* TARGET_SOLARIS */
9983
9984 /* We do not allow indirect calls to be optimized into sibling calls.
9985
9986 We cannot use sibling calls when delayed branches are disabled
9987 because they will likely require the call delay slot to be filled.
9988
9989 Also, on SPARC 32-bit we cannot emit a sibling call when the
9990 current function returns a structure. This is because the "unimp
9991 after call" convention would cause the callee to return to the
9992 wrong place. The generic code already disallows cases where the
9993 function being called returns a structure.
9994
9995 It may seem strange how this last case could occur. Usually there
9996 is code after the call which jumps to epilogue code which dumps the
9997 return value into the struct return area. That ought to invalidate
9998 the sibling call right? Well, in the C++ case we can end up passing
9999 the pointer to the struct return area to a constructor (which returns
10000 void) and then nothing else happens. Such a sibling call would look
10001 valid without the added check here.
10002
10003 VxWorks PIC PLT entries require the global pointer to be initialized
10004 on entry. We therefore can't emit sibling calls to them. */
10005 static bool
10006 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10007 {
10008 return (decl
10009 && flag_delayed_branch
10010 && (TARGET_ARCH64 || ! cfun->returns_struct)
10011 && !(TARGET_VXWORKS_RTP
10012 && flag_pic
10013 && !targetm.binds_local_p (decl)));
10014 }
10015 \f
10016 /* libfunc renaming. */
10017
10018 static void
10019 sparc_init_libfuncs (void)
10020 {
10021 if (TARGET_ARCH32)
10022 {
10023 /* Use the subroutines that Sun's library provides for integer
10024 multiply and divide. The `*' prevents an underscore from
10025 being prepended by the compiler. .umul is a little faster
10026 than .mul. */
10027 set_optab_libfunc (smul_optab, SImode, "*.umul");
10028 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10029 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10030 set_optab_libfunc (smod_optab, SImode, "*.rem");
10031 set_optab_libfunc (umod_optab, SImode, "*.urem");
10032
10033 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10034 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10035 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10036 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10037 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10038 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10039
10040 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10041 is because with soft-float, the SFmode and DFmode sqrt
10042 instructions will be absent, and the compiler will notice and
10043 try to use the TFmode sqrt instruction for calls to the
10044 builtin function sqrt, but this fails. */
10045 if (TARGET_FPU)
10046 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10047
10048 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10049 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10050 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10051 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10052 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10053 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10054
10055 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10056 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10057 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10058 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10059
10060 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10061 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10062 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10063 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10064
10065 if (DITF_CONVERSION_LIBFUNCS)
10066 {
10067 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10068 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10069 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10070 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10071 }
10072
10073 if (SUN_CONVERSION_LIBFUNCS)
10074 {
10075 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10076 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10077 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10078 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10079 }
10080 }
10081 if (TARGET_ARCH64)
10082 {
10083 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10084 do not exist in the library. Make sure the compiler does not
10085 emit calls to them by accident. (It should always use the
10086 hardware instructions.) */
10087 set_optab_libfunc (smul_optab, SImode, 0);
10088 set_optab_libfunc (sdiv_optab, SImode, 0);
10089 set_optab_libfunc (udiv_optab, SImode, 0);
10090 set_optab_libfunc (smod_optab, SImode, 0);
10091 set_optab_libfunc (umod_optab, SImode, 0);
10092
10093 if (SUN_INTEGER_MULTIPLY_64)
10094 {
10095 set_optab_libfunc (smul_optab, DImode, "__mul64");
10096 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10097 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10098 set_optab_libfunc (smod_optab, DImode, "__rem64");
10099 set_optab_libfunc (umod_optab, DImode, "__urem64");
10100 }
10101
10102 if (SUN_CONVERSION_LIBFUNCS)
10103 {
10104 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10105 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10106 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10107 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10108 }
10109 }
10110 }
10111 \f
10112 /* SPARC builtins. */
10113 enum sparc_builtins
10114 {
10115 /* FPU builtins. */
10116 SPARC_BUILTIN_LDFSR,
10117 SPARC_BUILTIN_STFSR,
10118
10119 /* VIS 1.0 builtins. */
10120 SPARC_BUILTIN_FPACK16,
10121 SPARC_BUILTIN_FPACK32,
10122 SPARC_BUILTIN_FPACKFIX,
10123 SPARC_BUILTIN_FEXPAND,
10124 SPARC_BUILTIN_FPMERGE,
10125 SPARC_BUILTIN_FMUL8X16,
10126 SPARC_BUILTIN_FMUL8X16AU,
10127 SPARC_BUILTIN_FMUL8X16AL,
10128 SPARC_BUILTIN_FMUL8SUX16,
10129 SPARC_BUILTIN_FMUL8ULX16,
10130 SPARC_BUILTIN_FMULD8SUX16,
10131 SPARC_BUILTIN_FMULD8ULX16,
10132 SPARC_BUILTIN_FALIGNDATAV4HI,
10133 SPARC_BUILTIN_FALIGNDATAV8QI,
10134 SPARC_BUILTIN_FALIGNDATAV2SI,
10135 SPARC_BUILTIN_FALIGNDATADI,
10136 SPARC_BUILTIN_WRGSR,
10137 SPARC_BUILTIN_RDGSR,
10138 SPARC_BUILTIN_ALIGNADDR,
10139 SPARC_BUILTIN_ALIGNADDRL,
10140 SPARC_BUILTIN_PDIST,
10141 SPARC_BUILTIN_EDGE8,
10142 SPARC_BUILTIN_EDGE8L,
10143 SPARC_BUILTIN_EDGE16,
10144 SPARC_BUILTIN_EDGE16L,
10145 SPARC_BUILTIN_EDGE32,
10146 SPARC_BUILTIN_EDGE32L,
10147 SPARC_BUILTIN_FCMPLE16,
10148 SPARC_BUILTIN_FCMPLE32,
10149 SPARC_BUILTIN_FCMPNE16,
10150 SPARC_BUILTIN_FCMPNE32,
10151 SPARC_BUILTIN_FCMPGT16,
10152 SPARC_BUILTIN_FCMPGT32,
10153 SPARC_BUILTIN_FCMPEQ16,
10154 SPARC_BUILTIN_FCMPEQ32,
10155 SPARC_BUILTIN_FPADD16,
10156 SPARC_BUILTIN_FPADD16S,
10157 SPARC_BUILTIN_FPADD32,
10158 SPARC_BUILTIN_FPADD32S,
10159 SPARC_BUILTIN_FPSUB16,
10160 SPARC_BUILTIN_FPSUB16S,
10161 SPARC_BUILTIN_FPSUB32,
10162 SPARC_BUILTIN_FPSUB32S,
10163 SPARC_BUILTIN_ARRAY8,
10164 SPARC_BUILTIN_ARRAY16,
10165 SPARC_BUILTIN_ARRAY32,
10166
10167 /* VIS 2.0 builtins. */
10168 SPARC_BUILTIN_EDGE8N,
10169 SPARC_BUILTIN_EDGE8LN,
10170 SPARC_BUILTIN_EDGE16N,
10171 SPARC_BUILTIN_EDGE16LN,
10172 SPARC_BUILTIN_EDGE32N,
10173 SPARC_BUILTIN_EDGE32LN,
10174 SPARC_BUILTIN_BMASK,
10175 SPARC_BUILTIN_BSHUFFLEV4HI,
10176 SPARC_BUILTIN_BSHUFFLEV8QI,
10177 SPARC_BUILTIN_BSHUFFLEV2SI,
10178 SPARC_BUILTIN_BSHUFFLEDI,
10179
10180 /* VIS 3.0 builtins. */
10181 SPARC_BUILTIN_CMASK8,
10182 SPARC_BUILTIN_CMASK16,
10183 SPARC_BUILTIN_CMASK32,
10184 SPARC_BUILTIN_FCHKSM16,
10185 SPARC_BUILTIN_FSLL16,
10186 SPARC_BUILTIN_FSLAS16,
10187 SPARC_BUILTIN_FSRL16,
10188 SPARC_BUILTIN_FSRA16,
10189 SPARC_BUILTIN_FSLL32,
10190 SPARC_BUILTIN_FSLAS32,
10191 SPARC_BUILTIN_FSRL32,
10192 SPARC_BUILTIN_FSRA32,
10193 SPARC_BUILTIN_PDISTN,
10194 SPARC_BUILTIN_FMEAN16,
10195 SPARC_BUILTIN_FPADD64,
10196 SPARC_BUILTIN_FPSUB64,
10197 SPARC_BUILTIN_FPADDS16,
10198 SPARC_BUILTIN_FPADDS16S,
10199 SPARC_BUILTIN_FPSUBS16,
10200 SPARC_BUILTIN_FPSUBS16S,
10201 SPARC_BUILTIN_FPADDS32,
10202 SPARC_BUILTIN_FPADDS32S,
10203 SPARC_BUILTIN_FPSUBS32,
10204 SPARC_BUILTIN_FPSUBS32S,
10205 SPARC_BUILTIN_FUCMPLE8,
10206 SPARC_BUILTIN_FUCMPNE8,
10207 SPARC_BUILTIN_FUCMPGT8,
10208 SPARC_BUILTIN_FUCMPEQ8,
10209 SPARC_BUILTIN_FHADDS,
10210 SPARC_BUILTIN_FHADDD,
10211 SPARC_BUILTIN_FHSUBS,
10212 SPARC_BUILTIN_FHSUBD,
10213 SPARC_BUILTIN_FNHADDS,
10214 SPARC_BUILTIN_FNHADDD,
10215 SPARC_BUILTIN_UMULXHI,
10216 SPARC_BUILTIN_XMULX,
10217 SPARC_BUILTIN_XMULXHI,
10218
10219 /* VIS 4.0 builtins. */
10220 SPARC_BUILTIN_FPADD8,
10221 SPARC_BUILTIN_FPADDS8,
10222 SPARC_BUILTIN_FPADDUS8,
10223 SPARC_BUILTIN_FPADDUS16,
10224 SPARC_BUILTIN_FPCMPLE8,
10225 SPARC_BUILTIN_FPCMPGT8,
10226 SPARC_BUILTIN_FPCMPULE16,
10227 SPARC_BUILTIN_FPCMPUGT16,
10228 SPARC_BUILTIN_FPCMPULE32,
10229 SPARC_BUILTIN_FPCMPUGT32,
10230 SPARC_BUILTIN_FPMAX8,
10231 SPARC_BUILTIN_FPMAX16,
10232 SPARC_BUILTIN_FPMAX32,
10233 SPARC_BUILTIN_FPMAXU8,
10234 SPARC_BUILTIN_FPMAXU16,
10235 SPARC_BUILTIN_FPMAXU32,
10236 SPARC_BUILTIN_FPMIN8,
10237 SPARC_BUILTIN_FPMIN16,
10238 SPARC_BUILTIN_FPMIN32,
10239 SPARC_BUILTIN_FPMINU8,
10240 SPARC_BUILTIN_FPMINU16,
10241 SPARC_BUILTIN_FPMINU32,
10242 SPARC_BUILTIN_FPSUB8,
10243 SPARC_BUILTIN_FPSUBS8,
10244 SPARC_BUILTIN_FPSUBUS8,
10245 SPARC_BUILTIN_FPSUBUS16,
10246
10247 SPARC_BUILTIN_MAX
10248 };
10249
10250 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10251 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10252
10253 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10254 function decl or NULL_TREE if the builtin was not added. */
10255
10256 static tree
10257 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10258 tree type)
10259 {
10260 tree t
10261 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10262
10263 if (t)
10264 {
10265 sparc_builtins[code] = t;
10266 sparc_builtins_icode[code] = icode;
10267 }
10268
10269 return t;
10270 }
10271
10272 /* Likewise, but also marks the function as "const". */
10273
10274 static tree
10275 def_builtin_const (const char *name, enum insn_code icode,
10276 enum sparc_builtins code, tree type)
10277 {
10278 tree t = def_builtin (name, icode, code, type);
10279
10280 if (t)
10281 TREE_READONLY (t) = 1;
10282
10283 return t;
10284 }
10285
10286 /* Implement the TARGET_INIT_BUILTINS target hook.
10287 Create builtin functions for special SPARC instructions. */
10288
10289 static void
10290 sparc_init_builtins (void)
10291 {
10292 if (TARGET_FPU)
10293 sparc_fpu_init_builtins ();
10294
10295 if (TARGET_VIS)
10296 sparc_vis_init_builtins ();
10297 }
10298
10299 /* Create builtin functions for FPU instructions. */
10300
10301 static void
10302 sparc_fpu_init_builtins (void)
10303 {
10304 tree ftype
10305 = build_function_type_list (void_type_node,
10306 build_pointer_type (unsigned_type_node), 0);
10307 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10308 SPARC_BUILTIN_LDFSR, ftype);
10309 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10310 SPARC_BUILTIN_STFSR, ftype);
10311 }
10312
10313 /* Create builtin functions for VIS instructions. */
10314
10315 static void
10316 sparc_vis_init_builtins (void)
10317 {
10318 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10319 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10320 tree v4hi = build_vector_type (intHI_type_node, 4);
10321 tree v2hi = build_vector_type (intHI_type_node, 2);
10322 tree v2si = build_vector_type (intSI_type_node, 2);
10323 tree v1si = build_vector_type (intSI_type_node, 1);
10324
10325 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10326 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10327 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10328 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10329 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10330 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10331 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10332 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10333 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10334 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10335 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10336 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10337 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10338 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10339 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10340 v8qi, v8qi,
10341 intDI_type_node, 0);
10342 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10343 v8qi, v8qi, 0);
10344 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10345 v8qi, v8qi, 0);
10346 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10347 intDI_type_node,
10348 intDI_type_node, 0);
10349 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10350 intSI_type_node,
10351 intSI_type_node, 0);
10352 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10353 ptr_type_node,
10354 intSI_type_node, 0);
10355 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10356 ptr_type_node,
10357 intDI_type_node, 0);
10358 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10359 ptr_type_node,
10360 ptr_type_node, 0);
10361 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10362 ptr_type_node,
10363 ptr_type_node, 0);
10364 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10365 v4hi, v4hi, 0);
10366 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10367 v2si, v2si, 0);
10368 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10369 v4hi, v4hi, 0);
10370 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10371 v2si, v2si, 0);
10372 tree void_ftype_di = build_function_type_list (void_type_node,
10373 intDI_type_node, 0);
10374 tree di_ftype_void = build_function_type_list (intDI_type_node,
10375 void_type_node, 0);
10376 tree void_ftype_si = build_function_type_list (void_type_node,
10377 intSI_type_node, 0);
10378 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10379 float_type_node,
10380 float_type_node, 0);
10381 tree df_ftype_df_df = build_function_type_list (double_type_node,
10382 double_type_node,
10383 double_type_node, 0);
10384
10385 /* Packing and expanding vectors. */
10386 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10387 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10388 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10389 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10390 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10391 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10392 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10393 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10394 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10395 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10396
10397 /* Multiplications. */
10398 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10399 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10400 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10401 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10402 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10403 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10404 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10405 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10406 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10407 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10408 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10409 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10410 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10411 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10412
10413 /* Data aligning. */
10414 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10415 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10416 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10417 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10418 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10419 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10420 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10421 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10422
10423 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10424 SPARC_BUILTIN_WRGSR, void_ftype_di);
10425 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10426 SPARC_BUILTIN_RDGSR, di_ftype_void);
10427
10428 if (TARGET_ARCH64)
10429 {
10430 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10431 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10432 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10433 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10434 }
10435 else
10436 {
10437 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10438 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10439 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10440 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10441 }
10442
10443 /* Pixel distance. */
10444 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10445 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10446
10447 /* Edge handling. */
10448 if (TARGET_ARCH64)
10449 {
10450 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10451 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10452 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10453 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10454 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10455 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10456 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10457 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10458 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10459 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10460 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10461 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10462 }
10463 else
10464 {
10465 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10466 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10467 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10468 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10469 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10470 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10471 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10472 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10473 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10474 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10475 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10476 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10477 }
10478
10479 /* Pixel compare. */
10480 if (TARGET_ARCH64)
10481 {
10482 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10483 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10484 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10485 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10486 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10487 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10488 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10489 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10490 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10491 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10492 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10493 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10494 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10495 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10496 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10497 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10498 }
10499 else
10500 {
10501 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10502 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10503 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10504 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10505 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10506 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10507 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10508 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10509 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10510 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10511 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10512 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10513 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10514 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10515 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10516 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10517 }
10518
10519 /* Addition and subtraction. */
10520 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10521 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10522 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10523 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10524 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10525 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10526 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10527 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10528 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10529 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10530 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10531 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10532 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10533 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10534 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10535 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10536
10537 /* Three-dimensional array addressing. */
10538 if (TARGET_ARCH64)
10539 {
10540 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10541 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10542 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10543 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10544 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10545 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10546 }
10547 else
10548 {
10549 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10550 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10551 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10552 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10553 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10554 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10555 }
10556
10557 if (TARGET_VIS2)
10558 {
10559 /* Edge handling. */
10560 if (TARGET_ARCH64)
10561 {
10562 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10563 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10564 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10565 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10566 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10567 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10568 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10569 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10570 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10571 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10572 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10573 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10574 }
10575 else
10576 {
10577 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10578 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10579 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10580 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10581 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10582 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10583 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10584 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10585 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10586 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10587 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10588 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10589 }
10590
10591 /* Byte mask and shuffle. */
10592 if (TARGET_ARCH64)
10593 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10594 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10595 else
10596 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10597 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10598 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10599 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10600 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10601 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10602 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10603 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10604 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10605 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10606 }
10607
10608 if (TARGET_VIS3)
10609 {
10610 if (TARGET_ARCH64)
10611 {
10612 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10613 SPARC_BUILTIN_CMASK8, void_ftype_di);
10614 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10615 SPARC_BUILTIN_CMASK16, void_ftype_di);
10616 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10617 SPARC_BUILTIN_CMASK32, void_ftype_di);
10618 }
10619 else
10620 {
10621 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10622 SPARC_BUILTIN_CMASK8, void_ftype_si);
10623 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10624 SPARC_BUILTIN_CMASK16, void_ftype_si);
10625 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10626 SPARC_BUILTIN_CMASK32, void_ftype_si);
10627 }
10628
10629 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10630 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10631
10632 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10633 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10634 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10635 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10636 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10637 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10638 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10639 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10640 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10641 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10642 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10643 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10644 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10645 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10646 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10647 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10648
10649 if (TARGET_ARCH64)
10650 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10651 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10652 else
10653 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10654 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10655
10656 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10657 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10658 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10659 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10660 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10661 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10662
10663 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10664 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10665 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10666 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10667 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10668 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10669 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10670 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10671 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10672 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10673 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10674 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10675 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10676 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10677 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10678 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10679
10680 if (TARGET_ARCH64)
10681 {
10682 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10683 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10684 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10685 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10686 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10687 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10688 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10689 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10690 }
10691 else
10692 {
10693 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10694 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10695 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10696 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10697 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10698 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10699 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10700 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10701 }
10702
10703 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10704 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10705 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10706 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10707 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10708 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10709 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10710 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10711 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10712 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10713 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10714 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10715
10716 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10717 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10718 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10719 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10720 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10721 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10722 }
10723
10724 if (TARGET_VIS4)
10725 {
10726 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10727 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10728 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10729 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10730 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10731 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10732 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10733 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10734
10735
10736 if (TARGET_ARCH64)
10737 {
10738 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10739 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10740 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10741 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10742 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10743 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10744 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10745 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10746 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10747 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10748 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10749 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10750 }
10751 else
10752 {
10753 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10754 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10755 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10756 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10757 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10758 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10759 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10760 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10761 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10762 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10763 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10764 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10765 }
10766
10767 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10768 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10769 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10770 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10771 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10772 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10773 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10774 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10775 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10776 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10777 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10778 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10779 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10780 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10781 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10782 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10783 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10784 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10785 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10786 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10787 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10788 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10789 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10790 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10791 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10792 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10793 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10794 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10795 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10796 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10797 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10798 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10799 }
10800 }
10801
10802 /* Implement TARGET_BUILTIN_DECL hook. */
10803
10804 static tree
10805 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10806 {
10807 if (code >= SPARC_BUILTIN_MAX)
10808 return error_mark_node;
10809
10810 return sparc_builtins[code];
10811 }
10812
10813 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10814
10815 static rtx
10816 sparc_expand_builtin (tree exp, rtx target,
10817 rtx subtarget ATTRIBUTE_UNUSED,
10818 machine_mode tmode ATTRIBUTE_UNUSED,
10819 int ignore ATTRIBUTE_UNUSED)
10820 {
10821 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10822 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10823 enum insn_code icode = sparc_builtins_icode[code];
10824 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10825 call_expr_arg_iterator iter;
10826 int arg_count = 0;
10827 rtx pat, op[4];
10828 tree arg;
10829
10830 if (nonvoid)
10831 {
10832 machine_mode tmode = insn_data[icode].operand[0].mode;
10833 if (!target
10834 || GET_MODE (target) != tmode
10835 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10836 op[0] = gen_reg_rtx (tmode);
10837 else
10838 op[0] = target;
10839 }
10840
10841 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10842 {
10843 const struct insn_operand_data *insn_op;
10844 int idx;
10845
10846 if (arg == error_mark_node)
10847 return NULL_RTX;
10848
10849 arg_count++;
10850 idx = arg_count - !nonvoid;
10851 insn_op = &insn_data[icode].operand[idx];
10852 op[arg_count] = expand_normal (arg);
10853
10854 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10855 {
10856 if (!address_operand (op[arg_count], SImode))
10857 {
10858 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10859 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10860 }
10861 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10862 }
10863
10864 else if (insn_op->mode == V1DImode
10865 && GET_MODE (op[arg_count]) == DImode)
10866 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10867
10868 else if (insn_op->mode == V1SImode
10869 && GET_MODE (op[arg_count]) == SImode)
10870 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10871
10872 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10873 insn_op->mode))
10874 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10875 }
10876
10877 switch (arg_count)
10878 {
10879 case 0:
10880 pat = GEN_FCN (icode) (op[0]);
10881 break;
10882 case 1:
10883 if (nonvoid)
10884 pat = GEN_FCN (icode) (op[0], op[1]);
10885 else
10886 pat = GEN_FCN (icode) (op[1]);
10887 break;
10888 case 2:
10889 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10890 break;
10891 case 3:
10892 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10893 break;
10894 default:
10895 gcc_unreachable ();
10896 }
10897
10898 if (!pat)
10899 return NULL_RTX;
10900
10901 emit_insn (pat);
10902
10903 return (nonvoid ? op[0] : const0_rtx);
10904 }
10905
10906 /* Return the upper 16 bits of the 8x16 multiplication. */
10907
10908 static int
10909 sparc_vis_mul8x16 (int e8, int e16)
10910 {
10911 return (e8 * e16 + 128) / 256;
10912 }
10913
10914 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10915 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10916
10917 static void
10918 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10919 tree inner_type, tree cst0, tree cst1)
10920 {
10921 unsigned i, num = VECTOR_CST_NELTS (cst0);
10922 int scale;
10923
10924 switch (fncode)
10925 {
10926 case SPARC_BUILTIN_FMUL8X16:
10927 for (i = 0; i < num; ++i)
10928 {
10929 int val
10930 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10931 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10932 n_elts[i] = build_int_cst (inner_type, val);
10933 }
10934 break;
10935
10936 case SPARC_BUILTIN_FMUL8X16AU:
10937 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10938
10939 for (i = 0; i < num; ++i)
10940 {
10941 int val
10942 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10943 scale);
10944 n_elts[i] = build_int_cst (inner_type, val);
10945 }
10946 break;
10947
10948 case SPARC_BUILTIN_FMUL8X16AL:
10949 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10950
10951 for (i = 0; i < num; ++i)
10952 {
10953 int val
10954 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10955 scale);
10956 n_elts[i] = build_int_cst (inner_type, val);
10957 }
10958 break;
10959
10960 default:
10961 gcc_unreachable ();
10962 }
10963 }
10964
10965 /* Implement TARGET_FOLD_BUILTIN hook.
10966
10967 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10968 result of the function call is ignored. NULL_TREE is returned if the
10969 function could not be folded. */
10970
10971 static tree
10972 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10973 tree *args, bool ignore)
10974 {
10975 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10976 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10977 tree arg0, arg1, arg2;
10978
10979 if (ignore)
10980 switch (code)
10981 {
10982 case SPARC_BUILTIN_LDFSR:
10983 case SPARC_BUILTIN_STFSR:
10984 case SPARC_BUILTIN_ALIGNADDR:
10985 case SPARC_BUILTIN_WRGSR:
10986 case SPARC_BUILTIN_BMASK:
10987 case SPARC_BUILTIN_CMASK8:
10988 case SPARC_BUILTIN_CMASK16:
10989 case SPARC_BUILTIN_CMASK32:
10990 break;
10991
10992 default:
10993 return build_zero_cst (rtype);
10994 }
10995
10996 switch (code)
10997 {
10998 case SPARC_BUILTIN_FEXPAND:
10999 arg0 = args[0];
11000 STRIP_NOPS (arg0);
11001
11002 if (TREE_CODE (arg0) == VECTOR_CST)
11003 {
11004 tree inner_type = TREE_TYPE (rtype);
11005 tree *n_elts;
11006 unsigned i;
11007
11008 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11009 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11010 n_elts[i] = build_int_cst (inner_type,
11011 TREE_INT_CST_LOW
11012 (VECTOR_CST_ELT (arg0, i)) << 4);
11013 return build_vector (rtype, n_elts);
11014 }
11015 break;
11016
11017 case SPARC_BUILTIN_FMUL8X16:
11018 case SPARC_BUILTIN_FMUL8X16AU:
11019 case SPARC_BUILTIN_FMUL8X16AL:
11020 arg0 = args[0];
11021 arg1 = args[1];
11022 STRIP_NOPS (arg0);
11023 STRIP_NOPS (arg1);
11024
11025 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11026 {
11027 tree inner_type = TREE_TYPE (rtype);
11028 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11029 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11030 return build_vector (rtype, n_elts);
11031 }
11032 break;
11033
11034 case SPARC_BUILTIN_FPMERGE:
11035 arg0 = args[0];
11036 arg1 = args[1];
11037 STRIP_NOPS (arg0);
11038 STRIP_NOPS (arg1);
11039
11040 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11041 {
11042 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11043 unsigned i;
11044 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11045 {
11046 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11047 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11048 }
11049
11050 return build_vector (rtype, n_elts);
11051 }
11052 break;
11053
11054 case SPARC_BUILTIN_PDIST:
11055 case SPARC_BUILTIN_PDISTN:
11056 arg0 = args[0];
11057 arg1 = args[1];
11058 STRIP_NOPS (arg0);
11059 STRIP_NOPS (arg1);
11060 if (code == SPARC_BUILTIN_PDIST)
11061 {
11062 arg2 = args[2];
11063 STRIP_NOPS (arg2);
11064 }
11065 else
11066 arg2 = integer_zero_node;
11067
11068 if (TREE_CODE (arg0) == VECTOR_CST
11069 && TREE_CODE (arg1) == VECTOR_CST
11070 && TREE_CODE (arg2) == INTEGER_CST)
11071 {
11072 bool overflow = false;
11073 widest_int result = wi::to_widest (arg2);
11074 widest_int tmp;
11075 unsigned i;
11076
11077 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11078 {
11079 tree e0 = VECTOR_CST_ELT (arg0, i);
11080 tree e1 = VECTOR_CST_ELT (arg1, i);
11081
11082 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11083
11084 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11085 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11086 if (wi::neg_p (tmp))
11087 tmp = wi::neg (tmp, &neg2_ovf);
11088 else
11089 neg2_ovf = false;
11090 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11091 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11092 }
11093
11094 gcc_assert (!overflow);
11095
11096 return wide_int_to_tree (rtype, result);
11097 }
11098
11099 default:
11100 break;
11101 }
11102
11103 return NULL_TREE;
11104 }
11105 \f
11106 /* ??? This duplicates information provided to the compiler by the
11107 ??? scheduler description. Some day, teach genautomata to output
11108 ??? the latencies and then CSE will just use that. */
11109
11110 static bool
11111 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11112 int opno ATTRIBUTE_UNUSED,
11113 int *total, bool speed ATTRIBUTE_UNUSED)
11114 {
11115 int code = GET_CODE (x);
11116 bool float_mode_p = FLOAT_MODE_P (mode);
11117
11118 switch (code)
11119 {
11120 case CONST_INT:
11121 if (SMALL_INT (x))
11122 *total = 0;
11123 else
11124 *total = 2;
11125 return true;
11126
11127 case CONST_WIDE_INT:
11128 *total = 0;
11129 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11130 *total += 2;
11131 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11132 *total += 2;
11133 return true;
11134
11135 case HIGH:
11136 *total = 2;
11137 return true;
11138
11139 case CONST:
11140 case LABEL_REF:
11141 case SYMBOL_REF:
11142 *total = 4;
11143 return true;
11144
11145 case CONST_DOUBLE:
11146 *total = 8;
11147 return true;
11148
11149 case MEM:
11150 /* If outer-code was a sign or zero extension, a cost
11151 of COSTS_N_INSNS (1) was already added in. This is
11152 why we are subtracting it back out. */
11153 if (outer_code == ZERO_EXTEND)
11154 {
11155 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11156 }
11157 else if (outer_code == SIGN_EXTEND)
11158 {
11159 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11160 }
11161 else if (float_mode_p)
11162 {
11163 *total = sparc_costs->float_load;
11164 }
11165 else
11166 {
11167 *total = sparc_costs->int_load;
11168 }
11169
11170 return true;
11171
11172 case PLUS:
11173 case MINUS:
11174 if (float_mode_p)
11175 *total = sparc_costs->float_plusminus;
11176 else
11177 *total = COSTS_N_INSNS (1);
11178 return false;
11179
11180 case FMA:
11181 {
11182 rtx sub;
11183
11184 gcc_assert (float_mode_p);
11185 *total = sparc_costs->float_mul;
11186
11187 sub = XEXP (x, 0);
11188 if (GET_CODE (sub) == NEG)
11189 sub = XEXP (sub, 0);
11190 *total += rtx_cost (sub, mode, FMA, 0, speed);
11191
11192 sub = XEXP (x, 2);
11193 if (GET_CODE (sub) == NEG)
11194 sub = XEXP (sub, 0);
11195 *total += rtx_cost (sub, mode, FMA, 2, speed);
11196 return true;
11197 }
11198
11199 case MULT:
11200 if (float_mode_p)
11201 *total = sparc_costs->float_mul;
11202 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11203 *total = COSTS_N_INSNS (25);
11204 else
11205 {
11206 int bit_cost;
11207
11208 bit_cost = 0;
11209 if (sparc_costs->int_mul_bit_factor)
11210 {
11211 int nbits;
11212
11213 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11214 {
11215 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11216 for (nbits = 0; value != 0; value &= value - 1)
11217 nbits++;
11218 }
11219 else
11220 nbits = 7;
11221
11222 if (nbits < 3)
11223 nbits = 3;
11224 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11225 bit_cost = COSTS_N_INSNS (bit_cost);
11226 }
11227
11228 if (mode == DImode || !TARGET_HARD_MUL)
11229 *total = sparc_costs->int_mulX + bit_cost;
11230 else
11231 *total = sparc_costs->int_mul + bit_cost;
11232 }
11233 return false;
11234
11235 case ASHIFT:
11236 case ASHIFTRT:
11237 case LSHIFTRT:
11238 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11239 return false;
11240
11241 case DIV:
11242 case UDIV:
11243 case MOD:
11244 case UMOD:
11245 if (float_mode_p)
11246 {
11247 if (mode == DFmode)
11248 *total = sparc_costs->float_div_df;
11249 else
11250 *total = sparc_costs->float_div_sf;
11251 }
11252 else
11253 {
11254 if (mode == DImode)
11255 *total = sparc_costs->int_divX;
11256 else
11257 *total = sparc_costs->int_div;
11258 }
11259 return false;
11260
11261 case NEG:
11262 if (! float_mode_p)
11263 {
11264 *total = COSTS_N_INSNS (1);
11265 return false;
11266 }
11267 /* FALLTHRU */
11268
11269 case ABS:
11270 case FLOAT:
11271 case UNSIGNED_FLOAT:
11272 case FIX:
11273 case UNSIGNED_FIX:
11274 case FLOAT_EXTEND:
11275 case FLOAT_TRUNCATE:
11276 *total = sparc_costs->float_move;
11277 return false;
11278
11279 case SQRT:
11280 if (mode == DFmode)
11281 *total = sparc_costs->float_sqrt_df;
11282 else
11283 *total = sparc_costs->float_sqrt_sf;
11284 return false;
11285
11286 case COMPARE:
11287 if (float_mode_p)
11288 *total = sparc_costs->float_cmp;
11289 else
11290 *total = COSTS_N_INSNS (1);
11291 return false;
11292
11293 case IF_THEN_ELSE:
11294 if (float_mode_p)
11295 *total = sparc_costs->float_cmove;
11296 else
11297 *total = sparc_costs->int_cmove;
11298 return false;
11299
11300 case IOR:
11301 /* Handle the NAND vector patterns. */
11302 if (sparc_vector_mode_supported_p (mode)
11303 && GET_CODE (XEXP (x, 0)) == NOT
11304 && GET_CODE (XEXP (x, 1)) == NOT)
11305 {
11306 *total = COSTS_N_INSNS (1);
11307 return true;
11308 }
11309 else
11310 return false;
11311
11312 default:
11313 return false;
11314 }
11315 }
11316
11317 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11318
11319 static inline bool
11320 general_or_i64_p (reg_class_t rclass)
11321 {
11322 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11323 }
11324
11325 /* Implement TARGET_REGISTER_MOVE_COST. */
11326
11327 static int
11328 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11329 reg_class_t from, reg_class_t to)
11330 {
11331 bool need_memory = false;
11332
11333 /* This helps postreload CSE to eliminate redundant comparisons. */
11334 if (from == NO_REGS || to == NO_REGS)
11335 return 100;
11336
11337 if (from == FPCC_REGS || to == FPCC_REGS)
11338 need_memory = true;
11339 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11340 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11341 {
11342 if (TARGET_VIS3)
11343 {
11344 int size = GET_MODE_SIZE (mode);
11345 if (size == 8 || size == 4)
11346 {
11347 if (! TARGET_ARCH32 || size == 4)
11348 return 4;
11349 else
11350 return 6;
11351 }
11352 }
11353 need_memory = true;
11354 }
11355
11356 if (need_memory)
11357 {
11358 if (sparc_cpu == PROCESSOR_ULTRASPARC
11359 || sparc_cpu == PROCESSOR_ULTRASPARC3
11360 || sparc_cpu == PROCESSOR_NIAGARA
11361 || sparc_cpu == PROCESSOR_NIAGARA2
11362 || sparc_cpu == PROCESSOR_NIAGARA3
11363 || sparc_cpu == PROCESSOR_NIAGARA4
11364 || sparc_cpu == PROCESSOR_NIAGARA7)
11365 return 12;
11366
11367 return 6;
11368 }
11369
11370 return 2;
11371 }
11372
11373 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11374 This is achieved by means of a manual dynamic stack space allocation in
11375 the current frame. We make the assumption that SEQ doesn't contain any
11376 function calls, with the possible exception of calls to the GOT helper. */
11377
11378 static void
11379 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11380 {
11381 /* We must preserve the lowest 16 words for the register save area. */
11382 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11383 /* We really need only 2 words of fresh stack space. */
11384 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11385
11386 rtx slot
11387 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11388 SPARC_STACK_BIAS + offset));
11389
11390 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11391 emit_insn (gen_rtx_SET (slot, reg));
11392 if (reg2)
11393 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11394 reg2));
11395 emit_insn (seq);
11396 if (reg2)
11397 emit_insn (gen_rtx_SET (reg2,
11398 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11399 emit_insn (gen_rtx_SET (reg, slot));
11400 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11401 }
11402
11403 /* Output the assembler code for a thunk function. THUNK_DECL is the
11404 declaration for the thunk function itself, FUNCTION is the decl for
11405 the target function. DELTA is an immediate constant offset to be
11406 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11407 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11408
11409 static void
11410 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11411 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11412 tree function)
11413 {
11414 rtx this_rtx, funexp;
11415 rtx_insn *insn;
11416 unsigned int int_arg_first;
11417
11418 reload_completed = 1;
11419 epilogue_completed = 1;
11420
11421 emit_note (NOTE_INSN_PROLOGUE_END);
11422
11423 if (TARGET_FLAT)
11424 {
11425 sparc_leaf_function_p = 1;
11426
11427 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11428 }
11429 else if (flag_delayed_branch)
11430 {
11431 /* We will emit a regular sibcall below, so we need to instruct
11432 output_sibcall that we are in a leaf function. */
11433 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11434
11435 /* This will cause final.c to invoke leaf_renumber_regs so we
11436 must behave as if we were in a not-yet-leafified function. */
11437 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11438 }
11439 else
11440 {
11441 /* We will emit the sibcall manually below, so we will need to
11442 manually spill non-leaf registers. */
11443 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11444
11445 /* We really are in a leaf function. */
11446 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11447 }
11448
11449 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11450 returns a structure, the structure return pointer is there instead. */
11451 if (TARGET_ARCH64
11452 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11453 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11454 else
11455 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11456
11457 /* Add DELTA. When possible use a plain add, otherwise load it into
11458 a register first. */
11459 if (delta)
11460 {
11461 rtx delta_rtx = GEN_INT (delta);
11462
11463 if (! SPARC_SIMM13_P (delta))
11464 {
11465 rtx scratch = gen_rtx_REG (Pmode, 1);
11466 emit_move_insn (scratch, delta_rtx);
11467 delta_rtx = scratch;
11468 }
11469
11470 /* THIS_RTX += DELTA. */
11471 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11472 }
11473
11474 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11475 if (vcall_offset)
11476 {
11477 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11478 rtx scratch = gen_rtx_REG (Pmode, 1);
11479
11480 gcc_assert (vcall_offset < 0);
11481
11482 /* SCRATCH = *THIS_RTX. */
11483 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11484
11485 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11486 may not have any available scratch register at this point. */
11487 if (SPARC_SIMM13_P (vcall_offset))
11488 ;
11489 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11490 else if (! fixed_regs[5]
11491 /* The below sequence is made up of at least 2 insns,
11492 while the default method may need only one. */
11493 && vcall_offset < -8192)
11494 {
11495 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11496 emit_move_insn (scratch2, vcall_offset_rtx);
11497 vcall_offset_rtx = scratch2;
11498 }
11499 else
11500 {
11501 rtx increment = GEN_INT (-4096);
11502
11503 /* VCALL_OFFSET is a negative number whose typical range can be
11504 estimated as -32768..0 in 32-bit mode. In almost all cases
11505 it is therefore cheaper to emit multiple add insns than
11506 spilling and loading the constant into a register (at least
11507 6 insns). */
11508 while (! SPARC_SIMM13_P (vcall_offset))
11509 {
11510 emit_insn (gen_add2_insn (scratch, increment));
11511 vcall_offset += 4096;
11512 }
11513 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11514 }
11515
11516 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11517 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11518 gen_rtx_PLUS (Pmode,
11519 scratch,
11520 vcall_offset_rtx)));
11521
11522 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11523 emit_insn (gen_add2_insn (this_rtx, scratch));
11524 }
11525
11526 /* Generate a tail call to the target function. */
11527 if (! TREE_USED (function))
11528 {
11529 assemble_external (function);
11530 TREE_USED (function) = 1;
11531 }
11532 funexp = XEXP (DECL_RTL (function), 0);
11533
11534 if (flag_delayed_branch)
11535 {
11536 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11537 insn = emit_call_insn (gen_sibcall (funexp));
11538 SIBLING_CALL_P (insn) = 1;
11539 }
11540 else
11541 {
11542 /* The hoops we have to jump through in order to generate a sibcall
11543 without using delay slots... */
11544 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11545
11546 if (flag_pic)
11547 {
11548 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11549 start_sequence ();
11550 load_got_register (); /* clobbers %o7 */
11551 scratch = sparc_legitimize_pic_address (funexp, scratch);
11552 seq = get_insns ();
11553 end_sequence ();
11554 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11555 }
11556 else if (TARGET_ARCH32)
11557 {
11558 emit_insn (gen_rtx_SET (scratch,
11559 gen_rtx_HIGH (SImode, funexp)));
11560 emit_insn (gen_rtx_SET (scratch,
11561 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11562 }
11563 else /* TARGET_ARCH64 */
11564 {
11565 switch (sparc_cmodel)
11566 {
11567 case CM_MEDLOW:
11568 case CM_MEDMID:
11569 /* The destination can serve as a temporary. */
11570 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11571 break;
11572
11573 case CM_MEDANY:
11574 case CM_EMBMEDANY:
11575 /* The destination cannot serve as a temporary. */
11576 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11577 start_sequence ();
11578 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11579 seq = get_insns ();
11580 end_sequence ();
11581 emit_and_preserve (seq, spill_reg, 0);
11582 break;
11583
11584 default:
11585 gcc_unreachable ();
11586 }
11587 }
11588
11589 emit_jump_insn (gen_indirect_jump (scratch));
11590 }
11591
11592 emit_barrier ();
11593
11594 /* Run just enough of rest_of_compilation to get the insns emitted.
11595 There's not really enough bulk here to make other passes such as
11596 instruction scheduling worth while. Note that use_thunk calls
11597 assemble_start_function and assemble_end_function. */
11598 insn = get_insns ();
11599 shorten_branches (insn);
11600 final_start_function (insn, file, 1);
11601 final (insn, file, 1);
11602 final_end_function ();
11603
11604 reload_completed = 0;
11605 epilogue_completed = 0;
11606 }
11607
11608 /* Return true if sparc_output_mi_thunk would be able to output the
11609 assembler code for the thunk function specified by the arguments
11610 it is passed, and false otherwise. */
11611 static bool
11612 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11613 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11614 HOST_WIDE_INT vcall_offset,
11615 const_tree function ATTRIBUTE_UNUSED)
11616 {
11617 /* Bound the loop used in the default method above. */
11618 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11619 }
11620
11621 /* How to allocate a 'struct machine_function'. */
11622
11623 static struct machine_function *
11624 sparc_init_machine_status (void)
11625 {
11626 return ggc_cleared_alloc<machine_function> ();
11627 }
11628
11629 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11630 We need to emit DTP-relative relocations. */
11631
11632 static void
11633 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11634 {
11635 switch (size)
11636 {
11637 case 4:
11638 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11639 break;
11640 case 8:
11641 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11642 break;
11643 default:
11644 gcc_unreachable ();
11645 }
11646 output_addr_const (file, x);
11647 fputs (")", file);
11648 }
11649
11650 /* Do whatever processing is required at the end of a file. */
11651
11652 static void
11653 sparc_file_end (void)
11654 {
11655 /* If we need to emit the special GOT helper function, do so now. */
11656 if (got_helper_rtx)
11657 {
11658 const char *name = XSTR (got_helper_rtx, 0);
11659 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11660 #ifdef DWARF2_UNWIND_INFO
11661 bool do_cfi;
11662 #endif
11663
11664 if (USE_HIDDEN_LINKONCE)
11665 {
11666 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11667 get_identifier (name),
11668 build_function_type_list (void_type_node,
11669 NULL_TREE));
11670 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11671 NULL_TREE, void_type_node);
11672 TREE_PUBLIC (decl) = 1;
11673 TREE_STATIC (decl) = 1;
11674 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11675 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11676 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11677 resolve_unique_section (decl, 0, flag_function_sections);
11678 allocate_struct_function (decl, true);
11679 cfun->is_thunk = 1;
11680 current_function_decl = decl;
11681 init_varasm_status ();
11682 assemble_start_function (decl, name);
11683 }
11684 else
11685 {
11686 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11687 switch_to_section (text_section);
11688 if (align > 0)
11689 ASM_OUTPUT_ALIGN (asm_out_file, align);
11690 ASM_OUTPUT_LABEL (asm_out_file, name);
11691 }
11692
11693 #ifdef DWARF2_UNWIND_INFO
11694 do_cfi = dwarf2out_do_cfi_asm ();
11695 if (do_cfi)
11696 fprintf (asm_out_file, "\t.cfi_startproc\n");
11697 #endif
11698 if (flag_delayed_branch)
11699 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11700 reg_name, reg_name);
11701 else
11702 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11703 reg_name, reg_name);
11704 #ifdef DWARF2_UNWIND_INFO
11705 if (do_cfi)
11706 fprintf (asm_out_file, "\t.cfi_endproc\n");
11707 #endif
11708 }
11709
11710 if (NEED_INDICATE_EXEC_STACK)
11711 file_end_indicate_exec_stack ();
11712
11713 #ifdef TARGET_SOLARIS
11714 solaris_file_end ();
11715 #endif
11716 }
11717
11718 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11719 /* Implement TARGET_MANGLE_TYPE. */
11720
11721 static const char *
11722 sparc_mangle_type (const_tree type)
11723 {
11724 if (!TARGET_64BIT
11725 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11726 && TARGET_LONG_DOUBLE_128)
11727 return "g";
11728
11729 /* For all other types, use normal C++ mangling. */
11730 return NULL;
11731 }
11732 #endif
11733
11734 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11735 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11736 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11737
11738 void
11739 sparc_emit_membar_for_model (enum memmodel model,
11740 int load_store, int before_after)
11741 {
11742 /* Bits for the MEMBAR mmask field. */
11743 const int LoadLoad = 1;
11744 const int StoreLoad = 2;
11745 const int LoadStore = 4;
11746 const int StoreStore = 8;
11747
11748 int mm = 0, implied = 0;
11749
11750 switch (sparc_memory_model)
11751 {
11752 case SMM_SC:
11753 /* Sequential Consistency. All memory transactions are immediately
11754 visible in sequential execution order. No barriers needed. */
11755 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11756 break;
11757
11758 case SMM_TSO:
11759 /* Total Store Ordering: all memory transactions with store semantics
11760 are followed by an implied StoreStore. */
11761 implied |= StoreStore;
11762
11763 /* If we're not looking for a raw barrer (before+after), then atomic
11764 operations get the benefit of being both load and store. */
11765 if (load_store == 3 && before_after == 1)
11766 implied |= StoreLoad;
11767 /* FALLTHRU */
11768
11769 case SMM_PSO:
11770 /* Partial Store Ordering: all memory transactions with load semantics
11771 are followed by an implied LoadLoad | LoadStore. */
11772 implied |= LoadLoad | LoadStore;
11773
11774 /* If we're not looking for a raw barrer (before+after), then atomic
11775 operations get the benefit of being both load and store. */
11776 if (load_store == 3 && before_after == 2)
11777 implied |= StoreLoad | StoreStore;
11778 /* FALLTHRU */
11779
11780 case SMM_RMO:
11781 /* Relaxed Memory Ordering: no implicit bits. */
11782 break;
11783
11784 default:
11785 gcc_unreachable ();
11786 }
11787
11788 if (before_after & 1)
11789 {
11790 if (is_mm_release (model) || is_mm_acq_rel (model)
11791 || is_mm_seq_cst (model))
11792 {
11793 if (load_store & 1)
11794 mm |= LoadLoad | StoreLoad;
11795 if (load_store & 2)
11796 mm |= LoadStore | StoreStore;
11797 }
11798 }
11799 if (before_after & 2)
11800 {
11801 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11802 || is_mm_seq_cst (model))
11803 {
11804 if (load_store & 1)
11805 mm |= LoadLoad | LoadStore;
11806 if (load_store & 2)
11807 mm |= StoreLoad | StoreStore;
11808 }
11809 }
11810
11811 /* Remove the bits implied by the system memory model. */
11812 mm &= ~implied;
11813
11814 /* For raw barriers (before+after), always emit a barrier.
11815 This will become a compile-time barrier if needed. */
11816 if (mm || before_after == 3)
11817 emit_insn (gen_membar (GEN_INT (mm)));
11818 }
11819
11820 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11821 compare and swap on the word containing the byte or half-word. */
11822
11823 static void
11824 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11825 rtx oldval, rtx newval)
11826 {
11827 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11828 rtx addr = gen_reg_rtx (Pmode);
11829 rtx off = gen_reg_rtx (SImode);
11830 rtx oldv = gen_reg_rtx (SImode);
11831 rtx newv = gen_reg_rtx (SImode);
11832 rtx oldvalue = gen_reg_rtx (SImode);
11833 rtx newvalue = gen_reg_rtx (SImode);
11834 rtx res = gen_reg_rtx (SImode);
11835 rtx resv = gen_reg_rtx (SImode);
11836 rtx memsi, val, mask, cc;
11837
11838 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11839
11840 if (Pmode != SImode)
11841 addr1 = gen_lowpart (SImode, addr1);
11842 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11843
11844 memsi = gen_rtx_MEM (SImode, addr);
11845 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11846 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11847
11848 val = copy_to_reg (memsi);
11849
11850 emit_insn (gen_rtx_SET (off,
11851 gen_rtx_XOR (SImode, off,
11852 GEN_INT (GET_MODE (mem) == QImode
11853 ? 3 : 2))));
11854
11855 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11856
11857 if (GET_MODE (mem) == QImode)
11858 mask = force_reg (SImode, GEN_INT (0xff));
11859 else
11860 mask = force_reg (SImode, GEN_INT (0xffff));
11861
11862 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11863
11864 emit_insn (gen_rtx_SET (val,
11865 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11866 val)));
11867
11868 oldval = gen_lowpart (SImode, oldval);
11869 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11870
11871 newval = gen_lowpart_common (SImode, newval);
11872 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11873
11874 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11875
11876 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11877
11878 rtx_code_label *end_label = gen_label_rtx ();
11879 rtx_code_label *loop_label = gen_label_rtx ();
11880 emit_label (loop_label);
11881
11882 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11883
11884 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11885
11886 emit_move_insn (bool_result, const1_rtx);
11887
11888 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11889
11890 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11891
11892 emit_insn (gen_rtx_SET (resv,
11893 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11894 res)));
11895
11896 emit_move_insn (bool_result, const0_rtx);
11897
11898 cc = gen_compare_reg_1 (NE, resv, val);
11899 emit_insn (gen_rtx_SET (val, resv));
11900
11901 /* Use cbranchcc4 to separate the compare and branch! */
11902 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11903 cc, const0_rtx, loop_label));
11904
11905 emit_label (end_label);
11906
11907 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11908
11909 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11910
11911 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11912 }
11913
11914 /* Expand code to perform a compare-and-swap. */
11915
11916 void
11917 sparc_expand_compare_and_swap (rtx operands[])
11918 {
11919 rtx bval, retval, mem, oldval, newval;
11920 machine_mode mode;
11921 enum memmodel model;
11922
11923 bval = operands[0];
11924 retval = operands[1];
11925 mem = operands[2];
11926 oldval = operands[3];
11927 newval = operands[4];
11928 model = (enum memmodel) INTVAL (operands[6]);
11929 mode = GET_MODE (mem);
11930
11931 sparc_emit_membar_for_model (model, 3, 1);
11932
11933 if (reg_overlap_mentioned_p (retval, oldval))
11934 oldval = copy_to_reg (oldval);
11935
11936 if (mode == QImode || mode == HImode)
11937 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11938 else
11939 {
11940 rtx (*gen) (rtx, rtx, rtx, rtx);
11941 rtx x;
11942
11943 if (mode == SImode)
11944 gen = gen_atomic_compare_and_swapsi_1;
11945 else
11946 gen = gen_atomic_compare_and_swapdi_1;
11947 emit_insn (gen (retval, mem, oldval, newval));
11948
11949 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11950 if (x != bval)
11951 convert_move (bval, x, 1);
11952 }
11953
11954 sparc_emit_membar_for_model (model, 3, 2);
11955 }
11956
11957 void
11958 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11959 {
11960 rtx t_1, t_2, t_3;
11961
11962 sel = gen_lowpart (DImode, sel);
11963 switch (vmode)
11964 {
11965 case V2SImode:
11966 /* inp = xxxxxxxAxxxxxxxB */
11967 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11968 NULL_RTX, 1, OPTAB_DIRECT);
11969 /* t_1 = ....xxxxxxxAxxx. */
11970 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11971 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11972 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11973 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11974 /* sel = .......B */
11975 /* t_1 = ...A.... */
11976 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11977 /* sel = ...A...B */
11978 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11979 /* sel = AAAABBBB * 4 */
11980 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11981 /* sel = { A*4, A*4+1, A*4+2, ... } */
11982 break;
11983
11984 case V4HImode:
11985 /* inp = xxxAxxxBxxxCxxxD */
11986 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11987 NULL_RTX, 1, OPTAB_DIRECT);
11988 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11989 NULL_RTX, 1, OPTAB_DIRECT);
11990 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11991 NULL_RTX, 1, OPTAB_DIRECT);
11992 /* t_1 = ..xxxAxxxBxxxCxx */
11993 /* t_2 = ....xxxAxxxBxxxC */
11994 /* t_3 = ......xxxAxxxBxx */
11995 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11996 GEN_INT (0x07),
11997 NULL_RTX, 1, OPTAB_DIRECT);
11998 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11999 GEN_INT (0x0700),
12000 NULL_RTX, 1, OPTAB_DIRECT);
12001 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12002 GEN_INT (0x070000),
12003 NULL_RTX, 1, OPTAB_DIRECT);
12004 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12005 GEN_INT (0x07000000),
12006 NULL_RTX, 1, OPTAB_DIRECT);
12007 /* sel = .......D */
12008 /* t_1 = .....C.. */
12009 /* t_2 = ...B.... */
12010 /* t_3 = .A...... */
12011 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12012 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12013 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12014 /* sel = .A.B.C.D */
12015 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12016 /* sel = AABBCCDD * 2 */
12017 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12018 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12019 break;
12020
12021 case V8QImode:
12022 /* input = xAxBxCxDxExFxGxH */
12023 sel = expand_simple_binop (DImode, AND, sel,
12024 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12025 | 0x0f0f0f0f),
12026 NULL_RTX, 1, OPTAB_DIRECT);
12027 /* sel = .A.B.C.D.E.F.G.H */
12028 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12029 NULL_RTX, 1, OPTAB_DIRECT);
12030 /* t_1 = ..A.B.C.D.E.F.G. */
12031 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12032 NULL_RTX, 1, OPTAB_DIRECT);
12033 /* sel = .AABBCCDDEEFFGGH */
12034 sel = expand_simple_binop (DImode, AND, sel,
12035 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12036 | 0xff00ff),
12037 NULL_RTX, 1, OPTAB_DIRECT);
12038 /* sel = ..AB..CD..EF..GH */
12039 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12040 NULL_RTX, 1, OPTAB_DIRECT);
12041 /* t_1 = ....AB..CD..EF.. */
12042 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12043 NULL_RTX, 1, OPTAB_DIRECT);
12044 /* sel = ..ABABCDCDEFEFGH */
12045 sel = expand_simple_binop (DImode, AND, sel,
12046 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12047 NULL_RTX, 1, OPTAB_DIRECT);
12048 /* sel = ....ABCD....EFGH */
12049 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12050 NULL_RTX, 1, OPTAB_DIRECT);
12051 /* t_1 = ........ABCD.... */
12052 sel = gen_lowpart (SImode, sel);
12053 t_1 = gen_lowpart (SImode, t_1);
12054 break;
12055
12056 default:
12057 gcc_unreachable ();
12058 }
12059
12060 /* Always perform the final addition/merge within the bmask insn. */
12061 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
12062 }
12063
12064 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12065
12066 static bool
12067 sparc_frame_pointer_required (void)
12068 {
12069 /* If the stack pointer is dynamically modified in the function, it cannot
12070 serve as the frame pointer. */
12071 if (cfun->calls_alloca)
12072 return true;
12073
12074 /* If the function receives nonlocal gotos, it needs to save the frame
12075 pointer in the nonlocal_goto_save_area object. */
12076 if (cfun->has_nonlocal_label)
12077 return true;
12078
12079 /* In flat mode, that's it. */
12080 if (TARGET_FLAT)
12081 return false;
12082
12083 /* Otherwise, the frame pointer is required if the function isn't leaf. */
12084 return !(crtl->is_leaf && only_leaf_regs_used ());
12085 }
12086
12087 /* The way this is structured, we can't eliminate SFP in favor of SP
12088 if the frame pointer is required: we want to use the SFP->HFP elimination
12089 in that case. But the test in update_eliminables doesn't know we are
12090 assuming below that we only do the former elimination. */
12091
12092 static bool
12093 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12094 {
12095 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12096 }
12097
12098 /* Return the hard frame pointer directly to bypass the stack bias. */
12099
12100 static rtx
12101 sparc_builtin_setjmp_frame_value (void)
12102 {
12103 return hard_frame_pointer_rtx;
12104 }
12105
12106 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12107 they won't be allocated. */
12108
12109 static void
12110 sparc_conditional_register_usage (void)
12111 {
12112 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12113 {
12114 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12115 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12116 }
12117 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12118 /* then honor it. */
12119 if (TARGET_ARCH32 && fixed_regs[5])
12120 fixed_regs[5] = 1;
12121 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12122 fixed_regs[5] = 0;
12123 if (! TARGET_V9)
12124 {
12125 int regno;
12126 for (regno = SPARC_FIRST_V9_FP_REG;
12127 regno <= SPARC_LAST_V9_FP_REG;
12128 regno++)
12129 fixed_regs[regno] = 1;
12130 /* %fcc0 is used by v8 and v9. */
12131 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12132 regno <= SPARC_LAST_V9_FCC_REG;
12133 regno++)
12134 fixed_regs[regno] = 1;
12135 }
12136 if (! TARGET_FPU)
12137 {
12138 int regno;
12139 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12140 fixed_regs[regno] = 1;
12141 }
12142 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12143 /* then honor it. Likewise with g3 and g4. */
12144 if (fixed_regs[2] == 2)
12145 fixed_regs[2] = ! TARGET_APP_REGS;
12146 if (fixed_regs[3] == 2)
12147 fixed_regs[3] = ! TARGET_APP_REGS;
12148 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12149 fixed_regs[4] = ! TARGET_APP_REGS;
12150 else if (TARGET_CM_EMBMEDANY)
12151 fixed_regs[4] = 1;
12152 else if (fixed_regs[4] == 2)
12153 fixed_regs[4] = 0;
12154 if (TARGET_FLAT)
12155 {
12156 int regno;
12157 /* Disable leaf functions. */
12158 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12159 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12160 leaf_reg_remap [regno] = regno;
12161 }
12162 if (TARGET_VIS)
12163 global_regs[SPARC_GSR_REG] = 1;
12164 }
12165
12166 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12167
12168 - We can't load constants into FP registers.
12169 - We can't load FP constants into integer registers when soft-float,
12170 because there is no soft-float pattern with a r/F constraint.
12171 - We can't load FP constants into integer registers for TFmode unless
12172 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12173 - Try and reload integer constants (symbolic or otherwise) back into
12174 registers directly, rather than having them dumped to memory. */
12175
12176 static reg_class_t
12177 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12178 {
12179 machine_mode mode = GET_MODE (x);
12180 if (CONSTANT_P (x))
12181 {
12182 if (FP_REG_CLASS_P (rclass)
12183 || rclass == GENERAL_OR_FP_REGS
12184 || rclass == GENERAL_OR_EXTRA_FP_REGS
12185 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12186 || (mode == TFmode && ! const_zero_operand (x, mode)))
12187 return NO_REGS;
12188
12189 if (GET_MODE_CLASS (mode) == MODE_INT)
12190 return GENERAL_REGS;
12191
12192 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12193 {
12194 if (! FP_REG_CLASS_P (rclass)
12195 || !(const_zero_operand (x, mode)
12196 || const_all_ones_operand (x, mode)))
12197 return NO_REGS;
12198 }
12199 }
12200
12201 if (TARGET_VIS3
12202 && ! TARGET_ARCH64
12203 && (rclass == EXTRA_FP_REGS
12204 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12205 {
12206 int regno = true_regnum (x);
12207
12208 if (SPARC_INT_REG_P (regno))
12209 return (rclass == EXTRA_FP_REGS
12210 ? FP_REGS : GENERAL_OR_FP_REGS);
12211 }
12212
12213 return rclass;
12214 }
12215
12216 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12217 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12218
12219 const char *
12220 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12221 {
12222 char mulstr[32];
12223
12224 gcc_assert (! TARGET_ARCH64);
12225
12226 if (sparc_check_64 (operands[1], insn) <= 0)
12227 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12228 if (which_alternative == 1)
12229 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12230 if (GET_CODE (operands[2]) == CONST_INT)
12231 {
12232 if (which_alternative == 1)
12233 {
12234 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12235 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12236 output_asm_insn (mulstr, operands);
12237 return "srlx\t%L0, 32, %H0";
12238 }
12239 else
12240 {
12241 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12242 output_asm_insn ("or\t%L1, %3, %3", operands);
12243 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12244 output_asm_insn (mulstr, operands);
12245 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12246 return "mov\t%3, %L0";
12247 }
12248 }
12249 else if (rtx_equal_p (operands[1], operands[2]))
12250 {
12251 if (which_alternative == 1)
12252 {
12253 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12254 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12255 output_asm_insn (mulstr, operands);
12256 return "srlx\t%L0, 32, %H0";
12257 }
12258 else
12259 {
12260 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12261 output_asm_insn ("or\t%L1, %3, %3", operands);
12262 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12263 output_asm_insn (mulstr, operands);
12264 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12265 return "mov\t%3, %L0";
12266 }
12267 }
12268 if (sparc_check_64 (operands[2], insn) <= 0)
12269 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12270 if (which_alternative == 1)
12271 {
12272 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12273 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12274 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12275 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12276 output_asm_insn (mulstr, operands);
12277 return "srlx\t%L0, 32, %H0";
12278 }
12279 else
12280 {
12281 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12282 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12283 output_asm_insn ("or\t%L1, %3, %3", operands);
12284 output_asm_insn ("or\t%L2, %4, %4", operands);
12285 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12286 output_asm_insn (mulstr, operands);
12287 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12288 return "mov\t%3, %L0";
12289 }
12290 }
12291
12292 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12293 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12294 and INNER_MODE are the modes describing TARGET. */
12295
12296 static void
12297 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12298 machine_mode inner_mode)
12299 {
12300 rtx t1, final_insn, sel;
12301 int bmask;
12302
12303 t1 = gen_reg_rtx (mode);
12304
12305 elt = convert_modes (SImode, inner_mode, elt, true);
12306 emit_move_insn (gen_lowpart(SImode, t1), elt);
12307
12308 switch (mode)
12309 {
12310 case V2SImode:
12311 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12312 bmask = 0x45674567;
12313 break;
12314 case V4HImode:
12315 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12316 bmask = 0x67676767;
12317 break;
12318 case V8QImode:
12319 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12320 bmask = 0x77777777;
12321 break;
12322 default:
12323 gcc_unreachable ();
12324 }
12325
12326 sel = force_reg (SImode, GEN_INT (bmask));
12327 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12328 emit_insn (final_insn);
12329 }
12330
12331 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12332 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12333
12334 static void
12335 vector_init_fpmerge (rtx target, rtx elt)
12336 {
12337 rtx t1, t2, t2_low, t3, t3_low;
12338
12339 t1 = gen_reg_rtx (V4QImode);
12340 elt = convert_modes (SImode, QImode, elt, true);
12341 emit_move_insn (gen_lowpart (SImode, t1), elt);
12342
12343 t2 = gen_reg_rtx (V8QImode);
12344 t2_low = gen_lowpart (V4QImode, t2);
12345 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12346
12347 t3 = gen_reg_rtx (V8QImode);
12348 t3_low = gen_lowpart (V4QImode, t3);
12349 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12350
12351 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12352 }
12353
12354 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12355 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12356
12357 static void
12358 vector_init_faligndata (rtx target, rtx elt)
12359 {
12360 rtx t1 = gen_reg_rtx (V4HImode);
12361 int i;
12362
12363 elt = convert_modes (SImode, HImode, elt, true);
12364 emit_move_insn (gen_lowpart (SImode, t1), elt);
12365
12366 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12367 force_reg (SImode, GEN_INT (6)),
12368 const0_rtx));
12369
12370 for (i = 0; i < 4; i++)
12371 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12372 }
12373
12374 /* Emit code to initialize TARGET to values for individual fields VALS. */
12375
12376 void
12377 sparc_expand_vector_init (rtx target, rtx vals)
12378 {
12379 const machine_mode mode = GET_MODE (target);
12380 const machine_mode inner_mode = GET_MODE_INNER (mode);
12381 const int n_elts = GET_MODE_NUNITS (mode);
12382 int i, n_var = 0;
12383 bool all_same;
12384 rtx mem;
12385
12386 all_same = true;
12387 for (i = 0; i < n_elts; i++)
12388 {
12389 rtx x = XVECEXP (vals, 0, i);
12390 if (!CONSTANT_P (x))
12391 n_var++;
12392
12393 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12394 all_same = false;
12395 }
12396
12397 if (n_var == 0)
12398 {
12399 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12400 return;
12401 }
12402
12403 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12404 {
12405 if (GET_MODE_SIZE (inner_mode) == 4)
12406 {
12407 emit_move_insn (gen_lowpart (SImode, target),
12408 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12409 return;
12410 }
12411 else if (GET_MODE_SIZE (inner_mode) == 8)
12412 {
12413 emit_move_insn (gen_lowpart (DImode, target),
12414 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12415 return;
12416 }
12417 }
12418 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12419 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12420 {
12421 emit_move_insn (gen_highpart (word_mode, target),
12422 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12423 emit_move_insn (gen_lowpart (word_mode, target),
12424 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12425 return;
12426 }
12427
12428 if (all_same && GET_MODE_SIZE (mode) == 8)
12429 {
12430 if (TARGET_VIS2)
12431 {
12432 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12433 return;
12434 }
12435 if (mode == V8QImode)
12436 {
12437 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12438 return;
12439 }
12440 if (mode == V4HImode)
12441 {
12442 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12443 return;
12444 }
12445 }
12446
12447 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12448 for (i = 0; i < n_elts; i++)
12449 emit_move_insn (adjust_address_nv (mem, inner_mode,
12450 i * GET_MODE_SIZE (inner_mode)),
12451 XVECEXP (vals, 0, i));
12452 emit_move_insn (target, mem);
12453 }
12454
12455 /* Implement TARGET_SECONDARY_RELOAD. */
12456
12457 static reg_class_t
12458 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12459 machine_mode mode, secondary_reload_info *sri)
12460 {
12461 enum reg_class rclass = (enum reg_class) rclass_i;
12462
12463 sri->icode = CODE_FOR_nothing;
12464 sri->extra_cost = 0;
12465
12466 /* We need a temporary when loading/storing a HImode/QImode value
12467 between memory and the FPU registers. This can happen when combine puts
12468 a paradoxical subreg in a float/fix conversion insn. */
12469 if (FP_REG_CLASS_P (rclass)
12470 && (mode == HImode || mode == QImode)
12471 && (GET_CODE (x) == MEM
12472 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12473 && true_regnum (x) == -1)))
12474 return GENERAL_REGS;
12475
12476 /* On 32-bit we need a temporary when loading/storing a DFmode value
12477 between unaligned memory and the upper FPU registers. */
12478 if (TARGET_ARCH32
12479 && rclass == EXTRA_FP_REGS
12480 && mode == DFmode
12481 && GET_CODE (x) == MEM
12482 && ! mem_min_alignment (x, 8))
12483 return FP_REGS;
12484
12485 if (((TARGET_CM_MEDANY
12486 && symbolic_operand (x, mode))
12487 || (TARGET_CM_EMBMEDANY
12488 && text_segment_operand (x, mode)))
12489 && ! flag_pic)
12490 {
12491 if (in_p)
12492 sri->icode = direct_optab_handler (reload_in_optab, mode);
12493 else
12494 sri->icode = direct_optab_handler (reload_out_optab, mode);
12495 return NO_REGS;
12496 }
12497
12498 if (TARGET_VIS3 && TARGET_ARCH32)
12499 {
12500 int regno = true_regnum (x);
12501
12502 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12503 to move 8-byte values in 4-byte pieces. This only works via
12504 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12505 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12506 an FP_REGS intermediate move. */
12507 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12508 || ((general_or_i64_p (rclass)
12509 || rclass == GENERAL_OR_FP_REGS)
12510 && SPARC_FP_REG_P (regno)))
12511 {
12512 sri->extra_cost = 2;
12513 return FP_REGS;
12514 }
12515 }
12516
12517 return NO_REGS;
12518 }
12519
12520 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12521 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12522
12523 bool
12524 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12525 {
12526 enum rtx_code rc = GET_CODE (operands[1]);
12527 machine_mode cmp_mode;
12528 rtx cc_reg, dst, cmp;
12529
12530 cmp = operands[1];
12531 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12532 return false;
12533
12534 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12535 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12536
12537 cmp_mode = GET_MODE (XEXP (cmp, 0));
12538 rc = GET_CODE (cmp);
12539
12540 dst = operands[0];
12541 if (! rtx_equal_p (operands[2], dst)
12542 && ! rtx_equal_p (operands[3], dst))
12543 {
12544 if (reg_overlap_mentioned_p (dst, cmp))
12545 dst = gen_reg_rtx (mode);
12546
12547 emit_move_insn (dst, operands[3]);
12548 }
12549 else if (operands[2] == dst)
12550 {
12551 operands[2] = operands[3];
12552
12553 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12554 rc = reverse_condition_maybe_unordered (rc);
12555 else
12556 rc = reverse_condition (rc);
12557 }
12558
12559 if (XEXP (cmp, 1) == const0_rtx
12560 && GET_CODE (XEXP (cmp, 0)) == REG
12561 && cmp_mode == DImode
12562 && v9_regcmp_p (rc))
12563 cc_reg = XEXP (cmp, 0);
12564 else
12565 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12566
12567 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12568
12569 emit_insn (gen_rtx_SET (dst,
12570 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12571
12572 if (dst != operands[0])
12573 emit_move_insn (operands[0], dst);
12574
12575 return true;
12576 }
12577
12578 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12579 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12580 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12581 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12582 code to be used for the condition mask. */
12583
12584 void
12585 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12586 {
12587 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12588 enum rtx_code code = GET_CODE (operands[3]);
12589
12590 mask = gen_reg_rtx (Pmode);
12591 cop0 = operands[4];
12592 cop1 = operands[5];
12593 if (code == LT || code == GE)
12594 {
12595 rtx t;
12596
12597 code = swap_condition (code);
12598 t = cop0; cop0 = cop1; cop1 = t;
12599 }
12600
12601 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12602
12603 fcmp = gen_rtx_UNSPEC (Pmode,
12604 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12605 fcode);
12606
12607 cmask = gen_rtx_UNSPEC (DImode,
12608 gen_rtvec (2, mask, gsr),
12609 ccode);
12610
12611 bshuf = gen_rtx_UNSPEC (mode,
12612 gen_rtvec (3, operands[1], operands[2], gsr),
12613 UNSPEC_BSHUFFLE);
12614
12615 emit_insn (gen_rtx_SET (mask, fcmp));
12616 emit_insn (gen_rtx_SET (gsr, cmask));
12617
12618 emit_insn (gen_rtx_SET (operands[0], bshuf));
12619 }
12620
12621 /* On sparc, any mode which naturally allocates into the float
12622 registers should return 4 here. */
12623
12624 unsigned int
12625 sparc_regmode_natural_size (machine_mode mode)
12626 {
12627 int size = UNITS_PER_WORD;
12628
12629 if (TARGET_ARCH64)
12630 {
12631 enum mode_class mclass = GET_MODE_CLASS (mode);
12632
12633 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12634 size = 4;
12635 }
12636
12637 return size;
12638 }
12639
12640 /* Return TRUE if it is a good idea to tie two pseudo registers
12641 when one has mode MODE1 and one has mode MODE2.
12642 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12643 for any hard reg, then this must be FALSE for correct output.
12644
12645 For V9 we have to deal with the fact that only the lower 32 floating
12646 point registers are 32-bit addressable. */
12647
12648 bool
12649 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12650 {
12651 enum mode_class mclass1, mclass2;
12652 unsigned short size1, size2;
12653
12654 if (mode1 == mode2)
12655 return true;
12656
12657 mclass1 = GET_MODE_CLASS (mode1);
12658 mclass2 = GET_MODE_CLASS (mode2);
12659 if (mclass1 != mclass2)
12660 return false;
12661
12662 if (! TARGET_V9)
12663 return true;
12664
12665 /* Classes are the same and we are V9 so we have to deal with upper
12666 vs. lower floating point registers. If one of the modes is a
12667 4-byte mode, and the other is not, we have to mark them as not
12668 tieable because only the lower 32 floating point register are
12669 addressable 32-bits at a time.
12670
12671 We can't just test explicitly for SFmode, otherwise we won't
12672 cover the vector mode cases properly. */
12673
12674 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12675 return true;
12676
12677 size1 = GET_MODE_SIZE (mode1);
12678 size2 = GET_MODE_SIZE (mode2);
12679 if ((size1 > 4 && size2 == 4)
12680 || (size2 > 4 && size1 == 4))
12681 return false;
12682
12683 return true;
12684 }
12685
12686 /* Implement TARGET_CSTORE_MODE. */
12687
12688 static machine_mode
12689 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12690 {
12691 return (TARGET_ARCH64 ? DImode : SImode);
12692 }
12693
12694 /* Return the compound expression made of T1 and T2. */
12695
12696 static inline tree
12697 compound_expr (tree t1, tree t2)
12698 {
12699 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12700 }
12701
12702 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12703
12704 static void
12705 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12706 {
12707 if (!TARGET_FPU)
12708 return;
12709
12710 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12711 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12712
12713 /* We generate the equivalent of feholdexcept (&fenv_var):
12714
12715 unsigned int fenv_var;
12716 __builtin_store_fsr (&fenv_var);
12717
12718 unsigned int tmp1_var;
12719 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12720
12721 __builtin_load_fsr (&tmp1_var); */
12722
12723 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12724 TREE_ADDRESSABLE (fenv_var) = 1;
12725 tree fenv_addr = build_fold_addr_expr (fenv_var);
12726 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12727 tree hold_stfsr
12728 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12729 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12730
12731 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12732 TREE_ADDRESSABLE (tmp1_var) = 1;
12733 tree masked_fenv_var
12734 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12735 build_int_cst (unsigned_type_node,
12736 ~(accrued_exception_mask | trap_enable_mask)));
12737 tree hold_mask
12738 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12739 NULL_TREE, NULL_TREE);
12740
12741 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12742 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12743 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12744
12745 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12746
12747 /* We reload the value of tmp1_var to clear the exceptions:
12748
12749 __builtin_load_fsr (&tmp1_var); */
12750
12751 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12752
12753 /* We generate the equivalent of feupdateenv (&fenv_var):
12754
12755 unsigned int tmp2_var;
12756 __builtin_store_fsr (&tmp2_var);
12757
12758 __builtin_load_fsr (&fenv_var);
12759
12760 if (SPARC_LOW_FE_EXCEPT_VALUES)
12761 tmp2_var >>= 5;
12762 __atomic_feraiseexcept ((int) tmp2_var); */
12763
12764 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12765 TREE_ADDRESSABLE (tmp2_var) = 1;
12766 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12767 tree update_stfsr
12768 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12769 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12770
12771 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12772
12773 tree atomic_feraiseexcept
12774 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12775 tree update_call
12776 = build_call_expr (atomic_feraiseexcept, 1,
12777 fold_convert (integer_type_node, tmp2_var));
12778
12779 if (SPARC_LOW_FE_EXCEPT_VALUES)
12780 {
12781 tree shifted_tmp2_var
12782 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12783 build_int_cst (unsigned_type_node, 5));
12784 tree update_shift
12785 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12786 update_call = compound_expr (update_shift, update_call);
12787 }
12788
12789 *update
12790 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12791 }
12792
12793 #include "gt-sparc.h"