]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
* config/sparc/sparc.c (emit_scc_insn): Remove direct support for EQ
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "debug.h"
51 #include "common/common-target.h"
52 #include "gimplify.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "context.h"
58 #include "builtins.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Processor costs */
64
65 struct processor_costs {
66 /* Integer load */
67 const int int_load;
68
69 /* Integer signed load */
70 const int int_sload;
71
72 /* Integer zeroed load */
73 const int int_zload;
74
75 /* Float load */
76 const int float_load;
77
78 /* fmov, fneg, fabs */
79 const int float_move;
80
81 /* fadd, fsub */
82 const int float_plusminus;
83
84 /* fcmp */
85 const int float_cmp;
86
87 /* fmov, fmovr */
88 const int float_cmove;
89
90 /* fmul */
91 const int float_mul;
92
93 /* fdivs */
94 const int float_div_sf;
95
96 /* fdivd */
97 const int float_div_df;
98
99 /* fsqrts */
100 const int float_sqrt_sf;
101
102 /* fsqrtd */
103 const int float_sqrt_df;
104
105 /* umul/smul */
106 const int int_mul;
107
108 /* mulX */
109 const int int_mulX;
110
111 /* integer multiply cost for each bit set past the most
112 significant 3, so the formula for multiply cost becomes:
113
114 if (rs1 < 0)
115 highest_bit = highest_clear_bit(rs1);
116 else
117 highest_bit = highest_set_bit(rs1);
118 if (highest_bit < 3)
119 highest_bit = 3;
120 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
121
122 A value of zero indicates that the multiply costs is fixed,
123 and not variable. */
124 const int int_mul_bit_factor;
125
126 /* udiv/sdiv */
127 const int int_div;
128
129 /* divX */
130 const int int_divX;
131
132 /* movcc, movr */
133 const int int_cmove;
134
135 /* penalty for shifts, due to scheduling rules etc. */
136 const int shift_penalty;
137 };
138
139 static const
140 struct processor_costs cypress_costs = {
141 COSTS_N_INSNS (2), /* int load */
142 COSTS_N_INSNS (2), /* int signed load */
143 COSTS_N_INSNS (2), /* int zeroed load */
144 COSTS_N_INSNS (2), /* float load */
145 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
146 COSTS_N_INSNS (5), /* fadd, fsub */
147 COSTS_N_INSNS (1), /* fcmp */
148 COSTS_N_INSNS (1), /* fmov, fmovr */
149 COSTS_N_INSNS (7), /* fmul */
150 COSTS_N_INSNS (37), /* fdivs */
151 COSTS_N_INSNS (37), /* fdivd */
152 COSTS_N_INSNS (63), /* fsqrts */
153 COSTS_N_INSNS (63), /* fsqrtd */
154 COSTS_N_INSNS (1), /* imul */
155 COSTS_N_INSNS (1), /* imulX */
156 0, /* imul bit factor */
157 COSTS_N_INSNS (1), /* idiv */
158 COSTS_N_INSNS (1), /* idivX */
159 COSTS_N_INSNS (1), /* movcc/movr */
160 0, /* shift penalty */
161 };
162
163 static const
164 struct processor_costs supersparc_costs = {
165 COSTS_N_INSNS (1), /* int load */
166 COSTS_N_INSNS (1), /* int signed load */
167 COSTS_N_INSNS (1), /* int zeroed load */
168 COSTS_N_INSNS (0), /* float load */
169 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (3), /* fadd, fsub */
171 COSTS_N_INSNS (3), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (3), /* fmul */
174 COSTS_N_INSNS (6), /* fdivs */
175 COSTS_N_INSNS (9), /* fdivd */
176 COSTS_N_INSNS (12), /* fsqrts */
177 COSTS_N_INSNS (12), /* fsqrtd */
178 COSTS_N_INSNS (4), /* imul */
179 COSTS_N_INSNS (4), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (4), /* idiv */
182 COSTS_N_INSNS (4), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 1, /* shift penalty */
185 };
186
187 static const
188 struct processor_costs hypersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (1), /* float load */
193 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (1), /* fadd, fsub */
195 COSTS_N_INSNS (1), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (1), /* fmul */
198 COSTS_N_INSNS (8), /* fdivs */
199 COSTS_N_INSNS (12), /* fdivd */
200 COSTS_N_INSNS (17), /* fsqrts */
201 COSTS_N_INSNS (17), /* fsqrtd */
202 COSTS_N_INSNS (17), /* imul */
203 COSTS_N_INSNS (17), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (17), /* idiv */
206 COSTS_N_INSNS (17), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 0, /* shift penalty */
209 };
210
211 static const
212 struct processor_costs leon_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (15), /* fdivs */
223 COSTS_N_INSNS (15), /* fdivd */
224 COSTS_N_INSNS (23), /* fsqrts */
225 COSTS_N_INSNS (23), /* fsqrtd */
226 COSTS_N_INSNS (5), /* imul */
227 COSTS_N_INSNS (5), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (5), /* idiv */
230 COSTS_N_INSNS (5), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
233 };
234
235 static const
236 struct processor_costs leon3_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (14), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (22), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (35), /* idiv */
254 COSTS_N_INSNS (35), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
257 };
258
259 static const
260 struct processor_costs sparclet_costs = {
261 COSTS_N_INSNS (3), /* int load */
262 COSTS_N_INSNS (3), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (1), /* fdivs */
271 COSTS_N_INSNS (1), /* fdivd */
272 COSTS_N_INSNS (1), /* fsqrts */
273 COSTS_N_INSNS (1), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (5), /* idiv */
278 COSTS_N_INSNS (5), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
281 };
282
283 static const
284 struct processor_costs ultrasparc_costs = {
285 COSTS_N_INSNS (2), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (2), /* int zeroed load */
288 COSTS_N_INSNS (2), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (4), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (2), /* fmov, fmovr */
293 COSTS_N_INSNS (4), /* fmul */
294 COSTS_N_INSNS (13), /* fdivs */
295 COSTS_N_INSNS (23), /* fdivd */
296 COSTS_N_INSNS (13), /* fsqrts */
297 COSTS_N_INSNS (23), /* fsqrtd */
298 COSTS_N_INSNS (4), /* imul */
299 COSTS_N_INSNS (4), /* imulX */
300 2, /* imul bit factor */
301 COSTS_N_INSNS (37), /* idiv */
302 COSTS_N_INSNS (68), /* idivX */
303 COSTS_N_INSNS (2), /* movcc/movr */
304 2, /* shift penalty */
305 };
306
307 static const
308 struct processor_costs ultrasparc3_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (3), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (5), /* fcmp */
316 COSTS_N_INSNS (3), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (17), /* fdivs */
319 COSTS_N_INSNS (20), /* fdivd */
320 COSTS_N_INSNS (20), /* fsqrts */
321 COSTS_N_INSNS (29), /* fsqrtd */
322 COSTS_N_INSNS (6), /* imul */
323 COSTS_N_INSNS (6), /* imulX */
324 0, /* imul bit factor */
325 COSTS_N_INSNS (40), /* idiv */
326 COSTS_N_INSNS (71), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 0, /* shift penalty */
329 };
330
331 static const
332 struct processor_costs niagara_costs = {
333 COSTS_N_INSNS (3), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (9), /* float load */
337 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (8), /* fadd, fsub */
339 COSTS_N_INSNS (26), /* fcmp */
340 COSTS_N_INSNS (8), /* fmov, fmovr */
341 COSTS_N_INSNS (29), /* fmul */
342 COSTS_N_INSNS (54), /* fdivs */
343 COSTS_N_INSNS (83), /* fdivd */
344 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
345 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
346 COSTS_N_INSNS (11), /* imul */
347 COSTS_N_INSNS (11), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (72), /* idiv */
350 COSTS_N_INSNS (72), /* idivX */
351 COSTS_N_INSNS (1), /* movcc/movr */
352 0, /* shift penalty */
353 };
354
355 static const
356 struct processor_costs niagara2_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (3), /* float load */
361 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (6), /* fadd, fsub */
363 COSTS_N_INSNS (6), /* fcmp */
364 COSTS_N_INSNS (6), /* fmov, fmovr */
365 COSTS_N_INSNS (6), /* fmul */
366 COSTS_N_INSNS (19), /* fdivs */
367 COSTS_N_INSNS (33), /* fdivd */
368 COSTS_N_INSNS (19), /* fsqrts */
369 COSTS_N_INSNS (33), /* fsqrtd */
370 COSTS_N_INSNS (5), /* imul */
371 COSTS_N_INSNS (5), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
377 };
378
379 static const
380 struct processor_costs niagara3_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (9), /* fadd, fsub */
387 COSTS_N_INSNS (9), /* fcmp */
388 COSTS_N_INSNS (9), /* fmov, fmovr */
389 COSTS_N_INSNS (9), /* fmul */
390 COSTS_N_INSNS (23), /* fdivs */
391 COSTS_N_INSNS (37), /* fdivd */
392 COSTS_N_INSNS (23), /* fsqrts */
393 COSTS_N_INSNS (37), /* fsqrtd */
394 COSTS_N_INSNS (9), /* imul */
395 COSTS_N_INSNS (9), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
398 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
401 };
402
403 static const
404 struct processor_costs niagara4_costs = {
405 COSTS_N_INSNS (5), /* int load */
406 COSTS_N_INSNS (5), /* int signed load */
407 COSTS_N_INSNS (5), /* int zeroed load */
408 COSTS_N_INSNS (5), /* float load */
409 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (11), /* fadd, fsub */
411 COSTS_N_INSNS (11), /* fcmp */
412 COSTS_N_INSNS (11), /* fmov, fmovr */
413 COSTS_N_INSNS (11), /* fmul */
414 COSTS_N_INSNS (24), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (24), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (12), /* imul */
419 COSTS_N_INSNS (12), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
422 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
425 };
426
427 static const
428 struct processor_costs niagara7_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
449 };
450
451 static const struct processor_costs *sparc_costs = &cypress_costs;
452
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
463
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
472
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
482
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
500
501 struct GTY(()) machine_function
502 {
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
505
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
509
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
515
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
518
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
524
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
527
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
530 };
531
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
540
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
543
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
547 const_tree, bool, bool, int *, int *);
548
549 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
550 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
551
552 static void sparc_emit_set_const32 (rtx, rtx);
553 static void sparc_emit_set_const64 (rtx, rtx);
554 static void sparc_output_addr_vec (rtx);
555 static void sparc_output_addr_diff_vec (rtx);
556 static void sparc_output_deferred_case_vectors (void);
557 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
558 static bool sparc_legitimate_constant_p (machine_mode, rtx);
559 static rtx sparc_builtin_saveregs (void);
560 static int epilogue_renumber (rtx *, int);
561 static bool sparc_assemble_integer (rtx, unsigned int, int);
562 static int set_extends (rtx_insn *);
563 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
564 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
565 #ifdef TARGET_SOLARIS
566 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
567 tree) ATTRIBUTE_UNUSED;
568 #endif
569 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
570 static int sparc_issue_rate (void);
571 static void sparc_sched_init (FILE *, int, int);
572 static int sparc_use_sched_lookahead (void);
573
574 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
575 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
578 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
579
580 static bool sparc_function_ok_for_sibcall (tree, tree);
581 static void sparc_init_libfuncs (void);
582 static void sparc_init_builtins (void);
583 static void sparc_fpu_init_builtins (void);
584 static void sparc_vis_init_builtins (void);
585 static tree sparc_builtin_decl (unsigned, bool);
586 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
587 static tree sparc_fold_builtin (tree, int, tree *, bool);
588 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
589 HOST_WIDE_INT, tree);
590 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
591 HOST_WIDE_INT, const_tree);
592 static struct machine_function * sparc_init_machine_status (void);
593 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
594 static rtx sparc_tls_get_addr (void);
595 static rtx sparc_tls_got (void);
596 static int sparc_register_move_cost (machine_mode,
597 reg_class_t, reg_class_t);
598 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
599 static rtx sparc_function_value (const_tree, const_tree, bool);
600 static rtx sparc_libcall_value (machine_mode, const_rtx);
601 static bool sparc_function_value_regno_p (const unsigned int);
602 static rtx sparc_struct_value_rtx (tree, int);
603 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
604 int *, const_tree, int);
605 static bool sparc_return_in_memory (const_tree, const_tree);
606 static bool sparc_strict_argument_naming (cumulative_args_t);
607 static void sparc_va_start (tree, rtx);
608 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
609 static bool sparc_vector_mode_supported_p (machine_mode);
610 static bool sparc_tls_referenced_p (rtx);
611 static rtx sparc_legitimize_tls_address (rtx);
612 static rtx sparc_legitimize_pic_address (rtx, rtx);
613 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
614 static rtx sparc_delegitimize_address (rtx);
615 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
616 static bool sparc_pass_by_reference (cumulative_args_t,
617 machine_mode, const_tree, bool);
618 static void sparc_function_arg_advance (cumulative_args_t,
619 machine_mode, const_tree, bool);
620 static rtx sparc_function_arg_1 (cumulative_args_t,
621 machine_mode, const_tree, bool, bool);
622 static rtx sparc_function_arg (cumulative_args_t,
623 machine_mode, const_tree, bool);
624 static rtx sparc_function_incoming_arg (cumulative_args_t,
625 machine_mode, const_tree, bool);
626 static unsigned int sparc_function_arg_boundary (machine_mode,
627 const_tree);
628 static int sparc_arg_partial_bytes (cumulative_args_t,
629 machine_mode, tree, bool);
630 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
631 static void sparc_file_end (void);
632 static bool sparc_frame_pointer_required (void);
633 static bool sparc_can_eliminate (const int, const int);
634 static rtx sparc_builtin_setjmp_frame_value (void);
635 static void sparc_conditional_register_usage (void);
636 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
637 static const char *sparc_mangle_type (const_tree);
638 #endif
639 static void sparc_trampoline_init (rtx, tree, rtx);
640 static machine_mode sparc_preferred_simd_mode (machine_mode);
641 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
642 static bool sparc_print_operand_punct_valid_p (unsigned char);
643 static void sparc_print_operand (FILE *, rtx, int);
644 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
645 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
646 machine_mode,
647 secondary_reload_info *);
648 static machine_mode sparc_cstore_mode (enum insn_code icode);
649 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
650 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
651 \f
652 #ifdef SUBTARGET_ATTRIBUTE_TABLE
653 /* Table of valid machine attributes. */
654 static const struct attribute_spec sparc_attribute_table[] =
655 {
656 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
657 do_diagnostic } */
658 SUBTARGET_ATTRIBUTE_TABLE,
659 { NULL, 0, 0, false, false, false, NULL, false }
660 };
661 #endif
662 \f
663 /* Option handling. */
664
665 /* Parsed value. */
666 enum cmodel sparc_cmodel;
667
668 char sparc_hard_reg_printed[8];
669
670 /* Initialize the GCC target structure. */
671
672 /* The default is to use .half rather than .short for aligned HI objects. */
673 #undef TARGET_ASM_ALIGNED_HI_OP
674 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
675
676 #undef TARGET_ASM_UNALIGNED_HI_OP
677 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
678 #undef TARGET_ASM_UNALIGNED_SI_OP
679 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
680 #undef TARGET_ASM_UNALIGNED_DI_OP
681 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
682
683 /* The target hook has to handle DI-mode values. */
684 #undef TARGET_ASM_INTEGER
685 #define TARGET_ASM_INTEGER sparc_assemble_integer
686
687 #undef TARGET_ASM_FUNCTION_PROLOGUE
688 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
689 #undef TARGET_ASM_FUNCTION_EPILOGUE
690 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
691
692 #undef TARGET_SCHED_ADJUST_COST
693 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
694 #undef TARGET_SCHED_ISSUE_RATE
695 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
696 #undef TARGET_SCHED_INIT
697 #define TARGET_SCHED_INIT sparc_sched_init
698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
700
701 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
702 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
703
704 #undef TARGET_INIT_LIBFUNCS
705 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
706
707 #undef TARGET_LEGITIMIZE_ADDRESS
708 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
709 #undef TARGET_DELEGITIMIZE_ADDRESS
710 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
711 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
712 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
713
714 #undef TARGET_INIT_BUILTINS
715 #define TARGET_INIT_BUILTINS sparc_init_builtins
716 #undef TARGET_BUILTIN_DECL
717 #define TARGET_BUILTIN_DECL sparc_builtin_decl
718 #undef TARGET_EXPAND_BUILTIN
719 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
720 #undef TARGET_FOLD_BUILTIN
721 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
722
723 #if TARGET_TLS
724 #undef TARGET_HAVE_TLS
725 #define TARGET_HAVE_TLS true
726 #endif
727
728 #undef TARGET_CANNOT_FORCE_CONST_MEM
729 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
730
731 #undef TARGET_ASM_OUTPUT_MI_THUNK
732 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
735
736 #undef TARGET_RTX_COSTS
737 #define TARGET_RTX_COSTS sparc_rtx_costs
738 #undef TARGET_ADDRESS_COST
739 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
740 #undef TARGET_REGISTER_MOVE_COST
741 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
742
743 #undef TARGET_PROMOTE_FUNCTION_MODE
744 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
745
746 #undef TARGET_FUNCTION_VALUE
747 #define TARGET_FUNCTION_VALUE sparc_function_value
748 #undef TARGET_LIBCALL_VALUE
749 #define TARGET_LIBCALL_VALUE sparc_libcall_value
750 #undef TARGET_FUNCTION_VALUE_REGNO_P
751 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
752
753 #undef TARGET_STRUCT_VALUE_RTX
754 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
755 #undef TARGET_RETURN_IN_MEMORY
756 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
757 #undef TARGET_MUST_PASS_IN_STACK
758 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
759 #undef TARGET_PASS_BY_REFERENCE
760 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
761 #undef TARGET_ARG_PARTIAL_BYTES
762 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
763 #undef TARGET_FUNCTION_ARG_ADVANCE
764 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
765 #undef TARGET_FUNCTION_ARG
766 #define TARGET_FUNCTION_ARG sparc_function_arg
767 #undef TARGET_FUNCTION_INCOMING_ARG
768 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
769 #undef TARGET_FUNCTION_ARG_BOUNDARY
770 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
771
772 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
773 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
774 #undef TARGET_STRICT_ARGUMENT_NAMING
775 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
776
777 #undef TARGET_EXPAND_BUILTIN_VA_START
778 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
779 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
780 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
781
782 #undef TARGET_VECTOR_MODE_SUPPORTED_P
783 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
784
785 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
786 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
787
788 #ifdef SUBTARGET_INSERT_ATTRIBUTES
789 #undef TARGET_INSERT_ATTRIBUTES
790 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
791 #endif
792
793 #ifdef SUBTARGET_ATTRIBUTE_TABLE
794 #undef TARGET_ATTRIBUTE_TABLE
795 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
796 #endif
797
798 #undef TARGET_OPTION_OVERRIDE
799 #define TARGET_OPTION_OVERRIDE sparc_option_override
800
801 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
802 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
803 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
804 #endif
805
806 #undef TARGET_ASM_FILE_END
807 #define TARGET_ASM_FILE_END sparc_file_end
808
809 #undef TARGET_FRAME_POINTER_REQUIRED
810 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
811
812 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
813 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
814
815 #undef TARGET_CAN_ELIMINATE
816 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
817
818 #undef TARGET_PREFERRED_RELOAD_CLASS
819 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
820
821 #undef TARGET_SECONDARY_RELOAD
822 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
823
824 #undef TARGET_CONDITIONAL_REGISTER_USAGE
825 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
826
827 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
828 #undef TARGET_MANGLE_TYPE
829 #define TARGET_MANGLE_TYPE sparc_mangle_type
830 #endif
831
832 #undef TARGET_LRA_P
833 #define TARGET_LRA_P hook_bool_void_false
834
835 #undef TARGET_LEGITIMATE_ADDRESS_P
836 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
837
838 #undef TARGET_LEGITIMATE_CONSTANT_P
839 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
840
841 #undef TARGET_TRAMPOLINE_INIT
842 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
843
844 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
845 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
846 #undef TARGET_PRINT_OPERAND
847 #define TARGET_PRINT_OPERAND sparc_print_operand
848 #undef TARGET_PRINT_OPERAND_ADDRESS
849 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
850
851 /* The value stored by LDSTUB. */
852 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
853 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
854
855 #undef TARGET_CSTORE_MODE
856 #define TARGET_CSTORE_MODE sparc_cstore_mode
857
858 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
859 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
860
861 #undef TARGET_FIXED_CONDITION_CODE_REGS
862 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
863
864 struct gcc_target targetm = TARGET_INITIALIZER;
865
866 /* Return the memory reference contained in X if any, zero otherwise. */
867
868 static rtx
869 mem_ref (rtx x)
870 {
871 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
872 x = XEXP (x, 0);
873
874 if (MEM_P (x))
875 return x;
876
877 return NULL_RTX;
878 }
879
880 /* We use a machine specific pass to enable workarounds for errata.
881 We need to have the (essentially) final form of the insn stream in order
882 to properly detect the various hazards. Therefore, this machine specific
883 pass runs as late as possible. The pass is inserted in the pass pipeline
884 at the end of sparc_option_override. */
885
886 static unsigned int
887 sparc_do_work_around_errata (void)
888 {
889 rtx_insn *insn, *next;
890
891 /* Force all instructions to be split into their final form. */
892 split_all_insns_noflow ();
893
894 /* Now look for specific patterns in the insn stream. */
895 for (insn = get_insns (); insn; insn = next)
896 {
897 bool insert_nop = false;
898 rtx set;
899
900 /* Look into the instruction in a delay slot. */
901 if (NONJUMP_INSN_P (insn))
902 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
903 insn = seq->insn (1);
904
905 /* Look for a single-word load into an odd-numbered FP register. */
906 if (sparc_fix_at697f
907 && NONJUMP_INSN_P (insn)
908 && (set = single_set (insn)) != NULL_RTX
909 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
910 && MEM_P (SET_SRC (set))
911 && REG_P (SET_DEST (set))
912 && REGNO (SET_DEST (set)) > 31
913 && REGNO (SET_DEST (set)) % 2 != 0)
914 {
915 /* The wrong dependency is on the enclosing double register. */
916 const unsigned int x = REGNO (SET_DEST (set)) - 1;
917 unsigned int src1, src2, dest;
918 int code;
919
920 next = next_active_insn (insn);
921 if (!next)
922 break;
923 /* If the insn is a branch, then it cannot be problematic. */
924 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
925 continue;
926
927 extract_insn (next);
928 code = INSN_CODE (next);
929
930 switch (code)
931 {
932 case CODE_FOR_adddf3:
933 case CODE_FOR_subdf3:
934 case CODE_FOR_muldf3:
935 case CODE_FOR_divdf3:
936 dest = REGNO (recog_data.operand[0]);
937 src1 = REGNO (recog_data.operand[1]);
938 src2 = REGNO (recog_data.operand[2]);
939 if (src1 != src2)
940 {
941 /* Case [1-4]:
942 ld [address], %fx+1
943 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
944 if ((src1 == x || src2 == x)
945 && (dest == src1 || dest == src2))
946 insert_nop = true;
947 }
948 else
949 {
950 /* Case 5:
951 ld [address], %fx+1
952 FPOPd %fx, %fx, %fx */
953 if (src1 == x
954 && dest == src1
955 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
956 insert_nop = true;
957 }
958 break;
959
960 case CODE_FOR_sqrtdf2:
961 dest = REGNO (recog_data.operand[0]);
962 src1 = REGNO (recog_data.operand[1]);
963 /* Case 6:
964 ld [address], %fx+1
965 fsqrtd %fx, %fx */
966 if (src1 == x && dest == src1)
967 insert_nop = true;
968 break;
969
970 default:
971 break;
972 }
973 }
974
975 /* Look for a single-word load into an integer register. */
976 else if (sparc_fix_ut699
977 && NONJUMP_INSN_P (insn)
978 && (set = single_set (insn)) != NULL_RTX
979 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
980 && mem_ref (SET_SRC (set)) != NULL_RTX
981 && REG_P (SET_DEST (set))
982 && REGNO (SET_DEST (set)) < 32)
983 {
984 /* There is no problem if the second memory access has a data
985 dependency on the first single-cycle load. */
986 rtx x = SET_DEST (set);
987
988 next = next_active_insn (insn);
989 if (!next)
990 break;
991 /* If the insn is a branch, then it cannot be problematic. */
992 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
993 continue;
994
995 /* Look for a second memory access to/from an integer register. */
996 if ((set = single_set (next)) != NULL_RTX)
997 {
998 rtx src = SET_SRC (set);
999 rtx dest = SET_DEST (set);
1000 rtx mem;
1001
1002 /* LDD is affected. */
1003 if ((mem = mem_ref (src)) != NULL_RTX
1004 && REG_P (dest)
1005 && REGNO (dest) < 32
1006 && !reg_mentioned_p (x, XEXP (mem, 0)))
1007 insert_nop = true;
1008
1009 /* STD is *not* affected. */
1010 else if (MEM_P (dest)
1011 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1012 && (src == CONST0_RTX (GET_MODE (dest))
1013 || (REG_P (src)
1014 && REGNO (src) < 32
1015 && REGNO (src) != REGNO (x)))
1016 && !reg_mentioned_p (x, XEXP (dest, 0)))
1017 insert_nop = true;
1018 }
1019 }
1020
1021 /* Look for a single-word load/operation into an FP register. */
1022 else if (sparc_fix_ut699
1023 && NONJUMP_INSN_P (insn)
1024 && (set = single_set (insn)) != NULL_RTX
1025 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1026 && REG_P (SET_DEST (set))
1027 && REGNO (SET_DEST (set)) > 31)
1028 {
1029 /* Number of instructions in the problematic window. */
1030 const int n_insns = 4;
1031 /* The problematic combination is with the sibling FP register. */
1032 const unsigned int x = REGNO (SET_DEST (set));
1033 const unsigned int y = x ^ 1;
1034 rtx_insn *after;
1035 int i;
1036
1037 next = next_active_insn (insn);
1038 if (!next)
1039 break;
1040 /* If the insn is a branch, then it cannot be problematic. */
1041 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1042 continue;
1043
1044 /* Look for a second load/operation into the sibling FP register. */
1045 if (!((set = single_set (next)) != NULL_RTX
1046 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1047 && REG_P (SET_DEST (set))
1048 && REGNO (SET_DEST (set)) == y))
1049 continue;
1050
1051 /* Look for a (possible) store from the FP register in the next N
1052 instructions, but bail out if it is again modified or if there
1053 is a store from the sibling FP register before this store. */
1054 for (after = next, i = 0; i < n_insns; i++)
1055 {
1056 bool branch_p;
1057
1058 after = next_active_insn (after);
1059 if (!after)
1060 break;
1061
1062 /* This is a branch with an empty delay slot. */
1063 if (!NONJUMP_INSN_P (after))
1064 {
1065 if (++i == n_insns)
1066 break;
1067 branch_p = true;
1068 after = NULL;
1069 }
1070 /* This is a branch with a filled delay slot. */
1071 else if (rtx_sequence *seq =
1072 dyn_cast <rtx_sequence *> (PATTERN (after)))
1073 {
1074 if (++i == n_insns)
1075 break;
1076 branch_p = true;
1077 after = seq->insn (1);
1078 }
1079 /* This is a regular instruction. */
1080 else
1081 branch_p = false;
1082
1083 if (after && (set = single_set (after)) != NULL_RTX)
1084 {
1085 const rtx src = SET_SRC (set);
1086 const rtx dest = SET_DEST (set);
1087 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1088
1089 /* If the FP register is again modified before the store,
1090 then the store isn't affected. */
1091 if (REG_P (dest)
1092 && (REGNO (dest) == x
1093 || (REGNO (dest) == y && size == 8)))
1094 break;
1095
1096 if (MEM_P (dest) && REG_P (src))
1097 {
1098 /* If there is a store from the sibling FP register
1099 before the store, then the store is not affected. */
1100 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1101 break;
1102
1103 /* Otherwise, the store is affected. */
1104 if (REGNO (src) == x && size == 4)
1105 {
1106 insert_nop = true;
1107 break;
1108 }
1109 }
1110 }
1111
1112 /* If we have a branch in the first M instructions, then we
1113 cannot see the (M+2)th instruction so we play safe. */
1114 if (branch_p && i <= (n_insns - 2))
1115 {
1116 insert_nop = true;
1117 break;
1118 }
1119 }
1120 }
1121
1122 else
1123 next = NEXT_INSN (insn);
1124
1125 if (insert_nop)
1126 emit_insn_before (gen_nop (), next);
1127 }
1128
1129 return 0;
1130 }
1131
1132 namespace {
1133
1134 const pass_data pass_data_work_around_errata =
1135 {
1136 RTL_PASS, /* type */
1137 "errata", /* name */
1138 OPTGROUP_NONE, /* optinfo_flags */
1139 TV_MACH_DEP, /* tv_id */
1140 0, /* properties_required */
1141 0, /* properties_provided */
1142 0, /* properties_destroyed */
1143 0, /* todo_flags_start */
1144 0, /* todo_flags_finish */
1145 };
1146
1147 class pass_work_around_errata : public rtl_opt_pass
1148 {
1149 public:
1150 pass_work_around_errata(gcc::context *ctxt)
1151 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1152 {}
1153
1154 /* opt_pass methods: */
1155 virtual bool gate (function *)
1156 {
1157 /* The only errata we handle are those of the AT697F and UT699. */
1158 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1159 }
1160
1161 virtual unsigned int execute (function *)
1162 {
1163 return sparc_do_work_around_errata ();
1164 }
1165
1166 }; // class pass_work_around_errata
1167
1168 } // anon namespace
1169
1170 rtl_opt_pass *
1171 make_pass_work_around_errata (gcc::context *ctxt)
1172 {
1173 return new pass_work_around_errata (ctxt);
1174 }
1175
1176 /* Helpers for TARGET_DEBUG_OPTIONS. */
1177 static void
1178 dump_target_flag_bits (const int flags)
1179 {
1180 if (flags & MASK_64BIT)
1181 fprintf (stderr, "64BIT ");
1182 if (flags & MASK_APP_REGS)
1183 fprintf (stderr, "APP_REGS ");
1184 if (flags & MASK_FASTER_STRUCTS)
1185 fprintf (stderr, "FASTER_STRUCTS ");
1186 if (flags & MASK_FLAT)
1187 fprintf (stderr, "FLAT ");
1188 if (flags & MASK_FMAF)
1189 fprintf (stderr, "FMAF ");
1190 if (flags & MASK_FPU)
1191 fprintf (stderr, "FPU ");
1192 if (flags & MASK_HARD_QUAD)
1193 fprintf (stderr, "HARD_QUAD ");
1194 if (flags & MASK_POPC)
1195 fprintf (stderr, "POPC ");
1196 if (flags & MASK_PTR64)
1197 fprintf (stderr, "PTR64 ");
1198 if (flags & MASK_STACK_BIAS)
1199 fprintf (stderr, "STACK_BIAS ");
1200 if (flags & MASK_UNALIGNED_DOUBLES)
1201 fprintf (stderr, "UNALIGNED_DOUBLES ");
1202 if (flags & MASK_V8PLUS)
1203 fprintf (stderr, "V8PLUS ");
1204 if (flags & MASK_VIS)
1205 fprintf (stderr, "VIS ");
1206 if (flags & MASK_VIS2)
1207 fprintf (stderr, "VIS2 ");
1208 if (flags & MASK_VIS3)
1209 fprintf (stderr, "VIS3 ");
1210 if (flags & MASK_VIS4)
1211 fprintf (stderr, "VIS4 ");
1212 if (flags & MASK_CBCOND)
1213 fprintf (stderr, "CBCOND ");
1214 if (flags & MASK_DEPRECATED_V8_INSNS)
1215 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1216 if (flags & MASK_SPARCLET)
1217 fprintf (stderr, "SPARCLET ");
1218 if (flags & MASK_SPARCLITE)
1219 fprintf (stderr, "SPARCLITE ");
1220 if (flags & MASK_V8)
1221 fprintf (stderr, "V8 ");
1222 if (flags & MASK_V9)
1223 fprintf (stderr, "V9 ");
1224 }
1225
1226 static void
1227 dump_target_flags (const char *prefix, const int flags)
1228 {
1229 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1230 dump_target_flag_bits (flags);
1231 fprintf(stderr, "]\n");
1232 }
1233
1234 /* Validate and override various options, and do some machine dependent
1235 initialization. */
1236
1237 static void
1238 sparc_option_override (void)
1239 {
1240 static struct code_model {
1241 const char *const name;
1242 const enum cmodel value;
1243 } const cmodels[] = {
1244 { "32", CM_32 },
1245 { "medlow", CM_MEDLOW },
1246 { "medmid", CM_MEDMID },
1247 { "medany", CM_MEDANY },
1248 { "embmedany", CM_EMBMEDANY },
1249 { NULL, (enum cmodel) 0 }
1250 };
1251 const struct code_model *cmodel;
1252 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1253 static struct cpu_default {
1254 const int cpu;
1255 const enum processor_type processor;
1256 } const cpu_default[] = {
1257 /* There must be one entry here for each TARGET_CPU value. */
1258 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1259 { TARGET_CPU_v8, PROCESSOR_V8 },
1260 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1261 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1262 { TARGET_CPU_leon, PROCESSOR_LEON },
1263 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1264 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1265 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1266 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1267 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1268 { TARGET_CPU_v9, PROCESSOR_V9 },
1269 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1270 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1271 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1272 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1273 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1274 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1275 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1276 { -1, PROCESSOR_V7 }
1277 };
1278 const struct cpu_default *def;
1279 /* Table of values for -m{cpu,tune}=. This must match the order of
1280 the enum processor_type in sparc-opts.h. */
1281 static struct cpu_table {
1282 const char *const name;
1283 const int disable;
1284 const int enable;
1285 } const cpu_table[] = {
1286 { "v7", MASK_ISA, 0 },
1287 { "cypress", MASK_ISA, 0 },
1288 { "v8", MASK_ISA, MASK_V8 },
1289 /* TI TMS390Z55 supersparc */
1290 { "supersparc", MASK_ISA, MASK_V8 },
1291 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1292 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1293 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1294 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1295 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1296 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1297 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1298 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1299 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1300 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1301 { "sparclet", MASK_ISA, MASK_SPARCLET },
1302 /* TEMIC sparclet */
1303 { "tsc701", MASK_ISA, MASK_SPARCLET },
1304 { "v9", MASK_ISA, MASK_V9 },
1305 /* UltraSPARC I, II, IIi */
1306 { "ultrasparc", MASK_ISA,
1307 /* Although insns using %y are deprecated, it is a clear win. */
1308 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1309 /* UltraSPARC III */
1310 /* ??? Check if %y issue still holds true. */
1311 { "ultrasparc3", MASK_ISA,
1312 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1313 /* UltraSPARC T1 */
1314 { "niagara", MASK_ISA,
1315 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1316 /* UltraSPARC T2 */
1317 { "niagara2", MASK_ISA,
1318 MASK_V9|MASK_POPC|MASK_VIS2 },
1319 /* UltraSPARC T3 */
1320 { "niagara3", MASK_ISA,
1321 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1322 /* UltraSPARC T4 */
1323 { "niagara4", MASK_ISA,
1324 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1325 /* UltraSPARC M7 */
1326 { "niagara7", MASK_ISA,
1327 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1328 };
1329 const struct cpu_table *cpu;
1330 unsigned int i;
1331 int fpu;
1332
1333 if (sparc_debug_string != NULL)
1334 {
1335 const char *q;
1336 char *p;
1337
1338 p = ASTRDUP (sparc_debug_string);
1339 while ((q = strtok (p, ",")) != NULL)
1340 {
1341 bool invert;
1342 int mask;
1343
1344 p = NULL;
1345 if (*q == '!')
1346 {
1347 invert = true;
1348 q++;
1349 }
1350 else
1351 invert = false;
1352
1353 if (! strcmp (q, "all"))
1354 mask = MASK_DEBUG_ALL;
1355 else if (! strcmp (q, "options"))
1356 mask = MASK_DEBUG_OPTIONS;
1357 else
1358 error ("unknown -mdebug-%s switch", q);
1359
1360 if (invert)
1361 sparc_debug &= ~mask;
1362 else
1363 sparc_debug |= mask;
1364 }
1365 }
1366
1367 if (TARGET_DEBUG_OPTIONS)
1368 {
1369 dump_target_flags("Initial target_flags", target_flags);
1370 dump_target_flags("target_flags_explicit", target_flags_explicit);
1371 }
1372
1373 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1374 SUBTARGET_OVERRIDE_OPTIONS;
1375 #endif
1376
1377 #ifndef SPARC_BI_ARCH
1378 /* Check for unsupported architecture size. */
1379 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1380 error ("%s is not supported by this configuration",
1381 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1382 #endif
1383
1384 /* We force all 64bit archs to use 128 bit long double */
1385 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1386 {
1387 error ("-mlong-double-64 not allowed with -m64");
1388 target_flags |= MASK_LONG_DOUBLE_128;
1389 }
1390
1391 /* Code model selection. */
1392 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1393
1394 #ifdef SPARC_BI_ARCH
1395 if (TARGET_ARCH32)
1396 sparc_cmodel = CM_32;
1397 #endif
1398
1399 if (sparc_cmodel_string != NULL)
1400 {
1401 if (TARGET_ARCH64)
1402 {
1403 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1404 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1405 break;
1406 if (cmodel->name == NULL)
1407 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1408 else
1409 sparc_cmodel = cmodel->value;
1410 }
1411 else
1412 error ("-mcmodel= is not supported on 32 bit systems");
1413 }
1414
1415 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1416 for (i = 8; i < 16; i++)
1417 if (!call_used_regs [i])
1418 {
1419 error ("-fcall-saved-REG is not supported for out registers");
1420 call_used_regs [i] = 1;
1421 }
1422
1423 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1424
1425 /* Set the default CPU. */
1426 if (!global_options_set.x_sparc_cpu_and_features)
1427 {
1428 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1429 if (def->cpu == TARGET_CPU_DEFAULT)
1430 break;
1431 gcc_assert (def->cpu != -1);
1432 sparc_cpu_and_features = def->processor;
1433 }
1434
1435 if (!global_options_set.x_sparc_cpu)
1436 sparc_cpu = sparc_cpu_and_features;
1437
1438 cpu = &cpu_table[(int) sparc_cpu_and_features];
1439
1440 if (TARGET_DEBUG_OPTIONS)
1441 {
1442 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1443 fprintf (stderr, "sparc_cpu: %s\n",
1444 cpu_table[(int) sparc_cpu].name);
1445 dump_target_flags ("cpu->disable", cpu->disable);
1446 dump_target_flags ("cpu->enable", cpu->enable);
1447 }
1448
1449 target_flags &= ~cpu->disable;
1450 target_flags |= (cpu->enable
1451 #ifndef HAVE_AS_FMAF_HPC_VIS3
1452 & ~(MASK_FMAF | MASK_VIS3)
1453 #endif
1454 #ifndef HAVE_AS_SPARC4
1455 & ~MASK_CBCOND
1456 #endif
1457 #ifndef HAVE_AS_SPARC5_VIS4
1458 & ~(MASK_VIS4 | MASK_SUBXC)
1459 #endif
1460 #ifndef HAVE_AS_LEON
1461 & ~(MASK_LEON | MASK_LEON3)
1462 #endif
1463 );
1464
1465 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1466 the processor default. */
1467 if (target_flags_explicit & MASK_FPU)
1468 target_flags = (target_flags & ~MASK_FPU) | fpu;
1469
1470 /* -mvis2 implies -mvis */
1471 if (TARGET_VIS2)
1472 target_flags |= MASK_VIS;
1473
1474 /* -mvis3 implies -mvis2 and -mvis */
1475 if (TARGET_VIS3)
1476 target_flags |= MASK_VIS2 | MASK_VIS;
1477
1478 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1479 if (TARGET_VIS4)
1480 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1481
1482 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1483 disabled. */
1484 if (! TARGET_FPU)
1485 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1486 | MASK_FMAF);
1487
1488 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1489 are available.
1490 -m64 also implies v9. */
1491 if (TARGET_VIS || TARGET_ARCH64)
1492 {
1493 target_flags |= MASK_V9;
1494 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1495 }
1496
1497 /* -mvis also implies -mv8plus on 32-bit */
1498 if (TARGET_VIS && ! TARGET_ARCH64)
1499 target_flags |= MASK_V8PLUS;
1500
1501 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1502 if (TARGET_V9 && TARGET_ARCH32)
1503 target_flags |= MASK_DEPRECATED_V8_INSNS;
1504
1505 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1506 if (! TARGET_V9 || TARGET_ARCH64)
1507 target_flags &= ~MASK_V8PLUS;
1508
1509 /* Don't use stack biasing in 32 bit mode. */
1510 if (TARGET_ARCH32)
1511 target_flags &= ~MASK_STACK_BIAS;
1512
1513 /* Supply a default value for align_functions. */
1514 if (align_functions == 0
1515 && (sparc_cpu == PROCESSOR_ULTRASPARC
1516 || sparc_cpu == PROCESSOR_ULTRASPARC3
1517 || sparc_cpu == PROCESSOR_NIAGARA
1518 || sparc_cpu == PROCESSOR_NIAGARA2
1519 || sparc_cpu == PROCESSOR_NIAGARA3
1520 || sparc_cpu == PROCESSOR_NIAGARA4
1521 || sparc_cpu == PROCESSOR_NIAGARA7))
1522 align_functions = 32;
1523
1524 /* Validate PCC_STRUCT_RETURN. */
1525 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1526 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1527
1528 /* Only use .uaxword when compiling for a 64-bit target. */
1529 if (!TARGET_ARCH64)
1530 targetm.asm_out.unaligned_op.di = NULL;
1531
1532 /* Do various machine dependent initializations. */
1533 sparc_init_modes ();
1534
1535 /* Set up function hooks. */
1536 init_machine_status = sparc_init_machine_status;
1537
1538 switch (sparc_cpu)
1539 {
1540 case PROCESSOR_V7:
1541 case PROCESSOR_CYPRESS:
1542 sparc_costs = &cypress_costs;
1543 break;
1544 case PROCESSOR_V8:
1545 case PROCESSOR_SPARCLITE:
1546 case PROCESSOR_SUPERSPARC:
1547 sparc_costs = &supersparc_costs;
1548 break;
1549 case PROCESSOR_F930:
1550 case PROCESSOR_F934:
1551 case PROCESSOR_HYPERSPARC:
1552 case PROCESSOR_SPARCLITE86X:
1553 sparc_costs = &hypersparc_costs;
1554 break;
1555 case PROCESSOR_LEON:
1556 sparc_costs = &leon_costs;
1557 break;
1558 case PROCESSOR_LEON3:
1559 case PROCESSOR_LEON3V7:
1560 sparc_costs = &leon3_costs;
1561 break;
1562 case PROCESSOR_SPARCLET:
1563 case PROCESSOR_TSC701:
1564 sparc_costs = &sparclet_costs;
1565 break;
1566 case PROCESSOR_V9:
1567 case PROCESSOR_ULTRASPARC:
1568 sparc_costs = &ultrasparc_costs;
1569 break;
1570 case PROCESSOR_ULTRASPARC3:
1571 sparc_costs = &ultrasparc3_costs;
1572 break;
1573 case PROCESSOR_NIAGARA:
1574 sparc_costs = &niagara_costs;
1575 break;
1576 case PROCESSOR_NIAGARA2:
1577 sparc_costs = &niagara2_costs;
1578 break;
1579 case PROCESSOR_NIAGARA3:
1580 sparc_costs = &niagara3_costs;
1581 break;
1582 case PROCESSOR_NIAGARA4:
1583 sparc_costs = &niagara4_costs;
1584 break;
1585 case PROCESSOR_NIAGARA7:
1586 sparc_costs = &niagara7_costs;
1587 break;
1588 case PROCESSOR_NATIVE:
1589 gcc_unreachable ();
1590 };
1591
1592 if (sparc_memory_model == SMM_DEFAULT)
1593 {
1594 /* Choose the memory model for the operating system. */
1595 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1596 if (os_default != SMM_DEFAULT)
1597 sparc_memory_model = os_default;
1598 /* Choose the most relaxed model for the processor. */
1599 else if (TARGET_V9)
1600 sparc_memory_model = SMM_RMO;
1601 else if (TARGET_LEON3)
1602 sparc_memory_model = SMM_TSO;
1603 else if (TARGET_LEON)
1604 sparc_memory_model = SMM_SC;
1605 else if (TARGET_V8)
1606 sparc_memory_model = SMM_PSO;
1607 else
1608 sparc_memory_model = SMM_SC;
1609 }
1610
1611 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1612 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1613 target_flags |= MASK_LONG_DOUBLE_128;
1614 #endif
1615
1616 if (TARGET_DEBUG_OPTIONS)
1617 dump_target_flags ("Final target_flags", target_flags);
1618
1619 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1620 can run at the same time. More important, it is the threshold
1621 defining when additional prefetches will be dropped by the
1622 hardware.
1623
1624 The UltraSPARC-III features a documented prefetch queue with a
1625 size of 8. Additional prefetches issued in the cpu are
1626 dropped.
1627
1628 Niagara processors are different. In these processors prefetches
1629 are handled much like regular loads. The L1 miss buffer is 32
1630 entries, but prefetches start getting affected when 30 entries
1631 become occupied. That occupation could be a mix of regular loads
1632 and prefetches though. And that buffer is shared by all threads.
1633 Once the threshold is reached, if the core is running a single
1634 thread the prefetch will retry. If more than one thread is
1635 running, the prefetch will be dropped.
1636
1637 All this makes it very difficult to determine how many
1638 simultaneous prefetches can be issued simultaneously, even in a
1639 single-threaded program. Experimental results show that setting
1640 this parameter to 32 works well when the number of threads is not
1641 high. */
1642 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1643 ((sparc_cpu == PROCESSOR_ULTRASPARC
1644 || sparc_cpu == PROCESSOR_NIAGARA
1645 || sparc_cpu == PROCESSOR_NIAGARA2
1646 || sparc_cpu == PROCESSOR_NIAGARA3
1647 || sparc_cpu == PROCESSOR_NIAGARA4)
1648 ? 2
1649 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1650 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1651 ? 32 : 3))),
1652 global_options.x_param_values,
1653 global_options_set.x_param_values);
1654
1655 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1656 params.def), so no maybe_set_param_value is needed.
1657
1658 The Oracle SPARC Architecture (previously the UltraSPARC
1659 Architecture) specification states that when a PREFETCH[A]
1660 instruction is executed an implementation-specific amount of data
1661 is prefetched, and that it is at least 64 bytes long (aligned to
1662 at least 64 bytes).
1663
1664 However, this is not correct. The M7 (and implementations prior
1665 to that) does not guarantee a 64B prefetch into a cache if the
1666 line size is smaller. A single cache line is all that is ever
1667 prefetched. So for the M7, where the L1D$ has 32B lines and the
1668 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1669 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1670 is a read_n prefetch, which is the only type which allocates to
1671 the L1.) */
1672
1673 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1674 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1675 Niagara processors feature a L1D$ of 16KB. */
1676 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1677 ((sparc_cpu == PROCESSOR_ULTRASPARC
1678 || sparc_cpu == PROCESSOR_ULTRASPARC3
1679 || sparc_cpu == PROCESSOR_NIAGARA
1680 || sparc_cpu == PROCESSOR_NIAGARA2
1681 || sparc_cpu == PROCESSOR_NIAGARA3
1682 || sparc_cpu == PROCESSOR_NIAGARA4
1683 || sparc_cpu == PROCESSOR_NIAGARA7)
1684 ? 16 : 64),
1685 global_options.x_param_values,
1686 global_options_set.x_param_values);
1687
1688
1689 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1690 that 512 is the default in params.def. */
1691 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1692 (sparc_cpu == PROCESSOR_NIAGARA4
1693 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1694 ? 256 : 512)),
1695 global_options.x_param_values,
1696 global_options_set.x_param_values);
1697
1698
1699 /* Disable save slot sharing for call-clobbered registers by default.
1700 The IRA sharing algorithm works on single registers only and this
1701 pessimizes for double floating-point registers. */
1702 if (!global_options_set.x_flag_ira_share_save_slots)
1703 flag_ira_share_save_slots = 0;
1704
1705 /* We register a machine specific pass to work around errata, if any.
1706 The pass mut be scheduled as late as possible so that we have the
1707 (essentially) final form of the insn stream to work on.
1708 Registering the pass must be done at start up. It's convenient to
1709 do it here. */
1710 opt_pass *errata_pass = make_pass_work_around_errata (g);
1711 struct register_pass_info insert_pass_work_around_errata =
1712 {
1713 errata_pass, /* pass */
1714 "dbr", /* reference_pass_name */
1715 1, /* ref_pass_instance_number */
1716 PASS_POS_INSERT_AFTER /* po_op */
1717 };
1718 register_pass (&insert_pass_work_around_errata);
1719 }
1720 \f
1721 /* Miscellaneous utilities. */
1722
1723 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1724 or branch on register contents instructions. */
1725
1726 int
1727 v9_regcmp_p (enum rtx_code code)
1728 {
1729 return (code == EQ || code == NE || code == GE || code == LT
1730 || code == LE || code == GT);
1731 }
1732
1733 /* Nonzero if OP is a floating point constant which can
1734 be loaded into an integer register using a single
1735 sethi instruction. */
1736
1737 int
1738 fp_sethi_p (rtx op)
1739 {
1740 if (GET_CODE (op) == CONST_DOUBLE)
1741 {
1742 long i;
1743
1744 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1745 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1746 }
1747
1748 return 0;
1749 }
1750
1751 /* Nonzero if OP is a floating point constant which can
1752 be loaded into an integer register using a single
1753 mov instruction. */
1754
1755 int
1756 fp_mov_p (rtx op)
1757 {
1758 if (GET_CODE (op) == CONST_DOUBLE)
1759 {
1760 long i;
1761
1762 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1763 return SPARC_SIMM13_P (i);
1764 }
1765
1766 return 0;
1767 }
1768
1769 /* Nonzero if OP is a floating point constant which can
1770 be loaded into an integer register using a high/losum
1771 instruction sequence. */
1772
1773 int
1774 fp_high_losum_p (rtx op)
1775 {
1776 /* The constraints calling this should only be in
1777 SFmode move insns, so any constant which cannot
1778 be moved using a single insn will do. */
1779 if (GET_CODE (op) == CONST_DOUBLE)
1780 {
1781 long i;
1782
1783 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1784 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1785 }
1786
1787 return 0;
1788 }
1789
1790 /* Return true if the address of LABEL can be loaded by means of the
1791 mov{si,di}_pic_label_ref patterns in PIC mode. */
1792
1793 static bool
1794 can_use_mov_pic_label_ref (rtx label)
1795 {
1796 /* VxWorks does not impose a fixed gap between segments; the run-time
1797 gap can be different from the object-file gap. We therefore can't
1798 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1799 are absolutely sure that X is in the same segment as the GOT.
1800 Unfortunately, the flexibility of linker scripts means that we
1801 can't be sure of that in general, so assume that GOT-relative
1802 accesses are never valid on VxWorks. */
1803 if (TARGET_VXWORKS_RTP)
1804 return false;
1805
1806 /* Similarly, if the label is non-local, it might end up being placed
1807 in a different section than the current one; now mov_pic_label_ref
1808 requires the label and the code to be in the same section. */
1809 if (LABEL_REF_NONLOCAL_P (label))
1810 return false;
1811
1812 /* Finally, if we are reordering basic blocks and partition into hot
1813 and cold sections, this might happen for any label. */
1814 if (flag_reorder_blocks_and_partition)
1815 return false;
1816
1817 return true;
1818 }
1819
1820 /* Expand a move instruction. Return true if all work is done. */
1821
1822 bool
1823 sparc_expand_move (machine_mode mode, rtx *operands)
1824 {
1825 /* Handle sets of MEM first. */
1826 if (GET_CODE (operands[0]) == MEM)
1827 {
1828 /* 0 is a register (or a pair of registers) on SPARC. */
1829 if (register_or_zero_operand (operands[1], mode))
1830 return false;
1831
1832 if (!reload_in_progress)
1833 {
1834 operands[0] = validize_mem (operands[0]);
1835 operands[1] = force_reg (mode, operands[1]);
1836 }
1837 }
1838
1839 /* Fixup TLS cases. */
1840 if (TARGET_HAVE_TLS
1841 && CONSTANT_P (operands[1])
1842 && sparc_tls_referenced_p (operands [1]))
1843 {
1844 operands[1] = sparc_legitimize_tls_address (operands[1]);
1845 return false;
1846 }
1847
1848 /* Fixup PIC cases. */
1849 if (flag_pic && CONSTANT_P (operands[1]))
1850 {
1851 if (pic_address_needs_scratch (operands[1]))
1852 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1853
1854 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1855 if (GET_CODE (operands[1]) == LABEL_REF
1856 && can_use_mov_pic_label_ref (operands[1]))
1857 {
1858 if (mode == SImode)
1859 {
1860 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1861 return true;
1862 }
1863
1864 if (mode == DImode)
1865 {
1866 gcc_assert (TARGET_ARCH64);
1867 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1868 return true;
1869 }
1870 }
1871
1872 if (symbolic_operand (operands[1], mode))
1873 {
1874 operands[1]
1875 = sparc_legitimize_pic_address (operands[1],
1876 reload_in_progress
1877 ? operands[0] : NULL_RTX);
1878 return false;
1879 }
1880 }
1881
1882 /* If we are trying to toss an integer constant into FP registers,
1883 or loading a FP or vector constant, force it into memory. */
1884 if (CONSTANT_P (operands[1])
1885 && REG_P (operands[0])
1886 && (SPARC_FP_REG_P (REGNO (operands[0]))
1887 || SCALAR_FLOAT_MODE_P (mode)
1888 || VECTOR_MODE_P (mode)))
1889 {
1890 /* emit_group_store will send such bogosity to us when it is
1891 not storing directly into memory. So fix this up to avoid
1892 crashes in output_constant_pool. */
1893 if (operands [1] == const0_rtx)
1894 operands[1] = CONST0_RTX (mode);
1895
1896 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1897 always other regs. */
1898 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1899 && (const_zero_operand (operands[1], mode)
1900 || const_all_ones_operand (operands[1], mode)))
1901 return false;
1902
1903 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1904 /* We are able to build any SF constant in integer registers
1905 with at most 2 instructions. */
1906 && (mode == SFmode
1907 /* And any DF constant in integer registers. */
1908 || (mode == DFmode
1909 && ! can_create_pseudo_p ())))
1910 return false;
1911
1912 operands[1] = force_const_mem (mode, operands[1]);
1913 if (!reload_in_progress)
1914 operands[1] = validize_mem (operands[1]);
1915 return false;
1916 }
1917
1918 /* Accept non-constants and valid constants unmodified. */
1919 if (!CONSTANT_P (operands[1])
1920 || GET_CODE (operands[1]) == HIGH
1921 || input_operand (operands[1], mode))
1922 return false;
1923
1924 switch (mode)
1925 {
1926 case QImode:
1927 /* All QImode constants require only one insn, so proceed. */
1928 break;
1929
1930 case HImode:
1931 case SImode:
1932 sparc_emit_set_const32 (operands[0], operands[1]);
1933 return true;
1934
1935 case DImode:
1936 /* input_operand should have filtered out 32-bit mode. */
1937 sparc_emit_set_const64 (operands[0], operands[1]);
1938 return true;
1939
1940 case TImode:
1941 {
1942 rtx high, low;
1943 /* TImode isn't available in 32-bit mode. */
1944 split_double (operands[1], &high, &low);
1945 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1946 high));
1947 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1948 low));
1949 }
1950 return true;
1951
1952 default:
1953 gcc_unreachable ();
1954 }
1955
1956 return false;
1957 }
1958
1959 /* Load OP1, a 32-bit constant, into OP0, a register.
1960 We know it can't be done in one insn when we get
1961 here, the move expander guarantees this. */
1962
1963 static void
1964 sparc_emit_set_const32 (rtx op0, rtx op1)
1965 {
1966 machine_mode mode = GET_MODE (op0);
1967 rtx temp = op0;
1968
1969 if (can_create_pseudo_p ())
1970 temp = gen_reg_rtx (mode);
1971
1972 if (GET_CODE (op1) == CONST_INT)
1973 {
1974 gcc_assert (!small_int_operand (op1, mode)
1975 && !const_high_operand (op1, mode));
1976
1977 /* Emit them as real moves instead of a HIGH/LO_SUM,
1978 this way CSE can see everything and reuse intermediate
1979 values if it wants. */
1980 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1981 & ~(HOST_WIDE_INT) 0x3ff)));
1982
1983 emit_insn (gen_rtx_SET (op0,
1984 gen_rtx_IOR (mode, temp,
1985 GEN_INT (INTVAL (op1) & 0x3ff))));
1986 }
1987 else
1988 {
1989 /* A symbol, emit in the traditional way. */
1990 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1991 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1992 }
1993 }
1994
1995 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1996 If TEMP is nonzero, we are forbidden to use any other scratch
1997 registers. Otherwise, we are allowed to generate them as needed.
1998
1999 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2000 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2001
2002 void
2003 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2004 {
2005 rtx temp1, temp2, temp3, temp4, temp5;
2006 rtx ti_temp = 0;
2007
2008 if (temp && GET_MODE (temp) == TImode)
2009 {
2010 ti_temp = temp;
2011 temp = gen_rtx_REG (DImode, REGNO (temp));
2012 }
2013
2014 /* SPARC-V9 code-model support. */
2015 switch (sparc_cmodel)
2016 {
2017 case CM_MEDLOW:
2018 /* The range spanned by all instructions in the object is less
2019 than 2^31 bytes (2GB) and the distance from any instruction
2020 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2021 than 2^31 bytes (2GB).
2022
2023 The executable must be in the low 4TB of the virtual address
2024 space.
2025
2026 sethi %hi(symbol), %temp1
2027 or %temp1, %lo(symbol), %reg */
2028 if (temp)
2029 temp1 = temp; /* op0 is allowed. */
2030 else
2031 temp1 = gen_reg_rtx (DImode);
2032
2033 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2034 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2035 break;
2036
2037 case CM_MEDMID:
2038 /* The range spanned by all instructions in the object is less
2039 than 2^31 bytes (2GB) and the distance from any instruction
2040 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2041 than 2^31 bytes (2GB).
2042
2043 The executable must be in the low 16TB of the virtual address
2044 space.
2045
2046 sethi %h44(symbol), %temp1
2047 or %temp1, %m44(symbol), %temp2
2048 sllx %temp2, 12, %temp3
2049 or %temp3, %l44(symbol), %reg */
2050 if (temp)
2051 {
2052 temp1 = op0;
2053 temp2 = op0;
2054 temp3 = temp; /* op0 is allowed. */
2055 }
2056 else
2057 {
2058 temp1 = gen_reg_rtx (DImode);
2059 temp2 = gen_reg_rtx (DImode);
2060 temp3 = gen_reg_rtx (DImode);
2061 }
2062
2063 emit_insn (gen_seth44 (temp1, op1));
2064 emit_insn (gen_setm44 (temp2, temp1, op1));
2065 emit_insn (gen_rtx_SET (temp3,
2066 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2067 emit_insn (gen_setl44 (op0, temp3, op1));
2068 break;
2069
2070 case CM_MEDANY:
2071 /* The range spanned by all instructions in the object is less
2072 than 2^31 bytes (2GB) and the distance from any instruction
2073 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2074 than 2^31 bytes (2GB).
2075
2076 The executable can be placed anywhere in the virtual address
2077 space.
2078
2079 sethi %hh(symbol), %temp1
2080 sethi %lm(symbol), %temp2
2081 or %temp1, %hm(symbol), %temp3
2082 sllx %temp3, 32, %temp4
2083 or %temp4, %temp2, %temp5
2084 or %temp5, %lo(symbol), %reg */
2085 if (temp)
2086 {
2087 /* It is possible that one of the registers we got for operands[2]
2088 might coincide with that of operands[0] (which is why we made
2089 it TImode). Pick the other one to use as our scratch. */
2090 if (rtx_equal_p (temp, op0))
2091 {
2092 gcc_assert (ti_temp);
2093 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2094 }
2095 temp1 = op0;
2096 temp2 = temp; /* op0 is _not_ allowed, see above. */
2097 temp3 = op0;
2098 temp4 = op0;
2099 temp5 = op0;
2100 }
2101 else
2102 {
2103 temp1 = gen_reg_rtx (DImode);
2104 temp2 = gen_reg_rtx (DImode);
2105 temp3 = gen_reg_rtx (DImode);
2106 temp4 = gen_reg_rtx (DImode);
2107 temp5 = gen_reg_rtx (DImode);
2108 }
2109
2110 emit_insn (gen_sethh (temp1, op1));
2111 emit_insn (gen_setlm (temp2, op1));
2112 emit_insn (gen_sethm (temp3, temp1, op1));
2113 emit_insn (gen_rtx_SET (temp4,
2114 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2115 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2116 emit_insn (gen_setlo (op0, temp5, op1));
2117 break;
2118
2119 case CM_EMBMEDANY:
2120 /* Old old old backwards compatibility kruft here.
2121 Essentially it is MEDLOW with a fixed 64-bit
2122 virtual base added to all data segment addresses.
2123 Text-segment stuff is computed like MEDANY, we can't
2124 reuse the code above because the relocation knobs
2125 look different.
2126
2127 Data segment: sethi %hi(symbol), %temp1
2128 add %temp1, EMBMEDANY_BASE_REG, %temp2
2129 or %temp2, %lo(symbol), %reg */
2130 if (data_segment_operand (op1, GET_MODE (op1)))
2131 {
2132 if (temp)
2133 {
2134 temp1 = temp; /* op0 is allowed. */
2135 temp2 = op0;
2136 }
2137 else
2138 {
2139 temp1 = gen_reg_rtx (DImode);
2140 temp2 = gen_reg_rtx (DImode);
2141 }
2142
2143 emit_insn (gen_embmedany_sethi (temp1, op1));
2144 emit_insn (gen_embmedany_brsum (temp2, temp1));
2145 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2146 }
2147
2148 /* Text segment: sethi %uhi(symbol), %temp1
2149 sethi %hi(symbol), %temp2
2150 or %temp1, %ulo(symbol), %temp3
2151 sllx %temp3, 32, %temp4
2152 or %temp4, %temp2, %temp5
2153 or %temp5, %lo(symbol), %reg */
2154 else
2155 {
2156 if (temp)
2157 {
2158 /* It is possible that one of the registers we got for operands[2]
2159 might coincide with that of operands[0] (which is why we made
2160 it TImode). Pick the other one to use as our scratch. */
2161 if (rtx_equal_p (temp, op0))
2162 {
2163 gcc_assert (ti_temp);
2164 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2165 }
2166 temp1 = op0;
2167 temp2 = temp; /* op0 is _not_ allowed, see above. */
2168 temp3 = op0;
2169 temp4 = op0;
2170 temp5 = op0;
2171 }
2172 else
2173 {
2174 temp1 = gen_reg_rtx (DImode);
2175 temp2 = gen_reg_rtx (DImode);
2176 temp3 = gen_reg_rtx (DImode);
2177 temp4 = gen_reg_rtx (DImode);
2178 temp5 = gen_reg_rtx (DImode);
2179 }
2180
2181 emit_insn (gen_embmedany_textuhi (temp1, op1));
2182 emit_insn (gen_embmedany_texthi (temp2, op1));
2183 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2184 emit_insn (gen_rtx_SET (temp4,
2185 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2186 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2187 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2188 }
2189 break;
2190
2191 default:
2192 gcc_unreachable ();
2193 }
2194 }
2195
2196 /* These avoid problems when cross compiling. If we do not
2197 go through all this hair then the optimizer will see
2198 invalid REG_EQUAL notes or in some cases none at all. */
2199 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2200 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2201 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2202 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2203
2204 /* The optimizer is not to assume anything about exactly
2205 which bits are set for a HIGH, they are unspecified.
2206 Unfortunately this leads to many missed optimizations
2207 during CSE. We mask out the non-HIGH bits, and matches
2208 a plain movdi, to alleviate this problem. */
2209 static rtx
2210 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2211 {
2212 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2213 }
2214
2215 static rtx
2216 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2217 {
2218 return gen_rtx_SET (dest, GEN_INT (val));
2219 }
2220
2221 static rtx
2222 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2223 {
2224 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2225 }
2226
2227 static rtx
2228 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2229 {
2230 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2231 }
2232
2233 /* Worker routines for 64-bit constant formation on arch64.
2234 One of the key things to be doing in these emissions is
2235 to create as many temp REGs as possible. This makes it
2236 possible for half-built constants to be used later when
2237 such values are similar to something required later on.
2238 Without doing this, the optimizer cannot see such
2239 opportunities. */
2240
2241 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2242 unsigned HOST_WIDE_INT, int);
2243
2244 static void
2245 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2246 unsigned HOST_WIDE_INT low_bits, int is_neg)
2247 {
2248 unsigned HOST_WIDE_INT high_bits;
2249
2250 if (is_neg)
2251 high_bits = (~low_bits) & 0xffffffff;
2252 else
2253 high_bits = low_bits;
2254
2255 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2256 if (!is_neg)
2257 {
2258 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2259 }
2260 else
2261 {
2262 /* If we are XOR'ing with -1, then we should emit a one's complement
2263 instead. This way the combiner will notice logical operations
2264 such as ANDN later on and substitute. */
2265 if ((low_bits & 0x3ff) == 0x3ff)
2266 {
2267 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2268 }
2269 else
2270 {
2271 emit_insn (gen_rtx_SET (op0,
2272 gen_safe_XOR64 (temp,
2273 (-(HOST_WIDE_INT)0x400
2274 | (low_bits & 0x3ff)))));
2275 }
2276 }
2277 }
2278
2279 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2280 unsigned HOST_WIDE_INT, int);
2281
2282 static void
2283 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2284 unsigned HOST_WIDE_INT high_bits,
2285 unsigned HOST_WIDE_INT low_immediate,
2286 int shift_count)
2287 {
2288 rtx temp2 = op0;
2289
2290 if ((high_bits & 0xfffffc00) != 0)
2291 {
2292 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2293 if ((high_bits & ~0xfffffc00) != 0)
2294 emit_insn (gen_rtx_SET (op0,
2295 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2296 else
2297 temp2 = temp;
2298 }
2299 else
2300 {
2301 emit_insn (gen_safe_SET64 (temp, high_bits));
2302 temp2 = temp;
2303 }
2304
2305 /* Now shift it up into place. */
2306 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2307 GEN_INT (shift_count))));
2308
2309 /* If there is a low immediate part piece, finish up by
2310 putting that in as well. */
2311 if (low_immediate != 0)
2312 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2313 }
2314
2315 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2316 unsigned HOST_WIDE_INT);
2317
2318 /* Full 64-bit constant decomposition. Even though this is the
2319 'worst' case, we still optimize a few things away. */
2320 static void
2321 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2322 unsigned HOST_WIDE_INT high_bits,
2323 unsigned HOST_WIDE_INT low_bits)
2324 {
2325 rtx sub_temp = op0;
2326
2327 if (can_create_pseudo_p ())
2328 sub_temp = gen_reg_rtx (DImode);
2329
2330 if ((high_bits & 0xfffffc00) != 0)
2331 {
2332 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2333 if ((high_bits & ~0xfffffc00) != 0)
2334 emit_insn (gen_rtx_SET (sub_temp,
2335 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2336 else
2337 sub_temp = temp;
2338 }
2339 else
2340 {
2341 emit_insn (gen_safe_SET64 (temp, high_bits));
2342 sub_temp = temp;
2343 }
2344
2345 if (can_create_pseudo_p ())
2346 {
2347 rtx temp2 = gen_reg_rtx (DImode);
2348 rtx temp3 = gen_reg_rtx (DImode);
2349 rtx temp4 = gen_reg_rtx (DImode);
2350
2351 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2352 GEN_INT (32))));
2353
2354 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2355 if ((low_bits & ~0xfffffc00) != 0)
2356 {
2357 emit_insn (gen_rtx_SET (temp3,
2358 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2359 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2360 }
2361 else
2362 {
2363 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2364 }
2365 }
2366 else
2367 {
2368 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2369 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2370 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2371 int to_shift = 12;
2372
2373 /* We are in the middle of reload, so this is really
2374 painful. However we do still make an attempt to
2375 avoid emitting truly stupid code. */
2376 if (low1 != const0_rtx)
2377 {
2378 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2379 GEN_INT (to_shift))));
2380 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2381 sub_temp = op0;
2382 to_shift = 12;
2383 }
2384 else
2385 {
2386 to_shift += 12;
2387 }
2388 if (low2 != const0_rtx)
2389 {
2390 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2391 GEN_INT (to_shift))));
2392 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2393 sub_temp = op0;
2394 to_shift = 8;
2395 }
2396 else
2397 {
2398 to_shift += 8;
2399 }
2400 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2401 GEN_INT (to_shift))));
2402 if (low3 != const0_rtx)
2403 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2404 /* phew... */
2405 }
2406 }
2407
2408 /* Analyze a 64-bit constant for certain properties. */
2409 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2410 unsigned HOST_WIDE_INT,
2411 int *, int *, int *);
2412
2413 static void
2414 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2415 unsigned HOST_WIDE_INT low_bits,
2416 int *hbsp, int *lbsp, int *abbasp)
2417 {
2418 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2419 int i;
2420
2421 lowest_bit_set = highest_bit_set = -1;
2422 i = 0;
2423 do
2424 {
2425 if ((lowest_bit_set == -1)
2426 && ((low_bits >> i) & 1))
2427 lowest_bit_set = i;
2428 if ((highest_bit_set == -1)
2429 && ((high_bits >> (32 - i - 1)) & 1))
2430 highest_bit_set = (64 - i - 1);
2431 }
2432 while (++i < 32
2433 && ((highest_bit_set == -1)
2434 || (lowest_bit_set == -1)));
2435 if (i == 32)
2436 {
2437 i = 0;
2438 do
2439 {
2440 if ((lowest_bit_set == -1)
2441 && ((high_bits >> i) & 1))
2442 lowest_bit_set = i + 32;
2443 if ((highest_bit_set == -1)
2444 && ((low_bits >> (32 - i - 1)) & 1))
2445 highest_bit_set = 32 - i - 1;
2446 }
2447 while (++i < 32
2448 && ((highest_bit_set == -1)
2449 || (lowest_bit_set == -1)));
2450 }
2451 /* If there are no bits set this should have gone out
2452 as one instruction! */
2453 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2454 all_bits_between_are_set = 1;
2455 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2456 {
2457 if (i < 32)
2458 {
2459 if ((low_bits & (1 << i)) != 0)
2460 continue;
2461 }
2462 else
2463 {
2464 if ((high_bits & (1 << (i - 32))) != 0)
2465 continue;
2466 }
2467 all_bits_between_are_set = 0;
2468 break;
2469 }
2470 *hbsp = highest_bit_set;
2471 *lbsp = lowest_bit_set;
2472 *abbasp = all_bits_between_are_set;
2473 }
2474
2475 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2476
2477 static int
2478 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2479 unsigned HOST_WIDE_INT low_bits)
2480 {
2481 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2482
2483 if (high_bits == 0
2484 || high_bits == 0xffffffff)
2485 return 1;
2486
2487 analyze_64bit_constant (high_bits, low_bits,
2488 &highest_bit_set, &lowest_bit_set,
2489 &all_bits_between_are_set);
2490
2491 if ((highest_bit_set == 63
2492 || lowest_bit_set == 0)
2493 && all_bits_between_are_set != 0)
2494 return 1;
2495
2496 if ((highest_bit_set - lowest_bit_set) < 21)
2497 return 1;
2498
2499 return 0;
2500 }
2501
2502 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2503 unsigned HOST_WIDE_INT,
2504 int, int);
2505
2506 static unsigned HOST_WIDE_INT
2507 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2508 unsigned HOST_WIDE_INT low_bits,
2509 int lowest_bit_set, int shift)
2510 {
2511 HOST_WIDE_INT hi, lo;
2512
2513 if (lowest_bit_set < 32)
2514 {
2515 lo = (low_bits >> lowest_bit_set) << shift;
2516 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2517 }
2518 else
2519 {
2520 lo = 0;
2521 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2522 }
2523 gcc_assert (! (hi & lo));
2524 return (hi | lo);
2525 }
2526
2527 /* Here we are sure to be arch64 and this is an integer constant
2528 being loaded into a register. Emit the most efficient
2529 insn sequence possible. Detection of all the 1-insn cases
2530 has been done already. */
2531 static void
2532 sparc_emit_set_const64 (rtx op0, rtx op1)
2533 {
2534 unsigned HOST_WIDE_INT high_bits, low_bits;
2535 int lowest_bit_set, highest_bit_set;
2536 int all_bits_between_are_set;
2537 rtx temp = 0;
2538
2539 /* Sanity check that we know what we are working with. */
2540 gcc_assert (TARGET_ARCH64
2541 && (GET_CODE (op0) == SUBREG
2542 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2543
2544 if (! can_create_pseudo_p ())
2545 temp = op0;
2546
2547 if (GET_CODE (op1) != CONST_INT)
2548 {
2549 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2550 return;
2551 }
2552
2553 if (! temp)
2554 temp = gen_reg_rtx (DImode);
2555
2556 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2557 low_bits = (INTVAL (op1) & 0xffffffff);
2558
2559 /* low_bits bits 0 --> 31
2560 high_bits bits 32 --> 63 */
2561
2562 analyze_64bit_constant (high_bits, low_bits,
2563 &highest_bit_set, &lowest_bit_set,
2564 &all_bits_between_are_set);
2565
2566 /* First try for a 2-insn sequence. */
2567
2568 /* These situations are preferred because the optimizer can
2569 * do more things with them:
2570 * 1) mov -1, %reg
2571 * sllx %reg, shift, %reg
2572 * 2) mov -1, %reg
2573 * srlx %reg, shift, %reg
2574 * 3) mov some_small_const, %reg
2575 * sllx %reg, shift, %reg
2576 */
2577 if (((highest_bit_set == 63
2578 || lowest_bit_set == 0)
2579 && all_bits_between_are_set != 0)
2580 || ((highest_bit_set - lowest_bit_set) < 12))
2581 {
2582 HOST_WIDE_INT the_const = -1;
2583 int shift = lowest_bit_set;
2584
2585 if ((highest_bit_set != 63
2586 && lowest_bit_set != 0)
2587 || all_bits_between_are_set == 0)
2588 {
2589 the_const =
2590 create_simple_focus_bits (high_bits, low_bits,
2591 lowest_bit_set, 0);
2592 }
2593 else if (lowest_bit_set == 0)
2594 shift = -(63 - highest_bit_set);
2595
2596 gcc_assert (SPARC_SIMM13_P (the_const));
2597 gcc_assert (shift != 0);
2598
2599 emit_insn (gen_safe_SET64 (temp, the_const));
2600 if (shift > 0)
2601 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2602 GEN_INT (shift))));
2603 else if (shift < 0)
2604 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2605 GEN_INT (-shift))));
2606 return;
2607 }
2608
2609 /* Now a range of 22 or less bits set somewhere.
2610 * 1) sethi %hi(focus_bits), %reg
2611 * sllx %reg, shift, %reg
2612 * 2) sethi %hi(focus_bits), %reg
2613 * srlx %reg, shift, %reg
2614 */
2615 if ((highest_bit_set - lowest_bit_set) < 21)
2616 {
2617 unsigned HOST_WIDE_INT focus_bits =
2618 create_simple_focus_bits (high_bits, low_bits,
2619 lowest_bit_set, 10);
2620
2621 gcc_assert (SPARC_SETHI_P (focus_bits));
2622 gcc_assert (lowest_bit_set != 10);
2623
2624 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2625
2626 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2627 if (lowest_bit_set < 10)
2628 emit_insn (gen_rtx_SET (op0,
2629 gen_rtx_LSHIFTRT (DImode, temp,
2630 GEN_INT (10 - lowest_bit_set))));
2631 else if (lowest_bit_set > 10)
2632 emit_insn (gen_rtx_SET (op0,
2633 gen_rtx_ASHIFT (DImode, temp,
2634 GEN_INT (lowest_bit_set - 10))));
2635 return;
2636 }
2637
2638 /* 1) sethi %hi(low_bits), %reg
2639 * or %reg, %lo(low_bits), %reg
2640 * 2) sethi %hi(~low_bits), %reg
2641 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2642 */
2643 if (high_bits == 0
2644 || high_bits == 0xffffffff)
2645 {
2646 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2647 (high_bits == 0xffffffff));
2648 return;
2649 }
2650
2651 /* Now, try 3-insn sequences. */
2652
2653 /* 1) sethi %hi(high_bits), %reg
2654 * or %reg, %lo(high_bits), %reg
2655 * sllx %reg, 32, %reg
2656 */
2657 if (low_bits == 0)
2658 {
2659 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2660 return;
2661 }
2662
2663 /* We may be able to do something quick
2664 when the constant is negated, so try that. */
2665 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2666 (~low_bits) & 0xfffffc00))
2667 {
2668 /* NOTE: The trailing bits get XOR'd so we need the
2669 non-negated bits, not the negated ones. */
2670 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2671
2672 if ((((~high_bits) & 0xffffffff) == 0
2673 && ((~low_bits) & 0x80000000) == 0)
2674 || (((~high_bits) & 0xffffffff) == 0xffffffff
2675 && ((~low_bits) & 0x80000000) != 0))
2676 {
2677 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2678
2679 if ((SPARC_SETHI_P (fast_int)
2680 && (~high_bits & 0xffffffff) == 0)
2681 || SPARC_SIMM13_P (fast_int))
2682 emit_insn (gen_safe_SET64 (temp, fast_int));
2683 else
2684 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2685 }
2686 else
2687 {
2688 rtx negated_const;
2689 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2690 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2691 sparc_emit_set_const64 (temp, negated_const);
2692 }
2693
2694 /* If we are XOR'ing with -1, then we should emit a one's complement
2695 instead. This way the combiner will notice logical operations
2696 such as ANDN later on and substitute. */
2697 if (trailing_bits == 0x3ff)
2698 {
2699 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2700 }
2701 else
2702 {
2703 emit_insn (gen_rtx_SET (op0,
2704 gen_safe_XOR64 (temp,
2705 (-0x400 | trailing_bits))));
2706 }
2707 return;
2708 }
2709
2710 /* 1) sethi %hi(xxx), %reg
2711 * or %reg, %lo(xxx), %reg
2712 * sllx %reg, yyy, %reg
2713 *
2714 * ??? This is just a generalized version of the low_bits==0
2715 * thing above, FIXME...
2716 */
2717 if ((highest_bit_set - lowest_bit_set) < 32)
2718 {
2719 unsigned HOST_WIDE_INT focus_bits =
2720 create_simple_focus_bits (high_bits, low_bits,
2721 lowest_bit_set, 0);
2722
2723 /* We can't get here in this state. */
2724 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2725
2726 /* So what we know is that the set bits straddle the
2727 middle of the 64-bit word. */
2728 sparc_emit_set_const64_quick2 (op0, temp,
2729 focus_bits, 0,
2730 lowest_bit_set);
2731 return;
2732 }
2733
2734 /* 1) sethi %hi(high_bits), %reg
2735 * or %reg, %lo(high_bits), %reg
2736 * sllx %reg, 32, %reg
2737 * or %reg, low_bits, %reg
2738 */
2739 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2740 {
2741 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2742 return;
2743 }
2744
2745 /* The easiest way when all else fails, is full decomposition. */
2746 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2747 }
2748
2749 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2750
2751 static bool
2752 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2753 {
2754 *p1 = SPARC_ICC_REG;
2755 *p2 = SPARC_FCC_REG;
2756 return true;
2757 }
2758
2759 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2760 return the mode to be used for the comparison. For floating-point,
2761 CCFP[E]mode is used. CCNZmode should be used when the first operand
2762 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2763 processing is needed. */
2764
2765 machine_mode
2766 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2767 {
2768 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2769 {
2770 switch (op)
2771 {
2772 case EQ:
2773 case NE:
2774 case UNORDERED:
2775 case ORDERED:
2776 case UNLT:
2777 case UNLE:
2778 case UNGT:
2779 case UNGE:
2780 case UNEQ:
2781 case LTGT:
2782 return CCFPmode;
2783
2784 case LT:
2785 case LE:
2786 case GT:
2787 case GE:
2788 return CCFPEmode;
2789
2790 default:
2791 gcc_unreachable ();
2792 }
2793 }
2794 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2795 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2796 {
2797 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2798 return CCXNZmode;
2799 else
2800 return CCNZmode;
2801 }
2802 else
2803 {
2804 /* This is for the cmp<mode>_sne pattern. */
2805 if (GET_CODE (x) == NOT && y == constm1_rtx)
2806 {
2807 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2808 return CCXCmode;
2809 else
2810 return CCCmode;
2811 }
2812
2813 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2814 return CCXmode;
2815 else
2816 return CCmode;
2817 }
2818 }
2819
2820 /* Emit the compare insn and return the CC reg for a CODE comparison
2821 with operands X and Y. */
2822
2823 static rtx
2824 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2825 {
2826 machine_mode mode;
2827 rtx cc_reg;
2828
2829 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2830 return x;
2831
2832 mode = SELECT_CC_MODE (code, x, y);
2833
2834 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2835 fcc regs (cse can't tell they're really call clobbered regs and will
2836 remove a duplicate comparison even if there is an intervening function
2837 call - it will then try to reload the cc reg via an int reg which is why
2838 we need the movcc patterns). It is possible to provide the movcc
2839 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2840 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2841 to tell cse that CCFPE mode registers (even pseudos) are call
2842 clobbered. */
2843
2844 /* ??? This is an experiment. Rather than making changes to cse which may
2845 or may not be easy/clean, we do our own cse. This is possible because
2846 we will generate hard registers. Cse knows they're call clobbered (it
2847 doesn't know the same thing about pseudos). If we guess wrong, no big
2848 deal, but if we win, great! */
2849
2850 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2851 #if 1 /* experiment */
2852 {
2853 int reg;
2854 /* We cycle through the registers to ensure they're all exercised. */
2855 static int next_fcc_reg = 0;
2856 /* Previous x,y for each fcc reg. */
2857 static rtx prev_args[4][2];
2858
2859 /* Scan prev_args for x,y. */
2860 for (reg = 0; reg < 4; reg++)
2861 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2862 break;
2863 if (reg == 4)
2864 {
2865 reg = next_fcc_reg;
2866 prev_args[reg][0] = x;
2867 prev_args[reg][1] = y;
2868 next_fcc_reg = (next_fcc_reg + 1) & 3;
2869 }
2870 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2871 }
2872 #else
2873 cc_reg = gen_reg_rtx (mode);
2874 #endif /* ! experiment */
2875 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2876 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2877 else
2878 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2879
2880 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2881 will only result in an unrecognizable insn so no point in asserting. */
2882 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2883
2884 return cc_reg;
2885 }
2886
2887
2888 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2889
2890 rtx
2891 gen_compare_reg (rtx cmp)
2892 {
2893 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2894 }
2895
2896 /* This function is used for v9 only.
2897 DEST is the target of the Scc insn.
2898 CODE is the code for an Scc's comparison.
2899 X and Y are the values we compare.
2900
2901 This function is needed to turn
2902
2903 (set (reg:SI 110)
2904 (gt (reg:CCX 100 %icc)
2905 (const_int 0)))
2906 into
2907 (set (reg:SI 110)
2908 (gt:DI (reg:CCX 100 %icc)
2909 (const_int 0)))
2910
2911 IE: The instruction recognizer needs to see the mode of the comparison to
2912 find the right instruction. We could use "gt:DI" right in the
2913 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2914
2915 static int
2916 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2917 {
2918 if (! TARGET_ARCH64
2919 && (GET_MODE (x) == DImode
2920 || GET_MODE (dest) == DImode))
2921 return 0;
2922
2923 /* Try to use the movrCC insns. */
2924 if (TARGET_ARCH64
2925 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2926 && y == const0_rtx
2927 && v9_regcmp_p (compare_code))
2928 {
2929 rtx op0 = x;
2930 rtx temp;
2931
2932 /* Special case for op0 != 0. This can be done with one instruction if
2933 dest == x. */
2934
2935 if (compare_code == NE
2936 && GET_MODE (dest) == DImode
2937 && rtx_equal_p (op0, dest))
2938 {
2939 emit_insn (gen_rtx_SET (dest,
2940 gen_rtx_IF_THEN_ELSE (DImode,
2941 gen_rtx_fmt_ee (compare_code, DImode,
2942 op0, const0_rtx),
2943 const1_rtx,
2944 dest)));
2945 return 1;
2946 }
2947
2948 if (reg_overlap_mentioned_p (dest, op0))
2949 {
2950 /* Handle the case where dest == x.
2951 We "early clobber" the result. */
2952 op0 = gen_reg_rtx (GET_MODE (x));
2953 emit_move_insn (op0, x);
2954 }
2955
2956 emit_insn (gen_rtx_SET (dest, const0_rtx));
2957 if (GET_MODE (op0) != DImode)
2958 {
2959 temp = gen_reg_rtx (DImode);
2960 convert_move (temp, op0, 0);
2961 }
2962 else
2963 temp = op0;
2964 emit_insn (gen_rtx_SET (dest,
2965 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2966 gen_rtx_fmt_ee (compare_code, DImode,
2967 temp, const0_rtx),
2968 const1_rtx,
2969 dest)));
2970 return 1;
2971 }
2972 else
2973 {
2974 x = gen_compare_reg_1 (compare_code, x, y);
2975 y = const0_rtx;
2976
2977 emit_insn (gen_rtx_SET (dest, const0_rtx));
2978 emit_insn (gen_rtx_SET (dest,
2979 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2980 gen_rtx_fmt_ee (compare_code,
2981 GET_MODE (x), x, y),
2982 const1_rtx, dest)));
2983 return 1;
2984 }
2985 }
2986
2987
2988 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2989 without jumps using the addx/subx instructions. */
2990
2991 bool
2992 emit_scc_insn (rtx operands[])
2993 {
2994 rtx tem, x, y;
2995 enum rtx_code code;
2996 machine_mode mode;
2997
2998 /* The quad-word fp compare library routines all return nonzero to indicate
2999 true, which is different from the equivalent libgcc routines, so we must
3000 handle them specially here. */
3001 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3002 {
3003 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3004 GET_CODE (operands[1]));
3005 operands[2] = XEXP (operands[1], 0);
3006 operands[3] = XEXP (operands[1], 1);
3007 }
3008
3009 code = GET_CODE (operands[1]);
3010 x = operands[2];
3011 y = operands[3];
3012 mode = GET_MODE (x);
3013
3014 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3015 more applications). The exception to this is "reg != 0" which can
3016 be done in one instruction on v9 (so we do it). */
3017 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3018 {
3019 if (y != const0_rtx)
3020 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3021
3022 rtx pat = gen_rtx_SET (operands[0],
3023 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3024 x, const0_rtx));
3025
3026 /* If we can use addx/subx or addxc, add a clobber for CC. */
3027 if (mode == SImode || (code == NE && TARGET_VIS3))
3028 {
3029 rtx clobber
3030 = gen_rtx_CLOBBER (VOIDmode,
3031 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3032 SPARC_ICC_REG));
3033 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3034 }
3035
3036 emit_insn (pat);
3037 return true;
3038 }
3039
3040 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3041 if (TARGET_ARCH64
3042 && mode == DImode
3043 && !((code == LTU || code == GTU) && TARGET_VIS3)
3044 && gen_v9_scc (operands[0], code, x, y))
3045 return true;
3046
3047 /* We can do LTU and GEU using the addx/subx instructions too. And
3048 for GTU/LEU, if both operands are registers swap them and fall
3049 back to the easy case. */
3050 if (code == GTU || code == LEU)
3051 {
3052 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3053 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3054 {
3055 tem = x;
3056 x = y;
3057 y = tem;
3058 code = swap_condition (code);
3059 }
3060 }
3061
3062 if (code == LTU || code == GEU)
3063 {
3064 emit_insn (gen_rtx_SET (operands[0],
3065 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3066 gen_compare_reg_1 (code, x, y),
3067 const0_rtx)));
3068 return true;
3069 }
3070
3071 /* All the posibilities to use addx/subx based sequences has been
3072 exhausted, try for a 3 instruction sequence using v9 conditional
3073 moves. */
3074 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3075 return true;
3076
3077 /* Nope, do branches. */
3078 return false;
3079 }
3080
3081 /* Emit a conditional jump insn for the v9 architecture using comparison code
3082 CODE and jump target LABEL.
3083 This function exists to take advantage of the v9 brxx insns. */
3084
3085 static void
3086 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3087 {
3088 emit_jump_insn (gen_rtx_SET (pc_rtx,
3089 gen_rtx_IF_THEN_ELSE (VOIDmode,
3090 gen_rtx_fmt_ee (code, GET_MODE (op0),
3091 op0, const0_rtx),
3092 gen_rtx_LABEL_REF (VOIDmode, label),
3093 pc_rtx)));
3094 }
3095
3096 /* Emit a conditional jump insn for the UA2011 architecture using
3097 comparison code CODE and jump target LABEL. This function exists
3098 to take advantage of the UA2011 Compare and Branch insns. */
3099
3100 static void
3101 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3102 {
3103 rtx if_then_else;
3104
3105 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3106 gen_rtx_fmt_ee(code, GET_MODE(op0),
3107 op0, op1),
3108 gen_rtx_LABEL_REF (VOIDmode, label),
3109 pc_rtx);
3110
3111 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3112 }
3113
3114 void
3115 emit_conditional_branch_insn (rtx operands[])
3116 {
3117 /* The quad-word fp compare library routines all return nonzero to indicate
3118 true, which is different from the equivalent libgcc routines, so we must
3119 handle them specially here. */
3120 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3121 {
3122 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3123 GET_CODE (operands[0]));
3124 operands[1] = XEXP (operands[0], 0);
3125 operands[2] = XEXP (operands[0], 1);
3126 }
3127
3128 /* If we can tell early on that the comparison is against a constant
3129 that won't fit in the 5-bit signed immediate field of a cbcond,
3130 use one of the other v9 conditional branch sequences. */
3131 if (TARGET_CBCOND
3132 && GET_CODE (operands[1]) == REG
3133 && (GET_MODE (operands[1]) == SImode
3134 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3135 && (GET_CODE (operands[2]) != CONST_INT
3136 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3137 {
3138 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3139 return;
3140 }
3141
3142 if (TARGET_ARCH64 && operands[2] == const0_rtx
3143 && GET_CODE (operands[1]) == REG
3144 && GET_MODE (operands[1]) == DImode)
3145 {
3146 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3147 return;
3148 }
3149
3150 operands[1] = gen_compare_reg (operands[0]);
3151 operands[2] = const0_rtx;
3152 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3153 operands[1], operands[2]);
3154 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3155 operands[3]));
3156 }
3157
3158
3159 /* Generate a DFmode part of a hard TFmode register.
3160 REG is the TFmode hard register, LOW is 1 for the
3161 low 64bit of the register and 0 otherwise.
3162 */
3163 rtx
3164 gen_df_reg (rtx reg, int low)
3165 {
3166 int regno = REGNO (reg);
3167
3168 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3169 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3170 return gen_rtx_REG (DFmode, regno);
3171 }
3172 \f
3173 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3174 Unlike normal calls, TFmode operands are passed by reference. It is
3175 assumed that no more than 3 operands are required. */
3176
3177 static void
3178 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3179 {
3180 rtx ret_slot = NULL, arg[3], func_sym;
3181 int i;
3182
3183 /* We only expect to be called for conversions, unary, and binary ops. */
3184 gcc_assert (nargs == 2 || nargs == 3);
3185
3186 for (i = 0; i < nargs; ++i)
3187 {
3188 rtx this_arg = operands[i];
3189 rtx this_slot;
3190
3191 /* TFmode arguments and return values are passed by reference. */
3192 if (GET_MODE (this_arg) == TFmode)
3193 {
3194 int force_stack_temp;
3195
3196 force_stack_temp = 0;
3197 if (TARGET_BUGGY_QP_LIB && i == 0)
3198 force_stack_temp = 1;
3199
3200 if (GET_CODE (this_arg) == MEM
3201 && ! force_stack_temp)
3202 {
3203 tree expr = MEM_EXPR (this_arg);
3204 if (expr)
3205 mark_addressable (expr);
3206 this_arg = XEXP (this_arg, 0);
3207 }
3208 else if (CONSTANT_P (this_arg)
3209 && ! force_stack_temp)
3210 {
3211 this_slot = force_const_mem (TFmode, this_arg);
3212 this_arg = XEXP (this_slot, 0);
3213 }
3214 else
3215 {
3216 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3217
3218 /* Operand 0 is the return value. We'll copy it out later. */
3219 if (i > 0)
3220 emit_move_insn (this_slot, this_arg);
3221 else
3222 ret_slot = this_slot;
3223
3224 this_arg = XEXP (this_slot, 0);
3225 }
3226 }
3227
3228 arg[i] = this_arg;
3229 }
3230
3231 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3232
3233 if (GET_MODE (operands[0]) == TFmode)
3234 {
3235 if (nargs == 2)
3236 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3237 arg[0], GET_MODE (arg[0]),
3238 arg[1], GET_MODE (arg[1]));
3239 else
3240 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3241 arg[0], GET_MODE (arg[0]),
3242 arg[1], GET_MODE (arg[1]),
3243 arg[2], GET_MODE (arg[2]));
3244
3245 if (ret_slot)
3246 emit_move_insn (operands[0], ret_slot);
3247 }
3248 else
3249 {
3250 rtx ret;
3251
3252 gcc_assert (nargs == 2);
3253
3254 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3255 GET_MODE (operands[0]), 1,
3256 arg[1], GET_MODE (arg[1]));
3257
3258 if (ret != operands[0])
3259 emit_move_insn (operands[0], ret);
3260 }
3261 }
3262
3263 /* Expand soft-float TFmode calls to sparc abi routines. */
3264
3265 static void
3266 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3267 {
3268 const char *func;
3269
3270 switch (code)
3271 {
3272 case PLUS:
3273 func = "_Qp_add";
3274 break;
3275 case MINUS:
3276 func = "_Qp_sub";
3277 break;
3278 case MULT:
3279 func = "_Qp_mul";
3280 break;
3281 case DIV:
3282 func = "_Qp_div";
3283 break;
3284 default:
3285 gcc_unreachable ();
3286 }
3287
3288 emit_soft_tfmode_libcall (func, 3, operands);
3289 }
3290
3291 static void
3292 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3293 {
3294 const char *func;
3295
3296 gcc_assert (code == SQRT);
3297 func = "_Qp_sqrt";
3298
3299 emit_soft_tfmode_libcall (func, 2, operands);
3300 }
3301
3302 static void
3303 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3304 {
3305 const char *func;
3306
3307 switch (code)
3308 {
3309 case FLOAT_EXTEND:
3310 switch (GET_MODE (operands[1]))
3311 {
3312 case SFmode:
3313 func = "_Qp_stoq";
3314 break;
3315 case DFmode:
3316 func = "_Qp_dtoq";
3317 break;
3318 default:
3319 gcc_unreachable ();
3320 }
3321 break;
3322
3323 case FLOAT_TRUNCATE:
3324 switch (GET_MODE (operands[0]))
3325 {
3326 case SFmode:
3327 func = "_Qp_qtos";
3328 break;
3329 case DFmode:
3330 func = "_Qp_qtod";
3331 break;
3332 default:
3333 gcc_unreachable ();
3334 }
3335 break;
3336
3337 case FLOAT:
3338 switch (GET_MODE (operands[1]))
3339 {
3340 case SImode:
3341 func = "_Qp_itoq";
3342 if (TARGET_ARCH64)
3343 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3344 break;
3345 case DImode:
3346 func = "_Qp_xtoq";
3347 break;
3348 default:
3349 gcc_unreachable ();
3350 }
3351 break;
3352
3353 case UNSIGNED_FLOAT:
3354 switch (GET_MODE (operands[1]))
3355 {
3356 case SImode:
3357 func = "_Qp_uitoq";
3358 if (TARGET_ARCH64)
3359 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3360 break;
3361 case DImode:
3362 func = "_Qp_uxtoq";
3363 break;
3364 default:
3365 gcc_unreachable ();
3366 }
3367 break;
3368
3369 case FIX:
3370 switch (GET_MODE (operands[0]))
3371 {
3372 case SImode:
3373 func = "_Qp_qtoi";
3374 break;
3375 case DImode:
3376 func = "_Qp_qtox";
3377 break;
3378 default:
3379 gcc_unreachable ();
3380 }
3381 break;
3382
3383 case UNSIGNED_FIX:
3384 switch (GET_MODE (operands[0]))
3385 {
3386 case SImode:
3387 func = "_Qp_qtoui";
3388 break;
3389 case DImode:
3390 func = "_Qp_qtoux";
3391 break;
3392 default:
3393 gcc_unreachable ();
3394 }
3395 break;
3396
3397 default:
3398 gcc_unreachable ();
3399 }
3400
3401 emit_soft_tfmode_libcall (func, 2, operands);
3402 }
3403
3404 /* Expand a hard-float tfmode operation. All arguments must be in
3405 registers. */
3406
3407 static void
3408 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3409 {
3410 rtx op, dest;
3411
3412 if (GET_RTX_CLASS (code) == RTX_UNARY)
3413 {
3414 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3415 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3416 }
3417 else
3418 {
3419 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3420 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3421 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3422 operands[1], operands[2]);
3423 }
3424
3425 if (register_operand (operands[0], VOIDmode))
3426 dest = operands[0];
3427 else
3428 dest = gen_reg_rtx (GET_MODE (operands[0]));
3429
3430 emit_insn (gen_rtx_SET (dest, op));
3431
3432 if (dest != operands[0])
3433 emit_move_insn (operands[0], dest);
3434 }
3435
3436 void
3437 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3438 {
3439 if (TARGET_HARD_QUAD)
3440 emit_hard_tfmode_operation (code, operands);
3441 else
3442 emit_soft_tfmode_binop (code, operands);
3443 }
3444
3445 void
3446 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3447 {
3448 if (TARGET_HARD_QUAD)
3449 emit_hard_tfmode_operation (code, operands);
3450 else
3451 emit_soft_tfmode_unop (code, operands);
3452 }
3453
3454 void
3455 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3456 {
3457 if (TARGET_HARD_QUAD)
3458 emit_hard_tfmode_operation (code, operands);
3459 else
3460 emit_soft_tfmode_cvt (code, operands);
3461 }
3462 \f
3463 /* Return nonzero if a branch/jump/call instruction will be emitting
3464 nop into its delay slot. */
3465
3466 int
3467 empty_delay_slot (rtx_insn *insn)
3468 {
3469 rtx seq;
3470
3471 /* If no previous instruction (should not happen), return true. */
3472 if (PREV_INSN (insn) == NULL)
3473 return 1;
3474
3475 seq = NEXT_INSN (PREV_INSN (insn));
3476 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3477 return 0;
3478
3479 return 1;
3480 }
3481
3482 /* Return nonzero if we should emit a nop after a cbcond instruction.
3483 The cbcond instruction does not have a delay slot, however there is
3484 a severe performance penalty if a control transfer appears right
3485 after a cbcond. Therefore we emit a nop when we detect this
3486 situation. */
3487
3488 int
3489 emit_cbcond_nop (rtx_insn *insn)
3490 {
3491 rtx next = next_active_insn (insn);
3492
3493 if (!next)
3494 return 1;
3495
3496 if (NONJUMP_INSN_P (next)
3497 && GET_CODE (PATTERN (next)) == SEQUENCE)
3498 next = XVECEXP (PATTERN (next), 0, 0);
3499 else if (CALL_P (next)
3500 && GET_CODE (PATTERN (next)) == PARALLEL)
3501 {
3502 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3503
3504 if (GET_CODE (delay) == RETURN)
3505 {
3506 /* It's a sibling call. Do not emit the nop if we're going
3507 to emit something other than the jump itself as the first
3508 instruction of the sibcall sequence. */
3509 if (sparc_leaf_function_p || TARGET_FLAT)
3510 return 0;
3511 }
3512 }
3513
3514 if (NONJUMP_INSN_P (next))
3515 return 0;
3516
3517 return 1;
3518 }
3519
3520 /* Return nonzero if TRIAL can go into the call delay slot. */
3521
3522 int
3523 eligible_for_call_delay (rtx_insn *trial)
3524 {
3525 rtx pat;
3526
3527 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3528 return 0;
3529
3530 /* Binutils allows
3531 call __tls_get_addr, %tgd_call (foo)
3532 add %l7, %o0, %o0, %tgd_add (foo)
3533 while Sun as/ld does not. */
3534 if (TARGET_GNU_TLS || !TARGET_TLS)
3535 return 1;
3536
3537 pat = PATTERN (trial);
3538
3539 /* We must reject tgd_add{32|64}, i.e.
3540 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3541 and tldm_add{32|64}, i.e.
3542 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3543 for Sun as/ld. */
3544 if (GET_CODE (pat) == SET
3545 && GET_CODE (SET_SRC (pat)) == PLUS)
3546 {
3547 rtx unspec = XEXP (SET_SRC (pat), 1);
3548
3549 if (GET_CODE (unspec) == UNSPEC
3550 && (XINT (unspec, 1) == UNSPEC_TLSGD
3551 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3552 return 0;
3553 }
3554
3555 return 1;
3556 }
3557
3558 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3559 instruction. RETURN_P is true if the v9 variant 'return' is to be
3560 considered in the test too.
3561
3562 TRIAL must be a SET whose destination is a REG appropriate for the
3563 'restore' instruction or, if RETURN_P is true, for the 'return'
3564 instruction. */
3565
3566 static int
3567 eligible_for_restore_insn (rtx trial, bool return_p)
3568 {
3569 rtx pat = PATTERN (trial);
3570 rtx src = SET_SRC (pat);
3571 bool src_is_freg = false;
3572 rtx src_reg;
3573
3574 /* Since we now can do moves between float and integer registers when
3575 VIS3 is enabled, we have to catch this case. We can allow such
3576 moves when doing a 'return' however. */
3577 src_reg = src;
3578 if (GET_CODE (src_reg) == SUBREG)
3579 src_reg = SUBREG_REG (src_reg);
3580 if (GET_CODE (src_reg) == REG
3581 && SPARC_FP_REG_P (REGNO (src_reg)))
3582 src_is_freg = true;
3583
3584 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3585 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3586 && arith_operand (src, GET_MODE (src))
3587 && ! src_is_freg)
3588 {
3589 if (TARGET_ARCH64)
3590 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3591 else
3592 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3593 }
3594
3595 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3596 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3597 && arith_double_operand (src, GET_MODE (src))
3598 && ! src_is_freg)
3599 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3600
3601 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3602 else if (! TARGET_FPU && register_operand (src, SFmode))
3603 return 1;
3604
3605 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3606 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3607 return 1;
3608
3609 /* If we have the 'return' instruction, anything that does not use
3610 local or output registers and can go into a delay slot wins. */
3611 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3612 return 1;
3613
3614 /* The 'restore src1,src2,dest' pattern for SImode. */
3615 else if (GET_CODE (src) == PLUS
3616 && register_operand (XEXP (src, 0), SImode)
3617 && arith_operand (XEXP (src, 1), SImode))
3618 return 1;
3619
3620 /* The 'restore src1,src2,dest' pattern for DImode. */
3621 else if (GET_CODE (src) == PLUS
3622 && register_operand (XEXP (src, 0), DImode)
3623 && arith_double_operand (XEXP (src, 1), DImode))
3624 return 1;
3625
3626 /* The 'restore src1,%lo(src2),dest' pattern. */
3627 else if (GET_CODE (src) == LO_SUM
3628 && ! TARGET_CM_MEDMID
3629 && ((register_operand (XEXP (src, 0), SImode)
3630 && immediate_operand (XEXP (src, 1), SImode))
3631 || (TARGET_ARCH64
3632 && register_operand (XEXP (src, 0), DImode)
3633 && immediate_operand (XEXP (src, 1), DImode))))
3634 return 1;
3635
3636 /* The 'restore src,src,dest' pattern. */
3637 else if (GET_CODE (src) == ASHIFT
3638 && (register_operand (XEXP (src, 0), SImode)
3639 || register_operand (XEXP (src, 0), DImode))
3640 && XEXP (src, 1) == const1_rtx)
3641 return 1;
3642
3643 return 0;
3644 }
3645
3646 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3647
3648 int
3649 eligible_for_return_delay (rtx_insn *trial)
3650 {
3651 int regno;
3652 rtx pat;
3653
3654 /* If the function uses __builtin_eh_return, the eh_return machinery
3655 occupies the delay slot. */
3656 if (crtl->calls_eh_return)
3657 return 0;
3658
3659 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3660 return 0;
3661
3662 /* In the case of a leaf or flat function, anything can go into the slot. */
3663 if (sparc_leaf_function_p || TARGET_FLAT)
3664 return 1;
3665
3666 if (!NONJUMP_INSN_P (trial))
3667 return 0;
3668
3669 pat = PATTERN (trial);
3670 if (GET_CODE (pat) == PARALLEL)
3671 {
3672 int i;
3673
3674 if (! TARGET_V9)
3675 return 0;
3676 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3677 {
3678 rtx expr = XVECEXP (pat, 0, i);
3679 if (GET_CODE (expr) != SET)
3680 return 0;
3681 if (GET_CODE (SET_DEST (expr)) != REG)
3682 return 0;
3683 regno = REGNO (SET_DEST (expr));
3684 if (regno >= 8 && regno < 24)
3685 return 0;
3686 }
3687 return !epilogue_renumber (&pat, 1);
3688 }
3689
3690 if (GET_CODE (pat) != SET)
3691 return 0;
3692
3693 if (GET_CODE (SET_DEST (pat)) != REG)
3694 return 0;
3695
3696 regno = REGNO (SET_DEST (pat));
3697
3698 /* Otherwise, only operations which can be done in tandem with
3699 a `restore' or `return' insn can go into the delay slot. */
3700 if (regno >= 8 && regno < 24)
3701 return 0;
3702
3703 /* If this instruction sets up floating point register and we have a return
3704 instruction, it can probably go in. But restore will not work
3705 with FP_REGS. */
3706 if (! SPARC_INT_REG_P (regno))
3707 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3708
3709 return eligible_for_restore_insn (trial, true);
3710 }
3711
3712 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3713
3714 int
3715 eligible_for_sibcall_delay (rtx_insn *trial)
3716 {
3717 rtx pat;
3718
3719 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3720 return 0;
3721
3722 if (!NONJUMP_INSN_P (trial))
3723 return 0;
3724
3725 pat = PATTERN (trial);
3726
3727 if (sparc_leaf_function_p || TARGET_FLAT)
3728 {
3729 /* If the tail call is done using the call instruction,
3730 we have to restore %o7 in the delay slot. */
3731 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3732 return 0;
3733
3734 /* %g1 is used to build the function address */
3735 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3736 return 0;
3737
3738 return 1;
3739 }
3740
3741 if (GET_CODE (pat) != SET)
3742 return 0;
3743
3744 /* Otherwise, only operations which can be done in tandem with
3745 a `restore' insn can go into the delay slot. */
3746 if (GET_CODE (SET_DEST (pat)) != REG
3747 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3748 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3749 return 0;
3750
3751 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3752 in most cases. */
3753 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3754 return 0;
3755
3756 return eligible_for_restore_insn (trial, false);
3757 }
3758 \f
3759 /* Determine if it's legal to put X into the constant pool. This
3760 is not possible if X contains the address of a symbol that is
3761 not constant (TLS) or not known at final link time (PIC). */
3762
3763 static bool
3764 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3765 {
3766 switch (GET_CODE (x))
3767 {
3768 case CONST_INT:
3769 case CONST_WIDE_INT:
3770 case CONST_DOUBLE:
3771 case CONST_VECTOR:
3772 /* Accept all non-symbolic constants. */
3773 return false;
3774
3775 case LABEL_REF:
3776 /* Labels are OK iff we are non-PIC. */
3777 return flag_pic != 0;
3778
3779 case SYMBOL_REF:
3780 /* 'Naked' TLS symbol references are never OK,
3781 non-TLS symbols are OK iff we are non-PIC. */
3782 if (SYMBOL_REF_TLS_MODEL (x))
3783 return true;
3784 else
3785 return flag_pic != 0;
3786
3787 case CONST:
3788 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3789 case PLUS:
3790 case MINUS:
3791 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3792 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3793 case UNSPEC:
3794 return true;
3795 default:
3796 gcc_unreachable ();
3797 }
3798 }
3799 \f
3800 /* Global Offset Table support. */
3801 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3802 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3803
3804 /* Return the SYMBOL_REF for the Global Offset Table. */
3805
3806 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3807
3808 static rtx
3809 sparc_got (void)
3810 {
3811 if (!sparc_got_symbol)
3812 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3813
3814 return sparc_got_symbol;
3815 }
3816
3817 /* Ensure that we are not using patterns that are not OK with PIC. */
3818
3819 int
3820 check_pic (int i)
3821 {
3822 rtx op;
3823
3824 switch (flag_pic)
3825 {
3826 case 1:
3827 op = recog_data.operand[i];
3828 gcc_assert (GET_CODE (op) != SYMBOL_REF
3829 && (GET_CODE (op) != CONST
3830 || (GET_CODE (XEXP (op, 0)) == MINUS
3831 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3832 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3833 /* fallthrough */
3834 case 2:
3835 default:
3836 return 1;
3837 }
3838 }
3839
3840 /* Return true if X is an address which needs a temporary register when
3841 reloaded while generating PIC code. */
3842
3843 int
3844 pic_address_needs_scratch (rtx x)
3845 {
3846 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3847 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3848 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3849 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3850 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3851 return 1;
3852
3853 return 0;
3854 }
3855
3856 /* Determine if a given RTX is a valid constant. We already know this
3857 satisfies CONSTANT_P. */
3858
3859 static bool
3860 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3861 {
3862 switch (GET_CODE (x))
3863 {
3864 case CONST:
3865 case SYMBOL_REF:
3866 if (sparc_tls_referenced_p (x))
3867 return false;
3868 break;
3869
3870 case CONST_DOUBLE:
3871 /* Floating point constants are generally not ok.
3872 The only exception is 0.0 and all-ones in VIS. */
3873 if (TARGET_VIS
3874 && SCALAR_FLOAT_MODE_P (mode)
3875 && (const_zero_operand (x, mode)
3876 || const_all_ones_operand (x, mode)))
3877 return true;
3878
3879 return false;
3880
3881 case CONST_VECTOR:
3882 /* Vector constants are generally not ok.
3883 The only exception is 0 or -1 in VIS. */
3884 if (TARGET_VIS
3885 && (const_zero_operand (x, mode)
3886 || const_all_ones_operand (x, mode)))
3887 return true;
3888
3889 return false;
3890
3891 default:
3892 break;
3893 }
3894
3895 return true;
3896 }
3897
3898 /* Determine if a given RTX is a valid constant address. */
3899
3900 bool
3901 constant_address_p (rtx x)
3902 {
3903 switch (GET_CODE (x))
3904 {
3905 case LABEL_REF:
3906 case CONST_INT:
3907 case HIGH:
3908 return true;
3909
3910 case CONST:
3911 if (flag_pic && pic_address_needs_scratch (x))
3912 return false;
3913 return sparc_legitimate_constant_p (Pmode, x);
3914
3915 case SYMBOL_REF:
3916 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3917
3918 default:
3919 return false;
3920 }
3921 }
3922
3923 /* Nonzero if the constant value X is a legitimate general operand
3924 when generating PIC code. It is given that flag_pic is on and
3925 that X satisfies CONSTANT_P. */
3926
3927 bool
3928 legitimate_pic_operand_p (rtx x)
3929 {
3930 if (pic_address_needs_scratch (x))
3931 return false;
3932 if (sparc_tls_referenced_p (x))
3933 return false;
3934 return true;
3935 }
3936
3937 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3938 (CONST_INT_P (X) \
3939 && INTVAL (X) >= -0x1000 \
3940 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3941
3942 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3943 (CONST_INT_P (X) \
3944 && INTVAL (X) >= -0x1000 \
3945 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3946
3947 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3948
3949 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3950 ordinarily. This changes a bit when generating PIC. */
3951
3952 static bool
3953 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3954 {
3955 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3956
3957 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3958 rs1 = addr;
3959 else if (GET_CODE (addr) == PLUS)
3960 {
3961 rs1 = XEXP (addr, 0);
3962 rs2 = XEXP (addr, 1);
3963
3964 /* Canonicalize. REG comes first, if there are no regs,
3965 LO_SUM comes first. */
3966 if (!REG_P (rs1)
3967 && GET_CODE (rs1) != SUBREG
3968 && (REG_P (rs2)
3969 || GET_CODE (rs2) == SUBREG
3970 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3971 {
3972 rs1 = XEXP (addr, 1);
3973 rs2 = XEXP (addr, 0);
3974 }
3975
3976 if ((flag_pic == 1
3977 && rs1 == pic_offset_table_rtx
3978 && !REG_P (rs2)
3979 && GET_CODE (rs2) != SUBREG
3980 && GET_CODE (rs2) != LO_SUM
3981 && GET_CODE (rs2) != MEM
3982 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3983 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3984 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3985 || ((REG_P (rs1)
3986 || GET_CODE (rs1) == SUBREG)
3987 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3988 {
3989 imm1 = rs2;
3990 rs2 = NULL;
3991 }
3992 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3993 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3994 {
3995 /* We prohibit REG + REG for TFmode when there are no quad move insns
3996 and we consequently need to split. We do this because REG+REG
3997 is not an offsettable address. If we get the situation in reload
3998 where source and destination of a movtf pattern are both MEMs with
3999 REG+REG address, then only one of them gets converted to an
4000 offsettable address. */
4001 if (mode == TFmode
4002 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4003 return 0;
4004
4005 /* Likewise for TImode, but in all cases. */
4006 if (mode == TImode)
4007 return 0;
4008
4009 /* We prohibit REG + REG on ARCH32 if not optimizing for
4010 DFmode/DImode because then mem_min_alignment is likely to be zero
4011 after reload and the forced split would lack a matching splitter
4012 pattern. */
4013 if (TARGET_ARCH32 && !optimize
4014 && (mode == DFmode || mode == DImode))
4015 return 0;
4016 }
4017 else if (USE_AS_OFFSETABLE_LO10
4018 && GET_CODE (rs1) == LO_SUM
4019 && TARGET_ARCH64
4020 && ! TARGET_CM_MEDMID
4021 && RTX_OK_FOR_OLO10_P (rs2, mode))
4022 {
4023 rs2 = NULL;
4024 imm1 = XEXP (rs1, 1);
4025 rs1 = XEXP (rs1, 0);
4026 if (!CONSTANT_P (imm1)
4027 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4028 return 0;
4029 }
4030 }
4031 else if (GET_CODE (addr) == LO_SUM)
4032 {
4033 rs1 = XEXP (addr, 0);
4034 imm1 = XEXP (addr, 1);
4035
4036 if (!CONSTANT_P (imm1)
4037 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4038 return 0;
4039
4040 /* We can't allow TFmode in 32-bit mode, because an offset greater
4041 than the alignment (8) may cause the LO_SUM to overflow. */
4042 if (mode == TFmode && TARGET_ARCH32)
4043 return 0;
4044
4045 /* During reload, accept the HIGH+LO_SUM construct generated by
4046 sparc_legitimize_reload_address. */
4047 if (reload_in_progress
4048 && GET_CODE (rs1) == HIGH
4049 && XEXP (rs1, 0) == imm1)
4050 return 1;
4051 }
4052 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4053 return 1;
4054 else
4055 return 0;
4056
4057 if (GET_CODE (rs1) == SUBREG)
4058 rs1 = SUBREG_REG (rs1);
4059 if (!REG_P (rs1))
4060 return 0;
4061
4062 if (rs2)
4063 {
4064 if (GET_CODE (rs2) == SUBREG)
4065 rs2 = SUBREG_REG (rs2);
4066 if (!REG_P (rs2))
4067 return 0;
4068 }
4069
4070 if (strict)
4071 {
4072 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4073 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4074 return 0;
4075 }
4076 else
4077 {
4078 if ((! SPARC_INT_REG_P (REGNO (rs1))
4079 && REGNO (rs1) != FRAME_POINTER_REGNUM
4080 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4081 || (rs2
4082 && (! SPARC_INT_REG_P (REGNO (rs2))
4083 && REGNO (rs2) != FRAME_POINTER_REGNUM
4084 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4085 return 0;
4086 }
4087 return 1;
4088 }
4089
4090 /* Return the SYMBOL_REF for the tls_get_addr function. */
4091
4092 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4093
4094 static rtx
4095 sparc_tls_get_addr (void)
4096 {
4097 if (!sparc_tls_symbol)
4098 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4099
4100 return sparc_tls_symbol;
4101 }
4102
4103 /* Return the Global Offset Table to be used in TLS mode. */
4104
4105 static rtx
4106 sparc_tls_got (void)
4107 {
4108 /* In PIC mode, this is just the PIC offset table. */
4109 if (flag_pic)
4110 {
4111 crtl->uses_pic_offset_table = 1;
4112 return pic_offset_table_rtx;
4113 }
4114
4115 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4116 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4117 if (TARGET_SUN_TLS && TARGET_ARCH32)
4118 {
4119 load_got_register ();
4120 return global_offset_table_rtx;
4121 }
4122
4123 /* In all other cases, we load a new pseudo with the GOT symbol. */
4124 return copy_to_reg (sparc_got ());
4125 }
4126
4127 /* Return true if X contains a thread-local symbol. */
4128
4129 static bool
4130 sparc_tls_referenced_p (rtx x)
4131 {
4132 if (!TARGET_HAVE_TLS)
4133 return false;
4134
4135 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4136 x = XEXP (XEXP (x, 0), 0);
4137
4138 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4139 return true;
4140
4141 /* That's all we handle in sparc_legitimize_tls_address for now. */
4142 return false;
4143 }
4144
4145 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4146 this (thread-local) address. */
4147
4148 static rtx
4149 sparc_legitimize_tls_address (rtx addr)
4150 {
4151 rtx temp1, temp2, temp3, ret, o0, got;
4152 rtx_insn *insn;
4153
4154 gcc_assert (can_create_pseudo_p ());
4155
4156 if (GET_CODE (addr) == SYMBOL_REF)
4157 switch (SYMBOL_REF_TLS_MODEL (addr))
4158 {
4159 case TLS_MODEL_GLOBAL_DYNAMIC:
4160 start_sequence ();
4161 temp1 = gen_reg_rtx (SImode);
4162 temp2 = gen_reg_rtx (SImode);
4163 ret = gen_reg_rtx (Pmode);
4164 o0 = gen_rtx_REG (Pmode, 8);
4165 got = sparc_tls_got ();
4166 emit_insn (gen_tgd_hi22 (temp1, addr));
4167 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4168 if (TARGET_ARCH32)
4169 {
4170 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4171 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4172 addr, const1_rtx));
4173 }
4174 else
4175 {
4176 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4177 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4178 addr, const1_rtx));
4179 }
4180 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4181 insn = get_insns ();
4182 end_sequence ();
4183 emit_libcall_block (insn, ret, o0, addr);
4184 break;
4185
4186 case TLS_MODEL_LOCAL_DYNAMIC:
4187 start_sequence ();
4188 temp1 = gen_reg_rtx (SImode);
4189 temp2 = gen_reg_rtx (SImode);
4190 temp3 = gen_reg_rtx (Pmode);
4191 ret = gen_reg_rtx (Pmode);
4192 o0 = gen_rtx_REG (Pmode, 8);
4193 got = sparc_tls_got ();
4194 emit_insn (gen_tldm_hi22 (temp1));
4195 emit_insn (gen_tldm_lo10 (temp2, temp1));
4196 if (TARGET_ARCH32)
4197 {
4198 emit_insn (gen_tldm_add32 (o0, got, temp2));
4199 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4200 const1_rtx));
4201 }
4202 else
4203 {
4204 emit_insn (gen_tldm_add64 (o0, got, temp2));
4205 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4206 const1_rtx));
4207 }
4208 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4209 insn = get_insns ();
4210 end_sequence ();
4211 emit_libcall_block (insn, temp3, o0,
4212 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4213 UNSPEC_TLSLD_BASE));
4214 temp1 = gen_reg_rtx (SImode);
4215 temp2 = gen_reg_rtx (SImode);
4216 emit_insn (gen_tldo_hix22 (temp1, addr));
4217 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4218 if (TARGET_ARCH32)
4219 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4220 else
4221 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4222 break;
4223
4224 case TLS_MODEL_INITIAL_EXEC:
4225 temp1 = gen_reg_rtx (SImode);
4226 temp2 = gen_reg_rtx (SImode);
4227 temp3 = gen_reg_rtx (Pmode);
4228 got = sparc_tls_got ();
4229 emit_insn (gen_tie_hi22 (temp1, addr));
4230 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4231 if (TARGET_ARCH32)
4232 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4233 else
4234 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4235 if (TARGET_SUN_TLS)
4236 {
4237 ret = gen_reg_rtx (Pmode);
4238 if (TARGET_ARCH32)
4239 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4240 temp3, addr));
4241 else
4242 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4243 temp3, addr));
4244 }
4245 else
4246 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4247 break;
4248
4249 case TLS_MODEL_LOCAL_EXEC:
4250 temp1 = gen_reg_rtx (Pmode);
4251 temp2 = gen_reg_rtx (Pmode);
4252 if (TARGET_ARCH32)
4253 {
4254 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4255 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4256 }
4257 else
4258 {
4259 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4260 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4261 }
4262 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4263 break;
4264
4265 default:
4266 gcc_unreachable ();
4267 }
4268
4269 else if (GET_CODE (addr) == CONST)
4270 {
4271 rtx base, offset;
4272
4273 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4274
4275 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4276 offset = XEXP (XEXP (addr, 0), 1);
4277
4278 base = force_operand (base, NULL_RTX);
4279 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4280 offset = force_reg (Pmode, offset);
4281 ret = gen_rtx_PLUS (Pmode, base, offset);
4282 }
4283
4284 else
4285 gcc_unreachable (); /* for now ... */
4286
4287 return ret;
4288 }
4289
4290 /* Legitimize PIC addresses. If the address is already position-independent,
4291 we return ORIG. Newly generated position-independent addresses go into a
4292 reg. This is REG if nonzero, otherwise we allocate register(s) as
4293 necessary. */
4294
4295 static rtx
4296 sparc_legitimize_pic_address (rtx orig, rtx reg)
4297 {
4298 bool gotdata_op = false;
4299
4300 if (GET_CODE (orig) == SYMBOL_REF
4301 /* See the comment in sparc_expand_move. */
4302 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4303 {
4304 rtx pic_ref, address;
4305 rtx_insn *insn;
4306
4307 if (reg == 0)
4308 {
4309 gcc_assert (can_create_pseudo_p ());
4310 reg = gen_reg_rtx (Pmode);
4311 }
4312
4313 if (flag_pic == 2)
4314 {
4315 /* If not during reload, allocate another temp reg here for loading
4316 in the address, so that these instructions can be optimized
4317 properly. */
4318 rtx temp_reg = (! can_create_pseudo_p ()
4319 ? reg : gen_reg_rtx (Pmode));
4320
4321 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4322 won't get confused into thinking that these two instructions
4323 are loading in the true address of the symbol. If in the
4324 future a PIC rtx exists, that should be used instead. */
4325 if (TARGET_ARCH64)
4326 {
4327 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4328 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4329 }
4330 else
4331 {
4332 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4333 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4334 }
4335 address = temp_reg;
4336 gotdata_op = true;
4337 }
4338 else
4339 address = orig;
4340
4341 crtl->uses_pic_offset_table = 1;
4342 if (gotdata_op)
4343 {
4344 if (TARGET_ARCH64)
4345 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4346 pic_offset_table_rtx,
4347 address, orig));
4348 else
4349 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4350 pic_offset_table_rtx,
4351 address, orig));
4352 }
4353 else
4354 {
4355 pic_ref
4356 = gen_const_mem (Pmode,
4357 gen_rtx_PLUS (Pmode,
4358 pic_offset_table_rtx, address));
4359 insn = emit_move_insn (reg, pic_ref);
4360 }
4361
4362 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4363 by loop. */
4364 set_unique_reg_note (insn, REG_EQUAL, orig);
4365 return reg;
4366 }
4367 else if (GET_CODE (orig) == CONST)
4368 {
4369 rtx base, offset;
4370
4371 if (GET_CODE (XEXP (orig, 0)) == PLUS
4372 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4373 return orig;
4374
4375 if (reg == 0)
4376 {
4377 gcc_assert (can_create_pseudo_p ());
4378 reg = gen_reg_rtx (Pmode);
4379 }
4380
4381 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4382 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4383 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4384 base == reg ? NULL_RTX : reg);
4385
4386 if (GET_CODE (offset) == CONST_INT)
4387 {
4388 if (SMALL_INT (offset))
4389 return plus_constant (Pmode, base, INTVAL (offset));
4390 else if (can_create_pseudo_p ())
4391 offset = force_reg (Pmode, offset);
4392 else
4393 /* If we reach here, then something is seriously wrong. */
4394 gcc_unreachable ();
4395 }
4396 return gen_rtx_PLUS (Pmode, base, offset);
4397 }
4398 else if (GET_CODE (orig) == LABEL_REF)
4399 /* ??? We ought to be checking that the register is live instead, in case
4400 it is eliminated. */
4401 crtl->uses_pic_offset_table = 1;
4402
4403 return orig;
4404 }
4405
4406 /* Try machine-dependent ways of modifying an illegitimate address X
4407 to be legitimate. If we find one, return the new, valid address.
4408
4409 OLDX is the address as it was before break_out_memory_refs was called.
4410 In some cases it is useful to look at this to decide what needs to be done.
4411
4412 MODE is the mode of the operand pointed to by X.
4413
4414 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4415
4416 static rtx
4417 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4418 machine_mode mode)
4419 {
4420 rtx orig_x = x;
4421
4422 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4423 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4424 force_operand (XEXP (x, 0), NULL_RTX));
4425 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4426 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4427 force_operand (XEXP (x, 1), NULL_RTX));
4428 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4429 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4430 XEXP (x, 1));
4431 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4432 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4433 force_operand (XEXP (x, 1), NULL_RTX));
4434
4435 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4436 return x;
4437
4438 if (sparc_tls_referenced_p (x))
4439 x = sparc_legitimize_tls_address (x);
4440 else if (flag_pic)
4441 x = sparc_legitimize_pic_address (x, NULL_RTX);
4442 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4443 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4444 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4445 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4446 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4447 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4448 else if (GET_CODE (x) == SYMBOL_REF
4449 || GET_CODE (x) == CONST
4450 || GET_CODE (x) == LABEL_REF)
4451 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4452
4453 return x;
4454 }
4455
4456 /* Delegitimize an address that was legitimized by the above function. */
4457
4458 static rtx
4459 sparc_delegitimize_address (rtx x)
4460 {
4461 x = delegitimize_mem_from_attrs (x);
4462
4463 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4464 switch (XINT (XEXP (x, 1), 1))
4465 {
4466 case UNSPEC_MOVE_PIC:
4467 case UNSPEC_TLSLE:
4468 x = XVECEXP (XEXP (x, 1), 0, 0);
4469 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4470 break;
4471 default:
4472 break;
4473 }
4474
4475 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4476 if (GET_CODE (x) == MINUS
4477 && REG_P (XEXP (x, 0))
4478 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4479 && GET_CODE (XEXP (x, 1)) == LO_SUM
4480 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4481 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4482 {
4483 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4484 gcc_assert (GET_CODE (x) == LABEL_REF);
4485 }
4486
4487 return x;
4488 }
4489
4490 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4491 replace the input X, or the original X if no replacement is called for.
4492 The output parameter *WIN is 1 if the calling macro should goto WIN,
4493 0 if it should not.
4494
4495 For SPARC, we wish to handle addresses by splitting them into
4496 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4497 This cuts the number of extra insns by one.
4498
4499 Do nothing when generating PIC code and the address is a symbolic
4500 operand or requires a scratch register. */
4501
4502 rtx
4503 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4504 int opnum, int type,
4505 int ind_levels ATTRIBUTE_UNUSED, int *win)
4506 {
4507 /* Decompose SImode constants into HIGH+LO_SUM. */
4508 if (CONSTANT_P (x)
4509 && (mode != TFmode || TARGET_ARCH64)
4510 && GET_MODE (x) == SImode
4511 && GET_CODE (x) != LO_SUM
4512 && GET_CODE (x) != HIGH
4513 && sparc_cmodel <= CM_MEDLOW
4514 && !(flag_pic
4515 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4516 {
4517 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4518 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4519 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4520 opnum, (enum reload_type)type);
4521 *win = 1;
4522 return x;
4523 }
4524
4525 /* We have to recognize what we have already generated above. */
4526 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4527 {
4528 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4529 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4530 opnum, (enum reload_type)type);
4531 *win = 1;
4532 return x;
4533 }
4534
4535 *win = 0;
4536 return x;
4537 }
4538
4539 /* Return true if ADDR (a legitimate address expression)
4540 has an effect that depends on the machine mode it is used for.
4541
4542 In PIC mode,
4543
4544 (mem:HI [%l7+a])
4545
4546 is not equivalent to
4547
4548 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4549
4550 because [%l7+a+1] is interpreted as the address of (a+1). */
4551
4552
4553 static bool
4554 sparc_mode_dependent_address_p (const_rtx addr,
4555 addr_space_t as ATTRIBUTE_UNUSED)
4556 {
4557 if (flag_pic && GET_CODE (addr) == PLUS)
4558 {
4559 rtx op0 = XEXP (addr, 0);
4560 rtx op1 = XEXP (addr, 1);
4561 if (op0 == pic_offset_table_rtx
4562 && symbolic_operand (op1, VOIDmode))
4563 return true;
4564 }
4565
4566 return false;
4567 }
4568
4569 #ifdef HAVE_GAS_HIDDEN
4570 # define USE_HIDDEN_LINKONCE 1
4571 #else
4572 # define USE_HIDDEN_LINKONCE 0
4573 #endif
4574
4575 static void
4576 get_pc_thunk_name (char name[32], unsigned int regno)
4577 {
4578 const char *reg_name = reg_names[regno];
4579
4580 /* Skip the leading '%' as that cannot be used in a
4581 symbol name. */
4582 reg_name += 1;
4583
4584 if (USE_HIDDEN_LINKONCE)
4585 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4586 else
4587 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4588 }
4589
4590 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4591
4592 static rtx
4593 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4594 {
4595 int orig_flag_pic = flag_pic;
4596 rtx insn;
4597
4598 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4599 flag_pic = 0;
4600 if (TARGET_ARCH64)
4601 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4602 else
4603 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4604 flag_pic = orig_flag_pic;
4605
4606 return insn;
4607 }
4608
4609 /* Emit code to load the GOT register. */
4610
4611 void
4612 load_got_register (void)
4613 {
4614 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4615 if (!global_offset_table_rtx)
4616 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4617
4618 if (TARGET_VXWORKS_RTP)
4619 emit_insn (gen_vxworks_load_got ());
4620 else
4621 {
4622 /* The GOT symbol is subject to a PC-relative relocation so we need a
4623 helper function to add the PC value and thus get the final value. */
4624 if (!got_helper_rtx)
4625 {
4626 char name[32];
4627 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4628 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4629 }
4630
4631 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4632 got_helper_rtx,
4633 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4634 }
4635
4636 /* Need to emit this whether or not we obey regdecls,
4637 since setjmp/longjmp can cause life info to screw up.
4638 ??? In the case where we don't obey regdecls, this is not sufficient
4639 since we may not fall out the bottom. */
4640 emit_use (global_offset_table_rtx);
4641 }
4642
4643 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4644 address of the call target. */
4645
4646 void
4647 sparc_emit_call_insn (rtx pat, rtx addr)
4648 {
4649 rtx_insn *insn;
4650
4651 insn = emit_call_insn (pat);
4652
4653 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4654 if (TARGET_VXWORKS_RTP
4655 && flag_pic
4656 && GET_CODE (addr) == SYMBOL_REF
4657 && (SYMBOL_REF_DECL (addr)
4658 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4659 : !SYMBOL_REF_LOCAL_P (addr)))
4660 {
4661 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4662 crtl->uses_pic_offset_table = 1;
4663 }
4664 }
4665 \f
4666 /* Return 1 if RTX is a MEM which is known to be aligned to at
4667 least a DESIRED byte boundary. */
4668
4669 int
4670 mem_min_alignment (rtx mem, int desired)
4671 {
4672 rtx addr, base, offset;
4673
4674 /* If it's not a MEM we can't accept it. */
4675 if (GET_CODE (mem) != MEM)
4676 return 0;
4677
4678 /* Obviously... */
4679 if (!TARGET_UNALIGNED_DOUBLES
4680 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4681 return 1;
4682
4683 /* ??? The rest of the function predates MEM_ALIGN so
4684 there is probably a bit of redundancy. */
4685 addr = XEXP (mem, 0);
4686 base = offset = NULL_RTX;
4687 if (GET_CODE (addr) == PLUS)
4688 {
4689 if (GET_CODE (XEXP (addr, 0)) == REG)
4690 {
4691 base = XEXP (addr, 0);
4692
4693 /* What we are saying here is that if the base
4694 REG is aligned properly, the compiler will make
4695 sure any REG based index upon it will be so
4696 as well. */
4697 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4698 offset = XEXP (addr, 1);
4699 else
4700 offset = const0_rtx;
4701 }
4702 }
4703 else if (GET_CODE (addr) == REG)
4704 {
4705 base = addr;
4706 offset = const0_rtx;
4707 }
4708
4709 if (base != NULL_RTX)
4710 {
4711 int regno = REGNO (base);
4712
4713 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4714 {
4715 /* Check if the compiler has recorded some information
4716 about the alignment of the base REG. If reload has
4717 completed, we already matched with proper alignments.
4718 If not running global_alloc, reload might give us
4719 unaligned pointer to local stack though. */
4720 if (((cfun != 0
4721 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4722 || (optimize && reload_completed))
4723 && (INTVAL (offset) & (desired - 1)) == 0)
4724 return 1;
4725 }
4726 else
4727 {
4728 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4729 return 1;
4730 }
4731 }
4732 else if (! TARGET_UNALIGNED_DOUBLES
4733 || CONSTANT_P (addr)
4734 || GET_CODE (addr) == LO_SUM)
4735 {
4736 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4737 is true, in which case we can only assume that an access is aligned if
4738 it is to a constant address, or the address involves a LO_SUM. */
4739 return 1;
4740 }
4741
4742 /* An obviously unaligned address. */
4743 return 0;
4744 }
4745
4746 \f
4747 /* Vectors to keep interesting information about registers where it can easily
4748 be got. We used to use the actual mode value as the bit number, but there
4749 are more than 32 modes now. Instead we use two tables: one indexed by
4750 hard register number, and one indexed by mode. */
4751
4752 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4753 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4754 mapped into one sparc_mode_class mode. */
4755
4756 enum sparc_mode_class {
4757 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4758 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4759 CC_MODE, CCFP_MODE
4760 };
4761
4762 /* Modes for single-word and smaller quantities. */
4763 #define S_MODES \
4764 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4765
4766 /* Modes for double-word and smaller quantities. */
4767 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4768
4769 /* Modes for quad-word and smaller quantities. */
4770 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4771
4772 /* Modes for 8-word and smaller quantities. */
4773 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4774
4775 /* Modes for single-float quantities. */
4776 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4777
4778 /* Modes for double-float and smaller quantities. */
4779 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4780
4781 /* Modes for quad-float and smaller quantities. */
4782 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4783
4784 /* Modes for quad-float pairs and smaller quantities. */
4785 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4786
4787 /* Modes for double-float only quantities. */
4788 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4789
4790 /* Modes for quad-float and double-float only quantities. */
4791 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4792
4793 /* Modes for quad-float pairs and double-float only quantities. */
4794 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4795
4796 /* Modes for condition codes. */
4797 #define CC_MODES (1 << (int) CC_MODE)
4798 #define CCFP_MODES (1 << (int) CCFP_MODE)
4799
4800 /* Value is 1 if register/mode pair is acceptable on sparc.
4801
4802 The funny mixture of D and T modes is because integer operations
4803 do not specially operate on tetra quantities, so non-quad-aligned
4804 registers can hold quadword quantities (except %o4 and %i4 because
4805 they cross fixed registers).
4806
4807 ??? Note that, despite the settings, non-double-aligned parameter
4808 registers can hold double-word quantities in 32-bit mode. */
4809
4810 /* This points to either the 32 bit or the 64 bit version. */
4811 const int *hard_regno_mode_classes;
4812
4813 static const int hard_32bit_mode_classes[] = {
4814 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4815 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4816 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4817 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4818
4819 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4820 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4821 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4822 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4823
4824 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4825 and none can hold SFmode/SImode values. */
4826 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4827 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4828 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4829 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4830
4831 /* %fcc[0123] */
4832 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4833
4834 /* %icc, %sfp, %gsr */
4835 CC_MODES, 0, D_MODES
4836 };
4837
4838 static const int hard_64bit_mode_classes[] = {
4839 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4840 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4841 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4842 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4843
4844 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4845 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4846 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4847 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4848
4849 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4850 and none can hold SFmode/SImode values. */
4851 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4852 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4853 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4854 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4855
4856 /* %fcc[0123] */
4857 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4858
4859 /* %icc, %sfp, %gsr */
4860 CC_MODES, 0, D_MODES
4861 };
4862
4863 int sparc_mode_class [NUM_MACHINE_MODES];
4864
4865 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4866
4867 static void
4868 sparc_init_modes (void)
4869 {
4870 int i;
4871
4872 for (i = 0; i < NUM_MACHINE_MODES; i++)
4873 {
4874 machine_mode m = (machine_mode) i;
4875 unsigned int size = GET_MODE_SIZE (m);
4876
4877 switch (GET_MODE_CLASS (m))
4878 {
4879 case MODE_INT:
4880 case MODE_PARTIAL_INT:
4881 case MODE_COMPLEX_INT:
4882 if (size < 4)
4883 sparc_mode_class[i] = 1 << (int) H_MODE;
4884 else if (size == 4)
4885 sparc_mode_class[i] = 1 << (int) S_MODE;
4886 else if (size == 8)
4887 sparc_mode_class[i] = 1 << (int) D_MODE;
4888 else if (size == 16)
4889 sparc_mode_class[i] = 1 << (int) T_MODE;
4890 else if (size == 32)
4891 sparc_mode_class[i] = 1 << (int) O_MODE;
4892 else
4893 sparc_mode_class[i] = 0;
4894 break;
4895 case MODE_VECTOR_INT:
4896 if (size == 4)
4897 sparc_mode_class[i] = 1 << (int) SF_MODE;
4898 else if (size == 8)
4899 sparc_mode_class[i] = 1 << (int) DF_MODE;
4900 else
4901 sparc_mode_class[i] = 0;
4902 break;
4903 case MODE_FLOAT:
4904 case MODE_COMPLEX_FLOAT:
4905 if (size == 4)
4906 sparc_mode_class[i] = 1 << (int) SF_MODE;
4907 else if (size == 8)
4908 sparc_mode_class[i] = 1 << (int) DF_MODE;
4909 else if (size == 16)
4910 sparc_mode_class[i] = 1 << (int) TF_MODE;
4911 else if (size == 32)
4912 sparc_mode_class[i] = 1 << (int) OF_MODE;
4913 else
4914 sparc_mode_class[i] = 0;
4915 break;
4916 case MODE_CC:
4917 if (m == CCFPmode || m == CCFPEmode)
4918 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4919 else
4920 sparc_mode_class[i] = 1 << (int) CC_MODE;
4921 break;
4922 default:
4923 sparc_mode_class[i] = 0;
4924 break;
4925 }
4926 }
4927
4928 if (TARGET_ARCH64)
4929 hard_regno_mode_classes = hard_64bit_mode_classes;
4930 else
4931 hard_regno_mode_classes = hard_32bit_mode_classes;
4932
4933 /* Initialize the array used by REGNO_REG_CLASS. */
4934 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4935 {
4936 if (i < 16 && TARGET_V8PLUS)
4937 sparc_regno_reg_class[i] = I64_REGS;
4938 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4939 sparc_regno_reg_class[i] = GENERAL_REGS;
4940 else if (i < 64)
4941 sparc_regno_reg_class[i] = FP_REGS;
4942 else if (i < 96)
4943 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4944 else if (i < 100)
4945 sparc_regno_reg_class[i] = FPCC_REGS;
4946 else
4947 sparc_regno_reg_class[i] = NO_REGS;
4948 }
4949 }
4950 \f
4951 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4952
4953 static inline bool
4954 save_global_or_fp_reg_p (unsigned int regno,
4955 int leaf_function ATTRIBUTE_UNUSED)
4956 {
4957 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4958 }
4959
4960 /* Return whether the return address register (%i7) is needed. */
4961
4962 static inline bool
4963 return_addr_reg_needed_p (int leaf_function)
4964 {
4965 /* If it is live, for example because of __builtin_return_address (0). */
4966 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4967 return true;
4968
4969 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4970 if (!leaf_function
4971 /* Loading the GOT register clobbers %o7. */
4972 || crtl->uses_pic_offset_table
4973 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4974 return true;
4975
4976 return false;
4977 }
4978
4979 /* Return whether REGNO, a local or in register, must be saved/restored. */
4980
4981 static bool
4982 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4983 {
4984 /* General case: call-saved registers live at some point. */
4985 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4986 return true;
4987
4988 /* Frame pointer register (%fp) if needed. */
4989 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4990 return true;
4991
4992 /* Return address register (%i7) if needed. */
4993 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4994 return true;
4995
4996 /* GOT register (%l7) if needed. */
4997 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4998 return true;
4999
5000 /* If the function accesses prior frames, the frame pointer and the return
5001 address of the previous frame must be saved on the stack. */
5002 if (crtl->accesses_prior_frames
5003 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5004 return true;
5005
5006 return false;
5007 }
5008
5009 /* Compute the frame size required by the function. This function is called
5010 during the reload pass and also by sparc_expand_prologue. */
5011
5012 HOST_WIDE_INT
5013 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5014 {
5015 HOST_WIDE_INT frame_size, apparent_frame_size;
5016 int args_size, n_global_fp_regs = 0;
5017 bool save_local_in_regs_p = false;
5018 unsigned int i;
5019
5020 /* If the function allocates dynamic stack space, the dynamic offset is
5021 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5022 if (leaf_function && !cfun->calls_alloca)
5023 args_size = 0;
5024 else
5025 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5026
5027 /* Calculate space needed for global registers. */
5028 if (TARGET_ARCH64)
5029 {
5030 for (i = 0; i < 8; i++)
5031 if (save_global_or_fp_reg_p (i, 0))
5032 n_global_fp_regs += 2;
5033 }
5034 else
5035 {
5036 for (i = 0; i < 8; i += 2)
5037 if (save_global_or_fp_reg_p (i, 0)
5038 || save_global_or_fp_reg_p (i + 1, 0))
5039 n_global_fp_regs += 2;
5040 }
5041
5042 /* In the flat window model, find out which local and in registers need to
5043 be saved. We don't reserve space in the current frame for them as they
5044 will be spilled into the register window save area of the caller's frame.
5045 However, as soon as we use this register window save area, we must create
5046 that of the current frame to make it the live one. */
5047 if (TARGET_FLAT)
5048 for (i = 16; i < 32; i++)
5049 if (save_local_or_in_reg_p (i, leaf_function))
5050 {
5051 save_local_in_regs_p = true;
5052 break;
5053 }
5054
5055 /* Calculate space needed for FP registers. */
5056 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5057 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5058 n_global_fp_regs += 2;
5059
5060 if (size == 0
5061 && n_global_fp_regs == 0
5062 && args_size == 0
5063 && !save_local_in_regs_p)
5064 frame_size = apparent_frame_size = 0;
5065 else
5066 {
5067 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5068 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5069 apparent_frame_size += n_global_fp_regs * 4;
5070
5071 /* We need to add the size of the outgoing argument area. */
5072 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5073
5074 /* And that of the register window save area. */
5075 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5076
5077 /* Finally, bump to the appropriate alignment. */
5078 frame_size = SPARC_STACK_ALIGN (frame_size);
5079 }
5080
5081 /* Set up values for use in prologue and epilogue. */
5082 sparc_frame_size = frame_size;
5083 sparc_apparent_frame_size = apparent_frame_size;
5084 sparc_n_global_fp_regs = n_global_fp_regs;
5085 sparc_save_local_in_regs_p = save_local_in_regs_p;
5086
5087 return frame_size;
5088 }
5089
5090 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5091
5092 int
5093 sparc_initial_elimination_offset (int to)
5094 {
5095 int offset;
5096
5097 if (to == STACK_POINTER_REGNUM)
5098 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5099 else
5100 offset = 0;
5101
5102 offset += SPARC_STACK_BIAS;
5103 return offset;
5104 }
5105
5106 /* Output any necessary .register pseudo-ops. */
5107
5108 void
5109 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5110 {
5111 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5112 int i;
5113
5114 if (TARGET_ARCH32)
5115 return;
5116
5117 /* Check if %g[2367] were used without
5118 .register being printed for them already. */
5119 for (i = 2; i < 8; i++)
5120 {
5121 if (df_regs_ever_live_p (i)
5122 && ! sparc_hard_reg_printed [i])
5123 {
5124 sparc_hard_reg_printed [i] = 1;
5125 /* %g7 is used as TLS base register, use #ignore
5126 for it instead of #scratch. */
5127 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5128 i == 7 ? "ignore" : "scratch");
5129 }
5130 if (i == 3) i = 5;
5131 }
5132 #endif
5133 }
5134
5135 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5136
5137 #if PROBE_INTERVAL > 4096
5138 #error Cannot use indexed addressing mode for stack probing
5139 #endif
5140
5141 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5142 inclusive. These are offsets from the current stack pointer.
5143
5144 Note that we don't use the REG+REG addressing mode for the probes because
5145 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5146 so the advantages of having a single code win here. */
5147
5148 static void
5149 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5150 {
5151 rtx g1 = gen_rtx_REG (Pmode, 1);
5152
5153 /* See if we have a constant small number of probes to generate. If so,
5154 that's the easy case. */
5155 if (size <= PROBE_INTERVAL)
5156 {
5157 emit_move_insn (g1, GEN_INT (first));
5158 emit_insn (gen_rtx_SET (g1,
5159 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5160 emit_stack_probe (plus_constant (Pmode, g1, -size));
5161 }
5162
5163 /* The run-time loop is made up of 9 insns in the generic case while the
5164 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5165 else if (size <= 4 * PROBE_INTERVAL)
5166 {
5167 HOST_WIDE_INT i;
5168
5169 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5170 emit_insn (gen_rtx_SET (g1,
5171 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5172 emit_stack_probe (g1);
5173
5174 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5175 it exceeds SIZE. If only two probes are needed, this will not
5176 generate any code. Then probe at FIRST + SIZE. */
5177 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5178 {
5179 emit_insn (gen_rtx_SET (g1,
5180 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5181 emit_stack_probe (g1);
5182 }
5183
5184 emit_stack_probe (plus_constant (Pmode, g1,
5185 (i - PROBE_INTERVAL) - size));
5186 }
5187
5188 /* Otherwise, do the same as above, but in a loop. Note that we must be
5189 extra careful with variables wrapping around because we might be at
5190 the very top (or the very bottom) of the address space and we have
5191 to be able to handle this case properly; in particular, we use an
5192 equality test for the loop condition. */
5193 else
5194 {
5195 HOST_WIDE_INT rounded_size;
5196 rtx g4 = gen_rtx_REG (Pmode, 4);
5197
5198 emit_move_insn (g1, GEN_INT (first));
5199
5200
5201 /* Step 1: round SIZE to the previous multiple of the interval. */
5202
5203 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5204 emit_move_insn (g4, GEN_INT (rounded_size));
5205
5206
5207 /* Step 2: compute initial and final value of the loop counter. */
5208
5209 /* TEST_ADDR = SP + FIRST. */
5210 emit_insn (gen_rtx_SET (g1,
5211 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5212
5213 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5214 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5215
5216
5217 /* Step 3: the loop
5218
5219 while (TEST_ADDR != LAST_ADDR)
5220 {
5221 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5222 probe at TEST_ADDR
5223 }
5224
5225 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5226 until it is equal to ROUNDED_SIZE. */
5227
5228 if (TARGET_ARCH64)
5229 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5230 else
5231 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5232
5233
5234 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5235 that SIZE is equal to ROUNDED_SIZE. */
5236
5237 if (size != rounded_size)
5238 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5239 }
5240
5241 /* Make sure nothing is scheduled before we are done. */
5242 emit_insn (gen_blockage ());
5243 }
5244
5245 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5246 absolute addresses. */
5247
5248 const char *
5249 output_probe_stack_range (rtx reg1, rtx reg2)
5250 {
5251 static int labelno = 0;
5252 char loop_lab[32];
5253 rtx xops[2];
5254
5255 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5256
5257 /* Loop. */
5258 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5259
5260 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5261 xops[0] = reg1;
5262 xops[1] = GEN_INT (-PROBE_INTERVAL);
5263 output_asm_insn ("add\t%0, %1, %0", xops);
5264
5265 /* Test if TEST_ADDR == LAST_ADDR. */
5266 xops[1] = reg2;
5267 output_asm_insn ("cmp\t%0, %1", xops);
5268
5269 /* Probe at TEST_ADDR and branch. */
5270 if (TARGET_ARCH64)
5271 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5272 else
5273 fputs ("\tbne\t", asm_out_file);
5274 assemble_name_raw (asm_out_file, loop_lab);
5275 fputc ('\n', asm_out_file);
5276 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5277 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5278
5279 return "";
5280 }
5281
5282 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5283 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5284 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5285 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5286 the action to be performed if it returns false. Return the new offset. */
5287
5288 typedef bool (*sorr_pred_t) (unsigned int, int);
5289 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5290
5291 static int
5292 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5293 int offset, int leaf_function, sorr_pred_t save_p,
5294 sorr_act_t action_true, sorr_act_t action_false)
5295 {
5296 unsigned int i;
5297 rtx mem;
5298 rtx_insn *insn;
5299
5300 if (TARGET_ARCH64 && high <= 32)
5301 {
5302 int fp_offset = -1;
5303
5304 for (i = low; i < high; i++)
5305 {
5306 if (save_p (i, leaf_function))
5307 {
5308 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5309 base, offset));
5310 if (action_true == SORR_SAVE)
5311 {
5312 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5313 RTX_FRAME_RELATED_P (insn) = 1;
5314 }
5315 else /* action_true == SORR_RESTORE */
5316 {
5317 /* The frame pointer must be restored last since its old
5318 value may be used as base address for the frame. This
5319 is problematic in 64-bit mode only because of the lack
5320 of double-word load instruction. */
5321 if (i == HARD_FRAME_POINTER_REGNUM)
5322 fp_offset = offset;
5323 else
5324 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5325 }
5326 offset += 8;
5327 }
5328 else if (action_false == SORR_ADVANCE)
5329 offset += 8;
5330 }
5331
5332 if (fp_offset >= 0)
5333 {
5334 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5335 emit_move_insn (hard_frame_pointer_rtx, mem);
5336 }
5337 }
5338 else
5339 {
5340 for (i = low; i < high; i += 2)
5341 {
5342 bool reg0 = save_p (i, leaf_function);
5343 bool reg1 = save_p (i + 1, leaf_function);
5344 machine_mode mode;
5345 int regno;
5346
5347 if (reg0 && reg1)
5348 {
5349 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5350 regno = i;
5351 }
5352 else if (reg0)
5353 {
5354 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5355 regno = i;
5356 }
5357 else if (reg1)
5358 {
5359 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5360 regno = i + 1;
5361 offset += 4;
5362 }
5363 else
5364 {
5365 if (action_false == SORR_ADVANCE)
5366 offset += 8;
5367 continue;
5368 }
5369
5370 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5371 if (action_true == SORR_SAVE)
5372 {
5373 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5374 RTX_FRAME_RELATED_P (insn) = 1;
5375 if (mode == DImode)
5376 {
5377 rtx set1, set2;
5378 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5379 offset));
5380 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5381 RTX_FRAME_RELATED_P (set1) = 1;
5382 mem
5383 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5384 offset + 4));
5385 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5386 RTX_FRAME_RELATED_P (set2) = 1;
5387 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5388 gen_rtx_PARALLEL (VOIDmode,
5389 gen_rtvec (2, set1, set2)));
5390 }
5391 }
5392 else /* action_true == SORR_RESTORE */
5393 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5394
5395 /* Bump and round down to double word
5396 in case we already bumped by 4. */
5397 offset = ROUND_DOWN (offset + 8, 8);
5398 }
5399 }
5400
5401 return offset;
5402 }
5403
5404 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5405
5406 static rtx
5407 emit_adjust_base_to_offset (rtx base, int offset)
5408 {
5409 /* ??? This might be optimized a little as %g1 might already have a
5410 value close enough that a single add insn will do. */
5411 /* ??? Although, all of this is probably only a temporary fix because
5412 if %g1 can hold a function result, then sparc_expand_epilogue will
5413 lose (the result will be clobbered). */
5414 rtx new_base = gen_rtx_REG (Pmode, 1);
5415 emit_move_insn (new_base, GEN_INT (offset));
5416 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5417 return new_base;
5418 }
5419
5420 /* Emit code to save/restore call-saved global and FP registers. */
5421
5422 static void
5423 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5424 {
5425 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5426 {
5427 base = emit_adjust_base_to_offset (base, offset);
5428 offset = 0;
5429 }
5430
5431 offset
5432 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5433 save_global_or_fp_reg_p, action, SORR_NONE);
5434 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5435 save_global_or_fp_reg_p, action, SORR_NONE);
5436 }
5437
5438 /* Emit code to save/restore call-saved local and in registers. */
5439
5440 static void
5441 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5442 {
5443 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5444 {
5445 base = emit_adjust_base_to_offset (base, offset);
5446 offset = 0;
5447 }
5448
5449 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5450 save_local_or_in_reg_p, action, SORR_ADVANCE);
5451 }
5452
5453 /* Emit a window_save insn. */
5454
5455 static rtx_insn *
5456 emit_window_save (rtx increment)
5457 {
5458 rtx_insn *insn = emit_insn (gen_window_save (increment));
5459 RTX_FRAME_RELATED_P (insn) = 1;
5460
5461 /* The incoming return address (%o7) is saved in %i7. */
5462 add_reg_note (insn, REG_CFA_REGISTER,
5463 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5464 gen_rtx_REG (Pmode,
5465 INCOMING_RETURN_ADDR_REGNUM)));
5466
5467 /* The window save event. */
5468 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5469
5470 /* The CFA is %fp, the hard frame pointer. */
5471 add_reg_note (insn, REG_CFA_DEF_CFA,
5472 plus_constant (Pmode, hard_frame_pointer_rtx,
5473 INCOMING_FRAME_SP_OFFSET));
5474
5475 return insn;
5476 }
5477
5478 /* Generate an increment for the stack pointer. */
5479
5480 static rtx
5481 gen_stack_pointer_inc (rtx increment)
5482 {
5483 return gen_rtx_SET (stack_pointer_rtx,
5484 gen_rtx_PLUS (Pmode,
5485 stack_pointer_rtx,
5486 increment));
5487 }
5488
5489 /* Expand the function prologue. The prologue is responsible for reserving
5490 storage for the frame, saving the call-saved registers and loading the
5491 GOT register if needed. */
5492
5493 void
5494 sparc_expand_prologue (void)
5495 {
5496 HOST_WIDE_INT size;
5497 rtx_insn *insn;
5498
5499 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5500 on the final value of the flag means deferring the prologue/epilogue
5501 expansion until just before the second scheduling pass, which is too
5502 late to emit multiple epilogues or return insns.
5503
5504 Of course we are making the assumption that the value of the flag
5505 will not change between now and its final value. Of the three parts
5506 of the formula, only the last one can reasonably vary. Let's take a
5507 closer look, after assuming that the first two ones are set to true
5508 (otherwise the last value is effectively silenced).
5509
5510 If only_leaf_regs_used returns false, the global predicate will also
5511 be false so the actual frame size calculated below will be positive.
5512 As a consequence, the save_register_window insn will be emitted in
5513 the instruction stream; now this insn explicitly references %fp
5514 which is not a leaf register so only_leaf_regs_used will always
5515 return false subsequently.
5516
5517 If only_leaf_regs_used returns true, we hope that the subsequent
5518 optimization passes won't cause non-leaf registers to pop up. For
5519 example, the regrename pass has special provisions to not rename to
5520 non-leaf registers in a leaf function. */
5521 sparc_leaf_function_p
5522 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5523
5524 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5525
5526 if (flag_stack_usage_info)
5527 current_function_static_stack_size = size;
5528
5529 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5530 {
5531 if (crtl->is_leaf && !cfun->calls_alloca)
5532 {
5533 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5534 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5535 size - STACK_CHECK_PROTECT);
5536 }
5537 else if (size > 0)
5538 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5539 }
5540
5541 if (size == 0)
5542 ; /* do nothing. */
5543 else if (sparc_leaf_function_p)
5544 {
5545 rtx size_int_rtx = GEN_INT (-size);
5546
5547 if (size <= 4096)
5548 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5549 else if (size <= 8192)
5550 {
5551 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5552 RTX_FRAME_RELATED_P (insn) = 1;
5553
5554 /* %sp is still the CFA register. */
5555 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5556 }
5557 else
5558 {
5559 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5560 emit_move_insn (size_rtx, size_int_rtx);
5561 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5562 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5563 gen_stack_pointer_inc (size_int_rtx));
5564 }
5565
5566 RTX_FRAME_RELATED_P (insn) = 1;
5567 }
5568 else
5569 {
5570 rtx size_int_rtx = GEN_INT (-size);
5571
5572 if (size <= 4096)
5573 emit_window_save (size_int_rtx);
5574 else if (size <= 8192)
5575 {
5576 emit_window_save (GEN_INT (-4096));
5577
5578 /* %sp is not the CFA register anymore. */
5579 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5580
5581 /* Make sure no %fp-based store is issued until after the frame is
5582 established. The offset between the frame pointer and the stack
5583 pointer is calculated relative to the value of the stack pointer
5584 at the end of the function prologue, and moving instructions that
5585 access the stack via the frame pointer between the instructions
5586 that decrement the stack pointer could result in accessing the
5587 register window save area, which is volatile. */
5588 emit_insn (gen_frame_blockage ());
5589 }
5590 else
5591 {
5592 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5593 emit_move_insn (size_rtx, size_int_rtx);
5594 emit_window_save (size_rtx);
5595 }
5596 }
5597
5598 if (sparc_leaf_function_p)
5599 {
5600 sparc_frame_base_reg = stack_pointer_rtx;
5601 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5602 }
5603 else
5604 {
5605 sparc_frame_base_reg = hard_frame_pointer_rtx;
5606 sparc_frame_base_offset = SPARC_STACK_BIAS;
5607 }
5608
5609 if (sparc_n_global_fp_regs > 0)
5610 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5611 sparc_frame_base_offset
5612 - sparc_apparent_frame_size,
5613 SORR_SAVE);
5614
5615 /* Load the GOT register if needed. */
5616 if (crtl->uses_pic_offset_table)
5617 load_got_register ();
5618
5619 /* Advertise that the data calculated just above are now valid. */
5620 sparc_prologue_data_valid_p = true;
5621 }
5622
5623 /* Expand the function prologue. The prologue is responsible for reserving
5624 storage for the frame, saving the call-saved registers and loading the
5625 GOT register if needed. */
5626
5627 void
5628 sparc_flat_expand_prologue (void)
5629 {
5630 HOST_WIDE_INT size;
5631 rtx_insn *insn;
5632
5633 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5634
5635 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5636
5637 if (flag_stack_usage_info)
5638 current_function_static_stack_size = size;
5639
5640 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5641 {
5642 if (crtl->is_leaf && !cfun->calls_alloca)
5643 {
5644 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5645 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5646 size - STACK_CHECK_PROTECT);
5647 }
5648 else if (size > 0)
5649 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5650 }
5651
5652 if (sparc_save_local_in_regs_p)
5653 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5654 SORR_SAVE);
5655
5656 if (size == 0)
5657 ; /* do nothing. */
5658 else
5659 {
5660 rtx size_int_rtx, size_rtx;
5661
5662 size_rtx = size_int_rtx = GEN_INT (-size);
5663
5664 /* We establish the frame (i.e. decrement the stack pointer) first, even
5665 if we use a frame pointer, because we cannot clobber any call-saved
5666 registers, including the frame pointer, if we haven't created a new
5667 register save area, for the sake of compatibility with the ABI. */
5668 if (size <= 4096)
5669 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5670 else if (size <= 8192 && !frame_pointer_needed)
5671 {
5672 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5673 RTX_FRAME_RELATED_P (insn) = 1;
5674 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5675 }
5676 else
5677 {
5678 size_rtx = gen_rtx_REG (Pmode, 1);
5679 emit_move_insn (size_rtx, size_int_rtx);
5680 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5681 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5682 gen_stack_pointer_inc (size_int_rtx));
5683 }
5684 RTX_FRAME_RELATED_P (insn) = 1;
5685
5686 /* Ensure nothing is scheduled until after the frame is established. */
5687 emit_insn (gen_blockage ());
5688
5689 if (frame_pointer_needed)
5690 {
5691 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5692 gen_rtx_MINUS (Pmode,
5693 stack_pointer_rtx,
5694 size_rtx)));
5695 RTX_FRAME_RELATED_P (insn) = 1;
5696
5697 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5698 gen_rtx_SET (hard_frame_pointer_rtx,
5699 plus_constant (Pmode, stack_pointer_rtx,
5700 size)));
5701 }
5702
5703 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5704 {
5705 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5706 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5707
5708 insn = emit_move_insn (i7, o7);
5709 RTX_FRAME_RELATED_P (insn) = 1;
5710
5711 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5712
5713 /* Prevent this instruction from ever being considered dead,
5714 even if this function has no epilogue. */
5715 emit_use (i7);
5716 }
5717 }
5718
5719 if (frame_pointer_needed)
5720 {
5721 sparc_frame_base_reg = hard_frame_pointer_rtx;
5722 sparc_frame_base_offset = SPARC_STACK_BIAS;
5723 }
5724 else
5725 {
5726 sparc_frame_base_reg = stack_pointer_rtx;
5727 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5728 }
5729
5730 if (sparc_n_global_fp_regs > 0)
5731 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5732 sparc_frame_base_offset
5733 - sparc_apparent_frame_size,
5734 SORR_SAVE);
5735
5736 /* Load the GOT register if needed. */
5737 if (crtl->uses_pic_offset_table)
5738 load_got_register ();
5739
5740 /* Advertise that the data calculated just above are now valid. */
5741 sparc_prologue_data_valid_p = true;
5742 }
5743
5744 /* This function generates the assembly code for function entry, which boils
5745 down to emitting the necessary .register directives. */
5746
5747 static void
5748 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5749 {
5750 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5751 if (!TARGET_FLAT)
5752 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5753
5754 sparc_output_scratch_registers (file);
5755 }
5756
5757 /* Expand the function epilogue, either normal or part of a sibcall.
5758 We emit all the instructions except the return or the call. */
5759
5760 void
5761 sparc_expand_epilogue (bool for_eh)
5762 {
5763 HOST_WIDE_INT size = sparc_frame_size;
5764
5765 if (sparc_n_global_fp_regs > 0)
5766 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5767 sparc_frame_base_offset
5768 - sparc_apparent_frame_size,
5769 SORR_RESTORE);
5770
5771 if (size == 0 || for_eh)
5772 ; /* do nothing. */
5773 else if (sparc_leaf_function_p)
5774 {
5775 if (size <= 4096)
5776 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5777 else if (size <= 8192)
5778 {
5779 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5780 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5781 }
5782 else
5783 {
5784 rtx reg = gen_rtx_REG (Pmode, 1);
5785 emit_move_insn (reg, GEN_INT (size));
5786 emit_insn (gen_stack_pointer_inc (reg));
5787 }
5788 }
5789 }
5790
5791 /* Expand the function epilogue, either normal or part of a sibcall.
5792 We emit all the instructions except the return or the call. */
5793
5794 void
5795 sparc_flat_expand_epilogue (bool for_eh)
5796 {
5797 HOST_WIDE_INT size = sparc_frame_size;
5798
5799 if (sparc_n_global_fp_regs > 0)
5800 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5801 sparc_frame_base_offset
5802 - sparc_apparent_frame_size,
5803 SORR_RESTORE);
5804
5805 /* If we have a frame pointer, we'll need both to restore it before the
5806 frame is destroyed and use its current value in destroying the frame.
5807 Since we don't have an atomic way to do that in the flat window model,
5808 we save the current value into a temporary register (%g1). */
5809 if (frame_pointer_needed && !for_eh)
5810 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5811
5812 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5813 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5814 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5815
5816 if (sparc_save_local_in_regs_p)
5817 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5818 sparc_frame_base_offset,
5819 SORR_RESTORE);
5820
5821 if (size == 0 || for_eh)
5822 ; /* do nothing. */
5823 else if (frame_pointer_needed)
5824 {
5825 /* Make sure the frame is destroyed after everything else is done. */
5826 emit_insn (gen_blockage ());
5827
5828 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5829 }
5830 else
5831 {
5832 /* Likewise. */
5833 emit_insn (gen_blockage ());
5834
5835 if (size <= 4096)
5836 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5837 else if (size <= 8192)
5838 {
5839 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5840 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5841 }
5842 else
5843 {
5844 rtx reg = gen_rtx_REG (Pmode, 1);
5845 emit_move_insn (reg, GEN_INT (size));
5846 emit_insn (gen_stack_pointer_inc (reg));
5847 }
5848 }
5849 }
5850
5851 /* Return true if it is appropriate to emit `return' instructions in the
5852 body of a function. */
5853
5854 bool
5855 sparc_can_use_return_insn_p (void)
5856 {
5857 return sparc_prologue_data_valid_p
5858 && sparc_n_global_fp_regs == 0
5859 && TARGET_FLAT
5860 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5861 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5862 }
5863
5864 /* This function generates the assembly code for function exit. */
5865
5866 static void
5867 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5868 {
5869 /* If the last two instructions of a function are "call foo; dslot;"
5870 the return address might point to the first instruction in the next
5871 function and we have to output a dummy nop for the sake of sane
5872 backtraces in such cases. This is pointless for sibling calls since
5873 the return address is explicitly adjusted. */
5874
5875 rtx_insn *insn = get_last_insn ();
5876
5877 rtx last_real_insn = prev_real_insn (insn);
5878 if (last_real_insn
5879 && NONJUMP_INSN_P (last_real_insn)
5880 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5881 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5882
5883 if (last_real_insn
5884 && CALL_P (last_real_insn)
5885 && !SIBLING_CALL_P (last_real_insn))
5886 fputs("\tnop\n", file);
5887
5888 sparc_output_deferred_case_vectors ();
5889 }
5890
5891 /* Output a 'restore' instruction. */
5892
5893 static void
5894 output_restore (rtx pat)
5895 {
5896 rtx operands[3];
5897
5898 if (! pat)
5899 {
5900 fputs ("\t restore\n", asm_out_file);
5901 return;
5902 }
5903
5904 gcc_assert (GET_CODE (pat) == SET);
5905
5906 operands[0] = SET_DEST (pat);
5907 pat = SET_SRC (pat);
5908
5909 switch (GET_CODE (pat))
5910 {
5911 case PLUS:
5912 operands[1] = XEXP (pat, 0);
5913 operands[2] = XEXP (pat, 1);
5914 output_asm_insn (" restore %r1, %2, %Y0", operands);
5915 break;
5916 case LO_SUM:
5917 operands[1] = XEXP (pat, 0);
5918 operands[2] = XEXP (pat, 1);
5919 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5920 break;
5921 case ASHIFT:
5922 operands[1] = XEXP (pat, 0);
5923 gcc_assert (XEXP (pat, 1) == const1_rtx);
5924 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5925 break;
5926 default:
5927 operands[1] = pat;
5928 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5929 break;
5930 }
5931 }
5932
5933 /* Output a return. */
5934
5935 const char *
5936 output_return (rtx_insn *insn)
5937 {
5938 if (crtl->calls_eh_return)
5939 {
5940 /* If the function uses __builtin_eh_return, the eh_return
5941 machinery occupies the delay slot. */
5942 gcc_assert (!final_sequence);
5943
5944 if (flag_delayed_branch)
5945 {
5946 if (!TARGET_FLAT && TARGET_V9)
5947 fputs ("\treturn\t%i7+8\n", asm_out_file);
5948 else
5949 {
5950 if (!TARGET_FLAT)
5951 fputs ("\trestore\n", asm_out_file);
5952
5953 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5954 }
5955
5956 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5957 }
5958 else
5959 {
5960 if (!TARGET_FLAT)
5961 fputs ("\trestore\n", asm_out_file);
5962
5963 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5964 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5965 }
5966 }
5967 else if (sparc_leaf_function_p || TARGET_FLAT)
5968 {
5969 /* This is a leaf or flat function so we don't have to bother restoring
5970 the register window, which frees us from dealing with the convoluted
5971 semantics of restore/return. We simply output the jump to the
5972 return address and the insn in the delay slot (if any). */
5973
5974 return "jmp\t%%o7+%)%#";
5975 }
5976 else
5977 {
5978 /* This is a regular function so we have to restore the register window.
5979 We may have a pending insn for the delay slot, which will be either
5980 combined with the 'restore' instruction or put in the delay slot of
5981 the 'return' instruction. */
5982
5983 if (final_sequence)
5984 {
5985 rtx delay, pat;
5986
5987 delay = NEXT_INSN (insn);
5988 gcc_assert (delay);
5989
5990 pat = PATTERN (delay);
5991
5992 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5993 {
5994 epilogue_renumber (&pat, 0);
5995 return "return\t%%i7+%)%#";
5996 }
5997 else
5998 {
5999 output_asm_insn ("jmp\t%%i7+%)", NULL);
6000 output_restore (pat);
6001 PATTERN (delay) = gen_blockage ();
6002 INSN_CODE (delay) = -1;
6003 }
6004 }
6005 else
6006 {
6007 /* The delay slot is empty. */
6008 if (TARGET_V9)
6009 return "return\t%%i7+%)\n\t nop";
6010 else if (flag_delayed_branch)
6011 return "jmp\t%%i7+%)\n\t restore";
6012 else
6013 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6014 }
6015 }
6016
6017 return "";
6018 }
6019
6020 /* Output a sibling call. */
6021
6022 const char *
6023 output_sibcall (rtx_insn *insn, rtx call_operand)
6024 {
6025 rtx operands[1];
6026
6027 gcc_assert (flag_delayed_branch);
6028
6029 operands[0] = call_operand;
6030
6031 if (sparc_leaf_function_p || TARGET_FLAT)
6032 {
6033 /* This is a leaf or flat function so we don't have to bother restoring
6034 the register window. We simply output the jump to the function and
6035 the insn in the delay slot (if any). */
6036
6037 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6038
6039 if (final_sequence)
6040 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6041 operands);
6042 else
6043 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6044 it into branch if possible. */
6045 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6046 operands);
6047 }
6048 else
6049 {
6050 /* This is a regular function so we have to restore the register window.
6051 We may have a pending insn for the delay slot, which will be combined
6052 with the 'restore' instruction. */
6053
6054 output_asm_insn ("call\t%a0, 0", operands);
6055
6056 if (final_sequence)
6057 {
6058 rtx_insn *delay = NEXT_INSN (insn);
6059 gcc_assert (delay);
6060
6061 output_restore (PATTERN (delay));
6062
6063 PATTERN (delay) = gen_blockage ();
6064 INSN_CODE (delay) = -1;
6065 }
6066 else
6067 output_restore (NULL_RTX);
6068 }
6069
6070 return "";
6071 }
6072 \f
6073 /* Functions for handling argument passing.
6074
6075 For 32-bit, the first 6 args are normally in registers and the rest are
6076 pushed. Any arg that starts within the first 6 words is at least
6077 partially passed in a register unless its data type forbids.
6078
6079 For 64-bit, the argument registers are laid out as an array of 16 elements
6080 and arguments are added sequentially. The first 6 int args and up to the
6081 first 16 fp args (depending on size) are passed in regs.
6082
6083 Slot Stack Integral Float Float in structure Double Long Double
6084 ---- ----- -------- ----- ------------------ ------ -----------
6085 15 [SP+248] %f31 %f30,%f31 %d30
6086 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6087 13 [SP+232] %f27 %f26,%f27 %d26
6088 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6089 11 [SP+216] %f23 %f22,%f23 %d22
6090 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6091 9 [SP+200] %f19 %f18,%f19 %d18
6092 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6093 7 [SP+184] %f15 %f14,%f15 %d14
6094 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6095 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6096 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6097 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6098 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6099 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6100 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6101
6102 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6103
6104 Integral arguments are always passed as 64-bit quantities appropriately
6105 extended.
6106
6107 Passing of floating point values is handled as follows.
6108 If a prototype is in scope:
6109 If the value is in a named argument (i.e. not a stdarg function or a
6110 value not part of the `...') then the value is passed in the appropriate
6111 fp reg.
6112 If the value is part of the `...' and is passed in one of the first 6
6113 slots then the value is passed in the appropriate int reg.
6114 If the value is part of the `...' and is not passed in one of the first 6
6115 slots then the value is passed in memory.
6116 If a prototype is not in scope:
6117 If the value is one of the first 6 arguments the value is passed in the
6118 appropriate integer reg and the appropriate fp reg.
6119 If the value is not one of the first 6 arguments the value is passed in
6120 the appropriate fp reg and in memory.
6121
6122
6123 Summary of the calling conventions implemented by GCC on the SPARC:
6124
6125 32-bit ABI:
6126 size argument return value
6127
6128 small integer <4 int. reg. int. reg.
6129 word 4 int. reg. int. reg.
6130 double word 8 int. reg. int. reg.
6131
6132 _Complex small integer <8 int. reg. int. reg.
6133 _Complex word 8 int. reg. int. reg.
6134 _Complex double word 16 memory int. reg.
6135
6136 vector integer <=8 int. reg. FP reg.
6137 vector integer >8 memory memory
6138
6139 float 4 int. reg. FP reg.
6140 double 8 int. reg. FP reg.
6141 long double 16 memory memory
6142
6143 _Complex float 8 memory FP reg.
6144 _Complex double 16 memory FP reg.
6145 _Complex long double 32 memory FP reg.
6146
6147 vector float any memory memory
6148
6149 aggregate any memory memory
6150
6151
6152
6153 64-bit ABI:
6154 size argument return value
6155
6156 small integer <8 int. reg. int. reg.
6157 word 8 int. reg. int. reg.
6158 double word 16 int. reg. int. reg.
6159
6160 _Complex small integer <16 int. reg. int. reg.
6161 _Complex word 16 int. reg. int. reg.
6162 _Complex double word 32 memory int. reg.
6163
6164 vector integer <=16 FP reg. FP reg.
6165 vector integer 16<s<=32 memory FP reg.
6166 vector integer >32 memory memory
6167
6168 float 4 FP reg. FP reg.
6169 double 8 FP reg. FP reg.
6170 long double 16 FP reg. FP reg.
6171
6172 _Complex float 8 FP reg. FP reg.
6173 _Complex double 16 FP reg. FP reg.
6174 _Complex long double 32 memory FP reg.
6175
6176 vector float <=16 FP reg. FP reg.
6177 vector float 16<s<=32 memory FP reg.
6178 vector float >32 memory memory
6179
6180 aggregate <=16 reg. reg.
6181 aggregate 16<s<=32 memory reg.
6182 aggregate >32 memory memory
6183
6184
6185
6186 Note #1: complex floating-point types follow the extended SPARC ABIs as
6187 implemented by the Sun compiler.
6188
6189 Note #2: integral vector types follow the scalar floating-point types
6190 conventions to match what is implemented by the Sun VIS SDK.
6191
6192 Note #3: floating-point vector types follow the aggregate types
6193 conventions. */
6194
6195
6196 /* Maximum number of int regs for args. */
6197 #define SPARC_INT_ARG_MAX 6
6198 /* Maximum number of fp regs for args. */
6199 #define SPARC_FP_ARG_MAX 16
6200 /* Number of words (partially) occupied for a given size in units. */
6201 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6202
6203 /* Handle the INIT_CUMULATIVE_ARGS macro.
6204 Initialize a variable CUM of type CUMULATIVE_ARGS
6205 for a call to a function whose data type is FNTYPE.
6206 For a library call, FNTYPE is 0. */
6207
6208 void
6209 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6210 {
6211 cum->words = 0;
6212 cum->prototype_p = fntype && prototype_p (fntype);
6213 cum->libcall_p = !fntype;
6214 }
6215
6216 /* Handle promotion of pointer and integer arguments. */
6217
6218 static machine_mode
6219 sparc_promote_function_mode (const_tree type, machine_mode mode,
6220 int *punsignedp, const_tree, int)
6221 {
6222 if (type && POINTER_TYPE_P (type))
6223 {
6224 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6225 return Pmode;
6226 }
6227
6228 /* Integral arguments are passed as full words, as per the ABI. */
6229 if (GET_MODE_CLASS (mode) == MODE_INT
6230 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6231 return word_mode;
6232
6233 return mode;
6234 }
6235
6236 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6237
6238 static bool
6239 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6240 {
6241 return TARGET_ARCH64 ? true : false;
6242 }
6243
6244 /* Traverse the record TYPE recursively and call FUNC on its fields.
6245 NAMED is true if this is for a named parameter. DATA is passed
6246 to FUNC for each field. OFFSET is the starting position and
6247 PACKED is true if we are inside a packed record. */
6248
6249 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6250 static void
6251 traverse_record_type (const_tree type, bool named, T *data,
6252 HOST_WIDE_INT offset = 0, bool packed = false)
6253 {
6254 /* The ABI obviously doesn't specify how packed structures are passed.
6255 These are passed in integer regs if possible, otherwise memory. */
6256 if (!packed)
6257 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6258 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6259 {
6260 packed = true;
6261 break;
6262 }
6263
6264 /* Walk the real fields, but skip those with no size or a zero size.
6265 ??? Fields with variable offset are handled as having zero offset. */
6266 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6267 if (TREE_CODE (field) == FIELD_DECL)
6268 {
6269 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6270 continue;
6271
6272 HOST_WIDE_INT bitpos = offset;
6273 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6274 bitpos += int_bit_position (field);
6275
6276 tree field_type = TREE_TYPE (field);
6277 if (TREE_CODE (field_type) == RECORD_TYPE)
6278 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6279 packed);
6280 else
6281 {
6282 const bool fp_type
6283 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6284 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6285 data);
6286 }
6287 }
6288 }
6289
6290 /* Handle recursive register classifying for structure layout. */
6291
6292 typedef struct
6293 {
6294 bool fp_regs; /* true if field eligible to FP registers. */
6295 bool fp_regs_in_first_word; /* true if such field in first word. */
6296 } classify_data_t;
6297
6298 /* A subroutine of function_arg_slotno. Classify the field. */
6299
6300 inline void
6301 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6302 classify_data_t *data)
6303 {
6304 if (fp)
6305 {
6306 data->fp_regs = true;
6307 if (bitpos < BITS_PER_WORD)
6308 data->fp_regs_in_first_word = true;
6309 }
6310 }
6311
6312 /* Compute the slot number to pass an argument in.
6313 Return the slot number or -1 if passing on the stack.
6314
6315 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6316 the preceding args and about the function being called.
6317 MODE is the argument's machine mode.
6318 TYPE is the data type of the argument (as a tree).
6319 This is null for libcalls where that information may
6320 not be available.
6321 NAMED is nonzero if this argument is a named parameter
6322 (otherwise it is an extra parameter matching an ellipsis).
6323 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6324 *PREGNO records the register number to use if scalar type.
6325 *PPADDING records the amount of padding needed in words. */
6326
6327 static int
6328 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6329 const_tree type, bool named, bool incoming,
6330 int *pregno, int *ppadding)
6331 {
6332 int regbase = (incoming
6333 ? SPARC_INCOMING_INT_ARG_FIRST
6334 : SPARC_OUTGOING_INT_ARG_FIRST);
6335 int slotno = cum->words;
6336 enum mode_class mclass;
6337 int regno;
6338
6339 *ppadding = 0;
6340
6341 if (type && TREE_ADDRESSABLE (type))
6342 return -1;
6343
6344 if (TARGET_ARCH32
6345 && mode == BLKmode
6346 && type
6347 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6348 return -1;
6349
6350 /* For SPARC64, objects requiring 16-byte alignment get it. */
6351 if (TARGET_ARCH64
6352 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6353 && (slotno & 1) != 0)
6354 slotno++, *ppadding = 1;
6355
6356 mclass = GET_MODE_CLASS (mode);
6357 if (type && TREE_CODE (type) == VECTOR_TYPE)
6358 {
6359 /* Vector types deserve special treatment because they are
6360 polymorphic wrt their mode, depending upon whether VIS
6361 instructions are enabled. */
6362 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6363 {
6364 /* The SPARC port defines no floating-point vector modes. */
6365 gcc_assert (mode == BLKmode);
6366 }
6367 else
6368 {
6369 /* Integral vector types should either have a vector
6370 mode or an integral mode, because we are guaranteed
6371 by pass_by_reference that their size is not greater
6372 than 16 bytes and TImode is 16-byte wide. */
6373 gcc_assert (mode != BLKmode);
6374
6375 /* Vector integers are handled like floats according to
6376 the Sun VIS SDK. */
6377 mclass = MODE_FLOAT;
6378 }
6379 }
6380
6381 switch (mclass)
6382 {
6383 case MODE_FLOAT:
6384 case MODE_COMPLEX_FLOAT:
6385 case MODE_VECTOR_INT:
6386 if (TARGET_ARCH64 && TARGET_FPU && named)
6387 {
6388 /* If all arg slots are filled, then must pass on stack. */
6389 if (slotno >= SPARC_FP_ARG_MAX)
6390 return -1;
6391
6392 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6393 /* Arguments filling only one single FP register are
6394 right-justified in the outer double FP register. */
6395 if (GET_MODE_SIZE (mode) <= 4)
6396 regno++;
6397 break;
6398 }
6399 /* fallthrough */
6400
6401 case MODE_INT:
6402 case MODE_COMPLEX_INT:
6403 /* If all arg slots are filled, then must pass on stack. */
6404 if (slotno >= SPARC_INT_ARG_MAX)
6405 return -1;
6406
6407 regno = regbase + slotno;
6408 break;
6409
6410 case MODE_RANDOM:
6411 if (mode == VOIDmode)
6412 /* MODE is VOIDmode when generating the actual call. */
6413 return -1;
6414
6415 gcc_assert (mode == BLKmode);
6416
6417 if (TARGET_ARCH32
6418 || !type
6419 || (TREE_CODE (type) != RECORD_TYPE
6420 && TREE_CODE (type) != VECTOR_TYPE))
6421 {
6422 /* If all arg slots are filled, then must pass on stack. */
6423 if (slotno >= SPARC_INT_ARG_MAX)
6424 return -1;
6425
6426 regno = regbase + slotno;
6427 }
6428 else /* TARGET_ARCH64 && type */
6429 {
6430 /* If all arg slots are filled, then must pass on stack. */
6431 if (slotno >= SPARC_FP_ARG_MAX)
6432 return -1;
6433
6434 if (TREE_CODE (type) == RECORD_TYPE)
6435 {
6436 classify_data_t data = { false, false };
6437 traverse_record_type<classify_data_t, classify_registers>
6438 (type, named, &data);
6439
6440 if (data.fp_regs)
6441 {
6442 /* If all FP slots are filled except for the last one and
6443 there is no FP field in the first word, then must pass
6444 on stack. */
6445 if (slotno >= SPARC_FP_ARG_MAX - 1
6446 && !data.fp_regs_in_first_word)
6447 return -1;
6448 }
6449 else
6450 {
6451 /* If all int slots are filled, then must pass on stack. */
6452 if (slotno >= SPARC_INT_ARG_MAX)
6453 return -1;
6454 }
6455 }
6456
6457 /* PREGNO isn't set since both int and FP regs can be used. */
6458 return slotno;
6459 }
6460 break;
6461
6462 default :
6463 gcc_unreachable ();
6464 }
6465
6466 *pregno = regno;
6467 return slotno;
6468 }
6469
6470 /* Handle recursive register counting/assigning for structure layout. */
6471
6472 typedef struct
6473 {
6474 int slotno; /* slot number of the argument. */
6475 int regbase; /* regno of the base register. */
6476 int intoffset; /* offset of the first pending integer field. */
6477 int nregs; /* number of words passed in registers. */
6478 bool stack; /* true if part of the argument is on the stack. */
6479 rtx ret; /* return expression being built. */
6480 } assign_data_t;
6481
6482 /* A subroutine of function_arg_record_value. Compute the number of integer
6483 registers to be assigned between PARMS->intoffset and BITPOS. Return
6484 true if at least one integer register is assigned or false otherwise. */
6485
6486 static bool
6487 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6488 {
6489 if (data->intoffset < 0)
6490 return false;
6491
6492 const int intoffset = data->intoffset;
6493 data->intoffset = -1;
6494
6495 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6496 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6497 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6498 int nregs = (endbit - startbit) / BITS_PER_WORD;
6499
6500 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6501 {
6502 nregs = SPARC_INT_ARG_MAX - this_slotno;
6503
6504 /* We need to pass this field (partly) on the stack. */
6505 data->stack = 1;
6506 }
6507
6508 if (nregs <= 0)
6509 return false;
6510
6511 *pnregs = nregs;
6512 return true;
6513 }
6514
6515 /* A subroutine of function_arg_record_value. Compute the number and the mode
6516 of the FP registers to be assigned for FIELD. Return true if at least one
6517 FP register is assigned or false otherwise. */
6518
6519 static bool
6520 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6521 assign_data_t *data,
6522 int *pnregs, machine_mode *pmode)
6523 {
6524 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6525 machine_mode mode = DECL_MODE (field);
6526 int nregs, nslots;
6527
6528 /* Slots are counted as words while regs are counted as having the size of
6529 the (inner) mode. */
6530 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6531 {
6532 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6533 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6534 }
6535 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6536 {
6537 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6538 nregs = 2;
6539 }
6540 else
6541 nregs = 1;
6542
6543 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6544
6545 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6546 {
6547 nslots = SPARC_FP_ARG_MAX - this_slotno;
6548 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6549
6550 /* We need to pass this field (partly) on the stack. */
6551 data->stack = 1;
6552
6553 if (nregs <= 0)
6554 return false;
6555 }
6556
6557 *pnregs = nregs;
6558 *pmode = mode;
6559 return true;
6560 }
6561
6562 /* A subroutine of function_arg_record_value. Count the number of registers
6563 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6564
6565 inline void
6566 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6567 assign_data_t *data)
6568 {
6569 if (fp)
6570 {
6571 int nregs;
6572 machine_mode mode;
6573
6574 if (compute_int_layout (bitpos, data, &nregs))
6575 data->nregs += nregs;
6576
6577 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6578 data->nregs += nregs;
6579 }
6580 else
6581 {
6582 if (data->intoffset < 0)
6583 data->intoffset = bitpos;
6584 }
6585 }
6586
6587 /* A subroutine of function_arg_record_value. Assign the bits of the
6588 structure between PARMS->intoffset and BITPOS to integer registers. */
6589
6590 static void
6591 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6592 {
6593 int intoffset = data->intoffset;
6594 machine_mode mode;
6595 int nregs;
6596
6597 if (!compute_int_layout (bitpos, data, &nregs))
6598 return;
6599
6600 /* If this is the trailing part of a word, only load that much into
6601 the register. Otherwise load the whole register. Note that in
6602 the latter case we may pick up unwanted bits. It's not a problem
6603 at the moment but may wish to revisit. */
6604 if (intoffset % BITS_PER_WORD != 0)
6605 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6606 MODE_INT);
6607 else
6608 mode = word_mode;
6609
6610 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6611 unsigned int regno = data->regbase + this_slotno;
6612 intoffset /= BITS_PER_UNIT;
6613
6614 do
6615 {
6616 rtx reg = gen_rtx_REG (mode, regno);
6617 XVECEXP (data->ret, 0, data->stack + data->nregs)
6618 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6619 data->nregs += 1;
6620 mode = word_mode;
6621 regno += 1;
6622 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6623 }
6624 while (--nregs > 0);
6625 }
6626
6627 /* A subroutine of function_arg_record_value. Assign FIELD at position
6628 BITPOS to FP registers. */
6629
6630 static void
6631 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6632 assign_data_t *data)
6633 {
6634 int nregs;
6635 machine_mode mode;
6636
6637 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6638 return;
6639
6640 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6641 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6642 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6643 regno++;
6644 int pos = bitpos / BITS_PER_UNIT;
6645
6646 do
6647 {
6648 rtx reg = gen_rtx_REG (mode, regno);
6649 XVECEXP (data->ret, 0, data->stack + data->nregs)
6650 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6651 data->nregs += 1;
6652 regno += GET_MODE_SIZE (mode) / 4;
6653 pos += GET_MODE_SIZE (mode);
6654 }
6655 while (--nregs > 0);
6656 }
6657
6658 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6659 the structure between PARMS->intoffset and BITPOS to registers. */
6660
6661 inline void
6662 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6663 assign_data_t *data)
6664 {
6665 if (fp)
6666 {
6667 assign_int_registers (bitpos, data);
6668
6669 assign_fp_registers (field, bitpos, data);
6670 }
6671 else
6672 {
6673 if (data->intoffset < 0)
6674 data->intoffset = bitpos;
6675 }
6676 }
6677
6678 /* Used by function_arg and sparc_function_value_1 to implement the complex
6679 conventions of the 64-bit ABI for passing and returning structures.
6680 Return an expression valid as a return value for the FUNCTION_ARG
6681 and TARGET_FUNCTION_VALUE.
6682
6683 TYPE is the data type of the argument (as a tree).
6684 This is null for libcalls where that information may
6685 not be available.
6686 MODE is the argument's machine mode.
6687 SLOTNO is the index number of the argument's slot in the parameter array.
6688 NAMED is true if this argument is a named parameter
6689 (otherwise it is an extra parameter matching an ellipsis).
6690 REGBASE is the regno of the base register for the parameter array. */
6691
6692 static rtx
6693 function_arg_record_value (const_tree type, machine_mode mode,
6694 int slotno, bool named, int regbase)
6695 {
6696 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6697 assign_data_t data;
6698 int nregs;
6699
6700 data.slotno = slotno;
6701 data.regbase = regbase;
6702
6703 /* Count how many registers we need. */
6704 data.nregs = 0;
6705 data.intoffset = 0;
6706 data.stack = false;
6707 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6708
6709 /* Take into account pending integer fields. */
6710 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6711 data.nregs += nregs;
6712
6713 /* Allocate the vector and handle some annoying special cases. */
6714 nregs = data.nregs;
6715
6716 if (nregs == 0)
6717 {
6718 /* ??? Empty structure has no value? Duh? */
6719 if (typesize <= 0)
6720 {
6721 /* Though there's nothing really to store, return a word register
6722 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6723 leads to breakage due to the fact that there are zero bytes to
6724 load. */
6725 return gen_rtx_REG (mode, regbase);
6726 }
6727
6728 /* ??? C++ has structures with no fields, and yet a size. Give up
6729 for now and pass everything back in integer registers. */
6730 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6731 if (nregs + slotno > SPARC_INT_ARG_MAX)
6732 nregs = SPARC_INT_ARG_MAX - slotno;
6733 }
6734
6735 gcc_assert (nregs > 0);
6736
6737 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6738
6739 /* If at least one field must be passed on the stack, generate
6740 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6741 also be passed on the stack. We can't do much better because the
6742 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6743 of structures for which the fields passed exclusively in registers
6744 are not at the beginning of the structure. */
6745 if (data.stack)
6746 XVECEXP (data.ret, 0, 0)
6747 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6748
6749 /* Assign the registers. */
6750 data.nregs = 0;
6751 data.intoffset = 0;
6752 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6753
6754 /* Assign pending integer fields. */
6755 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6756
6757 gcc_assert (data.nregs == nregs);
6758
6759 return data.ret;
6760 }
6761
6762 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6763 of the 64-bit ABI for passing and returning unions.
6764 Return an expression valid as a return value for the FUNCTION_ARG
6765 and TARGET_FUNCTION_VALUE.
6766
6767 SIZE is the size in bytes of the union.
6768 MODE is the argument's machine mode.
6769 REGNO is the hard register the union will be passed in. */
6770
6771 static rtx
6772 function_arg_union_value (int size, machine_mode mode, int slotno,
6773 int regno)
6774 {
6775 int nwords = CEIL_NWORDS (size), i;
6776 rtx regs;
6777
6778 /* See comment in previous function for empty structures. */
6779 if (nwords == 0)
6780 return gen_rtx_REG (mode, regno);
6781
6782 if (slotno == SPARC_INT_ARG_MAX - 1)
6783 nwords = 1;
6784
6785 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6786
6787 for (i = 0; i < nwords; i++)
6788 {
6789 /* Unions are passed left-justified. */
6790 XVECEXP (regs, 0, i)
6791 = gen_rtx_EXPR_LIST (VOIDmode,
6792 gen_rtx_REG (word_mode, regno),
6793 GEN_INT (UNITS_PER_WORD * i));
6794 regno++;
6795 }
6796
6797 return regs;
6798 }
6799
6800 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6801 for passing and returning BLKmode vectors.
6802 Return an expression valid as a return value for the FUNCTION_ARG
6803 and TARGET_FUNCTION_VALUE.
6804
6805 SIZE is the size in bytes of the vector.
6806 REGNO is the FP hard register the vector will be passed in. */
6807
6808 static rtx
6809 function_arg_vector_value (int size, int regno)
6810 {
6811 const int nregs = MAX (1, size / 8);
6812 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6813
6814 if (size < 8)
6815 XVECEXP (regs, 0, 0)
6816 = gen_rtx_EXPR_LIST (VOIDmode,
6817 gen_rtx_REG (SImode, regno),
6818 const0_rtx);
6819 else
6820 for (int i = 0; i < nregs; i++)
6821 XVECEXP (regs, 0, i)
6822 = gen_rtx_EXPR_LIST (VOIDmode,
6823 gen_rtx_REG (DImode, regno + 2*i),
6824 GEN_INT (i*8));
6825
6826 return regs;
6827 }
6828
6829 /* Determine where to put an argument to a function.
6830 Value is zero to push the argument on the stack,
6831 or a hard register in which to store the argument.
6832
6833 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6834 the preceding args and about the function being called.
6835 MODE is the argument's machine mode.
6836 TYPE is the data type of the argument (as a tree).
6837 This is null for libcalls where that information may
6838 not be available.
6839 NAMED is true if this argument is a named parameter
6840 (otherwise it is an extra parameter matching an ellipsis).
6841 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6842 TARGET_FUNCTION_INCOMING_ARG. */
6843
6844 static rtx
6845 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6846 const_tree type, bool named, bool incoming)
6847 {
6848 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6849
6850 int regbase = (incoming
6851 ? SPARC_INCOMING_INT_ARG_FIRST
6852 : SPARC_OUTGOING_INT_ARG_FIRST);
6853 int slotno, regno, padding;
6854 enum mode_class mclass = GET_MODE_CLASS (mode);
6855
6856 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6857 &regno, &padding);
6858 if (slotno == -1)
6859 return 0;
6860
6861 /* Vector types deserve special treatment because they are polymorphic wrt
6862 their mode, depending upon whether VIS instructions are enabled. */
6863 if (type && TREE_CODE (type) == VECTOR_TYPE)
6864 {
6865 HOST_WIDE_INT size = int_size_in_bytes (type);
6866 gcc_assert ((TARGET_ARCH32 && size <= 8)
6867 || (TARGET_ARCH64 && size <= 16));
6868
6869 if (mode == BLKmode)
6870 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6871
6872 mclass = MODE_FLOAT;
6873 }
6874
6875 if (TARGET_ARCH32)
6876 return gen_rtx_REG (mode, regno);
6877
6878 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6879 and are promoted to registers if possible. */
6880 if (type && TREE_CODE (type) == RECORD_TYPE)
6881 {
6882 HOST_WIDE_INT size = int_size_in_bytes (type);
6883 gcc_assert (size <= 16);
6884
6885 return function_arg_record_value (type, mode, slotno, named, regbase);
6886 }
6887
6888 /* Unions up to 16 bytes in size are passed in integer registers. */
6889 else if (type && TREE_CODE (type) == UNION_TYPE)
6890 {
6891 HOST_WIDE_INT size = int_size_in_bytes (type);
6892 gcc_assert (size <= 16);
6893
6894 return function_arg_union_value (size, mode, slotno, regno);
6895 }
6896
6897 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6898 but also have the slot allocated for them.
6899 If no prototype is in scope fp values in register slots get passed
6900 in two places, either fp regs and int regs or fp regs and memory. */
6901 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6902 && SPARC_FP_REG_P (regno))
6903 {
6904 rtx reg = gen_rtx_REG (mode, regno);
6905 if (cum->prototype_p || cum->libcall_p)
6906 return reg;
6907 else
6908 {
6909 rtx v0, v1;
6910
6911 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6912 {
6913 int intreg;
6914
6915 /* On incoming, we don't need to know that the value
6916 is passed in %f0 and %i0, and it confuses other parts
6917 causing needless spillage even on the simplest cases. */
6918 if (incoming)
6919 return reg;
6920
6921 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6922 + (regno - SPARC_FP_ARG_FIRST) / 2);
6923
6924 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6925 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6926 const0_rtx);
6927 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6928 }
6929 else
6930 {
6931 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6932 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6933 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6934 }
6935 }
6936 }
6937
6938 /* All other aggregate types are passed in an integer register in a mode
6939 corresponding to the size of the type. */
6940 else if (type && AGGREGATE_TYPE_P (type))
6941 {
6942 HOST_WIDE_INT size = int_size_in_bytes (type);
6943 gcc_assert (size <= 16);
6944
6945 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6946 }
6947
6948 return gen_rtx_REG (mode, regno);
6949 }
6950
6951 /* Handle the TARGET_FUNCTION_ARG target hook. */
6952
6953 static rtx
6954 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6955 const_tree type, bool named)
6956 {
6957 return sparc_function_arg_1 (cum, mode, type, named, false);
6958 }
6959
6960 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6961
6962 static rtx
6963 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6964 const_tree type, bool named)
6965 {
6966 return sparc_function_arg_1 (cum, mode, type, named, true);
6967 }
6968
6969 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6970
6971 static unsigned int
6972 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6973 {
6974 return ((TARGET_ARCH64
6975 && (GET_MODE_ALIGNMENT (mode) == 128
6976 || (type && TYPE_ALIGN (type) == 128)))
6977 ? 128
6978 : PARM_BOUNDARY);
6979 }
6980
6981 /* For an arg passed partly in registers and partly in memory,
6982 this is the number of bytes of registers used.
6983 For args passed entirely in registers or entirely in memory, zero.
6984
6985 Any arg that starts in the first 6 regs but won't entirely fit in them
6986 needs partial registers on v8. On v9, structures with integer
6987 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6988 values that begin in the last fp reg [where "last fp reg" varies with the
6989 mode] will be split between that reg and memory. */
6990
6991 static int
6992 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6993 tree type, bool named)
6994 {
6995 int slotno, regno, padding;
6996
6997 /* We pass false for incoming here, it doesn't matter. */
6998 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6999 false, &regno, &padding);
7000
7001 if (slotno == -1)
7002 return 0;
7003
7004 if (TARGET_ARCH32)
7005 {
7006 if ((slotno + (mode == BLKmode
7007 ? CEIL_NWORDS (int_size_in_bytes (type))
7008 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7009 > SPARC_INT_ARG_MAX)
7010 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7011 }
7012 else
7013 {
7014 /* We are guaranteed by pass_by_reference that the size of the
7015 argument is not greater than 16 bytes, so we only need to return
7016 one word if the argument is partially passed in registers. */
7017
7018 if (type && AGGREGATE_TYPE_P (type))
7019 {
7020 int size = int_size_in_bytes (type);
7021
7022 if (size > UNITS_PER_WORD
7023 && (slotno == SPARC_INT_ARG_MAX - 1
7024 || slotno == SPARC_FP_ARG_MAX - 1))
7025 return UNITS_PER_WORD;
7026 }
7027 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7028 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7029 && ! (TARGET_FPU && named)))
7030 {
7031 /* The complex types are passed as packed types. */
7032 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7033 && slotno == SPARC_INT_ARG_MAX - 1)
7034 return UNITS_PER_WORD;
7035 }
7036 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7037 {
7038 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7039 > SPARC_FP_ARG_MAX)
7040 return UNITS_PER_WORD;
7041 }
7042 }
7043
7044 return 0;
7045 }
7046
7047 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7048 Specify whether to pass the argument by reference. */
7049
7050 static bool
7051 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7052 machine_mode mode, const_tree type,
7053 bool named ATTRIBUTE_UNUSED)
7054 {
7055 if (TARGET_ARCH32)
7056 /* Original SPARC 32-bit ABI says that structures and unions,
7057 and quad-precision floats are passed by reference. For Pascal,
7058 also pass arrays by reference. All other base types are passed
7059 in registers.
7060
7061 Extended ABI (as implemented by the Sun compiler) says that all
7062 complex floats are passed by reference. Pass complex integers
7063 in registers up to 8 bytes. More generally, enforce the 2-word
7064 cap for passing arguments in registers.
7065
7066 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7067 integers are passed like floats of the same size, that is in
7068 registers up to 8 bytes. Pass all vector floats by reference
7069 like structure and unions. */
7070 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7071 || mode == SCmode
7072 /* Catch CDImode, TFmode, DCmode and TCmode. */
7073 || GET_MODE_SIZE (mode) > 8
7074 || (type
7075 && TREE_CODE (type) == VECTOR_TYPE
7076 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7077 else
7078 /* Original SPARC 64-bit ABI says that structures and unions
7079 smaller than 16 bytes are passed in registers, as well as
7080 all other base types.
7081
7082 Extended ABI (as implemented by the Sun compiler) says that
7083 complex floats are passed in registers up to 16 bytes. Pass
7084 all complex integers in registers up to 16 bytes. More generally,
7085 enforce the 2-word cap for passing arguments in registers.
7086
7087 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7088 integers are passed like floats of the same size, that is in
7089 registers (up to 16 bytes). Pass all vector floats like structure
7090 and unions. */
7091 return ((type
7092 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7093 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7094 /* Catch CTImode and TCmode. */
7095 || GET_MODE_SIZE (mode) > 16);
7096 }
7097
7098 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7099 Update the data in CUM to advance over an argument
7100 of mode MODE and data type TYPE.
7101 TYPE is null for libcalls where that information may not be available. */
7102
7103 static void
7104 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7105 const_tree type, bool named)
7106 {
7107 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7108 int regno, padding;
7109
7110 /* We pass false for incoming here, it doesn't matter. */
7111 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7112
7113 /* If argument requires leading padding, add it. */
7114 cum->words += padding;
7115
7116 if (TARGET_ARCH32)
7117 cum->words += (mode == BLKmode
7118 ? CEIL_NWORDS (int_size_in_bytes (type))
7119 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7120 else
7121 {
7122 if (type && AGGREGATE_TYPE_P (type))
7123 {
7124 int size = int_size_in_bytes (type);
7125
7126 if (size <= 8)
7127 ++cum->words;
7128 else if (size <= 16)
7129 cum->words += 2;
7130 else /* passed by reference */
7131 ++cum->words;
7132 }
7133 else
7134 cum->words += (mode == BLKmode
7135 ? CEIL_NWORDS (int_size_in_bytes (type))
7136 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7137 }
7138 }
7139
7140 /* Handle the FUNCTION_ARG_PADDING macro.
7141 For the 64 bit ABI structs are always stored left shifted in their
7142 argument slot. */
7143
7144 enum direction
7145 function_arg_padding (machine_mode mode, const_tree type)
7146 {
7147 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7148 return upward;
7149
7150 /* Fall back to the default. */
7151 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7152 }
7153
7154 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7155 Specify whether to return the return value in memory. */
7156
7157 static bool
7158 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7159 {
7160 if (TARGET_ARCH32)
7161 /* Original SPARC 32-bit ABI says that structures and unions,
7162 and quad-precision floats are returned in memory. All other
7163 base types are returned in registers.
7164
7165 Extended ABI (as implemented by the Sun compiler) says that
7166 all complex floats are returned in registers (8 FP registers
7167 at most for '_Complex long double'). Return all complex integers
7168 in registers (4 at most for '_Complex long long').
7169
7170 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7171 integers are returned like floats of the same size, that is in
7172 registers up to 8 bytes and in memory otherwise. Return all
7173 vector floats in memory like structure and unions; note that
7174 they always have BLKmode like the latter. */
7175 return (TYPE_MODE (type) == BLKmode
7176 || TYPE_MODE (type) == TFmode
7177 || (TREE_CODE (type) == VECTOR_TYPE
7178 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7179 else
7180 /* Original SPARC 64-bit ABI says that structures and unions
7181 smaller than 32 bytes are returned in registers, as well as
7182 all other base types.
7183
7184 Extended ABI (as implemented by the Sun compiler) says that all
7185 complex floats are returned in registers (8 FP registers at most
7186 for '_Complex long double'). Return all complex integers in
7187 registers (4 at most for '_Complex TItype').
7188
7189 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7190 integers are returned like floats of the same size, that is in
7191 registers. Return all vector floats like structure and unions;
7192 note that they always have BLKmode like the latter. */
7193 return (TYPE_MODE (type) == BLKmode
7194 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7195 }
7196
7197 /* Handle the TARGET_STRUCT_VALUE target hook.
7198 Return where to find the structure return value address. */
7199
7200 static rtx
7201 sparc_struct_value_rtx (tree fndecl, int incoming)
7202 {
7203 if (TARGET_ARCH64)
7204 return 0;
7205 else
7206 {
7207 rtx mem;
7208
7209 if (incoming)
7210 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7211 STRUCT_VALUE_OFFSET));
7212 else
7213 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7214 STRUCT_VALUE_OFFSET));
7215
7216 /* Only follow the SPARC ABI for fixed-size structure returns.
7217 Variable size structure returns are handled per the normal
7218 procedures in GCC. This is enabled by -mstd-struct-return */
7219 if (incoming == 2
7220 && sparc_std_struct_return
7221 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7222 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7223 {
7224 /* We must check and adjust the return address, as it is optional
7225 as to whether the return object is really provided. */
7226 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7227 rtx scratch = gen_reg_rtx (SImode);
7228 rtx_code_label *endlab = gen_label_rtx ();
7229
7230 /* Calculate the return object size. */
7231 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7232 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7233 /* Construct a temporary return value. */
7234 rtx temp_val
7235 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7236
7237 /* Implement SPARC 32-bit psABI callee return struct checking:
7238
7239 Fetch the instruction where we will return to and see if
7240 it's an unimp instruction (the most significant 10 bits
7241 will be zero). */
7242 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7243 plus_constant (Pmode,
7244 ret_reg, 8)));
7245 /* Assume the size is valid and pre-adjust. */
7246 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7247 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7248 0, endlab);
7249 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7250 /* Write the address of the memory pointed to by temp_val into
7251 the memory pointed to by mem. */
7252 emit_move_insn (mem, XEXP (temp_val, 0));
7253 emit_label (endlab);
7254 }
7255
7256 return mem;
7257 }
7258 }
7259
7260 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7261 For v9, function return values are subject to the same rules as arguments,
7262 except that up to 32 bytes may be returned in registers. */
7263
7264 static rtx
7265 sparc_function_value_1 (const_tree type, machine_mode mode,
7266 bool outgoing)
7267 {
7268 /* Beware that the two values are swapped here wrt function_arg. */
7269 int regbase = (outgoing
7270 ? SPARC_INCOMING_INT_ARG_FIRST
7271 : SPARC_OUTGOING_INT_ARG_FIRST);
7272 enum mode_class mclass = GET_MODE_CLASS (mode);
7273 int regno;
7274
7275 /* Vector types deserve special treatment because they are polymorphic wrt
7276 their mode, depending upon whether VIS instructions are enabled. */
7277 if (type && TREE_CODE (type) == VECTOR_TYPE)
7278 {
7279 HOST_WIDE_INT size = int_size_in_bytes (type);
7280 gcc_assert ((TARGET_ARCH32 && size <= 8)
7281 || (TARGET_ARCH64 && size <= 32));
7282
7283 if (mode == BLKmode)
7284 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7285
7286 mclass = MODE_FLOAT;
7287 }
7288
7289 if (TARGET_ARCH64 && type)
7290 {
7291 /* Structures up to 32 bytes in size are returned in registers. */
7292 if (TREE_CODE (type) == RECORD_TYPE)
7293 {
7294 HOST_WIDE_INT size = int_size_in_bytes (type);
7295 gcc_assert (size <= 32);
7296
7297 return function_arg_record_value (type, mode, 0, 1, regbase);
7298 }
7299
7300 /* Unions up to 32 bytes in size are returned in integer registers. */
7301 else if (TREE_CODE (type) == UNION_TYPE)
7302 {
7303 HOST_WIDE_INT size = int_size_in_bytes (type);
7304 gcc_assert (size <= 32);
7305
7306 return function_arg_union_value (size, mode, 0, regbase);
7307 }
7308
7309 /* Objects that require it are returned in FP registers. */
7310 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7311 ;
7312
7313 /* All other aggregate types are returned in an integer register in a
7314 mode corresponding to the size of the type. */
7315 else if (AGGREGATE_TYPE_P (type))
7316 {
7317 /* All other aggregate types are passed in an integer register
7318 in a mode corresponding to the size of the type. */
7319 HOST_WIDE_INT size = int_size_in_bytes (type);
7320 gcc_assert (size <= 32);
7321
7322 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7323
7324 /* ??? We probably should have made the same ABI change in
7325 3.4.0 as the one we made for unions. The latter was
7326 required by the SCD though, while the former is not
7327 specified, so we favored compatibility and efficiency.
7328
7329 Now we're stuck for aggregates larger than 16 bytes,
7330 because OImode vanished in the meantime. Let's not
7331 try to be unduly clever, and simply follow the ABI
7332 for unions in that case. */
7333 if (mode == BLKmode)
7334 return function_arg_union_value (size, mode, 0, regbase);
7335 else
7336 mclass = MODE_INT;
7337 }
7338
7339 /* We should only have pointer and integer types at this point. This
7340 must match sparc_promote_function_mode. */
7341 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7342 mode = word_mode;
7343 }
7344
7345 /* We should only have pointer and integer types at this point, except with
7346 -freg-struct-return. This must match sparc_promote_function_mode. */
7347 else if (TARGET_ARCH32
7348 && !(type && AGGREGATE_TYPE_P (type))
7349 && mclass == MODE_INT
7350 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7351 mode = word_mode;
7352
7353 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7354 regno = SPARC_FP_ARG_FIRST;
7355 else
7356 regno = regbase;
7357
7358 return gen_rtx_REG (mode, regno);
7359 }
7360
7361 /* Handle TARGET_FUNCTION_VALUE.
7362 On the SPARC, the value is found in the first "output" register, but the
7363 called function leaves it in the first "input" register. */
7364
7365 static rtx
7366 sparc_function_value (const_tree valtype,
7367 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7368 bool outgoing)
7369 {
7370 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7371 }
7372
7373 /* Handle TARGET_LIBCALL_VALUE. */
7374
7375 static rtx
7376 sparc_libcall_value (machine_mode mode,
7377 const_rtx fun ATTRIBUTE_UNUSED)
7378 {
7379 return sparc_function_value_1 (NULL_TREE, mode, false);
7380 }
7381
7382 /* Handle FUNCTION_VALUE_REGNO_P.
7383 On the SPARC, the first "output" reg is used for integer values, and the
7384 first floating point register is used for floating point values. */
7385
7386 static bool
7387 sparc_function_value_regno_p (const unsigned int regno)
7388 {
7389 return (regno == 8 || (TARGET_FPU && regno == 32));
7390 }
7391
7392 /* Do what is necessary for `va_start'. We look at the current function
7393 to determine if stdarg or varargs is used and return the address of
7394 the first unnamed parameter. */
7395
7396 static rtx
7397 sparc_builtin_saveregs (void)
7398 {
7399 int first_reg = crtl->args.info.words;
7400 rtx address;
7401 int regno;
7402
7403 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7404 emit_move_insn (gen_rtx_MEM (word_mode,
7405 gen_rtx_PLUS (Pmode,
7406 frame_pointer_rtx,
7407 GEN_INT (FIRST_PARM_OFFSET (0)
7408 + (UNITS_PER_WORD
7409 * regno)))),
7410 gen_rtx_REG (word_mode,
7411 SPARC_INCOMING_INT_ARG_FIRST + regno));
7412
7413 address = gen_rtx_PLUS (Pmode,
7414 frame_pointer_rtx,
7415 GEN_INT (FIRST_PARM_OFFSET (0)
7416 + UNITS_PER_WORD * first_reg));
7417
7418 return address;
7419 }
7420
7421 /* Implement `va_start' for stdarg. */
7422
7423 static void
7424 sparc_va_start (tree valist, rtx nextarg)
7425 {
7426 nextarg = expand_builtin_saveregs ();
7427 std_expand_builtin_va_start (valist, nextarg);
7428 }
7429
7430 /* Implement `va_arg' for stdarg. */
7431
7432 static tree
7433 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7434 gimple_seq *post_p)
7435 {
7436 HOST_WIDE_INT size, rsize, align;
7437 tree addr, incr;
7438 bool indirect;
7439 tree ptrtype = build_pointer_type (type);
7440
7441 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7442 {
7443 indirect = true;
7444 size = rsize = UNITS_PER_WORD;
7445 align = 0;
7446 }
7447 else
7448 {
7449 indirect = false;
7450 size = int_size_in_bytes (type);
7451 rsize = ROUND_UP (size, UNITS_PER_WORD);
7452 align = 0;
7453
7454 if (TARGET_ARCH64)
7455 {
7456 /* For SPARC64, objects requiring 16-byte alignment get it. */
7457 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7458 align = 2 * UNITS_PER_WORD;
7459
7460 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7461 are left-justified in their slots. */
7462 if (AGGREGATE_TYPE_P (type))
7463 {
7464 if (size == 0)
7465 size = rsize = UNITS_PER_WORD;
7466 else
7467 size = rsize;
7468 }
7469 }
7470 }
7471
7472 incr = valist;
7473 if (align)
7474 {
7475 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7476 incr = fold_convert (sizetype, incr);
7477 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7478 size_int (-align));
7479 incr = fold_convert (ptr_type_node, incr);
7480 }
7481
7482 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7483 addr = incr;
7484
7485 if (BYTES_BIG_ENDIAN && size < rsize)
7486 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7487
7488 if (indirect)
7489 {
7490 addr = fold_convert (build_pointer_type (ptrtype), addr);
7491 addr = build_va_arg_indirect_ref (addr);
7492 }
7493
7494 /* If the address isn't aligned properly for the type, we need a temporary.
7495 FIXME: This is inefficient, usually we can do this in registers. */
7496 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7497 {
7498 tree tmp = create_tmp_var (type, "va_arg_tmp");
7499 tree dest_addr = build_fold_addr_expr (tmp);
7500 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7501 3, dest_addr, addr, size_int (rsize));
7502 TREE_ADDRESSABLE (tmp) = 1;
7503 gimplify_and_add (copy, pre_p);
7504 addr = dest_addr;
7505 }
7506
7507 else
7508 addr = fold_convert (ptrtype, addr);
7509
7510 incr = fold_build_pointer_plus_hwi (incr, rsize);
7511 gimplify_assign (valist, incr, post_p);
7512
7513 return build_va_arg_indirect_ref (addr);
7514 }
7515 \f
7516 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7517 Specify whether the vector mode is supported by the hardware. */
7518
7519 static bool
7520 sparc_vector_mode_supported_p (machine_mode mode)
7521 {
7522 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7523 }
7524 \f
7525 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7526
7527 static machine_mode
7528 sparc_preferred_simd_mode (machine_mode mode)
7529 {
7530 if (TARGET_VIS)
7531 switch (mode)
7532 {
7533 case SImode:
7534 return V2SImode;
7535 case HImode:
7536 return V4HImode;
7537 case QImode:
7538 return V8QImode;
7539
7540 default:;
7541 }
7542
7543 return word_mode;
7544 }
7545 \f
7546 /* Return the string to output an unconditional branch to LABEL, which is
7547 the operand number of the label.
7548
7549 DEST is the destination insn (i.e. the label), INSN is the source. */
7550
7551 const char *
7552 output_ubranch (rtx dest, rtx_insn *insn)
7553 {
7554 static char string[64];
7555 bool v9_form = false;
7556 int delta;
7557 char *p;
7558
7559 /* Even if we are trying to use cbcond for this, evaluate
7560 whether we can use V9 branches as our backup plan. */
7561
7562 delta = 5000000;
7563 if (INSN_ADDRESSES_SET_P ())
7564 delta = (INSN_ADDRESSES (INSN_UID (dest))
7565 - INSN_ADDRESSES (INSN_UID (insn)));
7566
7567 /* Leave some instructions for "slop". */
7568 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7569 v9_form = true;
7570
7571 if (TARGET_CBCOND)
7572 {
7573 bool emit_nop = emit_cbcond_nop (insn);
7574 bool far = false;
7575 const char *rval;
7576
7577 if (delta < -500 || delta > 500)
7578 far = true;
7579
7580 if (far)
7581 {
7582 if (v9_form)
7583 rval = "ba,a,pt\t%%xcc, %l0";
7584 else
7585 rval = "b,a\t%l0";
7586 }
7587 else
7588 {
7589 if (emit_nop)
7590 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7591 else
7592 rval = "cwbe\t%%g0, %%g0, %l0";
7593 }
7594 return rval;
7595 }
7596
7597 if (v9_form)
7598 strcpy (string, "ba%*,pt\t%%xcc, ");
7599 else
7600 strcpy (string, "b%*\t");
7601
7602 p = strchr (string, '\0');
7603 *p++ = '%';
7604 *p++ = 'l';
7605 *p++ = '0';
7606 *p++ = '%';
7607 *p++ = '(';
7608 *p = '\0';
7609
7610 return string;
7611 }
7612
7613 /* Return the string to output a conditional branch to LABEL, which is
7614 the operand number of the label. OP is the conditional expression.
7615 XEXP (OP, 0) is assumed to be a condition code register (integer or
7616 floating point) and its mode specifies what kind of comparison we made.
7617
7618 DEST is the destination insn (i.e. the label), INSN is the source.
7619
7620 REVERSED is nonzero if we should reverse the sense of the comparison.
7621
7622 ANNUL is nonzero if we should generate an annulling branch. */
7623
7624 const char *
7625 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7626 rtx_insn *insn)
7627 {
7628 static char string[64];
7629 enum rtx_code code = GET_CODE (op);
7630 rtx cc_reg = XEXP (op, 0);
7631 machine_mode mode = GET_MODE (cc_reg);
7632 const char *labelno, *branch;
7633 int spaces = 8, far;
7634 char *p;
7635
7636 /* v9 branches are limited to +-1MB. If it is too far away,
7637 change
7638
7639 bne,pt %xcc, .LC30
7640
7641 to
7642
7643 be,pn %xcc, .+12
7644 nop
7645 ba .LC30
7646
7647 and
7648
7649 fbne,a,pn %fcc2, .LC29
7650
7651 to
7652
7653 fbe,pt %fcc2, .+16
7654 nop
7655 ba .LC29 */
7656
7657 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7658 if (reversed ^ far)
7659 {
7660 /* Reversal of FP compares takes care -- an ordered compare
7661 becomes an unordered compare and vice versa. */
7662 if (mode == CCFPmode || mode == CCFPEmode)
7663 code = reverse_condition_maybe_unordered (code);
7664 else
7665 code = reverse_condition (code);
7666 }
7667
7668 /* Start by writing the branch condition. */
7669 if (mode == CCFPmode || mode == CCFPEmode)
7670 {
7671 switch (code)
7672 {
7673 case NE:
7674 branch = "fbne";
7675 break;
7676 case EQ:
7677 branch = "fbe";
7678 break;
7679 case GE:
7680 branch = "fbge";
7681 break;
7682 case GT:
7683 branch = "fbg";
7684 break;
7685 case LE:
7686 branch = "fble";
7687 break;
7688 case LT:
7689 branch = "fbl";
7690 break;
7691 case UNORDERED:
7692 branch = "fbu";
7693 break;
7694 case ORDERED:
7695 branch = "fbo";
7696 break;
7697 case UNGT:
7698 branch = "fbug";
7699 break;
7700 case UNLT:
7701 branch = "fbul";
7702 break;
7703 case UNEQ:
7704 branch = "fbue";
7705 break;
7706 case UNGE:
7707 branch = "fbuge";
7708 break;
7709 case UNLE:
7710 branch = "fbule";
7711 break;
7712 case LTGT:
7713 branch = "fblg";
7714 break;
7715 default:
7716 gcc_unreachable ();
7717 }
7718
7719 /* ??? !v9: FP branches cannot be preceded by another floating point
7720 insn. Because there is currently no concept of pre-delay slots,
7721 we can fix this only by always emitting a nop before a floating
7722 point branch. */
7723
7724 string[0] = '\0';
7725 if (! TARGET_V9)
7726 strcpy (string, "nop\n\t");
7727 strcat (string, branch);
7728 }
7729 else
7730 {
7731 switch (code)
7732 {
7733 case NE:
7734 branch = "bne";
7735 break;
7736 case EQ:
7737 branch = "be";
7738 break;
7739 case GE:
7740 if (mode == CCNZmode || mode == CCXNZmode)
7741 branch = "bpos";
7742 else
7743 branch = "bge";
7744 break;
7745 case GT:
7746 branch = "bg";
7747 break;
7748 case LE:
7749 branch = "ble";
7750 break;
7751 case LT:
7752 if (mode == CCNZmode || mode == CCXNZmode)
7753 branch = "bneg";
7754 else
7755 branch = "bl";
7756 break;
7757 case GEU:
7758 branch = "bgeu";
7759 break;
7760 case GTU:
7761 branch = "bgu";
7762 break;
7763 case LEU:
7764 branch = "bleu";
7765 break;
7766 case LTU:
7767 branch = "blu";
7768 break;
7769 default:
7770 gcc_unreachable ();
7771 }
7772 strcpy (string, branch);
7773 }
7774 spaces -= strlen (branch);
7775 p = strchr (string, '\0');
7776
7777 /* Now add the annulling, the label, and a possible noop. */
7778 if (annul && ! far)
7779 {
7780 strcpy (p, ",a");
7781 p += 2;
7782 spaces -= 2;
7783 }
7784
7785 if (TARGET_V9)
7786 {
7787 rtx note;
7788 int v8 = 0;
7789
7790 if (! far && insn && INSN_ADDRESSES_SET_P ())
7791 {
7792 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7793 - INSN_ADDRESSES (INSN_UID (insn)));
7794 /* Leave some instructions for "slop". */
7795 if (delta < -260000 || delta >= 260000)
7796 v8 = 1;
7797 }
7798
7799 switch (mode)
7800 {
7801 case CCmode:
7802 case CCNZmode:
7803 case CCCmode:
7804 labelno = "%%icc, ";
7805 if (v8)
7806 labelno = "";
7807 break;
7808 case CCXmode:
7809 case CCXNZmode:
7810 case CCXCmode:
7811 labelno = "%%xcc, ";
7812 gcc_assert (!v8);
7813 break;
7814 case CCFPmode:
7815 case CCFPEmode:
7816 {
7817 static char v9_fcc_labelno[] = "%%fccX, ";
7818 /* Set the char indicating the number of the fcc reg to use. */
7819 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7820 labelno = v9_fcc_labelno;
7821 if (v8)
7822 {
7823 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7824 labelno = "";
7825 }
7826 }
7827 break;
7828 default:
7829 gcc_unreachable ();
7830 }
7831
7832 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7833 {
7834 strcpy (p,
7835 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7836 ? ",pt" : ",pn");
7837 p += 3;
7838 spaces -= 3;
7839 }
7840 }
7841 else
7842 labelno = "";
7843
7844 if (spaces > 0)
7845 *p++ = '\t';
7846 else
7847 *p++ = ' ';
7848 strcpy (p, labelno);
7849 p = strchr (p, '\0');
7850 if (far)
7851 {
7852 strcpy (p, ".+12\n\t nop\n\tb\t");
7853 /* Skip the next insn if requested or
7854 if we know that it will be a nop. */
7855 if (annul || ! final_sequence)
7856 p[3] = '6';
7857 p += 14;
7858 }
7859 *p++ = '%';
7860 *p++ = 'l';
7861 *p++ = label + '0';
7862 *p++ = '%';
7863 *p++ = '#';
7864 *p = '\0';
7865
7866 return string;
7867 }
7868
7869 /* Emit a library call comparison between floating point X and Y.
7870 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7871 Return the new operator to be used in the comparison sequence.
7872
7873 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7874 values as arguments instead of the TFmode registers themselves,
7875 that's why we cannot call emit_float_lib_cmp. */
7876
7877 rtx
7878 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7879 {
7880 const char *qpfunc;
7881 rtx slot0, slot1, result, tem, tem2, libfunc;
7882 machine_mode mode;
7883 enum rtx_code new_comparison;
7884
7885 switch (comparison)
7886 {
7887 case EQ:
7888 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7889 break;
7890
7891 case NE:
7892 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7893 break;
7894
7895 case GT:
7896 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7897 break;
7898
7899 case GE:
7900 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7901 break;
7902
7903 case LT:
7904 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7905 break;
7906
7907 case LE:
7908 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7909 break;
7910
7911 case ORDERED:
7912 case UNORDERED:
7913 case UNGT:
7914 case UNLT:
7915 case UNEQ:
7916 case UNGE:
7917 case UNLE:
7918 case LTGT:
7919 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7920 break;
7921
7922 default:
7923 gcc_unreachable ();
7924 }
7925
7926 if (TARGET_ARCH64)
7927 {
7928 if (MEM_P (x))
7929 {
7930 tree expr = MEM_EXPR (x);
7931 if (expr)
7932 mark_addressable (expr);
7933 slot0 = x;
7934 }
7935 else
7936 {
7937 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7938 emit_move_insn (slot0, x);
7939 }
7940
7941 if (MEM_P (y))
7942 {
7943 tree expr = MEM_EXPR (y);
7944 if (expr)
7945 mark_addressable (expr);
7946 slot1 = y;
7947 }
7948 else
7949 {
7950 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7951 emit_move_insn (slot1, y);
7952 }
7953
7954 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7955 emit_library_call (libfunc, LCT_NORMAL,
7956 DImode, 2,
7957 XEXP (slot0, 0), Pmode,
7958 XEXP (slot1, 0), Pmode);
7959 mode = DImode;
7960 }
7961 else
7962 {
7963 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7964 emit_library_call (libfunc, LCT_NORMAL,
7965 SImode, 2,
7966 x, TFmode, y, TFmode);
7967 mode = SImode;
7968 }
7969
7970
7971 /* Immediately move the result of the libcall into a pseudo
7972 register so reload doesn't clobber the value if it needs
7973 the return register for a spill reg. */
7974 result = gen_reg_rtx (mode);
7975 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7976
7977 switch (comparison)
7978 {
7979 default:
7980 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7981 case ORDERED:
7982 case UNORDERED:
7983 new_comparison = (comparison == UNORDERED ? EQ : NE);
7984 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7985 case UNGT:
7986 case UNGE:
7987 new_comparison = (comparison == UNGT ? GT : NE);
7988 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7989 case UNLE:
7990 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7991 case UNLT:
7992 tem = gen_reg_rtx (mode);
7993 if (TARGET_ARCH32)
7994 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7995 else
7996 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7997 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7998 case UNEQ:
7999 case LTGT:
8000 tem = gen_reg_rtx (mode);
8001 if (TARGET_ARCH32)
8002 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8003 else
8004 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8005 tem2 = gen_reg_rtx (mode);
8006 if (TARGET_ARCH32)
8007 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8008 else
8009 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8010 new_comparison = (comparison == UNEQ ? EQ : NE);
8011 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8012 }
8013
8014 gcc_unreachable ();
8015 }
8016
8017 /* Generate an unsigned DImode to FP conversion. This is the same code
8018 optabs would emit if we didn't have TFmode patterns. */
8019
8020 void
8021 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8022 {
8023 rtx i0, i1, f0, in, out;
8024
8025 out = operands[0];
8026 in = force_reg (DImode, operands[1]);
8027 rtx_code_label *neglab = gen_label_rtx ();
8028 rtx_code_label *donelab = gen_label_rtx ();
8029 i0 = gen_reg_rtx (DImode);
8030 i1 = gen_reg_rtx (DImode);
8031 f0 = gen_reg_rtx (mode);
8032
8033 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8034
8035 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8036 emit_jump_insn (gen_jump (donelab));
8037 emit_barrier ();
8038
8039 emit_label (neglab);
8040
8041 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8042 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8043 emit_insn (gen_iordi3 (i0, i0, i1));
8044 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8045 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8046
8047 emit_label (donelab);
8048 }
8049
8050 /* Generate an FP to unsigned DImode conversion. This is the same code
8051 optabs would emit if we didn't have TFmode patterns. */
8052
8053 void
8054 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8055 {
8056 rtx i0, i1, f0, in, out, limit;
8057
8058 out = operands[0];
8059 in = force_reg (mode, operands[1]);
8060 rtx_code_label *neglab = gen_label_rtx ();
8061 rtx_code_label *donelab = gen_label_rtx ();
8062 i0 = gen_reg_rtx (DImode);
8063 i1 = gen_reg_rtx (DImode);
8064 limit = gen_reg_rtx (mode);
8065 f0 = gen_reg_rtx (mode);
8066
8067 emit_move_insn (limit,
8068 const_double_from_real_value (
8069 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8070 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8071
8072 emit_insn (gen_rtx_SET (out,
8073 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8074 emit_jump_insn (gen_jump (donelab));
8075 emit_barrier ();
8076
8077 emit_label (neglab);
8078
8079 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8080 emit_insn (gen_rtx_SET (i0,
8081 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8082 emit_insn (gen_movdi (i1, const1_rtx));
8083 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8084 emit_insn (gen_xordi3 (out, i0, i1));
8085
8086 emit_label (donelab);
8087 }
8088
8089 /* Return the string to output a compare and branch instruction to DEST.
8090 DEST is the destination insn (i.e. the label), INSN is the source,
8091 and OP is the conditional expression. */
8092
8093 const char *
8094 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8095 {
8096 machine_mode mode = GET_MODE (XEXP (op, 0));
8097 enum rtx_code code = GET_CODE (op);
8098 const char *cond_str, *tmpl;
8099 int far, emit_nop, len;
8100 static char string[64];
8101 char size_char;
8102
8103 /* Compare and Branch is limited to +-2KB. If it is too far away,
8104 change
8105
8106 cxbne X, Y, .LC30
8107
8108 to
8109
8110 cxbe X, Y, .+16
8111 nop
8112 ba,pt xcc, .LC30
8113 nop */
8114
8115 len = get_attr_length (insn);
8116
8117 far = len == 4;
8118 emit_nop = len == 2;
8119
8120 if (far)
8121 code = reverse_condition (code);
8122
8123 size_char = ((mode == SImode) ? 'w' : 'x');
8124
8125 switch (code)
8126 {
8127 case NE:
8128 cond_str = "ne";
8129 break;
8130
8131 case EQ:
8132 cond_str = "e";
8133 break;
8134
8135 case GE:
8136 cond_str = "ge";
8137 break;
8138
8139 case GT:
8140 cond_str = "g";
8141 break;
8142
8143 case LE:
8144 cond_str = "le";
8145 break;
8146
8147 case LT:
8148 cond_str = "l";
8149 break;
8150
8151 case GEU:
8152 cond_str = "cc";
8153 break;
8154
8155 case GTU:
8156 cond_str = "gu";
8157 break;
8158
8159 case LEU:
8160 cond_str = "leu";
8161 break;
8162
8163 case LTU:
8164 cond_str = "cs";
8165 break;
8166
8167 default:
8168 gcc_unreachable ();
8169 }
8170
8171 if (far)
8172 {
8173 int veryfar = 1, delta;
8174
8175 if (INSN_ADDRESSES_SET_P ())
8176 {
8177 delta = (INSN_ADDRESSES (INSN_UID (dest))
8178 - INSN_ADDRESSES (INSN_UID (insn)));
8179 /* Leave some instructions for "slop". */
8180 if (delta >= -260000 && delta < 260000)
8181 veryfar = 0;
8182 }
8183
8184 if (veryfar)
8185 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8186 else
8187 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8188 }
8189 else
8190 {
8191 if (emit_nop)
8192 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8193 else
8194 tmpl = "c%cb%s\t%%1, %%2, %%3";
8195 }
8196
8197 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8198
8199 return string;
8200 }
8201
8202 /* Return the string to output a conditional branch to LABEL, testing
8203 register REG. LABEL is the operand number of the label; REG is the
8204 operand number of the reg. OP is the conditional expression. The mode
8205 of REG says what kind of comparison we made.
8206
8207 DEST is the destination insn (i.e. the label), INSN is the source.
8208
8209 REVERSED is nonzero if we should reverse the sense of the comparison.
8210
8211 ANNUL is nonzero if we should generate an annulling branch. */
8212
8213 const char *
8214 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8215 int annul, rtx_insn *insn)
8216 {
8217 static char string[64];
8218 enum rtx_code code = GET_CODE (op);
8219 machine_mode mode = GET_MODE (XEXP (op, 0));
8220 rtx note;
8221 int far;
8222 char *p;
8223
8224 /* branch on register are limited to +-128KB. If it is too far away,
8225 change
8226
8227 brnz,pt %g1, .LC30
8228
8229 to
8230
8231 brz,pn %g1, .+12
8232 nop
8233 ba,pt %xcc, .LC30
8234
8235 and
8236
8237 brgez,a,pn %o1, .LC29
8238
8239 to
8240
8241 brlz,pt %o1, .+16
8242 nop
8243 ba,pt %xcc, .LC29 */
8244
8245 far = get_attr_length (insn) >= 3;
8246
8247 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8248 if (reversed ^ far)
8249 code = reverse_condition (code);
8250
8251 /* Only 64 bit versions of these instructions exist. */
8252 gcc_assert (mode == DImode);
8253
8254 /* Start by writing the branch condition. */
8255
8256 switch (code)
8257 {
8258 case NE:
8259 strcpy (string, "brnz");
8260 break;
8261
8262 case EQ:
8263 strcpy (string, "brz");
8264 break;
8265
8266 case GE:
8267 strcpy (string, "brgez");
8268 break;
8269
8270 case LT:
8271 strcpy (string, "brlz");
8272 break;
8273
8274 case LE:
8275 strcpy (string, "brlez");
8276 break;
8277
8278 case GT:
8279 strcpy (string, "brgz");
8280 break;
8281
8282 default:
8283 gcc_unreachable ();
8284 }
8285
8286 p = strchr (string, '\0');
8287
8288 /* Now add the annulling, reg, label, and nop. */
8289 if (annul && ! far)
8290 {
8291 strcpy (p, ",a");
8292 p += 2;
8293 }
8294
8295 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8296 {
8297 strcpy (p,
8298 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8299 ? ",pt" : ",pn");
8300 p += 3;
8301 }
8302
8303 *p = p < string + 8 ? '\t' : ' ';
8304 p++;
8305 *p++ = '%';
8306 *p++ = '0' + reg;
8307 *p++ = ',';
8308 *p++ = ' ';
8309 if (far)
8310 {
8311 int veryfar = 1, delta;
8312
8313 if (INSN_ADDRESSES_SET_P ())
8314 {
8315 delta = (INSN_ADDRESSES (INSN_UID (dest))
8316 - INSN_ADDRESSES (INSN_UID (insn)));
8317 /* Leave some instructions for "slop". */
8318 if (delta >= -260000 && delta < 260000)
8319 veryfar = 0;
8320 }
8321
8322 strcpy (p, ".+12\n\t nop\n\t");
8323 /* Skip the next insn if requested or
8324 if we know that it will be a nop. */
8325 if (annul || ! final_sequence)
8326 p[3] = '6';
8327 p += 12;
8328 if (veryfar)
8329 {
8330 strcpy (p, "b\t");
8331 p += 2;
8332 }
8333 else
8334 {
8335 strcpy (p, "ba,pt\t%%xcc, ");
8336 p += 13;
8337 }
8338 }
8339 *p++ = '%';
8340 *p++ = 'l';
8341 *p++ = '0' + label;
8342 *p++ = '%';
8343 *p++ = '#';
8344 *p = '\0';
8345
8346 return string;
8347 }
8348
8349 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8350 Such instructions cannot be used in the delay slot of return insn on v9.
8351 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8352 */
8353
8354 static int
8355 epilogue_renumber (register rtx *where, int test)
8356 {
8357 register const char *fmt;
8358 register int i;
8359 register enum rtx_code code;
8360
8361 if (*where == 0)
8362 return 0;
8363
8364 code = GET_CODE (*where);
8365
8366 switch (code)
8367 {
8368 case REG:
8369 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8370 return 1;
8371 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8372 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8373 /* fallthrough */
8374 case SCRATCH:
8375 case CC0:
8376 case PC:
8377 case CONST_INT:
8378 case CONST_WIDE_INT:
8379 case CONST_DOUBLE:
8380 return 0;
8381
8382 /* Do not replace the frame pointer with the stack pointer because
8383 it can cause the delayed instruction to load below the stack.
8384 This occurs when instructions like:
8385
8386 (set (reg/i:SI 24 %i0)
8387 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8388 (const_int -20 [0xffffffec])) 0))
8389
8390 are in the return delayed slot. */
8391 case PLUS:
8392 if (GET_CODE (XEXP (*where, 0)) == REG
8393 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8394 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8395 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8396 return 1;
8397 break;
8398
8399 case MEM:
8400 if (SPARC_STACK_BIAS
8401 && GET_CODE (XEXP (*where, 0)) == REG
8402 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8403 return 1;
8404 break;
8405
8406 default:
8407 break;
8408 }
8409
8410 fmt = GET_RTX_FORMAT (code);
8411
8412 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8413 {
8414 if (fmt[i] == 'E')
8415 {
8416 register int j;
8417 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8418 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8419 return 1;
8420 }
8421 else if (fmt[i] == 'e'
8422 && epilogue_renumber (&(XEXP (*where, i)), test))
8423 return 1;
8424 }
8425 return 0;
8426 }
8427 \f
8428 /* Leaf functions and non-leaf functions have different needs. */
8429
8430 static const int
8431 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8432
8433 static const int
8434 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8435
8436 static const int *const reg_alloc_orders[] = {
8437 reg_leaf_alloc_order,
8438 reg_nonleaf_alloc_order};
8439
8440 void
8441 order_regs_for_local_alloc (void)
8442 {
8443 static int last_order_nonleaf = 1;
8444
8445 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8446 {
8447 last_order_nonleaf = !last_order_nonleaf;
8448 memcpy ((char *) reg_alloc_order,
8449 (const char *) reg_alloc_orders[last_order_nonleaf],
8450 FIRST_PSEUDO_REGISTER * sizeof (int));
8451 }
8452 }
8453 \f
8454 /* Return 1 if REG and MEM are legitimate enough to allow the various
8455 mem<-->reg splits to be run. */
8456
8457 int
8458 sparc_splitdi_legitimate (rtx reg, rtx mem)
8459 {
8460 /* Punt if we are here by mistake. */
8461 gcc_assert (reload_completed);
8462
8463 /* We must have an offsettable memory reference. */
8464 if (! offsettable_memref_p (mem))
8465 return 0;
8466
8467 /* If we have legitimate args for ldd/std, we do not want
8468 the split to happen. */
8469 if ((REGNO (reg) % 2) == 0
8470 && mem_min_alignment (mem, 8))
8471 return 0;
8472
8473 /* Success. */
8474 return 1;
8475 }
8476
8477 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8478
8479 int
8480 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8481 {
8482 int regno1, regno2;
8483
8484 if (GET_CODE (reg1) == SUBREG)
8485 reg1 = SUBREG_REG (reg1);
8486 if (GET_CODE (reg1) != REG)
8487 return 0;
8488 regno1 = REGNO (reg1);
8489
8490 if (GET_CODE (reg2) == SUBREG)
8491 reg2 = SUBREG_REG (reg2);
8492 if (GET_CODE (reg2) != REG)
8493 return 0;
8494 regno2 = REGNO (reg2);
8495
8496 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8497 return 1;
8498
8499 if (TARGET_VIS3)
8500 {
8501 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8502 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8503 return 1;
8504 }
8505
8506 return 0;
8507 }
8508
8509 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8510 This makes them candidates for using ldd and std insns.
8511
8512 Note reg1 and reg2 *must* be hard registers. */
8513
8514 int
8515 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8516 {
8517 /* We might have been passed a SUBREG. */
8518 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8519 return 0;
8520
8521 if (REGNO (reg1) % 2 != 0)
8522 return 0;
8523
8524 /* Integer ldd is deprecated in SPARC V9 */
8525 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8526 return 0;
8527
8528 return (REGNO (reg1) == REGNO (reg2) - 1);
8529 }
8530
8531 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8532 an ldd or std insn.
8533
8534 This can only happen when addr1 and addr2, the addresses in mem1
8535 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8536 addr1 must also be aligned on a 64-bit boundary.
8537
8538 Also iff dependent_reg_rtx is not null it should not be used to
8539 compute the address for mem1, i.e. we cannot optimize a sequence
8540 like:
8541 ld [%o0], %o0
8542 ld [%o0 + 4], %o1
8543 to
8544 ldd [%o0], %o0
8545 nor:
8546 ld [%g3 + 4], %g3
8547 ld [%g3], %g2
8548 to
8549 ldd [%g3], %g2
8550
8551 But, note that the transformation from:
8552 ld [%g2 + 4], %g3
8553 ld [%g2], %g2
8554 to
8555 ldd [%g2], %g2
8556 is perfectly fine. Thus, the peephole2 patterns always pass us
8557 the destination register of the first load, never the second one.
8558
8559 For stores we don't have a similar problem, so dependent_reg_rtx is
8560 NULL_RTX. */
8561
8562 int
8563 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8564 {
8565 rtx addr1, addr2;
8566 unsigned int reg1;
8567 HOST_WIDE_INT offset1;
8568
8569 /* The mems cannot be volatile. */
8570 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8571 return 0;
8572
8573 /* MEM1 should be aligned on a 64-bit boundary. */
8574 if (MEM_ALIGN (mem1) < 64)
8575 return 0;
8576
8577 addr1 = XEXP (mem1, 0);
8578 addr2 = XEXP (mem2, 0);
8579
8580 /* Extract a register number and offset (if used) from the first addr. */
8581 if (GET_CODE (addr1) == PLUS)
8582 {
8583 /* If not a REG, return zero. */
8584 if (GET_CODE (XEXP (addr1, 0)) != REG)
8585 return 0;
8586 else
8587 {
8588 reg1 = REGNO (XEXP (addr1, 0));
8589 /* The offset must be constant! */
8590 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8591 return 0;
8592 offset1 = INTVAL (XEXP (addr1, 1));
8593 }
8594 }
8595 else if (GET_CODE (addr1) != REG)
8596 return 0;
8597 else
8598 {
8599 reg1 = REGNO (addr1);
8600 /* This was a simple (mem (reg)) expression. Offset is 0. */
8601 offset1 = 0;
8602 }
8603
8604 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8605 if (GET_CODE (addr2) != PLUS)
8606 return 0;
8607
8608 if (GET_CODE (XEXP (addr2, 0)) != REG
8609 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8610 return 0;
8611
8612 if (reg1 != REGNO (XEXP (addr2, 0)))
8613 return 0;
8614
8615 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8616 return 0;
8617
8618 /* The first offset must be evenly divisible by 8 to ensure the
8619 address is 64 bit aligned. */
8620 if (offset1 % 8 != 0)
8621 return 0;
8622
8623 /* The offset for the second addr must be 4 more than the first addr. */
8624 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8625 return 0;
8626
8627 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8628 instructions. */
8629 return 1;
8630 }
8631
8632 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8633
8634 rtx
8635 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8636 {
8637 rtx x = widen_memory_access (mem1, mode, 0);
8638 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8639 return x;
8640 }
8641
8642 /* Return 1 if reg is a pseudo, or is the first register in
8643 a hard register pair. This makes it suitable for use in
8644 ldd and std insns. */
8645
8646 int
8647 register_ok_for_ldd (rtx reg)
8648 {
8649 /* We might have been passed a SUBREG. */
8650 if (!REG_P (reg))
8651 return 0;
8652
8653 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8654 return (REGNO (reg) % 2 == 0);
8655
8656 return 1;
8657 }
8658
8659 /* Return 1 if OP, a MEM, has an address which is known to be
8660 aligned to an 8-byte boundary. */
8661
8662 int
8663 memory_ok_for_ldd (rtx op)
8664 {
8665 /* In 64-bit mode, we assume that the address is word-aligned. */
8666 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8667 return 0;
8668
8669 if (! can_create_pseudo_p ()
8670 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8671 return 0;
8672
8673 return 1;
8674 }
8675 \f
8676 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8677
8678 static bool
8679 sparc_print_operand_punct_valid_p (unsigned char code)
8680 {
8681 if (code == '#'
8682 || code == '*'
8683 || code == '('
8684 || code == ')'
8685 || code == '_'
8686 || code == '&')
8687 return true;
8688
8689 return false;
8690 }
8691
8692 /* Implement TARGET_PRINT_OPERAND.
8693 Print operand X (an rtx) in assembler syntax to file FILE.
8694 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8695 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8696
8697 static void
8698 sparc_print_operand (FILE *file, rtx x, int code)
8699 {
8700 const char *s;
8701
8702 switch (code)
8703 {
8704 case '#':
8705 /* Output an insn in a delay slot. */
8706 if (final_sequence)
8707 sparc_indent_opcode = 1;
8708 else
8709 fputs ("\n\t nop", file);
8710 return;
8711 case '*':
8712 /* Output an annul flag if there's nothing for the delay slot and we
8713 are optimizing. This is always used with '(' below.
8714 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8715 this is a dbx bug. So, we only do this when optimizing.
8716 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8717 Always emit a nop in case the next instruction is a branch. */
8718 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8719 fputs (",a", file);
8720 return;
8721 case '(':
8722 /* Output a 'nop' if there's nothing for the delay slot and we are
8723 not optimizing. This is always used with '*' above. */
8724 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8725 fputs ("\n\t nop", file);
8726 else if (final_sequence)
8727 sparc_indent_opcode = 1;
8728 return;
8729 case ')':
8730 /* Output the right displacement from the saved PC on function return.
8731 The caller may have placed an "unimp" insn immediately after the call
8732 so we have to account for it. This insn is used in the 32-bit ABI
8733 when calling a function that returns a non zero-sized structure. The
8734 64-bit ABI doesn't have it. Be careful to have this test be the same
8735 as that for the call. The exception is when sparc_std_struct_return
8736 is enabled, the psABI is followed exactly and the adjustment is made
8737 by the code in sparc_struct_value_rtx. The call emitted is the same
8738 when sparc_std_struct_return is enabled. */
8739 if (!TARGET_ARCH64
8740 && cfun->returns_struct
8741 && !sparc_std_struct_return
8742 && DECL_SIZE (DECL_RESULT (current_function_decl))
8743 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8744 == INTEGER_CST
8745 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8746 fputs ("12", file);
8747 else
8748 fputc ('8', file);
8749 return;
8750 case '_':
8751 /* Output the Embedded Medium/Anywhere code model base register. */
8752 fputs (EMBMEDANY_BASE_REG, file);
8753 return;
8754 case '&':
8755 /* Print some local dynamic TLS name. */
8756 if (const char *name = get_some_local_dynamic_name ())
8757 assemble_name (file, name);
8758 else
8759 output_operand_lossage ("'%%&' used without any "
8760 "local dynamic TLS references");
8761 return;
8762
8763 case 'Y':
8764 /* Adjust the operand to take into account a RESTORE operation. */
8765 if (GET_CODE (x) == CONST_INT)
8766 break;
8767 else if (GET_CODE (x) != REG)
8768 output_operand_lossage ("invalid %%Y operand");
8769 else if (REGNO (x) < 8)
8770 fputs (reg_names[REGNO (x)], file);
8771 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8772 fputs (reg_names[REGNO (x)-16], file);
8773 else
8774 output_operand_lossage ("invalid %%Y operand");
8775 return;
8776 case 'L':
8777 /* Print out the low order register name of a register pair. */
8778 if (WORDS_BIG_ENDIAN)
8779 fputs (reg_names[REGNO (x)+1], file);
8780 else
8781 fputs (reg_names[REGNO (x)], file);
8782 return;
8783 case 'H':
8784 /* Print out the high order register name of a register pair. */
8785 if (WORDS_BIG_ENDIAN)
8786 fputs (reg_names[REGNO (x)], file);
8787 else
8788 fputs (reg_names[REGNO (x)+1], file);
8789 return;
8790 case 'R':
8791 /* Print out the second register name of a register pair or quad.
8792 I.e., R (%o0) => %o1. */
8793 fputs (reg_names[REGNO (x)+1], file);
8794 return;
8795 case 'S':
8796 /* Print out the third register name of a register quad.
8797 I.e., S (%o0) => %o2. */
8798 fputs (reg_names[REGNO (x)+2], file);
8799 return;
8800 case 'T':
8801 /* Print out the fourth register name of a register quad.
8802 I.e., T (%o0) => %o3. */
8803 fputs (reg_names[REGNO (x)+3], file);
8804 return;
8805 case 'x':
8806 /* Print a condition code register. */
8807 if (REGNO (x) == SPARC_ICC_REG)
8808 {
8809 switch (GET_MODE (x))
8810 {
8811 case CCmode:
8812 case CCNZmode:
8813 case CCCmode:
8814 s = "%icc";
8815 break;
8816 case CCXmode:
8817 case CCXNZmode:
8818 case CCXCmode:
8819 s = "%xcc";
8820 break;
8821 default:
8822 gcc_unreachable ();
8823 }
8824 fputs (s, file);
8825 }
8826 else
8827 /* %fccN register */
8828 fputs (reg_names[REGNO (x)], file);
8829 return;
8830 case 'm':
8831 /* Print the operand's address only. */
8832 output_address (GET_MODE (x), XEXP (x, 0));
8833 return;
8834 case 'r':
8835 /* In this case we need a register. Use %g0 if the
8836 operand is const0_rtx. */
8837 if (x == const0_rtx
8838 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8839 {
8840 fputs ("%g0", file);
8841 return;
8842 }
8843 else
8844 break;
8845
8846 case 'A':
8847 switch (GET_CODE (x))
8848 {
8849 case IOR:
8850 s = "or";
8851 break;
8852 case AND:
8853 s = "and";
8854 break;
8855 case XOR:
8856 s = "xor";
8857 break;
8858 default:
8859 output_operand_lossage ("invalid %%A operand");
8860 s = "";
8861 break;
8862 }
8863 fputs (s, file);
8864 return;
8865
8866 case 'B':
8867 switch (GET_CODE (x))
8868 {
8869 case IOR:
8870 s = "orn";
8871 break;
8872 case AND:
8873 s = "andn";
8874 break;
8875 case XOR:
8876 s = "xnor";
8877 break;
8878 default:
8879 output_operand_lossage ("invalid %%B operand");
8880 s = "";
8881 break;
8882 }
8883 fputs (s, file);
8884 return;
8885
8886 /* This is used by the conditional move instructions. */
8887 case 'C':
8888 {
8889 machine_mode mode = GET_MODE (XEXP (x, 0));
8890 switch (GET_CODE (x))
8891 {
8892 case NE:
8893 s = "ne";
8894 break;
8895 case EQ:
8896 s = "e";
8897 break;
8898 case GE:
8899 if (mode == CCNZmode || mode == CCXNZmode)
8900 s = "pos";
8901 else
8902 s = "ge";
8903 break;
8904 case GT:
8905 s = "g";
8906 break;
8907 case LE:
8908 s = "le";
8909 break;
8910 case LT:
8911 if (mode == CCNZmode || mode == CCXNZmode)
8912 s = "neg";
8913 else
8914 s = "l";
8915 break;
8916 case GEU:
8917 s = "geu";
8918 break;
8919 case GTU:
8920 s = "gu";
8921 break;
8922 case LEU:
8923 s = "leu";
8924 break;
8925 case LTU:
8926 s = "lu";
8927 break;
8928 case LTGT:
8929 s = "lg";
8930 break;
8931 case UNORDERED:
8932 s = "u";
8933 break;
8934 case ORDERED:
8935 s = "o";
8936 break;
8937 case UNLT:
8938 s = "ul";
8939 break;
8940 case UNLE:
8941 s = "ule";
8942 break;
8943 case UNGT:
8944 s = "ug";
8945 break;
8946 case UNGE:
8947 s = "uge"
8948 ; break;
8949 case UNEQ:
8950 s = "ue";
8951 break;
8952 default:
8953 output_operand_lossage ("invalid %%C operand");
8954 s = "";
8955 break;
8956 }
8957 fputs (s, file);
8958 return;
8959 }
8960
8961 /* This are used by the movr instruction pattern. */
8962 case 'D':
8963 {
8964 switch (GET_CODE (x))
8965 {
8966 case NE:
8967 s = "ne";
8968 break;
8969 case EQ:
8970 s = "e";
8971 break;
8972 case GE:
8973 s = "gez";
8974 break;
8975 case LT:
8976 s = "lz";
8977 break;
8978 case LE:
8979 s = "lez";
8980 break;
8981 case GT:
8982 s = "gz";
8983 break;
8984 default:
8985 output_operand_lossage ("invalid %%D operand");
8986 s = "";
8987 break;
8988 }
8989 fputs (s, file);
8990 return;
8991 }
8992
8993 case 'b':
8994 {
8995 /* Print a sign-extended character. */
8996 int i = trunc_int_for_mode (INTVAL (x), QImode);
8997 fprintf (file, "%d", i);
8998 return;
8999 }
9000
9001 case 'f':
9002 /* Operand must be a MEM; write its address. */
9003 if (GET_CODE (x) != MEM)
9004 output_operand_lossage ("invalid %%f operand");
9005 output_address (GET_MODE (x), XEXP (x, 0));
9006 return;
9007
9008 case 's':
9009 {
9010 /* Print a sign-extended 32-bit value. */
9011 HOST_WIDE_INT i;
9012 if (GET_CODE(x) == CONST_INT)
9013 i = INTVAL (x);
9014 else
9015 {
9016 output_operand_lossage ("invalid %%s operand");
9017 return;
9018 }
9019 i = trunc_int_for_mode (i, SImode);
9020 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9021 return;
9022 }
9023
9024 case 0:
9025 /* Do nothing special. */
9026 break;
9027
9028 default:
9029 /* Undocumented flag. */
9030 output_operand_lossage ("invalid operand output code");
9031 }
9032
9033 if (GET_CODE (x) == REG)
9034 fputs (reg_names[REGNO (x)], file);
9035 else if (GET_CODE (x) == MEM)
9036 {
9037 fputc ('[', file);
9038 /* Poor Sun assembler doesn't understand absolute addressing. */
9039 if (CONSTANT_P (XEXP (x, 0)))
9040 fputs ("%g0+", file);
9041 output_address (GET_MODE (x), XEXP (x, 0));
9042 fputc (']', file);
9043 }
9044 else if (GET_CODE (x) == HIGH)
9045 {
9046 fputs ("%hi(", file);
9047 output_addr_const (file, XEXP (x, 0));
9048 fputc (')', file);
9049 }
9050 else if (GET_CODE (x) == LO_SUM)
9051 {
9052 sparc_print_operand (file, XEXP (x, 0), 0);
9053 if (TARGET_CM_MEDMID)
9054 fputs ("+%l44(", file);
9055 else
9056 fputs ("+%lo(", file);
9057 output_addr_const (file, XEXP (x, 1));
9058 fputc (')', file);
9059 }
9060 else if (GET_CODE (x) == CONST_DOUBLE)
9061 output_operand_lossage ("floating-point constant not a valid immediate operand");
9062 else
9063 output_addr_const (file, x);
9064 }
9065
9066 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9067
9068 static void
9069 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9070 {
9071 register rtx base, index = 0;
9072 int offset = 0;
9073 register rtx addr = x;
9074
9075 if (REG_P (addr))
9076 fputs (reg_names[REGNO (addr)], file);
9077 else if (GET_CODE (addr) == PLUS)
9078 {
9079 if (CONST_INT_P (XEXP (addr, 0)))
9080 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9081 else if (CONST_INT_P (XEXP (addr, 1)))
9082 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9083 else
9084 base = XEXP (addr, 0), index = XEXP (addr, 1);
9085 if (GET_CODE (base) == LO_SUM)
9086 {
9087 gcc_assert (USE_AS_OFFSETABLE_LO10
9088 && TARGET_ARCH64
9089 && ! TARGET_CM_MEDMID);
9090 output_operand (XEXP (base, 0), 0);
9091 fputs ("+%lo(", file);
9092 output_address (VOIDmode, XEXP (base, 1));
9093 fprintf (file, ")+%d", offset);
9094 }
9095 else
9096 {
9097 fputs (reg_names[REGNO (base)], file);
9098 if (index == 0)
9099 fprintf (file, "%+d", offset);
9100 else if (REG_P (index))
9101 fprintf (file, "+%s", reg_names[REGNO (index)]);
9102 else if (GET_CODE (index) == SYMBOL_REF
9103 || GET_CODE (index) == LABEL_REF
9104 || GET_CODE (index) == CONST)
9105 fputc ('+', file), output_addr_const (file, index);
9106 else gcc_unreachable ();
9107 }
9108 }
9109 else if (GET_CODE (addr) == MINUS
9110 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9111 {
9112 output_addr_const (file, XEXP (addr, 0));
9113 fputs ("-(", file);
9114 output_addr_const (file, XEXP (addr, 1));
9115 fputs ("-.)", file);
9116 }
9117 else if (GET_CODE (addr) == LO_SUM)
9118 {
9119 output_operand (XEXP (addr, 0), 0);
9120 if (TARGET_CM_MEDMID)
9121 fputs ("+%l44(", file);
9122 else
9123 fputs ("+%lo(", file);
9124 output_address (VOIDmode, XEXP (addr, 1));
9125 fputc (')', file);
9126 }
9127 else if (flag_pic
9128 && GET_CODE (addr) == CONST
9129 && GET_CODE (XEXP (addr, 0)) == MINUS
9130 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9131 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9132 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9133 {
9134 addr = XEXP (addr, 0);
9135 output_addr_const (file, XEXP (addr, 0));
9136 /* Group the args of the second CONST in parenthesis. */
9137 fputs ("-(", file);
9138 /* Skip past the second CONST--it does nothing for us. */
9139 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9140 /* Close the parenthesis. */
9141 fputc (')', file);
9142 }
9143 else
9144 {
9145 output_addr_const (file, addr);
9146 }
9147 }
9148 \f
9149 /* Target hook for assembling integer objects. The sparc version has
9150 special handling for aligned DI-mode objects. */
9151
9152 static bool
9153 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9154 {
9155 /* ??? We only output .xword's for symbols and only then in environments
9156 where the assembler can handle them. */
9157 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9158 {
9159 if (TARGET_V9)
9160 {
9161 assemble_integer_with_op ("\t.xword\t", x);
9162 return true;
9163 }
9164 else
9165 {
9166 assemble_aligned_integer (4, const0_rtx);
9167 assemble_aligned_integer (4, x);
9168 return true;
9169 }
9170 }
9171 return default_assemble_integer (x, size, aligned_p);
9172 }
9173 \f
9174 /* Return the value of a code used in the .proc pseudo-op that says
9175 what kind of result this function returns. For non-C types, we pick
9176 the closest C type. */
9177
9178 #ifndef SHORT_TYPE_SIZE
9179 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9180 #endif
9181
9182 #ifndef INT_TYPE_SIZE
9183 #define INT_TYPE_SIZE BITS_PER_WORD
9184 #endif
9185
9186 #ifndef LONG_TYPE_SIZE
9187 #define LONG_TYPE_SIZE BITS_PER_WORD
9188 #endif
9189
9190 #ifndef LONG_LONG_TYPE_SIZE
9191 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9192 #endif
9193
9194 #ifndef FLOAT_TYPE_SIZE
9195 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9196 #endif
9197
9198 #ifndef DOUBLE_TYPE_SIZE
9199 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9200 #endif
9201
9202 #ifndef LONG_DOUBLE_TYPE_SIZE
9203 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9204 #endif
9205
9206 unsigned long
9207 sparc_type_code (register tree type)
9208 {
9209 register unsigned long qualifiers = 0;
9210 register unsigned shift;
9211
9212 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9213 setting more, since some assemblers will give an error for this. Also,
9214 we must be careful to avoid shifts of 32 bits or more to avoid getting
9215 unpredictable results. */
9216
9217 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9218 {
9219 switch (TREE_CODE (type))
9220 {
9221 case ERROR_MARK:
9222 return qualifiers;
9223
9224 case ARRAY_TYPE:
9225 qualifiers |= (3 << shift);
9226 break;
9227
9228 case FUNCTION_TYPE:
9229 case METHOD_TYPE:
9230 qualifiers |= (2 << shift);
9231 break;
9232
9233 case POINTER_TYPE:
9234 case REFERENCE_TYPE:
9235 case OFFSET_TYPE:
9236 qualifiers |= (1 << shift);
9237 break;
9238
9239 case RECORD_TYPE:
9240 return (qualifiers | 8);
9241
9242 case UNION_TYPE:
9243 case QUAL_UNION_TYPE:
9244 return (qualifiers | 9);
9245
9246 case ENUMERAL_TYPE:
9247 return (qualifiers | 10);
9248
9249 case VOID_TYPE:
9250 return (qualifiers | 16);
9251
9252 case INTEGER_TYPE:
9253 /* If this is a range type, consider it to be the underlying
9254 type. */
9255 if (TREE_TYPE (type) != 0)
9256 break;
9257
9258 /* Carefully distinguish all the standard types of C,
9259 without messing up if the language is not C. We do this by
9260 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9261 look at both the names and the above fields, but that's redundant.
9262 Any type whose size is between two C types will be considered
9263 to be the wider of the two types. Also, we do not have a
9264 special code to use for "long long", so anything wider than
9265 long is treated the same. Note that we can't distinguish
9266 between "int" and "long" in this code if they are the same
9267 size, but that's fine, since neither can the assembler. */
9268
9269 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9270 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9271
9272 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9273 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9274
9275 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9276 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9277
9278 else
9279 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9280
9281 case REAL_TYPE:
9282 /* If this is a range type, consider it to be the underlying
9283 type. */
9284 if (TREE_TYPE (type) != 0)
9285 break;
9286
9287 /* Carefully distinguish all the standard types of C,
9288 without messing up if the language is not C. */
9289
9290 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9291 return (qualifiers | 6);
9292
9293 else
9294 return (qualifiers | 7);
9295
9296 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9297 /* ??? We need to distinguish between double and float complex types,
9298 but I don't know how yet because I can't reach this code from
9299 existing front-ends. */
9300 return (qualifiers | 7); /* Who knows? */
9301
9302 case VECTOR_TYPE:
9303 case BOOLEAN_TYPE: /* Boolean truth value type. */
9304 case LANG_TYPE:
9305 case NULLPTR_TYPE:
9306 return qualifiers;
9307
9308 default:
9309 gcc_unreachable (); /* Not a type! */
9310 }
9311 }
9312
9313 return qualifiers;
9314 }
9315 \f
9316 /* Nested function support. */
9317
9318 /* Emit RTL insns to initialize the variable parts of a trampoline.
9319 FNADDR is an RTX for the address of the function's pure code.
9320 CXT is an RTX for the static chain value for the function.
9321
9322 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9323 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9324 (to store insns). This is a bit excessive. Perhaps a different
9325 mechanism would be better here.
9326
9327 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9328
9329 static void
9330 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9331 {
9332 /* SPARC 32-bit trampoline:
9333
9334 sethi %hi(fn), %g1
9335 sethi %hi(static), %g2
9336 jmp %g1+%lo(fn)
9337 or %g2, %lo(static), %g2
9338
9339 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9340 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9341 */
9342
9343 emit_move_insn
9344 (adjust_address (m_tramp, SImode, 0),
9345 expand_binop (SImode, ior_optab,
9346 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9347 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9348 NULL_RTX, 1, OPTAB_DIRECT));
9349
9350 emit_move_insn
9351 (adjust_address (m_tramp, SImode, 4),
9352 expand_binop (SImode, ior_optab,
9353 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9354 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9355 NULL_RTX, 1, OPTAB_DIRECT));
9356
9357 emit_move_insn
9358 (adjust_address (m_tramp, SImode, 8),
9359 expand_binop (SImode, ior_optab,
9360 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9361 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9362 NULL_RTX, 1, OPTAB_DIRECT));
9363
9364 emit_move_insn
9365 (adjust_address (m_tramp, SImode, 12),
9366 expand_binop (SImode, ior_optab,
9367 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9368 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9369 NULL_RTX, 1, OPTAB_DIRECT));
9370
9371 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9372 aligned on a 16 byte boundary so one flush clears it all. */
9373 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9374 if (sparc_cpu != PROCESSOR_ULTRASPARC
9375 && sparc_cpu != PROCESSOR_ULTRASPARC3
9376 && sparc_cpu != PROCESSOR_NIAGARA
9377 && sparc_cpu != PROCESSOR_NIAGARA2
9378 && sparc_cpu != PROCESSOR_NIAGARA3
9379 && sparc_cpu != PROCESSOR_NIAGARA4
9380 && sparc_cpu != PROCESSOR_NIAGARA7)
9381 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9382
9383 /* Call __enable_execute_stack after writing onto the stack to make sure
9384 the stack address is accessible. */
9385 #ifdef HAVE_ENABLE_EXECUTE_STACK
9386 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9387 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9388 #endif
9389
9390 }
9391
9392 /* The 64-bit version is simpler because it makes more sense to load the
9393 values as "immediate" data out of the trampoline. It's also easier since
9394 we can read the PC without clobbering a register. */
9395
9396 static void
9397 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9398 {
9399 /* SPARC 64-bit trampoline:
9400
9401 rd %pc, %g1
9402 ldx [%g1+24], %g5
9403 jmp %g5
9404 ldx [%g1+16], %g5
9405 +16 bytes data
9406 */
9407
9408 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9409 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9410 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9411 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9412 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9413 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9414 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9415 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9416 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9417 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9418 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9419
9420 if (sparc_cpu != PROCESSOR_ULTRASPARC
9421 && sparc_cpu != PROCESSOR_ULTRASPARC3
9422 && sparc_cpu != PROCESSOR_NIAGARA
9423 && sparc_cpu != PROCESSOR_NIAGARA2
9424 && sparc_cpu != PROCESSOR_NIAGARA3
9425 && sparc_cpu != PROCESSOR_NIAGARA4
9426 && sparc_cpu != PROCESSOR_NIAGARA7)
9427 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9428
9429 /* Call __enable_execute_stack after writing onto the stack to make sure
9430 the stack address is accessible. */
9431 #ifdef HAVE_ENABLE_EXECUTE_STACK
9432 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9433 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9434 #endif
9435 }
9436
9437 /* Worker for TARGET_TRAMPOLINE_INIT. */
9438
9439 static void
9440 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9441 {
9442 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9443 cxt = force_reg (Pmode, cxt);
9444 if (TARGET_ARCH64)
9445 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9446 else
9447 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9448 }
9449 \f
9450 /* Adjust the cost of a scheduling dependency. Return the new cost of
9451 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9452
9453 static int
9454 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9455 int cost)
9456 {
9457 enum attr_type insn_type;
9458
9459 if (recog_memoized (insn) < 0)
9460 return cost;
9461
9462 insn_type = get_attr_type (insn);
9463
9464 if (dep_type == 0)
9465 {
9466 /* Data dependency; DEP_INSN writes a register that INSN reads some
9467 cycles later. */
9468
9469 /* if a load, then the dependence must be on the memory address;
9470 add an extra "cycle". Note that the cost could be two cycles
9471 if the reg was written late in an instruction group; we ca not tell
9472 here. */
9473 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9474 return cost + 3;
9475
9476 /* Get the delay only if the address of the store is the dependence. */
9477 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9478 {
9479 rtx pat = PATTERN(insn);
9480 rtx dep_pat = PATTERN (dep_insn);
9481
9482 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9483 return cost; /* This should not happen! */
9484
9485 /* The dependency between the two instructions was on the data that
9486 is being stored. Assume that this implies that the address of the
9487 store is not dependent. */
9488 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9489 return cost;
9490
9491 return cost + 3; /* An approximation. */
9492 }
9493
9494 /* A shift instruction cannot receive its data from an instruction
9495 in the same cycle; add a one cycle penalty. */
9496 if (insn_type == TYPE_SHIFT)
9497 return cost + 3; /* Split before cascade into shift. */
9498 }
9499 else
9500 {
9501 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9502 INSN writes some cycles later. */
9503
9504 /* These are only significant for the fpu unit; writing a fp reg before
9505 the fpu has finished with it stalls the processor. */
9506
9507 /* Reusing an integer register causes no problems. */
9508 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9509 return 0;
9510 }
9511
9512 return cost;
9513 }
9514
9515 static int
9516 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9517 int cost)
9518 {
9519 enum attr_type insn_type, dep_type;
9520 rtx pat = PATTERN(insn);
9521 rtx dep_pat = PATTERN (dep_insn);
9522
9523 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9524 return cost;
9525
9526 insn_type = get_attr_type (insn);
9527 dep_type = get_attr_type (dep_insn);
9528
9529 switch (dtype)
9530 {
9531 case 0:
9532 /* Data dependency; DEP_INSN writes a register that INSN reads some
9533 cycles later. */
9534
9535 switch (insn_type)
9536 {
9537 case TYPE_STORE:
9538 case TYPE_FPSTORE:
9539 /* Get the delay iff the address of the store is the dependence. */
9540 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9541 return cost;
9542
9543 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9544 return cost;
9545 return cost + 3;
9546
9547 case TYPE_LOAD:
9548 case TYPE_SLOAD:
9549 case TYPE_FPLOAD:
9550 /* If a load, then the dependence must be on the memory address. If
9551 the addresses aren't equal, then it might be a false dependency */
9552 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9553 {
9554 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9555 || GET_CODE (SET_DEST (dep_pat)) != MEM
9556 || GET_CODE (SET_SRC (pat)) != MEM
9557 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9558 XEXP (SET_SRC (pat), 0)))
9559 return cost + 2;
9560
9561 return cost + 8;
9562 }
9563 break;
9564
9565 case TYPE_BRANCH:
9566 /* Compare to branch latency is 0. There is no benefit from
9567 separating compare and branch. */
9568 if (dep_type == TYPE_COMPARE)
9569 return 0;
9570 /* Floating point compare to branch latency is less than
9571 compare to conditional move. */
9572 if (dep_type == TYPE_FPCMP)
9573 return cost - 1;
9574 break;
9575 default:
9576 break;
9577 }
9578 break;
9579
9580 case REG_DEP_ANTI:
9581 /* Anti-dependencies only penalize the fpu unit. */
9582 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9583 return 0;
9584 break;
9585
9586 default:
9587 break;
9588 }
9589
9590 return cost;
9591 }
9592
9593 static int
9594 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9595 unsigned int)
9596 {
9597 switch (sparc_cpu)
9598 {
9599 case PROCESSOR_SUPERSPARC:
9600 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9601 break;
9602 case PROCESSOR_HYPERSPARC:
9603 case PROCESSOR_SPARCLITE86X:
9604 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9605 break;
9606 default:
9607 break;
9608 }
9609 return cost;
9610 }
9611
9612 static void
9613 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9614 int sched_verbose ATTRIBUTE_UNUSED,
9615 int max_ready ATTRIBUTE_UNUSED)
9616 {}
9617
9618 static int
9619 sparc_use_sched_lookahead (void)
9620 {
9621 if (sparc_cpu == PROCESSOR_NIAGARA
9622 || sparc_cpu == PROCESSOR_NIAGARA2
9623 || sparc_cpu == PROCESSOR_NIAGARA3)
9624 return 0;
9625 if (sparc_cpu == PROCESSOR_NIAGARA4
9626 || sparc_cpu == PROCESSOR_NIAGARA7)
9627 return 2;
9628 if (sparc_cpu == PROCESSOR_ULTRASPARC
9629 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9630 return 4;
9631 if ((1 << sparc_cpu) &
9632 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9633 (1 << PROCESSOR_SPARCLITE86X)))
9634 return 3;
9635 return 0;
9636 }
9637
9638 static int
9639 sparc_issue_rate (void)
9640 {
9641 switch (sparc_cpu)
9642 {
9643 case PROCESSOR_NIAGARA:
9644 case PROCESSOR_NIAGARA2:
9645 case PROCESSOR_NIAGARA3:
9646 default:
9647 return 1;
9648 case PROCESSOR_NIAGARA4:
9649 case PROCESSOR_NIAGARA7:
9650 case PROCESSOR_V9:
9651 /* Assume V9 processors are capable of at least dual-issue. */
9652 return 2;
9653 case PROCESSOR_SUPERSPARC:
9654 return 3;
9655 case PROCESSOR_HYPERSPARC:
9656 case PROCESSOR_SPARCLITE86X:
9657 return 2;
9658 case PROCESSOR_ULTRASPARC:
9659 case PROCESSOR_ULTRASPARC3:
9660 return 4;
9661 }
9662 }
9663
9664 static int
9665 set_extends (rtx_insn *insn)
9666 {
9667 register rtx pat = PATTERN (insn);
9668
9669 switch (GET_CODE (SET_SRC (pat)))
9670 {
9671 /* Load and some shift instructions zero extend. */
9672 case MEM:
9673 case ZERO_EXTEND:
9674 /* sethi clears the high bits */
9675 case HIGH:
9676 /* LO_SUM is used with sethi. sethi cleared the high
9677 bits and the values used with lo_sum are positive */
9678 case LO_SUM:
9679 /* Store flag stores 0 or 1 */
9680 case LT: case LTU:
9681 case GT: case GTU:
9682 case LE: case LEU:
9683 case GE: case GEU:
9684 case EQ:
9685 case NE:
9686 return 1;
9687 case AND:
9688 {
9689 rtx op0 = XEXP (SET_SRC (pat), 0);
9690 rtx op1 = XEXP (SET_SRC (pat), 1);
9691 if (GET_CODE (op1) == CONST_INT)
9692 return INTVAL (op1) >= 0;
9693 if (GET_CODE (op0) != REG)
9694 return 0;
9695 if (sparc_check_64 (op0, insn) == 1)
9696 return 1;
9697 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9698 }
9699 case IOR:
9700 case XOR:
9701 {
9702 rtx op0 = XEXP (SET_SRC (pat), 0);
9703 rtx op1 = XEXP (SET_SRC (pat), 1);
9704 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9705 return 0;
9706 if (GET_CODE (op1) == CONST_INT)
9707 return INTVAL (op1) >= 0;
9708 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9709 }
9710 case LSHIFTRT:
9711 return GET_MODE (SET_SRC (pat)) == SImode;
9712 /* Positive integers leave the high bits zero. */
9713 case CONST_INT:
9714 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9715 case ASHIFTRT:
9716 case SIGN_EXTEND:
9717 return - (GET_MODE (SET_SRC (pat)) == SImode);
9718 case REG:
9719 return sparc_check_64 (SET_SRC (pat), insn);
9720 default:
9721 return 0;
9722 }
9723 }
9724
9725 /* We _ought_ to have only one kind per function, but... */
9726 static GTY(()) rtx sparc_addr_diff_list;
9727 static GTY(()) rtx sparc_addr_list;
9728
9729 void
9730 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9731 {
9732 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9733 if (diff)
9734 sparc_addr_diff_list
9735 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9736 else
9737 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9738 }
9739
9740 static void
9741 sparc_output_addr_vec (rtx vec)
9742 {
9743 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9744 int idx, vlen = XVECLEN (body, 0);
9745
9746 #ifdef ASM_OUTPUT_ADDR_VEC_START
9747 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9748 #endif
9749
9750 #ifdef ASM_OUTPUT_CASE_LABEL
9751 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9752 NEXT_INSN (lab));
9753 #else
9754 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9755 #endif
9756
9757 for (idx = 0; idx < vlen; idx++)
9758 {
9759 ASM_OUTPUT_ADDR_VEC_ELT
9760 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9761 }
9762
9763 #ifdef ASM_OUTPUT_ADDR_VEC_END
9764 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9765 #endif
9766 }
9767
9768 static void
9769 sparc_output_addr_diff_vec (rtx vec)
9770 {
9771 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9772 rtx base = XEXP (XEXP (body, 0), 0);
9773 int idx, vlen = XVECLEN (body, 1);
9774
9775 #ifdef ASM_OUTPUT_ADDR_VEC_START
9776 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9777 #endif
9778
9779 #ifdef ASM_OUTPUT_CASE_LABEL
9780 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9781 NEXT_INSN (lab));
9782 #else
9783 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9784 #endif
9785
9786 for (idx = 0; idx < vlen; idx++)
9787 {
9788 ASM_OUTPUT_ADDR_DIFF_ELT
9789 (asm_out_file,
9790 body,
9791 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9792 CODE_LABEL_NUMBER (base));
9793 }
9794
9795 #ifdef ASM_OUTPUT_ADDR_VEC_END
9796 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9797 #endif
9798 }
9799
9800 static void
9801 sparc_output_deferred_case_vectors (void)
9802 {
9803 rtx t;
9804 int align;
9805
9806 if (sparc_addr_list == NULL_RTX
9807 && sparc_addr_diff_list == NULL_RTX)
9808 return;
9809
9810 /* Align to cache line in the function's code section. */
9811 switch_to_section (current_function_section ());
9812
9813 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9814 if (align > 0)
9815 ASM_OUTPUT_ALIGN (asm_out_file, align);
9816
9817 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9818 sparc_output_addr_vec (XEXP (t, 0));
9819 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9820 sparc_output_addr_diff_vec (XEXP (t, 0));
9821
9822 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9823 }
9824
9825 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9826 unknown. Return 1 if the high bits are zero, -1 if the register is
9827 sign extended. */
9828 int
9829 sparc_check_64 (rtx x, rtx_insn *insn)
9830 {
9831 /* If a register is set only once it is safe to ignore insns this
9832 code does not know how to handle. The loop will either recognize
9833 the single set and return the correct value or fail to recognize
9834 it and return 0. */
9835 int set_once = 0;
9836 rtx y = x;
9837
9838 gcc_assert (GET_CODE (x) == REG);
9839
9840 if (GET_MODE (x) == DImode)
9841 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9842
9843 if (flag_expensive_optimizations
9844 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9845 set_once = 1;
9846
9847 if (insn == 0)
9848 {
9849 if (set_once)
9850 insn = get_last_insn_anywhere ();
9851 else
9852 return 0;
9853 }
9854
9855 while ((insn = PREV_INSN (insn)))
9856 {
9857 switch (GET_CODE (insn))
9858 {
9859 case JUMP_INSN:
9860 case NOTE:
9861 break;
9862 case CODE_LABEL:
9863 case CALL_INSN:
9864 default:
9865 if (! set_once)
9866 return 0;
9867 break;
9868 case INSN:
9869 {
9870 rtx pat = PATTERN (insn);
9871 if (GET_CODE (pat) != SET)
9872 return 0;
9873 if (rtx_equal_p (x, SET_DEST (pat)))
9874 return set_extends (insn);
9875 if (y && rtx_equal_p (y, SET_DEST (pat)))
9876 return set_extends (insn);
9877 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9878 return 0;
9879 }
9880 }
9881 }
9882 return 0;
9883 }
9884
9885 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9886 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9887
9888 const char *
9889 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9890 {
9891 static char asm_code[60];
9892
9893 /* The scratch register is only required when the destination
9894 register is not a 64-bit global or out register. */
9895 if (which_alternative != 2)
9896 operands[3] = operands[0];
9897
9898 /* We can only shift by constants <= 63. */
9899 if (GET_CODE (operands[2]) == CONST_INT)
9900 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9901
9902 if (GET_CODE (operands[1]) == CONST_INT)
9903 {
9904 output_asm_insn ("mov\t%1, %3", operands);
9905 }
9906 else
9907 {
9908 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9909 if (sparc_check_64 (operands[1], insn) <= 0)
9910 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9911 output_asm_insn ("or\t%L1, %3, %3", operands);
9912 }
9913
9914 strcpy (asm_code, opcode);
9915
9916 if (which_alternative != 2)
9917 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9918 else
9919 return
9920 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9921 }
9922 \f
9923 /* Output rtl to increment the profiler label LABELNO
9924 for profiling a function entry. */
9925
9926 void
9927 sparc_profile_hook (int labelno)
9928 {
9929 char buf[32];
9930 rtx lab, fun;
9931
9932 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9933 if (NO_PROFILE_COUNTERS)
9934 {
9935 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9936 }
9937 else
9938 {
9939 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9940 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9941 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9942 }
9943 }
9944 \f
9945 #ifdef TARGET_SOLARIS
9946 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9947
9948 static void
9949 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9950 tree decl ATTRIBUTE_UNUSED)
9951 {
9952 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9953 {
9954 solaris_elf_asm_comdat_section (name, flags, decl);
9955 return;
9956 }
9957
9958 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9959
9960 if (!(flags & SECTION_DEBUG))
9961 fputs (",#alloc", asm_out_file);
9962 if (flags & SECTION_WRITE)
9963 fputs (",#write", asm_out_file);
9964 if (flags & SECTION_TLS)
9965 fputs (",#tls", asm_out_file);
9966 if (flags & SECTION_CODE)
9967 fputs (",#execinstr", asm_out_file);
9968
9969 if (flags & SECTION_NOTYPE)
9970 ;
9971 else if (flags & SECTION_BSS)
9972 fputs (",#nobits", asm_out_file);
9973 else
9974 fputs (",#progbits", asm_out_file);
9975
9976 fputc ('\n', asm_out_file);
9977 }
9978 #endif /* TARGET_SOLARIS */
9979
9980 /* We do not allow indirect calls to be optimized into sibling calls.
9981
9982 We cannot use sibling calls when delayed branches are disabled
9983 because they will likely require the call delay slot to be filled.
9984
9985 Also, on SPARC 32-bit we cannot emit a sibling call when the
9986 current function returns a structure. This is because the "unimp
9987 after call" convention would cause the callee to return to the
9988 wrong place. The generic code already disallows cases where the
9989 function being called returns a structure.
9990
9991 It may seem strange how this last case could occur. Usually there
9992 is code after the call which jumps to epilogue code which dumps the
9993 return value into the struct return area. That ought to invalidate
9994 the sibling call right? Well, in the C++ case we can end up passing
9995 the pointer to the struct return area to a constructor (which returns
9996 void) and then nothing else happens. Such a sibling call would look
9997 valid without the added check here.
9998
9999 VxWorks PIC PLT entries require the global pointer to be initialized
10000 on entry. We therefore can't emit sibling calls to them. */
10001 static bool
10002 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10003 {
10004 return (decl
10005 && flag_delayed_branch
10006 && (TARGET_ARCH64 || ! cfun->returns_struct)
10007 && !(TARGET_VXWORKS_RTP
10008 && flag_pic
10009 && !targetm.binds_local_p (decl)));
10010 }
10011 \f
10012 /* libfunc renaming. */
10013
10014 static void
10015 sparc_init_libfuncs (void)
10016 {
10017 if (TARGET_ARCH32)
10018 {
10019 /* Use the subroutines that Sun's library provides for integer
10020 multiply and divide. The `*' prevents an underscore from
10021 being prepended by the compiler. .umul is a little faster
10022 than .mul. */
10023 set_optab_libfunc (smul_optab, SImode, "*.umul");
10024 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10025 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10026 set_optab_libfunc (smod_optab, SImode, "*.rem");
10027 set_optab_libfunc (umod_optab, SImode, "*.urem");
10028
10029 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10030 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10031 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10032 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10033 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10034 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10035
10036 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10037 is because with soft-float, the SFmode and DFmode sqrt
10038 instructions will be absent, and the compiler will notice and
10039 try to use the TFmode sqrt instruction for calls to the
10040 builtin function sqrt, but this fails. */
10041 if (TARGET_FPU)
10042 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10043
10044 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10045 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10046 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10047 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10048 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10049 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10050
10051 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10052 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10053 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10054 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10055
10056 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10057 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10058 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10059 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10060
10061 if (DITF_CONVERSION_LIBFUNCS)
10062 {
10063 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10064 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10065 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10066 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10067 }
10068
10069 if (SUN_CONVERSION_LIBFUNCS)
10070 {
10071 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10072 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10073 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10074 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10075 }
10076 }
10077 if (TARGET_ARCH64)
10078 {
10079 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10080 do not exist in the library. Make sure the compiler does not
10081 emit calls to them by accident. (It should always use the
10082 hardware instructions.) */
10083 set_optab_libfunc (smul_optab, SImode, 0);
10084 set_optab_libfunc (sdiv_optab, SImode, 0);
10085 set_optab_libfunc (udiv_optab, SImode, 0);
10086 set_optab_libfunc (smod_optab, SImode, 0);
10087 set_optab_libfunc (umod_optab, SImode, 0);
10088
10089 if (SUN_INTEGER_MULTIPLY_64)
10090 {
10091 set_optab_libfunc (smul_optab, DImode, "__mul64");
10092 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10093 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10094 set_optab_libfunc (smod_optab, DImode, "__rem64");
10095 set_optab_libfunc (umod_optab, DImode, "__urem64");
10096 }
10097
10098 if (SUN_CONVERSION_LIBFUNCS)
10099 {
10100 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10101 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10102 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10103 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10104 }
10105 }
10106 }
10107 \f
10108 /* SPARC builtins. */
10109 enum sparc_builtins
10110 {
10111 /* FPU builtins. */
10112 SPARC_BUILTIN_LDFSR,
10113 SPARC_BUILTIN_STFSR,
10114
10115 /* VIS 1.0 builtins. */
10116 SPARC_BUILTIN_FPACK16,
10117 SPARC_BUILTIN_FPACK32,
10118 SPARC_BUILTIN_FPACKFIX,
10119 SPARC_BUILTIN_FEXPAND,
10120 SPARC_BUILTIN_FPMERGE,
10121 SPARC_BUILTIN_FMUL8X16,
10122 SPARC_BUILTIN_FMUL8X16AU,
10123 SPARC_BUILTIN_FMUL8X16AL,
10124 SPARC_BUILTIN_FMUL8SUX16,
10125 SPARC_BUILTIN_FMUL8ULX16,
10126 SPARC_BUILTIN_FMULD8SUX16,
10127 SPARC_BUILTIN_FMULD8ULX16,
10128 SPARC_BUILTIN_FALIGNDATAV4HI,
10129 SPARC_BUILTIN_FALIGNDATAV8QI,
10130 SPARC_BUILTIN_FALIGNDATAV2SI,
10131 SPARC_BUILTIN_FALIGNDATADI,
10132 SPARC_BUILTIN_WRGSR,
10133 SPARC_BUILTIN_RDGSR,
10134 SPARC_BUILTIN_ALIGNADDR,
10135 SPARC_BUILTIN_ALIGNADDRL,
10136 SPARC_BUILTIN_PDIST,
10137 SPARC_BUILTIN_EDGE8,
10138 SPARC_BUILTIN_EDGE8L,
10139 SPARC_BUILTIN_EDGE16,
10140 SPARC_BUILTIN_EDGE16L,
10141 SPARC_BUILTIN_EDGE32,
10142 SPARC_BUILTIN_EDGE32L,
10143 SPARC_BUILTIN_FCMPLE16,
10144 SPARC_BUILTIN_FCMPLE32,
10145 SPARC_BUILTIN_FCMPNE16,
10146 SPARC_BUILTIN_FCMPNE32,
10147 SPARC_BUILTIN_FCMPGT16,
10148 SPARC_BUILTIN_FCMPGT32,
10149 SPARC_BUILTIN_FCMPEQ16,
10150 SPARC_BUILTIN_FCMPEQ32,
10151 SPARC_BUILTIN_FPADD16,
10152 SPARC_BUILTIN_FPADD16S,
10153 SPARC_BUILTIN_FPADD32,
10154 SPARC_BUILTIN_FPADD32S,
10155 SPARC_BUILTIN_FPSUB16,
10156 SPARC_BUILTIN_FPSUB16S,
10157 SPARC_BUILTIN_FPSUB32,
10158 SPARC_BUILTIN_FPSUB32S,
10159 SPARC_BUILTIN_ARRAY8,
10160 SPARC_BUILTIN_ARRAY16,
10161 SPARC_BUILTIN_ARRAY32,
10162
10163 /* VIS 2.0 builtins. */
10164 SPARC_BUILTIN_EDGE8N,
10165 SPARC_BUILTIN_EDGE8LN,
10166 SPARC_BUILTIN_EDGE16N,
10167 SPARC_BUILTIN_EDGE16LN,
10168 SPARC_BUILTIN_EDGE32N,
10169 SPARC_BUILTIN_EDGE32LN,
10170 SPARC_BUILTIN_BMASK,
10171 SPARC_BUILTIN_BSHUFFLEV4HI,
10172 SPARC_BUILTIN_BSHUFFLEV8QI,
10173 SPARC_BUILTIN_BSHUFFLEV2SI,
10174 SPARC_BUILTIN_BSHUFFLEDI,
10175
10176 /* VIS 3.0 builtins. */
10177 SPARC_BUILTIN_CMASK8,
10178 SPARC_BUILTIN_CMASK16,
10179 SPARC_BUILTIN_CMASK32,
10180 SPARC_BUILTIN_FCHKSM16,
10181 SPARC_BUILTIN_FSLL16,
10182 SPARC_BUILTIN_FSLAS16,
10183 SPARC_BUILTIN_FSRL16,
10184 SPARC_BUILTIN_FSRA16,
10185 SPARC_BUILTIN_FSLL32,
10186 SPARC_BUILTIN_FSLAS32,
10187 SPARC_BUILTIN_FSRL32,
10188 SPARC_BUILTIN_FSRA32,
10189 SPARC_BUILTIN_PDISTN,
10190 SPARC_BUILTIN_FMEAN16,
10191 SPARC_BUILTIN_FPADD64,
10192 SPARC_BUILTIN_FPSUB64,
10193 SPARC_BUILTIN_FPADDS16,
10194 SPARC_BUILTIN_FPADDS16S,
10195 SPARC_BUILTIN_FPSUBS16,
10196 SPARC_BUILTIN_FPSUBS16S,
10197 SPARC_BUILTIN_FPADDS32,
10198 SPARC_BUILTIN_FPADDS32S,
10199 SPARC_BUILTIN_FPSUBS32,
10200 SPARC_BUILTIN_FPSUBS32S,
10201 SPARC_BUILTIN_FUCMPLE8,
10202 SPARC_BUILTIN_FUCMPNE8,
10203 SPARC_BUILTIN_FUCMPGT8,
10204 SPARC_BUILTIN_FUCMPEQ8,
10205 SPARC_BUILTIN_FHADDS,
10206 SPARC_BUILTIN_FHADDD,
10207 SPARC_BUILTIN_FHSUBS,
10208 SPARC_BUILTIN_FHSUBD,
10209 SPARC_BUILTIN_FNHADDS,
10210 SPARC_BUILTIN_FNHADDD,
10211 SPARC_BUILTIN_UMULXHI,
10212 SPARC_BUILTIN_XMULX,
10213 SPARC_BUILTIN_XMULXHI,
10214
10215 /* VIS 4.0 builtins. */
10216 SPARC_BUILTIN_FPADD8,
10217 SPARC_BUILTIN_FPADDS8,
10218 SPARC_BUILTIN_FPADDUS8,
10219 SPARC_BUILTIN_FPADDUS16,
10220 SPARC_BUILTIN_FPCMPLE8,
10221 SPARC_BUILTIN_FPCMPGT8,
10222 SPARC_BUILTIN_FPCMPULE16,
10223 SPARC_BUILTIN_FPCMPUGT16,
10224 SPARC_BUILTIN_FPCMPULE32,
10225 SPARC_BUILTIN_FPCMPUGT32,
10226 SPARC_BUILTIN_FPMAX8,
10227 SPARC_BUILTIN_FPMAX16,
10228 SPARC_BUILTIN_FPMAX32,
10229 SPARC_BUILTIN_FPMAXU8,
10230 SPARC_BUILTIN_FPMAXU16,
10231 SPARC_BUILTIN_FPMAXU32,
10232 SPARC_BUILTIN_FPMIN8,
10233 SPARC_BUILTIN_FPMIN16,
10234 SPARC_BUILTIN_FPMIN32,
10235 SPARC_BUILTIN_FPMINU8,
10236 SPARC_BUILTIN_FPMINU16,
10237 SPARC_BUILTIN_FPMINU32,
10238 SPARC_BUILTIN_FPSUB8,
10239 SPARC_BUILTIN_FPSUBS8,
10240 SPARC_BUILTIN_FPSUBUS8,
10241 SPARC_BUILTIN_FPSUBUS16,
10242
10243 SPARC_BUILTIN_MAX
10244 };
10245
10246 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10247 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10248
10249 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10250 function decl or NULL_TREE if the builtin was not added. */
10251
10252 static tree
10253 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10254 tree type)
10255 {
10256 tree t
10257 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10258
10259 if (t)
10260 {
10261 sparc_builtins[code] = t;
10262 sparc_builtins_icode[code] = icode;
10263 }
10264
10265 return t;
10266 }
10267
10268 /* Likewise, but also marks the function as "const". */
10269
10270 static tree
10271 def_builtin_const (const char *name, enum insn_code icode,
10272 enum sparc_builtins code, tree type)
10273 {
10274 tree t = def_builtin (name, icode, code, type);
10275
10276 if (t)
10277 TREE_READONLY (t) = 1;
10278
10279 return t;
10280 }
10281
10282 /* Implement the TARGET_INIT_BUILTINS target hook.
10283 Create builtin functions for special SPARC instructions. */
10284
10285 static void
10286 sparc_init_builtins (void)
10287 {
10288 if (TARGET_FPU)
10289 sparc_fpu_init_builtins ();
10290
10291 if (TARGET_VIS)
10292 sparc_vis_init_builtins ();
10293 }
10294
10295 /* Create builtin functions for FPU instructions. */
10296
10297 static void
10298 sparc_fpu_init_builtins (void)
10299 {
10300 tree ftype
10301 = build_function_type_list (void_type_node,
10302 build_pointer_type (unsigned_type_node), 0);
10303 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10304 SPARC_BUILTIN_LDFSR, ftype);
10305 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10306 SPARC_BUILTIN_STFSR, ftype);
10307 }
10308
10309 /* Create builtin functions for VIS instructions. */
10310
10311 static void
10312 sparc_vis_init_builtins (void)
10313 {
10314 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10315 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10316 tree v4hi = build_vector_type (intHI_type_node, 4);
10317 tree v2hi = build_vector_type (intHI_type_node, 2);
10318 tree v2si = build_vector_type (intSI_type_node, 2);
10319 tree v1si = build_vector_type (intSI_type_node, 1);
10320
10321 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10322 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10323 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10324 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10325 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10326 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10327 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10328 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10329 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10330 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10331 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10332 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10333 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10334 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10335 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10336 v8qi, v8qi,
10337 intDI_type_node, 0);
10338 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10339 v8qi, v8qi, 0);
10340 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10341 v8qi, v8qi, 0);
10342 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10343 intDI_type_node,
10344 intDI_type_node, 0);
10345 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10346 intSI_type_node,
10347 intSI_type_node, 0);
10348 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10349 ptr_type_node,
10350 intSI_type_node, 0);
10351 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10352 ptr_type_node,
10353 intDI_type_node, 0);
10354 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10355 ptr_type_node,
10356 ptr_type_node, 0);
10357 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10358 ptr_type_node,
10359 ptr_type_node, 0);
10360 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10361 v4hi, v4hi, 0);
10362 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10363 v2si, v2si, 0);
10364 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10365 v4hi, v4hi, 0);
10366 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10367 v2si, v2si, 0);
10368 tree void_ftype_di = build_function_type_list (void_type_node,
10369 intDI_type_node, 0);
10370 tree di_ftype_void = build_function_type_list (intDI_type_node,
10371 void_type_node, 0);
10372 tree void_ftype_si = build_function_type_list (void_type_node,
10373 intSI_type_node, 0);
10374 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10375 float_type_node,
10376 float_type_node, 0);
10377 tree df_ftype_df_df = build_function_type_list (double_type_node,
10378 double_type_node,
10379 double_type_node, 0);
10380
10381 /* Packing and expanding vectors. */
10382 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10383 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10384 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10385 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10386 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10387 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10388 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10389 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10390 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10391 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10392
10393 /* Multiplications. */
10394 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10395 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10396 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10397 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10398 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10399 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10400 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10401 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10402 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10403 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10404 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10405 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10406 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10407 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10408
10409 /* Data aligning. */
10410 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10411 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10412 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10413 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10414 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10415 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10416 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10417 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10418
10419 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10420 SPARC_BUILTIN_WRGSR, void_ftype_di);
10421 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10422 SPARC_BUILTIN_RDGSR, di_ftype_void);
10423
10424 if (TARGET_ARCH64)
10425 {
10426 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10427 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10428 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10429 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10430 }
10431 else
10432 {
10433 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10434 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10435 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10436 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10437 }
10438
10439 /* Pixel distance. */
10440 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10441 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10442
10443 /* Edge handling. */
10444 if (TARGET_ARCH64)
10445 {
10446 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10447 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10448 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10449 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10450 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10451 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10452 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10453 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10454 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10455 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10456 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10457 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10458 }
10459 else
10460 {
10461 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10462 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10463 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10464 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10465 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10466 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10467 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10468 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10469 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10470 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10471 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10472 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10473 }
10474
10475 /* Pixel compare. */
10476 if (TARGET_ARCH64)
10477 {
10478 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10479 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10480 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10481 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10482 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10483 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10484 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10485 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10486 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10487 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10488 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10489 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10490 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10491 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10492 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10493 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10494 }
10495 else
10496 {
10497 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10498 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10499 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10500 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10501 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10502 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10503 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10504 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10505 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10506 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10507 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10508 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10509 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10510 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10511 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10512 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10513 }
10514
10515 /* Addition and subtraction. */
10516 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10517 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10518 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10519 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10520 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10521 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10522 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10523 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10524 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10525 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10526 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10527 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10528 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10529 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10530 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10531 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10532
10533 /* Three-dimensional array addressing. */
10534 if (TARGET_ARCH64)
10535 {
10536 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10537 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10538 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10539 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10540 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10541 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10542 }
10543 else
10544 {
10545 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10546 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10547 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10548 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10549 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10550 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10551 }
10552
10553 if (TARGET_VIS2)
10554 {
10555 /* Edge handling. */
10556 if (TARGET_ARCH64)
10557 {
10558 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10559 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10560 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10561 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10562 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10563 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10564 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10565 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10566 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10567 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10568 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10569 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10570 }
10571 else
10572 {
10573 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10574 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10575 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10576 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10577 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10578 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10579 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10580 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10581 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10582 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10583 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10584 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10585 }
10586
10587 /* Byte mask and shuffle. */
10588 if (TARGET_ARCH64)
10589 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10590 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10591 else
10592 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10593 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10594 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10595 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10596 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10597 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10598 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10599 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10600 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10601 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10602 }
10603
10604 if (TARGET_VIS3)
10605 {
10606 if (TARGET_ARCH64)
10607 {
10608 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10609 SPARC_BUILTIN_CMASK8, void_ftype_di);
10610 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10611 SPARC_BUILTIN_CMASK16, void_ftype_di);
10612 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10613 SPARC_BUILTIN_CMASK32, void_ftype_di);
10614 }
10615 else
10616 {
10617 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10618 SPARC_BUILTIN_CMASK8, void_ftype_si);
10619 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10620 SPARC_BUILTIN_CMASK16, void_ftype_si);
10621 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10622 SPARC_BUILTIN_CMASK32, void_ftype_si);
10623 }
10624
10625 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10626 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10627
10628 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10629 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10630 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10631 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10632 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10633 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10634 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10635 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10636 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10637 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10638 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10639 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10640 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10641 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10642 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10643 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10644
10645 if (TARGET_ARCH64)
10646 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10647 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10648 else
10649 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10650 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10651
10652 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10653 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10654 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10655 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10656 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10657 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10658
10659 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10660 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10661 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10662 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10663 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10664 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10665 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10666 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10667 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10668 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10669 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10670 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10671 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10672 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10673 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10674 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10675
10676 if (TARGET_ARCH64)
10677 {
10678 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10679 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10680 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10681 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10682 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10683 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10684 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10685 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10686 }
10687 else
10688 {
10689 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10690 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10691 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10692 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10693 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10694 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10695 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10696 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10697 }
10698
10699 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10700 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10701 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10702 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10703 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10704 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10705 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10706 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10707 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10708 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10709 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10710 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10711
10712 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10713 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10714 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10715 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10716 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10717 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10718 }
10719
10720 if (TARGET_VIS4)
10721 {
10722 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10723 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10724 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10725 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10726 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10727 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10728 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10729 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10730
10731
10732 if (TARGET_ARCH64)
10733 {
10734 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10735 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10736 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10737 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10738 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10739 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10740 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10741 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10742 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10743 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10744 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10745 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10746 }
10747 else
10748 {
10749 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10750 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10751 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10752 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10753 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10754 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10755 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10756 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10757 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10758 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10759 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10760 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10761 }
10762
10763 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10764 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10765 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10766 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10767 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10768 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10769 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10770 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10771 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10772 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10773 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10774 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10775 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10776 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10777 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10778 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10779 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10780 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10781 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10782 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10783 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10784 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10785 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10786 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10787 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10788 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10789 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10790 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10791 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10792 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10793 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10794 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10795 }
10796 }
10797
10798 /* Implement TARGET_BUILTIN_DECL hook. */
10799
10800 static tree
10801 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10802 {
10803 if (code >= SPARC_BUILTIN_MAX)
10804 return error_mark_node;
10805
10806 return sparc_builtins[code];
10807 }
10808
10809 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10810
10811 static rtx
10812 sparc_expand_builtin (tree exp, rtx target,
10813 rtx subtarget ATTRIBUTE_UNUSED,
10814 machine_mode tmode ATTRIBUTE_UNUSED,
10815 int ignore ATTRIBUTE_UNUSED)
10816 {
10817 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10818 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10819 enum insn_code icode = sparc_builtins_icode[code];
10820 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10821 call_expr_arg_iterator iter;
10822 int arg_count = 0;
10823 rtx pat, op[4];
10824 tree arg;
10825
10826 if (nonvoid)
10827 {
10828 machine_mode tmode = insn_data[icode].operand[0].mode;
10829 if (!target
10830 || GET_MODE (target) != tmode
10831 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10832 op[0] = gen_reg_rtx (tmode);
10833 else
10834 op[0] = target;
10835 }
10836
10837 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10838 {
10839 const struct insn_operand_data *insn_op;
10840 int idx;
10841
10842 if (arg == error_mark_node)
10843 return NULL_RTX;
10844
10845 arg_count++;
10846 idx = arg_count - !nonvoid;
10847 insn_op = &insn_data[icode].operand[idx];
10848 op[arg_count] = expand_normal (arg);
10849
10850 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10851 {
10852 if (!address_operand (op[arg_count], SImode))
10853 {
10854 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10855 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10856 }
10857 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10858 }
10859
10860 else if (insn_op->mode == V1DImode
10861 && GET_MODE (op[arg_count]) == DImode)
10862 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10863
10864 else if (insn_op->mode == V1SImode
10865 && GET_MODE (op[arg_count]) == SImode)
10866 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10867
10868 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10869 insn_op->mode))
10870 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10871 }
10872
10873 switch (arg_count)
10874 {
10875 case 0:
10876 pat = GEN_FCN (icode) (op[0]);
10877 break;
10878 case 1:
10879 if (nonvoid)
10880 pat = GEN_FCN (icode) (op[0], op[1]);
10881 else
10882 pat = GEN_FCN (icode) (op[1]);
10883 break;
10884 case 2:
10885 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10886 break;
10887 case 3:
10888 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10889 break;
10890 default:
10891 gcc_unreachable ();
10892 }
10893
10894 if (!pat)
10895 return NULL_RTX;
10896
10897 emit_insn (pat);
10898
10899 return (nonvoid ? op[0] : const0_rtx);
10900 }
10901
10902 /* Return the upper 16 bits of the 8x16 multiplication. */
10903
10904 static int
10905 sparc_vis_mul8x16 (int e8, int e16)
10906 {
10907 return (e8 * e16 + 128) / 256;
10908 }
10909
10910 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10911 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10912
10913 static void
10914 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10915 tree inner_type, tree cst0, tree cst1)
10916 {
10917 unsigned i, num = VECTOR_CST_NELTS (cst0);
10918 int scale;
10919
10920 switch (fncode)
10921 {
10922 case SPARC_BUILTIN_FMUL8X16:
10923 for (i = 0; i < num; ++i)
10924 {
10925 int val
10926 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10927 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10928 n_elts[i] = build_int_cst (inner_type, val);
10929 }
10930 break;
10931
10932 case SPARC_BUILTIN_FMUL8X16AU:
10933 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10934
10935 for (i = 0; i < num; ++i)
10936 {
10937 int val
10938 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10939 scale);
10940 n_elts[i] = build_int_cst (inner_type, val);
10941 }
10942 break;
10943
10944 case SPARC_BUILTIN_FMUL8X16AL:
10945 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10946
10947 for (i = 0; i < num; ++i)
10948 {
10949 int val
10950 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10951 scale);
10952 n_elts[i] = build_int_cst (inner_type, val);
10953 }
10954 break;
10955
10956 default:
10957 gcc_unreachable ();
10958 }
10959 }
10960
10961 /* Implement TARGET_FOLD_BUILTIN hook.
10962
10963 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10964 result of the function call is ignored. NULL_TREE is returned if the
10965 function could not be folded. */
10966
10967 static tree
10968 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10969 tree *args, bool ignore)
10970 {
10971 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10972 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10973 tree arg0, arg1, arg2;
10974
10975 if (ignore)
10976 switch (code)
10977 {
10978 case SPARC_BUILTIN_LDFSR:
10979 case SPARC_BUILTIN_STFSR:
10980 case SPARC_BUILTIN_ALIGNADDR:
10981 case SPARC_BUILTIN_WRGSR:
10982 case SPARC_BUILTIN_BMASK:
10983 case SPARC_BUILTIN_CMASK8:
10984 case SPARC_BUILTIN_CMASK16:
10985 case SPARC_BUILTIN_CMASK32:
10986 break;
10987
10988 default:
10989 return build_zero_cst (rtype);
10990 }
10991
10992 switch (code)
10993 {
10994 case SPARC_BUILTIN_FEXPAND:
10995 arg0 = args[0];
10996 STRIP_NOPS (arg0);
10997
10998 if (TREE_CODE (arg0) == VECTOR_CST)
10999 {
11000 tree inner_type = TREE_TYPE (rtype);
11001 tree *n_elts;
11002 unsigned i;
11003
11004 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11005 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11006 n_elts[i] = build_int_cst (inner_type,
11007 TREE_INT_CST_LOW
11008 (VECTOR_CST_ELT (arg0, i)) << 4);
11009 return build_vector (rtype, n_elts);
11010 }
11011 break;
11012
11013 case SPARC_BUILTIN_FMUL8X16:
11014 case SPARC_BUILTIN_FMUL8X16AU:
11015 case SPARC_BUILTIN_FMUL8X16AL:
11016 arg0 = args[0];
11017 arg1 = args[1];
11018 STRIP_NOPS (arg0);
11019 STRIP_NOPS (arg1);
11020
11021 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11022 {
11023 tree inner_type = TREE_TYPE (rtype);
11024 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11025 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11026 return build_vector (rtype, n_elts);
11027 }
11028 break;
11029
11030 case SPARC_BUILTIN_FPMERGE:
11031 arg0 = args[0];
11032 arg1 = args[1];
11033 STRIP_NOPS (arg0);
11034 STRIP_NOPS (arg1);
11035
11036 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11037 {
11038 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11039 unsigned i;
11040 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11041 {
11042 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11043 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11044 }
11045
11046 return build_vector (rtype, n_elts);
11047 }
11048 break;
11049
11050 case SPARC_BUILTIN_PDIST:
11051 case SPARC_BUILTIN_PDISTN:
11052 arg0 = args[0];
11053 arg1 = args[1];
11054 STRIP_NOPS (arg0);
11055 STRIP_NOPS (arg1);
11056 if (code == SPARC_BUILTIN_PDIST)
11057 {
11058 arg2 = args[2];
11059 STRIP_NOPS (arg2);
11060 }
11061 else
11062 arg2 = integer_zero_node;
11063
11064 if (TREE_CODE (arg0) == VECTOR_CST
11065 && TREE_CODE (arg1) == VECTOR_CST
11066 && TREE_CODE (arg2) == INTEGER_CST)
11067 {
11068 bool overflow = false;
11069 widest_int result = wi::to_widest (arg2);
11070 widest_int tmp;
11071 unsigned i;
11072
11073 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11074 {
11075 tree e0 = VECTOR_CST_ELT (arg0, i);
11076 tree e1 = VECTOR_CST_ELT (arg1, i);
11077
11078 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11079
11080 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11081 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11082 if (wi::neg_p (tmp))
11083 tmp = wi::neg (tmp, &neg2_ovf);
11084 else
11085 neg2_ovf = false;
11086 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11087 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11088 }
11089
11090 gcc_assert (!overflow);
11091
11092 return wide_int_to_tree (rtype, result);
11093 }
11094
11095 default:
11096 break;
11097 }
11098
11099 return NULL_TREE;
11100 }
11101 \f
11102 /* ??? This duplicates information provided to the compiler by the
11103 ??? scheduler description. Some day, teach genautomata to output
11104 ??? the latencies and then CSE will just use that. */
11105
11106 static bool
11107 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11108 int opno ATTRIBUTE_UNUSED,
11109 int *total, bool speed ATTRIBUTE_UNUSED)
11110 {
11111 int code = GET_CODE (x);
11112 bool float_mode_p = FLOAT_MODE_P (mode);
11113
11114 switch (code)
11115 {
11116 case CONST_INT:
11117 if (SMALL_INT (x))
11118 *total = 0;
11119 else
11120 *total = 2;
11121 return true;
11122
11123 case CONST_WIDE_INT:
11124 *total = 0;
11125 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11126 *total += 2;
11127 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11128 *total += 2;
11129 return true;
11130
11131 case HIGH:
11132 *total = 2;
11133 return true;
11134
11135 case CONST:
11136 case LABEL_REF:
11137 case SYMBOL_REF:
11138 *total = 4;
11139 return true;
11140
11141 case CONST_DOUBLE:
11142 *total = 8;
11143 return true;
11144
11145 case MEM:
11146 /* If outer-code was a sign or zero extension, a cost
11147 of COSTS_N_INSNS (1) was already added in. This is
11148 why we are subtracting it back out. */
11149 if (outer_code == ZERO_EXTEND)
11150 {
11151 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11152 }
11153 else if (outer_code == SIGN_EXTEND)
11154 {
11155 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11156 }
11157 else if (float_mode_p)
11158 {
11159 *total = sparc_costs->float_load;
11160 }
11161 else
11162 {
11163 *total = sparc_costs->int_load;
11164 }
11165
11166 return true;
11167
11168 case PLUS:
11169 case MINUS:
11170 if (float_mode_p)
11171 *total = sparc_costs->float_plusminus;
11172 else
11173 *total = COSTS_N_INSNS (1);
11174 return false;
11175
11176 case FMA:
11177 {
11178 rtx sub;
11179
11180 gcc_assert (float_mode_p);
11181 *total = sparc_costs->float_mul;
11182
11183 sub = XEXP (x, 0);
11184 if (GET_CODE (sub) == NEG)
11185 sub = XEXP (sub, 0);
11186 *total += rtx_cost (sub, mode, FMA, 0, speed);
11187
11188 sub = XEXP (x, 2);
11189 if (GET_CODE (sub) == NEG)
11190 sub = XEXP (sub, 0);
11191 *total += rtx_cost (sub, mode, FMA, 2, speed);
11192 return true;
11193 }
11194
11195 case MULT:
11196 if (float_mode_p)
11197 *total = sparc_costs->float_mul;
11198 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11199 *total = COSTS_N_INSNS (25);
11200 else
11201 {
11202 int bit_cost;
11203
11204 bit_cost = 0;
11205 if (sparc_costs->int_mul_bit_factor)
11206 {
11207 int nbits;
11208
11209 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11210 {
11211 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11212 for (nbits = 0; value != 0; value &= value - 1)
11213 nbits++;
11214 }
11215 else
11216 nbits = 7;
11217
11218 if (nbits < 3)
11219 nbits = 3;
11220 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11221 bit_cost = COSTS_N_INSNS (bit_cost);
11222 }
11223
11224 if (mode == DImode || !TARGET_HARD_MUL)
11225 *total = sparc_costs->int_mulX + bit_cost;
11226 else
11227 *total = sparc_costs->int_mul + bit_cost;
11228 }
11229 return false;
11230
11231 case ASHIFT:
11232 case ASHIFTRT:
11233 case LSHIFTRT:
11234 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11235 return false;
11236
11237 case DIV:
11238 case UDIV:
11239 case MOD:
11240 case UMOD:
11241 if (float_mode_p)
11242 {
11243 if (mode == DFmode)
11244 *total = sparc_costs->float_div_df;
11245 else
11246 *total = sparc_costs->float_div_sf;
11247 }
11248 else
11249 {
11250 if (mode == DImode)
11251 *total = sparc_costs->int_divX;
11252 else
11253 *total = sparc_costs->int_div;
11254 }
11255 return false;
11256
11257 case NEG:
11258 if (! float_mode_p)
11259 {
11260 *total = COSTS_N_INSNS (1);
11261 return false;
11262 }
11263 /* FALLTHRU */
11264
11265 case ABS:
11266 case FLOAT:
11267 case UNSIGNED_FLOAT:
11268 case FIX:
11269 case UNSIGNED_FIX:
11270 case FLOAT_EXTEND:
11271 case FLOAT_TRUNCATE:
11272 *total = sparc_costs->float_move;
11273 return false;
11274
11275 case SQRT:
11276 if (mode == DFmode)
11277 *total = sparc_costs->float_sqrt_df;
11278 else
11279 *total = sparc_costs->float_sqrt_sf;
11280 return false;
11281
11282 case COMPARE:
11283 if (float_mode_p)
11284 *total = sparc_costs->float_cmp;
11285 else
11286 *total = COSTS_N_INSNS (1);
11287 return false;
11288
11289 case IF_THEN_ELSE:
11290 if (float_mode_p)
11291 *total = sparc_costs->float_cmove;
11292 else
11293 *total = sparc_costs->int_cmove;
11294 return false;
11295
11296 case IOR:
11297 /* Handle the NAND vector patterns. */
11298 if (sparc_vector_mode_supported_p (mode)
11299 && GET_CODE (XEXP (x, 0)) == NOT
11300 && GET_CODE (XEXP (x, 1)) == NOT)
11301 {
11302 *total = COSTS_N_INSNS (1);
11303 return true;
11304 }
11305 else
11306 return false;
11307
11308 default:
11309 return false;
11310 }
11311 }
11312
11313 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11314
11315 static inline bool
11316 general_or_i64_p (reg_class_t rclass)
11317 {
11318 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11319 }
11320
11321 /* Implement TARGET_REGISTER_MOVE_COST. */
11322
11323 static int
11324 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11325 reg_class_t from, reg_class_t to)
11326 {
11327 bool need_memory = false;
11328
11329 /* This helps postreload CSE to eliminate redundant comparisons. */
11330 if (from == NO_REGS || to == NO_REGS)
11331 return 100;
11332
11333 if (from == FPCC_REGS || to == FPCC_REGS)
11334 need_memory = true;
11335 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11336 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11337 {
11338 if (TARGET_VIS3)
11339 {
11340 int size = GET_MODE_SIZE (mode);
11341 if (size == 8 || size == 4)
11342 {
11343 if (! TARGET_ARCH32 || size == 4)
11344 return 4;
11345 else
11346 return 6;
11347 }
11348 }
11349 need_memory = true;
11350 }
11351
11352 if (need_memory)
11353 {
11354 if (sparc_cpu == PROCESSOR_ULTRASPARC
11355 || sparc_cpu == PROCESSOR_ULTRASPARC3
11356 || sparc_cpu == PROCESSOR_NIAGARA
11357 || sparc_cpu == PROCESSOR_NIAGARA2
11358 || sparc_cpu == PROCESSOR_NIAGARA3
11359 || sparc_cpu == PROCESSOR_NIAGARA4
11360 || sparc_cpu == PROCESSOR_NIAGARA7)
11361 return 12;
11362
11363 return 6;
11364 }
11365
11366 return 2;
11367 }
11368
11369 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11370 This is achieved by means of a manual dynamic stack space allocation in
11371 the current frame. We make the assumption that SEQ doesn't contain any
11372 function calls, with the possible exception of calls to the GOT helper. */
11373
11374 static void
11375 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11376 {
11377 /* We must preserve the lowest 16 words for the register save area. */
11378 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11379 /* We really need only 2 words of fresh stack space. */
11380 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11381
11382 rtx slot
11383 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11384 SPARC_STACK_BIAS + offset));
11385
11386 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11387 emit_insn (gen_rtx_SET (slot, reg));
11388 if (reg2)
11389 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11390 reg2));
11391 emit_insn (seq);
11392 if (reg2)
11393 emit_insn (gen_rtx_SET (reg2,
11394 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11395 emit_insn (gen_rtx_SET (reg, slot));
11396 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11397 }
11398
11399 /* Output the assembler code for a thunk function. THUNK_DECL is the
11400 declaration for the thunk function itself, FUNCTION is the decl for
11401 the target function. DELTA is an immediate constant offset to be
11402 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11403 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11404
11405 static void
11406 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11407 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11408 tree function)
11409 {
11410 rtx this_rtx, funexp;
11411 rtx_insn *insn;
11412 unsigned int int_arg_first;
11413
11414 reload_completed = 1;
11415 epilogue_completed = 1;
11416
11417 emit_note (NOTE_INSN_PROLOGUE_END);
11418
11419 if (TARGET_FLAT)
11420 {
11421 sparc_leaf_function_p = 1;
11422
11423 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11424 }
11425 else if (flag_delayed_branch)
11426 {
11427 /* We will emit a regular sibcall below, so we need to instruct
11428 output_sibcall that we are in a leaf function. */
11429 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11430
11431 /* This will cause final.c to invoke leaf_renumber_regs so we
11432 must behave as if we were in a not-yet-leafified function. */
11433 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11434 }
11435 else
11436 {
11437 /* We will emit the sibcall manually below, so we will need to
11438 manually spill non-leaf registers. */
11439 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11440
11441 /* We really are in a leaf function. */
11442 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11443 }
11444
11445 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11446 returns a structure, the structure return pointer is there instead. */
11447 if (TARGET_ARCH64
11448 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11449 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11450 else
11451 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11452
11453 /* Add DELTA. When possible use a plain add, otherwise load it into
11454 a register first. */
11455 if (delta)
11456 {
11457 rtx delta_rtx = GEN_INT (delta);
11458
11459 if (! SPARC_SIMM13_P (delta))
11460 {
11461 rtx scratch = gen_rtx_REG (Pmode, 1);
11462 emit_move_insn (scratch, delta_rtx);
11463 delta_rtx = scratch;
11464 }
11465
11466 /* THIS_RTX += DELTA. */
11467 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11468 }
11469
11470 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11471 if (vcall_offset)
11472 {
11473 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11474 rtx scratch = gen_rtx_REG (Pmode, 1);
11475
11476 gcc_assert (vcall_offset < 0);
11477
11478 /* SCRATCH = *THIS_RTX. */
11479 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11480
11481 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11482 may not have any available scratch register at this point. */
11483 if (SPARC_SIMM13_P (vcall_offset))
11484 ;
11485 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11486 else if (! fixed_regs[5]
11487 /* The below sequence is made up of at least 2 insns,
11488 while the default method may need only one. */
11489 && vcall_offset < -8192)
11490 {
11491 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11492 emit_move_insn (scratch2, vcall_offset_rtx);
11493 vcall_offset_rtx = scratch2;
11494 }
11495 else
11496 {
11497 rtx increment = GEN_INT (-4096);
11498
11499 /* VCALL_OFFSET is a negative number whose typical range can be
11500 estimated as -32768..0 in 32-bit mode. In almost all cases
11501 it is therefore cheaper to emit multiple add insns than
11502 spilling and loading the constant into a register (at least
11503 6 insns). */
11504 while (! SPARC_SIMM13_P (vcall_offset))
11505 {
11506 emit_insn (gen_add2_insn (scratch, increment));
11507 vcall_offset += 4096;
11508 }
11509 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11510 }
11511
11512 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11513 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11514 gen_rtx_PLUS (Pmode,
11515 scratch,
11516 vcall_offset_rtx)));
11517
11518 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11519 emit_insn (gen_add2_insn (this_rtx, scratch));
11520 }
11521
11522 /* Generate a tail call to the target function. */
11523 if (! TREE_USED (function))
11524 {
11525 assemble_external (function);
11526 TREE_USED (function) = 1;
11527 }
11528 funexp = XEXP (DECL_RTL (function), 0);
11529
11530 if (flag_delayed_branch)
11531 {
11532 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11533 insn = emit_call_insn (gen_sibcall (funexp));
11534 SIBLING_CALL_P (insn) = 1;
11535 }
11536 else
11537 {
11538 /* The hoops we have to jump through in order to generate a sibcall
11539 without using delay slots... */
11540 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11541
11542 if (flag_pic)
11543 {
11544 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11545 start_sequence ();
11546 load_got_register (); /* clobbers %o7 */
11547 scratch = sparc_legitimize_pic_address (funexp, scratch);
11548 seq = get_insns ();
11549 end_sequence ();
11550 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11551 }
11552 else if (TARGET_ARCH32)
11553 {
11554 emit_insn (gen_rtx_SET (scratch,
11555 gen_rtx_HIGH (SImode, funexp)));
11556 emit_insn (gen_rtx_SET (scratch,
11557 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11558 }
11559 else /* TARGET_ARCH64 */
11560 {
11561 switch (sparc_cmodel)
11562 {
11563 case CM_MEDLOW:
11564 case CM_MEDMID:
11565 /* The destination can serve as a temporary. */
11566 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11567 break;
11568
11569 case CM_MEDANY:
11570 case CM_EMBMEDANY:
11571 /* The destination cannot serve as a temporary. */
11572 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11573 start_sequence ();
11574 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11575 seq = get_insns ();
11576 end_sequence ();
11577 emit_and_preserve (seq, spill_reg, 0);
11578 break;
11579
11580 default:
11581 gcc_unreachable ();
11582 }
11583 }
11584
11585 emit_jump_insn (gen_indirect_jump (scratch));
11586 }
11587
11588 emit_barrier ();
11589
11590 /* Run just enough of rest_of_compilation to get the insns emitted.
11591 There's not really enough bulk here to make other passes such as
11592 instruction scheduling worth while. Note that use_thunk calls
11593 assemble_start_function and assemble_end_function. */
11594 insn = get_insns ();
11595 shorten_branches (insn);
11596 final_start_function (insn, file, 1);
11597 final (insn, file, 1);
11598 final_end_function ();
11599
11600 reload_completed = 0;
11601 epilogue_completed = 0;
11602 }
11603
11604 /* Return true if sparc_output_mi_thunk would be able to output the
11605 assembler code for the thunk function specified by the arguments
11606 it is passed, and false otherwise. */
11607 static bool
11608 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11609 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11610 HOST_WIDE_INT vcall_offset,
11611 const_tree function ATTRIBUTE_UNUSED)
11612 {
11613 /* Bound the loop used in the default method above. */
11614 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11615 }
11616
11617 /* How to allocate a 'struct machine_function'. */
11618
11619 static struct machine_function *
11620 sparc_init_machine_status (void)
11621 {
11622 return ggc_cleared_alloc<machine_function> ();
11623 }
11624
11625 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11626 We need to emit DTP-relative relocations. */
11627
11628 static void
11629 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11630 {
11631 switch (size)
11632 {
11633 case 4:
11634 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11635 break;
11636 case 8:
11637 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11638 break;
11639 default:
11640 gcc_unreachable ();
11641 }
11642 output_addr_const (file, x);
11643 fputs (")", file);
11644 }
11645
11646 /* Do whatever processing is required at the end of a file. */
11647
11648 static void
11649 sparc_file_end (void)
11650 {
11651 /* If we need to emit the special GOT helper function, do so now. */
11652 if (got_helper_rtx)
11653 {
11654 const char *name = XSTR (got_helper_rtx, 0);
11655 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11656 #ifdef DWARF2_UNWIND_INFO
11657 bool do_cfi;
11658 #endif
11659
11660 if (USE_HIDDEN_LINKONCE)
11661 {
11662 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11663 get_identifier (name),
11664 build_function_type_list (void_type_node,
11665 NULL_TREE));
11666 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11667 NULL_TREE, void_type_node);
11668 TREE_PUBLIC (decl) = 1;
11669 TREE_STATIC (decl) = 1;
11670 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11671 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11672 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11673 resolve_unique_section (decl, 0, flag_function_sections);
11674 allocate_struct_function (decl, true);
11675 cfun->is_thunk = 1;
11676 current_function_decl = decl;
11677 init_varasm_status ();
11678 assemble_start_function (decl, name);
11679 }
11680 else
11681 {
11682 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11683 switch_to_section (text_section);
11684 if (align > 0)
11685 ASM_OUTPUT_ALIGN (asm_out_file, align);
11686 ASM_OUTPUT_LABEL (asm_out_file, name);
11687 }
11688
11689 #ifdef DWARF2_UNWIND_INFO
11690 do_cfi = dwarf2out_do_cfi_asm ();
11691 if (do_cfi)
11692 fprintf (asm_out_file, "\t.cfi_startproc\n");
11693 #endif
11694 if (flag_delayed_branch)
11695 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11696 reg_name, reg_name);
11697 else
11698 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11699 reg_name, reg_name);
11700 #ifdef DWARF2_UNWIND_INFO
11701 if (do_cfi)
11702 fprintf (asm_out_file, "\t.cfi_endproc\n");
11703 #endif
11704 }
11705
11706 if (NEED_INDICATE_EXEC_STACK)
11707 file_end_indicate_exec_stack ();
11708
11709 #ifdef TARGET_SOLARIS
11710 solaris_file_end ();
11711 #endif
11712 }
11713
11714 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11715 /* Implement TARGET_MANGLE_TYPE. */
11716
11717 static const char *
11718 sparc_mangle_type (const_tree type)
11719 {
11720 if (!TARGET_64BIT
11721 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11722 && TARGET_LONG_DOUBLE_128)
11723 return "g";
11724
11725 /* For all other types, use normal C++ mangling. */
11726 return NULL;
11727 }
11728 #endif
11729
11730 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11731 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11732 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11733
11734 void
11735 sparc_emit_membar_for_model (enum memmodel model,
11736 int load_store, int before_after)
11737 {
11738 /* Bits for the MEMBAR mmask field. */
11739 const int LoadLoad = 1;
11740 const int StoreLoad = 2;
11741 const int LoadStore = 4;
11742 const int StoreStore = 8;
11743
11744 int mm = 0, implied = 0;
11745
11746 switch (sparc_memory_model)
11747 {
11748 case SMM_SC:
11749 /* Sequential Consistency. All memory transactions are immediately
11750 visible in sequential execution order. No barriers needed. */
11751 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11752 break;
11753
11754 case SMM_TSO:
11755 /* Total Store Ordering: all memory transactions with store semantics
11756 are followed by an implied StoreStore. */
11757 implied |= StoreStore;
11758
11759 /* If we're not looking for a raw barrer (before+after), then atomic
11760 operations get the benefit of being both load and store. */
11761 if (load_store == 3 && before_after == 1)
11762 implied |= StoreLoad;
11763 /* FALLTHRU */
11764
11765 case SMM_PSO:
11766 /* Partial Store Ordering: all memory transactions with load semantics
11767 are followed by an implied LoadLoad | LoadStore. */
11768 implied |= LoadLoad | LoadStore;
11769
11770 /* If we're not looking for a raw barrer (before+after), then atomic
11771 operations get the benefit of being both load and store. */
11772 if (load_store == 3 && before_after == 2)
11773 implied |= StoreLoad | StoreStore;
11774 /* FALLTHRU */
11775
11776 case SMM_RMO:
11777 /* Relaxed Memory Ordering: no implicit bits. */
11778 break;
11779
11780 default:
11781 gcc_unreachable ();
11782 }
11783
11784 if (before_after & 1)
11785 {
11786 if (is_mm_release (model) || is_mm_acq_rel (model)
11787 || is_mm_seq_cst (model))
11788 {
11789 if (load_store & 1)
11790 mm |= LoadLoad | StoreLoad;
11791 if (load_store & 2)
11792 mm |= LoadStore | StoreStore;
11793 }
11794 }
11795 if (before_after & 2)
11796 {
11797 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11798 || is_mm_seq_cst (model))
11799 {
11800 if (load_store & 1)
11801 mm |= LoadLoad | LoadStore;
11802 if (load_store & 2)
11803 mm |= StoreLoad | StoreStore;
11804 }
11805 }
11806
11807 /* Remove the bits implied by the system memory model. */
11808 mm &= ~implied;
11809
11810 /* For raw barriers (before+after), always emit a barrier.
11811 This will become a compile-time barrier if needed. */
11812 if (mm || before_after == 3)
11813 emit_insn (gen_membar (GEN_INT (mm)));
11814 }
11815
11816 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11817 compare and swap on the word containing the byte or half-word. */
11818
11819 static void
11820 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11821 rtx oldval, rtx newval)
11822 {
11823 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11824 rtx addr = gen_reg_rtx (Pmode);
11825 rtx off = gen_reg_rtx (SImode);
11826 rtx oldv = gen_reg_rtx (SImode);
11827 rtx newv = gen_reg_rtx (SImode);
11828 rtx oldvalue = gen_reg_rtx (SImode);
11829 rtx newvalue = gen_reg_rtx (SImode);
11830 rtx res = gen_reg_rtx (SImode);
11831 rtx resv = gen_reg_rtx (SImode);
11832 rtx memsi, val, mask, cc;
11833
11834 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11835
11836 if (Pmode != SImode)
11837 addr1 = gen_lowpart (SImode, addr1);
11838 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11839
11840 memsi = gen_rtx_MEM (SImode, addr);
11841 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11842 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11843
11844 val = copy_to_reg (memsi);
11845
11846 emit_insn (gen_rtx_SET (off,
11847 gen_rtx_XOR (SImode, off,
11848 GEN_INT (GET_MODE (mem) == QImode
11849 ? 3 : 2))));
11850
11851 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11852
11853 if (GET_MODE (mem) == QImode)
11854 mask = force_reg (SImode, GEN_INT (0xff));
11855 else
11856 mask = force_reg (SImode, GEN_INT (0xffff));
11857
11858 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11859
11860 emit_insn (gen_rtx_SET (val,
11861 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11862 val)));
11863
11864 oldval = gen_lowpart (SImode, oldval);
11865 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11866
11867 newval = gen_lowpart_common (SImode, newval);
11868 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11869
11870 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11871
11872 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11873
11874 rtx_code_label *end_label = gen_label_rtx ();
11875 rtx_code_label *loop_label = gen_label_rtx ();
11876 emit_label (loop_label);
11877
11878 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11879
11880 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11881
11882 emit_move_insn (bool_result, const1_rtx);
11883
11884 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11885
11886 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11887
11888 emit_insn (gen_rtx_SET (resv,
11889 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11890 res)));
11891
11892 emit_move_insn (bool_result, const0_rtx);
11893
11894 cc = gen_compare_reg_1 (NE, resv, val);
11895 emit_insn (gen_rtx_SET (val, resv));
11896
11897 /* Use cbranchcc4 to separate the compare and branch! */
11898 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11899 cc, const0_rtx, loop_label));
11900
11901 emit_label (end_label);
11902
11903 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11904
11905 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11906
11907 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11908 }
11909
11910 /* Expand code to perform a compare-and-swap. */
11911
11912 void
11913 sparc_expand_compare_and_swap (rtx operands[])
11914 {
11915 rtx bval, retval, mem, oldval, newval;
11916 machine_mode mode;
11917 enum memmodel model;
11918
11919 bval = operands[0];
11920 retval = operands[1];
11921 mem = operands[2];
11922 oldval = operands[3];
11923 newval = operands[4];
11924 model = (enum memmodel) INTVAL (operands[6]);
11925 mode = GET_MODE (mem);
11926
11927 sparc_emit_membar_for_model (model, 3, 1);
11928
11929 if (reg_overlap_mentioned_p (retval, oldval))
11930 oldval = copy_to_reg (oldval);
11931
11932 if (mode == QImode || mode == HImode)
11933 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11934 else
11935 {
11936 rtx (*gen) (rtx, rtx, rtx, rtx);
11937 rtx x;
11938
11939 if (mode == SImode)
11940 gen = gen_atomic_compare_and_swapsi_1;
11941 else
11942 gen = gen_atomic_compare_and_swapdi_1;
11943 emit_insn (gen (retval, mem, oldval, newval));
11944
11945 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11946 if (x != bval)
11947 convert_move (bval, x, 1);
11948 }
11949
11950 sparc_emit_membar_for_model (model, 3, 2);
11951 }
11952
11953 void
11954 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11955 {
11956 rtx t_1, t_2, t_3;
11957
11958 sel = gen_lowpart (DImode, sel);
11959 switch (vmode)
11960 {
11961 case V2SImode:
11962 /* inp = xxxxxxxAxxxxxxxB */
11963 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11964 NULL_RTX, 1, OPTAB_DIRECT);
11965 /* t_1 = ....xxxxxxxAxxx. */
11966 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11967 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11968 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11969 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11970 /* sel = .......B */
11971 /* t_1 = ...A.... */
11972 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11973 /* sel = ...A...B */
11974 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11975 /* sel = AAAABBBB * 4 */
11976 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11977 /* sel = { A*4, A*4+1, A*4+2, ... } */
11978 break;
11979
11980 case V4HImode:
11981 /* inp = xxxAxxxBxxxCxxxD */
11982 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11983 NULL_RTX, 1, OPTAB_DIRECT);
11984 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11985 NULL_RTX, 1, OPTAB_DIRECT);
11986 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11987 NULL_RTX, 1, OPTAB_DIRECT);
11988 /* t_1 = ..xxxAxxxBxxxCxx */
11989 /* t_2 = ....xxxAxxxBxxxC */
11990 /* t_3 = ......xxxAxxxBxx */
11991 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11992 GEN_INT (0x07),
11993 NULL_RTX, 1, OPTAB_DIRECT);
11994 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11995 GEN_INT (0x0700),
11996 NULL_RTX, 1, OPTAB_DIRECT);
11997 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11998 GEN_INT (0x070000),
11999 NULL_RTX, 1, OPTAB_DIRECT);
12000 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12001 GEN_INT (0x07000000),
12002 NULL_RTX, 1, OPTAB_DIRECT);
12003 /* sel = .......D */
12004 /* t_1 = .....C.. */
12005 /* t_2 = ...B.... */
12006 /* t_3 = .A...... */
12007 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12008 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12009 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12010 /* sel = .A.B.C.D */
12011 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12012 /* sel = AABBCCDD * 2 */
12013 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12014 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12015 break;
12016
12017 case V8QImode:
12018 /* input = xAxBxCxDxExFxGxH */
12019 sel = expand_simple_binop (DImode, AND, sel,
12020 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12021 | 0x0f0f0f0f),
12022 NULL_RTX, 1, OPTAB_DIRECT);
12023 /* sel = .A.B.C.D.E.F.G.H */
12024 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12025 NULL_RTX, 1, OPTAB_DIRECT);
12026 /* t_1 = ..A.B.C.D.E.F.G. */
12027 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12028 NULL_RTX, 1, OPTAB_DIRECT);
12029 /* sel = .AABBCCDDEEFFGGH */
12030 sel = expand_simple_binop (DImode, AND, sel,
12031 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12032 | 0xff00ff),
12033 NULL_RTX, 1, OPTAB_DIRECT);
12034 /* sel = ..AB..CD..EF..GH */
12035 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12036 NULL_RTX, 1, OPTAB_DIRECT);
12037 /* t_1 = ....AB..CD..EF.. */
12038 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12039 NULL_RTX, 1, OPTAB_DIRECT);
12040 /* sel = ..ABABCDCDEFEFGH */
12041 sel = expand_simple_binop (DImode, AND, sel,
12042 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12043 NULL_RTX, 1, OPTAB_DIRECT);
12044 /* sel = ....ABCD....EFGH */
12045 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12046 NULL_RTX, 1, OPTAB_DIRECT);
12047 /* t_1 = ........ABCD.... */
12048 sel = gen_lowpart (SImode, sel);
12049 t_1 = gen_lowpart (SImode, t_1);
12050 break;
12051
12052 default:
12053 gcc_unreachable ();
12054 }
12055
12056 /* Always perform the final addition/merge within the bmask insn. */
12057 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
12058 }
12059
12060 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12061
12062 static bool
12063 sparc_frame_pointer_required (void)
12064 {
12065 /* If the stack pointer is dynamically modified in the function, it cannot
12066 serve as the frame pointer. */
12067 if (cfun->calls_alloca)
12068 return true;
12069
12070 /* If the function receives nonlocal gotos, it needs to save the frame
12071 pointer in the nonlocal_goto_save_area object. */
12072 if (cfun->has_nonlocal_label)
12073 return true;
12074
12075 /* In flat mode, that's it. */
12076 if (TARGET_FLAT)
12077 return false;
12078
12079 /* Otherwise, the frame pointer is required if the function isn't leaf. */
12080 return !(crtl->is_leaf && only_leaf_regs_used ());
12081 }
12082
12083 /* The way this is structured, we can't eliminate SFP in favor of SP
12084 if the frame pointer is required: we want to use the SFP->HFP elimination
12085 in that case. But the test in update_eliminables doesn't know we are
12086 assuming below that we only do the former elimination. */
12087
12088 static bool
12089 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12090 {
12091 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12092 }
12093
12094 /* Return the hard frame pointer directly to bypass the stack bias. */
12095
12096 static rtx
12097 sparc_builtin_setjmp_frame_value (void)
12098 {
12099 return hard_frame_pointer_rtx;
12100 }
12101
12102 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12103 they won't be allocated. */
12104
12105 static void
12106 sparc_conditional_register_usage (void)
12107 {
12108 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12109 {
12110 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12111 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12112 }
12113 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12114 /* then honor it. */
12115 if (TARGET_ARCH32 && fixed_regs[5])
12116 fixed_regs[5] = 1;
12117 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12118 fixed_regs[5] = 0;
12119 if (! TARGET_V9)
12120 {
12121 int regno;
12122 for (regno = SPARC_FIRST_V9_FP_REG;
12123 regno <= SPARC_LAST_V9_FP_REG;
12124 regno++)
12125 fixed_regs[regno] = 1;
12126 /* %fcc0 is used by v8 and v9. */
12127 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12128 regno <= SPARC_LAST_V9_FCC_REG;
12129 regno++)
12130 fixed_regs[regno] = 1;
12131 }
12132 if (! TARGET_FPU)
12133 {
12134 int regno;
12135 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12136 fixed_regs[regno] = 1;
12137 }
12138 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12139 /* then honor it. Likewise with g3 and g4. */
12140 if (fixed_regs[2] == 2)
12141 fixed_regs[2] = ! TARGET_APP_REGS;
12142 if (fixed_regs[3] == 2)
12143 fixed_regs[3] = ! TARGET_APP_REGS;
12144 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12145 fixed_regs[4] = ! TARGET_APP_REGS;
12146 else if (TARGET_CM_EMBMEDANY)
12147 fixed_regs[4] = 1;
12148 else if (fixed_regs[4] == 2)
12149 fixed_regs[4] = 0;
12150 if (TARGET_FLAT)
12151 {
12152 int regno;
12153 /* Disable leaf functions. */
12154 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12155 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12156 leaf_reg_remap [regno] = regno;
12157 }
12158 if (TARGET_VIS)
12159 global_regs[SPARC_GSR_REG] = 1;
12160 }
12161
12162 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12163
12164 - We can't load constants into FP registers.
12165 - We can't load FP constants into integer registers when soft-float,
12166 because there is no soft-float pattern with a r/F constraint.
12167 - We can't load FP constants into integer registers for TFmode unless
12168 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12169 - Try and reload integer constants (symbolic or otherwise) back into
12170 registers directly, rather than having them dumped to memory. */
12171
12172 static reg_class_t
12173 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12174 {
12175 machine_mode mode = GET_MODE (x);
12176 if (CONSTANT_P (x))
12177 {
12178 if (FP_REG_CLASS_P (rclass)
12179 || rclass == GENERAL_OR_FP_REGS
12180 || rclass == GENERAL_OR_EXTRA_FP_REGS
12181 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12182 || (mode == TFmode && ! const_zero_operand (x, mode)))
12183 return NO_REGS;
12184
12185 if (GET_MODE_CLASS (mode) == MODE_INT)
12186 return GENERAL_REGS;
12187
12188 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12189 {
12190 if (! FP_REG_CLASS_P (rclass)
12191 || !(const_zero_operand (x, mode)
12192 || const_all_ones_operand (x, mode)))
12193 return NO_REGS;
12194 }
12195 }
12196
12197 if (TARGET_VIS3
12198 && ! TARGET_ARCH64
12199 && (rclass == EXTRA_FP_REGS
12200 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12201 {
12202 int regno = true_regnum (x);
12203
12204 if (SPARC_INT_REG_P (regno))
12205 return (rclass == EXTRA_FP_REGS
12206 ? FP_REGS : GENERAL_OR_FP_REGS);
12207 }
12208
12209 return rclass;
12210 }
12211
12212 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12213 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12214
12215 const char *
12216 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12217 {
12218 char mulstr[32];
12219
12220 gcc_assert (! TARGET_ARCH64);
12221
12222 if (sparc_check_64 (operands[1], insn) <= 0)
12223 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12224 if (which_alternative == 1)
12225 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12226 if (GET_CODE (operands[2]) == CONST_INT)
12227 {
12228 if (which_alternative == 1)
12229 {
12230 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12231 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12232 output_asm_insn (mulstr, operands);
12233 return "srlx\t%L0, 32, %H0";
12234 }
12235 else
12236 {
12237 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12238 output_asm_insn ("or\t%L1, %3, %3", operands);
12239 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12240 output_asm_insn (mulstr, operands);
12241 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12242 return "mov\t%3, %L0";
12243 }
12244 }
12245 else if (rtx_equal_p (operands[1], operands[2]))
12246 {
12247 if (which_alternative == 1)
12248 {
12249 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12250 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12251 output_asm_insn (mulstr, operands);
12252 return "srlx\t%L0, 32, %H0";
12253 }
12254 else
12255 {
12256 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12257 output_asm_insn ("or\t%L1, %3, %3", operands);
12258 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12259 output_asm_insn (mulstr, operands);
12260 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12261 return "mov\t%3, %L0";
12262 }
12263 }
12264 if (sparc_check_64 (operands[2], insn) <= 0)
12265 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12266 if (which_alternative == 1)
12267 {
12268 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12269 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12270 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12271 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12272 output_asm_insn (mulstr, operands);
12273 return "srlx\t%L0, 32, %H0";
12274 }
12275 else
12276 {
12277 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12278 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12279 output_asm_insn ("or\t%L1, %3, %3", operands);
12280 output_asm_insn ("or\t%L2, %4, %4", operands);
12281 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12282 output_asm_insn (mulstr, operands);
12283 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12284 return "mov\t%3, %L0";
12285 }
12286 }
12287
12288 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12289 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12290 and INNER_MODE are the modes describing TARGET. */
12291
12292 static void
12293 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12294 machine_mode inner_mode)
12295 {
12296 rtx t1, final_insn, sel;
12297 int bmask;
12298
12299 t1 = gen_reg_rtx (mode);
12300
12301 elt = convert_modes (SImode, inner_mode, elt, true);
12302 emit_move_insn (gen_lowpart(SImode, t1), elt);
12303
12304 switch (mode)
12305 {
12306 case V2SImode:
12307 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12308 bmask = 0x45674567;
12309 break;
12310 case V4HImode:
12311 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12312 bmask = 0x67676767;
12313 break;
12314 case V8QImode:
12315 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12316 bmask = 0x77777777;
12317 break;
12318 default:
12319 gcc_unreachable ();
12320 }
12321
12322 sel = force_reg (SImode, GEN_INT (bmask));
12323 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12324 emit_insn (final_insn);
12325 }
12326
12327 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12328 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12329
12330 static void
12331 vector_init_fpmerge (rtx target, rtx elt)
12332 {
12333 rtx t1, t2, t2_low, t3, t3_low;
12334
12335 t1 = gen_reg_rtx (V4QImode);
12336 elt = convert_modes (SImode, QImode, elt, true);
12337 emit_move_insn (gen_lowpart (SImode, t1), elt);
12338
12339 t2 = gen_reg_rtx (V8QImode);
12340 t2_low = gen_lowpart (V4QImode, t2);
12341 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12342
12343 t3 = gen_reg_rtx (V8QImode);
12344 t3_low = gen_lowpart (V4QImode, t3);
12345 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12346
12347 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12348 }
12349
12350 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12351 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12352
12353 static void
12354 vector_init_faligndata (rtx target, rtx elt)
12355 {
12356 rtx t1 = gen_reg_rtx (V4HImode);
12357 int i;
12358
12359 elt = convert_modes (SImode, HImode, elt, true);
12360 emit_move_insn (gen_lowpart (SImode, t1), elt);
12361
12362 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12363 force_reg (SImode, GEN_INT (6)),
12364 const0_rtx));
12365
12366 for (i = 0; i < 4; i++)
12367 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12368 }
12369
12370 /* Emit code to initialize TARGET to values for individual fields VALS. */
12371
12372 void
12373 sparc_expand_vector_init (rtx target, rtx vals)
12374 {
12375 const machine_mode mode = GET_MODE (target);
12376 const machine_mode inner_mode = GET_MODE_INNER (mode);
12377 const int n_elts = GET_MODE_NUNITS (mode);
12378 int i, n_var = 0;
12379 bool all_same;
12380 rtx mem;
12381
12382 all_same = true;
12383 for (i = 0; i < n_elts; i++)
12384 {
12385 rtx x = XVECEXP (vals, 0, i);
12386 if (!CONSTANT_P (x))
12387 n_var++;
12388
12389 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12390 all_same = false;
12391 }
12392
12393 if (n_var == 0)
12394 {
12395 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12396 return;
12397 }
12398
12399 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12400 {
12401 if (GET_MODE_SIZE (inner_mode) == 4)
12402 {
12403 emit_move_insn (gen_lowpart (SImode, target),
12404 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12405 return;
12406 }
12407 else if (GET_MODE_SIZE (inner_mode) == 8)
12408 {
12409 emit_move_insn (gen_lowpart (DImode, target),
12410 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12411 return;
12412 }
12413 }
12414 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12415 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12416 {
12417 emit_move_insn (gen_highpart (word_mode, target),
12418 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12419 emit_move_insn (gen_lowpart (word_mode, target),
12420 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12421 return;
12422 }
12423
12424 if (all_same && GET_MODE_SIZE (mode) == 8)
12425 {
12426 if (TARGET_VIS2)
12427 {
12428 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12429 return;
12430 }
12431 if (mode == V8QImode)
12432 {
12433 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12434 return;
12435 }
12436 if (mode == V4HImode)
12437 {
12438 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12439 return;
12440 }
12441 }
12442
12443 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12444 for (i = 0; i < n_elts; i++)
12445 emit_move_insn (adjust_address_nv (mem, inner_mode,
12446 i * GET_MODE_SIZE (inner_mode)),
12447 XVECEXP (vals, 0, i));
12448 emit_move_insn (target, mem);
12449 }
12450
12451 /* Implement TARGET_SECONDARY_RELOAD. */
12452
12453 static reg_class_t
12454 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12455 machine_mode mode, secondary_reload_info *sri)
12456 {
12457 enum reg_class rclass = (enum reg_class) rclass_i;
12458
12459 sri->icode = CODE_FOR_nothing;
12460 sri->extra_cost = 0;
12461
12462 /* We need a temporary when loading/storing a HImode/QImode value
12463 between memory and the FPU registers. This can happen when combine puts
12464 a paradoxical subreg in a float/fix conversion insn. */
12465 if (FP_REG_CLASS_P (rclass)
12466 && (mode == HImode || mode == QImode)
12467 && (GET_CODE (x) == MEM
12468 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12469 && true_regnum (x) == -1)))
12470 return GENERAL_REGS;
12471
12472 /* On 32-bit we need a temporary when loading/storing a DFmode value
12473 between unaligned memory and the upper FPU registers. */
12474 if (TARGET_ARCH32
12475 && rclass == EXTRA_FP_REGS
12476 && mode == DFmode
12477 && GET_CODE (x) == MEM
12478 && ! mem_min_alignment (x, 8))
12479 return FP_REGS;
12480
12481 if (((TARGET_CM_MEDANY
12482 && symbolic_operand (x, mode))
12483 || (TARGET_CM_EMBMEDANY
12484 && text_segment_operand (x, mode)))
12485 && ! flag_pic)
12486 {
12487 if (in_p)
12488 sri->icode = direct_optab_handler (reload_in_optab, mode);
12489 else
12490 sri->icode = direct_optab_handler (reload_out_optab, mode);
12491 return NO_REGS;
12492 }
12493
12494 if (TARGET_VIS3 && TARGET_ARCH32)
12495 {
12496 int regno = true_regnum (x);
12497
12498 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12499 to move 8-byte values in 4-byte pieces. This only works via
12500 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12501 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12502 an FP_REGS intermediate move. */
12503 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12504 || ((general_or_i64_p (rclass)
12505 || rclass == GENERAL_OR_FP_REGS)
12506 && SPARC_FP_REG_P (regno)))
12507 {
12508 sri->extra_cost = 2;
12509 return FP_REGS;
12510 }
12511 }
12512
12513 return NO_REGS;
12514 }
12515
12516 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12517 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12518
12519 bool
12520 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12521 {
12522 enum rtx_code rc = GET_CODE (operands[1]);
12523 machine_mode cmp_mode;
12524 rtx cc_reg, dst, cmp;
12525
12526 cmp = operands[1];
12527 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12528 return false;
12529
12530 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12531 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12532
12533 cmp_mode = GET_MODE (XEXP (cmp, 0));
12534 rc = GET_CODE (cmp);
12535
12536 dst = operands[0];
12537 if (! rtx_equal_p (operands[2], dst)
12538 && ! rtx_equal_p (operands[3], dst))
12539 {
12540 if (reg_overlap_mentioned_p (dst, cmp))
12541 dst = gen_reg_rtx (mode);
12542
12543 emit_move_insn (dst, operands[3]);
12544 }
12545 else if (operands[2] == dst)
12546 {
12547 operands[2] = operands[3];
12548
12549 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12550 rc = reverse_condition_maybe_unordered (rc);
12551 else
12552 rc = reverse_condition (rc);
12553 }
12554
12555 if (XEXP (cmp, 1) == const0_rtx
12556 && GET_CODE (XEXP (cmp, 0)) == REG
12557 && cmp_mode == DImode
12558 && v9_regcmp_p (rc))
12559 cc_reg = XEXP (cmp, 0);
12560 else
12561 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12562
12563 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12564
12565 emit_insn (gen_rtx_SET (dst,
12566 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12567
12568 if (dst != operands[0])
12569 emit_move_insn (operands[0], dst);
12570
12571 return true;
12572 }
12573
12574 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12575 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12576 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12577 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12578 code to be used for the condition mask. */
12579
12580 void
12581 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12582 {
12583 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12584 enum rtx_code code = GET_CODE (operands[3]);
12585
12586 mask = gen_reg_rtx (Pmode);
12587 cop0 = operands[4];
12588 cop1 = operands[5];
12589 if (code == LT || code == GE)
12590 {
12591 rtx t;
12592
12593 code = swap_condition (code);
12594 t = cop0; cop0 = cop1; cop1 = t;
12595 }
12596
12597 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12598
12599 fcmp = gen_rtx_UNSPEC (Pmode,
12600 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12601 fcode);
12602
12603 cmask = gen_rtx_UNSPEC (DImode,
12604 gen_rtvec (2, mask, gsr),
12605 ccode);
12606
12607 bshuf = gen_rtx_UNSPEC (mode,
12608 gen_rtvec (3, operands[1], operands[2], gsr),
12609 UNSPEC_BSHUFFLE);
12610
12611 emit_insn (gen_rtx_SET (mask, fcmp));
12612 emit_insn (gen_rtx_SET (gsr, cmask));
12613
12614 emit_insn (gen_rtx_SET (operands[0], bshuf));
12615 }
12616
12617 /* On sparc, any mode which naturally allocates into the float
12618 registers should return 4 here. */
12619
12620 unsigned int
12621 sparc_regmode_natural_size (machine_mode mode)
12622 {
12623 int size = UNITS_PER_WORD;
12624
12625 if (TARGET_ARCH64)
12626 {
12627 enum mode_class mclass = GET_MODE_CLASS (mode);
12628
12629 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12630 size = 4;
12631 }
12632
12633 return size;
12634 }
12635
12636 /* Return TRUE if it is a good idea to tie two pseudo registers
12637 when one has mode MODE1 and one has mode MODE2.
12638 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12639 for any hard reg, then this must be FALSE for correct output.
12640
12641 For V9 we have to deal with the fact that only the lower 32 floating
12642 point registers are 32-bit addressable. */
12643
12644 bool
12645 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12646 {
12647 enum mode_class mclass1, mclass2;
12648 unsigned short size1, size2;
12649
12650 if (mode1 == mode2)
12651 return true;
12652
12653 mclass1 = GET_MODE_CLASS (mode1);
12654 mclass2 = GET_MODE_CLASS (mode2);
12655 if (mclass1 != mclass2)
12656 return false;
12657
12658 if (! TARGET_V9)
12659 return true;
12660
12661 /* Classes are the same and we are V9 so we have to deal with upper
12662 vs. lower floating point registers. If one of the modes is a
12663 4-byte mode, and the other is not, we have to mark them as not
12664 tieable because only the lower 32 floating point register are
12665 addressable 32-bits at a time.
12666
12667 We can't just test explicitly for SFmode, otherwise we won't
12668 cover the vector mode cases properly. */
12669
12670 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12671 return true;
12672
12673 size1 = GET_MODE_SIZE (mode1);
12674 size2 = GET_MODE_SIZE (mode2);
12675 if ((size1 > 4 && size2 == 4)
12676 || (size2 > 4 && size1 == 4))
12677 return false;
12678
12679 return true;
12680 }
12681
12682 /* Implement TARGET_CSTORE_MODE. */
12683
12684 static machine_mode
12685 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12686 {
12687 return (TARGET_ARCH64 ? DImode : SImode);
12688 }
12689
12690 /* Return the compound expression made of T1 and T2. */
12691
12692 static inline tree
12693 compound_expr (tree t1, tree t2)
12694 {
12695 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12696 }
12697
12698 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12699
12700 static void
12701 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12702 {
12703 if (!TARGET_FPU)
12704 return;
12705
12706 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12707 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12708
12709 /* We generate the equivalent of feholdexcept (&fenv_var):
12710
12711 unsigned int fenv_var;
12712 __builtin_store_fsr (&fenv_var);
12713
12714 unsigned int tmp1_var;
12715 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12716
12717 __builtin_load_fsr (&tmp1_var); */
12718
12719 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12720 TREE_ADDRESSABLE (fenv_var) = 1;
12721 tree fenv_addr = build_fold_addr_expr (fenv_var);
12722 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12723 tree hold_stfsr
12724 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12725 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12726
12727 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12728 TREE_ADDRESSABLE (tmp1_var) = 1;
12729 tree masked_fenv_var
12730 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12731 build_int_cst (unsigned_type_node,
12732 ~(accrued_exception_mask | trap_enable_mask)));
12733 tree hold_mask
12734 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12735 NULL_TREE, NULL_TREE);
12736
12737 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12738 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12739 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12740
12741 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12742
12743 /* We reload the value of tmp1_var to clear the exceptions:
12744
12745 __builtin_load_fsr (&tmp1_var); */
12746
12747 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12748
12749 /* We generate the equivalent of feupdateenv (&fenv_var):
12750
12751 unsigned int tmp2_var;
12752 __builtin_store_fsr (&tmp2_var);
12753
12754 __builtin_load_fsr (&fenv_var);
12755
12756 if (SPARC_LOW_FE_EXCEPT_VALUES)
12757 tmp2_var >>= 5;
12758 __atomic_feraiseexcept ((int) tmp2_var); */
12759
12760 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12761 TREE_ADDRESSABLE (tmp2_var) = 1;
12762 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12763 tree update_stfsr
12764 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12765 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12766
12767 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12768
12769 tree atomic_feraiseexcept
12770 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12771 tree update_call
12772 = build_call_expr (atomic_feraiseexcept, 1,
12773 fold_convert (integer_type_node, tmp2_var));
12774
12775 if (SPARC_LOW_FE_EXCEPT_VALUES)
12776 {
12777 tree shifted_tmp2_var
12778 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12779 build_int_cst (unsigned_type_node, 5));
12780 tree update_shift
12781 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12782 update_call = compound_expr (update_shift, update_call);
12783 }
12784
12785 *update
12786 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12787 }
12788
12789 #include "gt-sparc.h"