]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Remove the frame size argument from function_prologue/epilogue
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static unsigned int sparc_function_arg_boundary (machine_mode,
652 const_tree);
653 static int sparc_arg_partial_bytes (cumulative_args_t,
654 machine_mode, tree, bool);
655 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
656 static void sparc_file_end (void);
657 static bool sparc_frame_pointer_required (void);
658 static bool sparc_can_eliminate (const int, const int);
659 static rtx sparc_builtin_setjmp_frame_value (void);
660 static void sparc_conditional_register_usage (void);
661 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
662 static const char *sparc_mangle_type (const_tree);
663 #endif
664 static void sparc_trampoline_init (rtx, tree, rtx);
665 static machine_mode sparc_preferred_simd_mode (machine_mode);
666 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
667 static bool sparc_lra_p (void);
668 static bool sparc_print_operand_punct_valid_p (unsigned char);
669 static void sparc_print_operand (FILE *, rtx, int);
670 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
671 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
672 machine_mode,
673 secondary_reload_info *);
674 static machine_mode sparc_cstore_mode (enum insn_code icode);
675 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
676 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
677 static unsigned int sparc_min_arithmetic_precision (void);
678 \f
679 #ifdef SUBTARGET_ATTRIBUTE_TABLE
680 /* Table of valid machine attributes. */
681 static const struct attribute_spec sparc_attribute_table[] =
682 {
683 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
684 do_diagnostic } */
685 SUBTARGET_ATTRIBUTE_TABLE,
686 { NULL, 0, 0, false, false, false, NULL, false }
687 };
688 #endif
689 \f
690 /* Option handling. */
691
692 /* Parsed value. */
693 enum cmodel sparc_cmodel;
694
695 char sparc_hard_reg_printed[8];
696
697 /* Initialize the GCC target structure. */
698
699 /* The default is to use .half rather than .short for aligned HI objects. */
700 #undef TARGET_ASM_ALIGNED_HI_OP
701 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
702
703 #undef TARGET_ASM_UNALIGNED_HI_OP
704 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
705 #undef TARGET_ASM_UNALIGNED_SI_OP
706 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
707 #undef TARGET_ASM_UNALIGNED_DI_OP
708 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
709
710 /* The target hook has to handle DI-mode values. */
711 #undef TARGET_ASM_INTEGER
712 #define TARGET_ASM_INTEGER sparc_assemble_integer
713
714 #undef TARGET_ASM_FUNCTION_PROLOGUE
715 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
716 #undef TARGET_ASM_FUNCTION_EPILOGUE
717 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
718
719 #undef TARGET_SCHED_ADJUST_COST
720 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
721 #undef TARGET_SCHED_ISSUE_RATE
722 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
723 #undef TARGET_SCHED_INIT
724 #define TARGET_SCHED_INIT sparc_sched_init
725 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
726 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
727
728 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
729 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
730
731 #undef TARGET_INIT_LIBFUNCS
732 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
733
734 #undef TARGET_LEGITIMIZE_ADDRESS
735 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
736 #undef TARGET_DELEGITIMIZE_ADDRESS
737 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
738 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
739 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
740
741 #undef TARGET_INIT_BUILTINS
742 #define TARGET_INIT_BUILTINS sparc_init_builtins
743 #undef TARGET_BUILTIN_DECL
744 #define TARGET_BUILTIN_DECL sparc_builtin_decl
745 #undef TARGET_EXPAND_BUILTIN
746 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
747 #undef TARGET_FOLD_BUILTIN
748 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
749
750 #if TARGET_TLS
751 #undef TARGET_HAVE_TLS
752 #define TARGET_HAVE_TLS true
753 #endif
754
755 #undef TARGET_CANNOT_FORCE_CONST_MEM
756 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
757
758 #undef TARGET_ASM_OUTPUT_MI_THUNK
759 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
760 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
761 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
762
763 #undef TARGET_RTX_COSTS
764 #define TARGET_RTX_COSTS sparc_rtx_costs
765 #undef TARGET_ADDRESS_COST
766 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
767 #undef TARGET_REGISTER_MOVE_COST
768 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
769
770 #undef TARGET_PROMOTE_FUNCTION_MODE
771 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
772
773 #undef TARGET_FUNCTION_VALUE
774 #define TARGET_FUNCTION_VALUE sparc_function_value
775 #undef TARGET_LIBCALL_VALUE
776 #define TARGET_LIBCALL_VALUE sparc_libcall_value
777 #undef TARGET_FUNCTION_VALUE_REGNO_P
778 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
779
780 #undef TARGET_STRUCT_VALUE_RTX
781 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
782 #undef TARGET_RETURN_IN_MEMORY
783 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
784 #undef TARGET_MUST_PASS_IN_STACK
785 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
786 #undef TARGET_PASS_BY_REFERENCE
787 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
788 #undef TARGET_ARG_PARTIAL_BYTES
789 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
790 #undef TARGET_FUNCTION_ARG_ADVANCE
791 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
792 #undef TARGET_FUNCTION_ARG
793 #define TARGET_FUNCTION_ARG sparc_function_arg
794 #undef TARGET_FUNCTION_INCOMING_ARG
795 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
796 #undef TARGET_FUNCTION_ARG_BOUNDARY
797 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
798
799 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
800 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
801 #undef TARGET_STRICT_ARGUMENT_NAMING
802 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
803
804 #undef TARGET_EXPAND_BUILTIN_VA_START
805 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
806 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
807 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
808
809 #undef TARGET_VECTOR_MODE_SUPPORTED_P
810 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
811
812 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
813 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
814
815 #ifdef SUBTARGET_INSERT_ATTRIBUTES
816 #undef TARGET_INSERT_ATTRIBUTES
817 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
818 #endif
819
820 #ifdef SUBTARGET_ATTRIBUTE_TABLE
821 #undef TARGET_ATTRIBUTE_TABLE
822 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
823 #endif
824
825 #undef TARGET_OPTION_OVERRIDE
826 #define TARGET_OPTION_OVERRIDE sparc_option_override
827
828 #ifdef TARGET_THREAD_SSP_OFFSET
829 #undef TARGET_STACK_PROTECT_GUARD
830 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
831 #endif
832
833 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
834 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
835 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
836 #endif
837
838 #undef TARGET_ASM_FILE_END
839 #define TARGET_ASM_FILE_END sparc_file_end
840
841 #undef TARGET_FRAME_POINTER_REQUIRED
842 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
843
844 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
845 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
846
847 #undef TARGET_CAN_ELIMINATE
848 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
849
850 #undef TARGET_PREFERRED_RELOAD_CLASS
851 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
852
853 #undef TARGET_SECONDARY_RELOAD
854 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
855
856 #undef TARGET_CONDITIONAL_REGISTER_USAGE
857 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
858
859 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
860 #undef TARGET_MANGLE_TYPE
861 #define TARGET_MANGLE_TYPE sparc_mangle_type
862 #endif
863
864 #undef TARGET_LRA_P
865 #define TARGET_LRA_P sparc_lra_p
866
867 #undef TARGET_LEGITIMATE_ADDRESS_P
868 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
869
870 #undef TARGET_LEGITIMATE_CONSTANT_P
871 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
872
873 #undef TARGET_TRAMPOLINE_INIT
874 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
875
876 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
877 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
878 #undef TARGET_PRINT_OPERAND
879 #define TARGET_PRINT_OPERAND sparc_print_operand
880 #undef TARGET_PRINT_OPERAND_ADDRESS
881 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
882
883 /* The value stored by LDSTUB. */
884 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
885 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
886
887 #undef TARGET_CSTORE_MODE
888 #define TARGET_CSTORE_MODE sparc_cstore_mode
889
890 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
891 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
892
893 #undef TARGET_FIXED_CONDITION_CODE_REGS
894 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
895
896 #undef TARGET_MIN_ARITHMETIC_PRECISION
897 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
898
899 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
900 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
901
902 struct gcc_target targetm = TARGET_INITIALIZER;
903
904 /* Return the memory reference contained in X if any, zero otherwise. */
905
906 static rtx
907 mem_ref (rtx x)
908 {
909 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
910 x = XEXP (x, 0);
911
912 if (MEM_P (x))
913 return x;
914
915 return NULL_RTX;
916 }
917
918 /* We use a machine specific pass to enable workarounds for errata.
919
920 We need to have the (essentially) final form of the insn stream in order
921 to properly detect the various hazards. Therefore, this machine specific
922 pass runs as late as possible. */
923
924 /* True if INSN is a md pattern or asm statement. */
925 #define USEFUL_INSN_P(INSN) \
926 (NONDEBUG_INSN_P (INSN) \
927 && GET_CODE (PATTERN (INSN)) != USE \
928 && GET_CODE (PATTERN (INSN)) != CLOBBER)
929
930 static unsigned int
931 sparc_do_work_around_errata (void)
932 {
933 rtx_insn *insn, *next;
934
935 /* Force all instructions to be split into their final form. */
936 split_all_insns_noflow ();
937
938 /* Now look for specific patterns in the insn stream. */
939 for (insn = get_insns (); insn; insn = next)
940 {
941 bool insert_nop = false;
942 rtx set;
943
944 /* Look into the instruction in a delay slot. */
945 if (NONJUMP_INSN_P (insn))
946 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
947 insn = seq->insn (1);
948
949 /* Look for either of these two sequences:
950
951 Sequence A:
952 1. store of word size or less (e.g. st / stb / sth / stf)
953 2. any single instruction that is not a load or store
954 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
955
956 Sequence B:
957 1. store of double word size (e.g. std / stdf)
958 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
959 if (sparc_fix_b2bst
960 && NONJUMP_INSN_P (insn)
961 && (set = single_set (insn)) != NULL_RTX
962 && MEM_P (SET_DEST (set)))
963 {
964 /* Sequence B begins with a double-word store. */
965 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
966 rtx_insn *after;
967 int i;
968
969 next = next_active_insn (insn);
970 if (!next)
971 break;
972
973 for (after = next, i = 0; i < 2; i++)
974 {
975 /* Skip empty assembly statements. */
976 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
977 || (USEFUL_INSN_P (after)
978 && (asm_noperands (PATTERN (after))>=0)
979 && !strcmp (decode_asm_operands (PATTERN (after),
980 NULL, NULL, NULL,
981 NULL, NULL), "")))
982 after = next_active_insn (after);
983 if (!after)
984 break;
985
986 /* If the insn is a branch, then it cannot be problematic. */
987 if (!NONJUMP_INSN_P (after)
988 || GET_CODE (PATTERN (after)) == SEQUENCE)
989 break;
990
991 /* Sequence B is only two instructions long. */
992 if (seq_b)
993 {
994 /* Add NOP if followed by a store. */
995 if ((set = single_set (after)) != NULL_RTX
996 && MEM_P (SET_DEST (set)))
997 insert_nop = true;
998
999 /* Otherwise it is ok. */
1000 break;
1001 }
1002
1003 /* If the second instruction is a load or a store,
1004 then the sequence cannot be problematic. */
1005 if (i == 0)
1006 {
1007 if (((set = single_set (after)) != NULL_RTX)
1008 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1009 break;
1010
1011 after = next_active_insn (after);
1012 if (!after)
1013 break;
1014 }
1015
1016 /* Add NOP if third instruction is a store. */
1017 if (i == 1
1018 && ((set = single_set (after)) != NULL_RTX)
1019 && MEM_P (SET_DEST (set)))
1020 insert_nop = true;
1021 }
1022 }
1023 else
1024 /* Look for a single-word load into an odd-numbered FP register. */
1025 if (sparc_fix_at697f
1026 && NONJUMP_INSN_P (insn)
1027 && (set = single_set (insn)) != NULL_RTX
1028 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1029 && MEM_P (SET_SRC (set))
1030 && REG_P (SET_DEST (set))
1031 && REGNO (SET_DEST (set)) > 31
1032 && REGNO (SET_DEST (set)) % 2 != 0)
1033 {
1034 /* The wrong dependency is on the enclosing double register. */
1035 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1036 unsigned int src1, src2, dest;
1037 int code;
1038
1039 next = next_active_insn (insn);
1040 if (!next)
1041 break;
1042 /* If the insn is a branch, then it cannot be problematic. */
1043 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1044 continue;
1045
1046 extract_insn (next);
1047 code = INSN_CODE (next);
1048
1049 switch (code)
1050 {
1051 case CODE_FOR_adddf3:
1052 case CODE_FOR_subdf3:
1053 case CODE_FOR_muldf3:
1054 case CODE_FOR_divdf3:
1055 dest = REGNO (recog_data.operand[0]);
1056 src1 = REGNO (recog_data.operand[1]);
1057 src2 = REGNO (recog_data.operand[2]);
1058 if (src1 != src2)
1059 {
1060 /* Case [1-4]:
1061 ld [address], %fx+1
1062 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1063 if ((src1 == x || src2 == x)
1064 && (dest == src1 || dest == src2))
1065 insert_nop = true;
1066 }
1067 else
1068 {
1069 /* Case 5:
1070 ld [address], %fx+1
1071 FPOPd %fx, %fx, %fx */
1072 if (src1 == x
1073 && dest == src1
1074 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1075 insert_nop = true;
1076 }
1077 break;
1078
1079 case CODE_FOR_sqrtdf2:
1080 dest = REGNO (recog_data.operand[0]);
1081 src1 = REGNO (recog_data.operand[1]);
1082 /* Case 6:
1083 ld [address], %fx+1
1084 fsqrtd %fx, %fx */
1085 if (src1 == x && dest == src1)
1086 insert_nop = true;
1087 break;
1088
1089 default:
1090 break;
1091 }
1092 }
1093
1094 /* Look for a single-word load into an integer register. */
1095 else if (sparc_fix_ut699
1096 && NONJUMP_INSN_P (insn)
1097 && (set = single_set (insn)) != NULL_RTX
1098 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1099 && mem_ref (SET_SRC (set)) != NULL_RTX
1100 && REG_P (SET_DEST (set))
1101 && REGNO (SET_DEST (set)) < 32)
1102 {
1103 /* There is no problem if the second memory access has a data
1104 dependency on the first single-cycle load. */
1105 rtx x = SET_DEST (set);
1106
1107 next = next_active_insn (insn);
1108 if (!next)
1109 break;
1110 /* If the insn is a branch, then it cannot be problematic. */
1111 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1112 continue;
1113
1114 /* Look for a second memory access to/from an integer register. */
1115 if ((set = single_set (next)) != NULL_RTX)
1116 {
1117 rtx src = SET_SRC (set);
1118 rtx dest = SET_DEST (set);
1119 rtx mem;
1120
1121 /* LDD is affected. */
1122 if ((mem = mem_ref (src)) != NULL_RTX
1123 && REG_P (dest)
1124 && REGNO (dest) < 32
1125 && !reg_mentioned_p (x, XEXP (mem, 0)))
1126 insert_nop = true;
1127
1128 /* STD is *not* affected. */
1129 else if (MEM_P (dest)
1130 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1131 && (src == CONST0_RTX (GET_MODE (dest))
1132 || (REG_P (src)
1133 && REGNO (src) < 32
1134 && REGNO (src) != REGNO (x)))
1135 && !reg_mentioned_p (x, XEXP (dest, 0)))
1136 insert_nop = true;
1137 }
1138 }
1139
1140 /* Look for a single-word load/operation into an FP register. */
1141 else if (sparc_fix_ut699
1142 && NONJUMP_INSN_P (insn)
1143 && (set = single_set (insn)) != NULL_RTX
1144 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1145 && REG_P (SET_DEST (set))
1146 && REGNO (SET_DEST (set)) > 31)
1147 {
1148 /* Number of instructions in the problematic window. */
1149 const int n_insns = 4;
1150 /* The problematic combination is with the sibling FP register. */
1151 const unsigned int x = REGNO (SET_DEST (set));
1152 const unsigned int y = x ^ 1;
1153 rtx_insn *after;
1154 int i;
1155
1156 next = next_active_insn (insn);
1157 if (!next)
1158 break;
1159 /* If the insn is a branch, then it cannot be problematic. */
1160 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1161 continue;
1162
1163 /* Look for a second load/operation into the sibling FP register. */
1164 if (!((set = single_set (next)) != NULL_RTX
1165 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1166 && REG_P (SET_DEST (set))
1167 && REGNO (SET_DEST (set)) == y))
1168 continue;
1169
1170 /* Look for a (possible) store from the FP register in the next N
1171 instructions, but bail out if it is again modified or if there
1172 is a store from the sibling FP register before this store. */
1173 for (after = next, i = 0; i < n_insns; i++)
1174 {
1175 bool branch_p;
1176
1177 after = next_active_insn (after);
1178 if (!after)
1179 break;
1180
1181 /* This is a branch with an empty delay slot. */
1182 if (!NONJUMP_INSN_P (after))
1183 {
1184 if (++i == n_insns)
1185 break;
1186 branch_p = true;
1187 after = NULL;
1188 }
1189 /* This is a branch with a filled delay slot. */
1190 else if (rtx_sequence *seq =
1191 dyn_cast <rtx_sequence *> (PATTERN (after)))
1192 {
1193 if (++i == n_insns)
1194 break;
1195 branch_p = true;
1196 after = seq->insn (1);
1197 }
1198 /* This is a regular instruction. */
1199 else
1200 branch_p = false;
1201
1202 if (after && (set = single_set (after)) != NULL_RTX)
1203 {
1204 const rtx src = SET_SRC (set);
1205 const rtx dest = SET_DEST (set);
1206 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1207
1208 /* If the FP register is again modified before the store,
1209 then the store isn't affected. */
1210 if (REG_P (dest)
1211 && (REGNO (dest) == x
1212 || (REGNO (dest) == y && size == 8)))
1213 break;
1214
1215 if (MEM_P (dest) && REG_P (src))
1216 {
1217 /* If there is a store from the sibling FP register
1218 before the store, then the store is not affected. */
1219 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1220 break;
1221
1222 /* Otherwise, the store is affected. */
1223 if (REGNO (src) == x && size == 4)
1224 {
1225 insert_nop = true;
1226 break;
1227 }
1228 }
1229 }
1230
1231 /* If we have a branch in the first M instructions, then we
1232 cannot see the (M+2)th instruction so we play safe. */
1233 if (branch_p && i <= (n_insns - 2))
1234 {
1235 insert_nop = true;
1236 break;
1237 }
1238 }
1239 }
1240
1241 else
1242 next = NEXT_INSN (insn);
1243
1244 if (insert_nop)
1245 emit_insn_before (gen_nop (), next);
1246 }
1247
1248 return 0;
1249 }
1250
1251 namespace {
1252
1253 const pass_data pass_data_work_around_errata =
1254 {
1255 RTL_PASS, /* type */
1256 "errata", /* name */
1257 OPTGROUP_NONE, /* optinfo_flags */
1258 TV_MACH_DEP, /* tv_id */
1259 0, /* properties_required */
1260 0, /* properties_provided */
1261 0, /* properties_destroyed */
1262 0, /* todo_flags_start */
1263 0, /* todo_flags_finish */
1264 };
1265
1266 class pass_work_around_errata : public rtl_opt_pass
1267 {
1268 public:
1269 pass_work_around_errata(gcc::context *ctxt)
1270 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1271 {}
1272
1273 /* opt_pass methods: */
1274 virtual bool gate (function *)
1275 {
1276 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1277 }
1278
1279 virtual unsigned int execute (function *)
1280 {
1281 return sparc_do_work_around_errata ();
1282 }
1283
1284 }; // class pass_work_around_errata
1285
1286 } // anon namespace
1287
1288 rtl_opt_pass *
1289 make_pass_work_around_errata (gcc::context *ctxt)
1290 {
1291 return new pass_work_around_errata (ctxt);
1292 }
1293
1294 /* Helpers for TARGET_DEBUG_OPTIONS. */
1295 static void
1296 dump_target_flag_bits (const int flags)
1297 {
1298 if (flags & MASK_64BIT)
1299 fprintf (stderr, "64BIT ");
1300 if (flags & MASK_APP_REGS)
1301 fprintf (stderr, "APP_REGS ");
1302 if (flags & MASK_FASTER_STRUCTS)
1303 fprintf (stderr, "FASTER_STRUCTS ");
1304 if (flags & MASK_FLAT)
1305 fprintf (stderr, "FLAT ");
1306 if (flags & MASK_FMAF)
1307 fprintf (stderr, "FMAF ");
1308 if (flags & MASK_FSMULD)
1309 fprintf (stderr, "FSMULD ");
1310 if (flags & MASK_FPU)
1311 fprintf (stderr, "FPU ");
1312 if (flags & MASK_HARD_QUAD)
1313 fprintf (stderr, "HARD_QUAD ");
1314 if (flags & MASK_POPC)
1315 fprintf (stderr, "POPC ");
1316 if (flags & MASK_PTR64)
1317 fprintf (stderr, "PTR64 ");
1318 if (flags & MASK_STACK_BIAS)
1319 fprintf (stderr, "STACK_BIAS ");
1320 if (flags & MASK_UNALIGNED_DOUBLES)
1321 fprintf (stderr, "UNALIGNED_DOUBLES ");
1322 if (flags & MASK_V8PLUS)
1323 fprintf (stderr, "V8PLUS ");
1324 if (flags & MASK_VIS)
1325 fprintf (stderr, "VIS ");
1326 if (flags & MASK_VIS2)
1327 fprintf (stderr, "VIS2 ");
1328 if (flags & MASK_VIS3)
1329 fprintf (stderr, "VIS3 ");
1330 if (flags & MASK_VIS4)
1331 fprintf (stderr, "VIS4 ");
1332 if (flags & MASK_VIS4B)
1333 fprintf (stderr, "VIS4B ");
1334 if (flags & MASK_CBCOND)
1335 fprintf (stderr, "CBCOND ");
1336 if (flags & MASK_DEPRECATED_V8_INSNS)
1337 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1338 if (flags & MASK_SPARCLET)
1339 fprintf (stderr, "SPARCLET ");
1340 if (flags & MASK_SPARCLITE)
1341 fprintf (stderr, "SPARCLITE ");
1342 if (flags & MASK_V8)
1343 fprintf (stderr, "V8 ");
1344 if (flags & MASK_V9)
1345 fprintf (stderr, "V9 ");
1346 }
1347
1348 static void
1349 dump_target_flags (const char *prefix, const int flags)
1350 {
1351 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1352 dump_target_flag_bits (flags);
1353 fprintf(stderr, "]\n");
1354 }
1355
1356 /* Validate and override various options, and do some machine dependent
1357 initialization. */
1358
1359 static void
1360 sparc_option_override (void)
1361 {
1362 static struct code_model {
1363 const char *const name;
1364 const enum cmodel value;
1365 } const cmodels[] = {
1366 { "32", CM_32 },
1367 { "medlow", CM_MEDLOW },
1368 { "medmid", CM_MEDMID },
1369 { "medany", CM_MEDANY },
1370 { "embmedany", CM_EMBMEDANY },
1371 { NULL, (enum cmodel) 0 }
1372 };
1373 const struct code_model *cmodel;
1374 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1375 static struct cpu_default {
1376 const int cpu;
1377 const enum processor_type processor;
1378 } const cpu_default[] = {
1379 /* There must be one entry here for each TARGET_CPU value. */
1380 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1381 { TARGET_CPU_v8, PROCESSOR_V8 },
1382 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1383 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1384 { TARGET_CPU_leon, PROCESSOR_LEON },
1385 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1386 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1387 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1388 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1389 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1390 { TARGET_CPU_v9, PROCESSOR_V9 },
1391 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1392 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1393 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1394 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1395 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1396 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1397 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1398 { TARGET_CPU_m8, PROCESSOR_M8 },
1399 { -1, PROCESSOR_V7 }
1400 };
1401 const struct cpu_default *def;
1402 /* Table of values for -m{cpu,tune}=. This must match the order of
1403 the enum processor_type in sparc-opts.h. */
1404 static struct cpu_table {
1405 const char *const name;
1406 const int disable;
1407 const int enable;
1408 } const cpu_table[] = {
1409 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1410 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1411 { "v8", MASK_ISA, MASK_V8 },
1412 /* TI TMS390Z55 supersparc */
1413 { "supersparc", MASK_ISA, MASK_V8 },
1414 { "hypersparc", MASK_ISA, MASK_V8 },
1415 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1416 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1417 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1418 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1419 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1420 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1421 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1422 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1423 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1424 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1425 /* TEMIC sparclet */
1426 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1427 { "v9", MASK_ISA, MASK_V9 },
1428 /* UltraSPARC I, II, IIi */
1429 { "ultrasparc", MASK_ISA,
1430 /* Although insns using %y are deprecated, it is a clear win. */
1431 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1432 /* UltraSPARC III */
1433 /* ??? Check if %y issue still holds true. */
1434 { "ultrasparc3", MASK_ISA,
1435 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1436 /* UltraSPARC T1 */
1437 { "niagara", MASK_ISA,
1438 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1439 /* UltraSPARC T2 */
1440 { "niagara2", MASK_ISA,
1441 MASK_V9|MASK_POPC|MASK_VIS2 },
1442 /* UltraSPARC T3 */
1443 { "niagara3", MASK_ISA,
1444 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1445 /* UltraSPARC T4 */
1446 { "niagara4", MASK_ISA,
1447 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1448 /* UltraSPARC M7 */
1449 { "niagara7", MASK_ISA,
1450 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1451 /* UltraSPARC M8 */
1452 { "m8", MASK_ISA,
1453 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1454 };
1455 const struct cpu_table *cpu;
1456 unsigned int i;
1457
1458 if (sparc_debug_string != NULL)
1459 {
1460 const char *q;
1461 char *p;
1462
1463 p = ASTRDUP (sparc_debug_string);
1464 while ((q = strtok (p, ",")) != NULL)
1465 {
1466 bool invert;
1467 int mask;
1468
1469 p = NULL;
1470 if (*q == '!')
1471 {
1472 invert = true;
1473 q++;
1474 }
1475 else
1476 invert = false;
1477
1478 if (! strcmp (q, "all"))
1479 mask = MASK_DEBUG_ALL;
1480 else if (! strcmp (q, "options"))
1481 mask = MASK_DEBUG_OPTIONS;
1482 else
1483 error ("unknown -mdebug-%s switch", q);
1484
1485 if (invert)
1486 sparc_debug &= ~mask;
1487 else
1488 sparc_debug |= mask;
1489 }
1490 }
1491
1492 /* Enable the FsMULd instruction by default if not explicitly specified by
1493 the user. It may be later disabled by the CPU (explicitly or not). */
1494 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1495 target_flags |= MASK_FSMULD;
1496
1497 if (TARGET_DEBUG_OPTIONS)
1498 {
1499 dump_target_flags("Initial target_flags", target_flags);
1500 dump_target_flags("target_flags_explicit", target_flags_explicit);
1501 }
1502
1503 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1504 SUBTARGET_OVERRIDE_OPTIONS;
1505 #endif
1506
1507 #ifndef SPARC_BI_ARCH
1508 /* Check for unsupported architecture size. */
1509 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1510 error ("%s is not supported by this configuration",
1511 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1512 #endif
1513
1514 /* We force all 64bit archs to use 128 bit long double */
1515 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1516 {
1517 error ("-mlong-double-64 not allowed with -m64");
1518 target_flags |= MASK_LONG_DOUBLE_128;
1519 }
1520
1521 /* Code model selection. */
1522 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1523
1524 #ifdef SPARC_BI_ARCH
1525 if (TARGET_ARCH32)
1526 sparc_cmodel = CM_32;
1527 #endif
1528
1529 if (sparc_cmodel_string != NULL)
1530 {
1531 if (TARGET_ARCH64)
1532 {
1533 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1534 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1535 break;
1536 if (cmodel->name == NULL)
1537 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1538 else
1539 sparc_cmodel = cmodel->value;
1540 }
1541 else
1542 error ("-mcmodel= is not supported on 32-bit systems");
1543 }
1544
1545 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1546 for (i = 8; i < 16; i++)
1547 if (!call_used_regs [i])
1548 {
1549 error ("-fcall-saved-REG is not supported for out registers");
1550 call_used_regs [i] = 1;
1551 }
1552
1553 /* Set the default CPU if no -mcpu option was specified. */
1554 if (!global_options_set.x_sparc_cpu_and_features)
1555 {
1556 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1557 if (def->cpu == TARGET_CPU_DEFAULT)
1558 break;
1559 gcc_assert (def->cpu != -1);
1560 sparc_cpu_and_features = def->processor;
1561 }
1562
1563 /* Set the default CPU if no -mtune option was specified. */
1564 if (!global_options_set.x_sparc_cpu)
1565 sparc_cpu = sparc_cpu_and_features;
1566
1567 cpu = &cpu_table[(int) sparc_cpu_and_features];
1568
1569 if (TARGET_DEBUG_OPTIONS)
1570 {
1571 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1572 dump_target_flags ("cpu->disable", cpu->disable);
1573 dump_target_flags ("cpu->enable", cpu->enable);
1574 }
1575
1576 target_flags &= ~cpu->disable;
1577 target_flags |= (cpu->enable
1578 #ifndef HAVE_AS_FMAF_HPC_VIS3
1579 & ~(MASK_FMAF | MASK_VIS3)
1580 #endif
1581 #ifndef HAVE_AS_SPARC4
1582 & ~MASK_CBCOND
1583 #endif
1584 #ifndef HAVE_AS_SPARC5_VIS4
1585 & ~(MASK_VIS4 | MASK_SUBXC)
1586 #endif
1587 #ifndef HAVE_AS_SPARC6
1588 & ~(MASK_VIS4B)
1589 #endif
1590 #ifndef HAVE_AS_LEON
1591 & ~(MASK_LEON | MASK_LEON3)
1592 #endif
1593 & ~(target_flags_explicit & MASK_FEATURES)
1594 );
1595
1596 /* -mvis2 implies -mvis. */
1597 if (TARGET_VIS2)
1598 target_flags |= MASK_VIS;
1599
1600 /* -mvis3 implies -mvis2 and -mvis. */
1601 if (TARGET_VIS3)
1602 target_flags |= MASK_VIS2 | MASK_VIS;
1603
1604 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1605 if (TARGET_VIS4)
1606 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1607
1608 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1609 if (TARGET_VIS4B)
1610 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1611
1612 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1613 FPU is disabled. */
1614 if (!TARGET_FPU)
1615 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1616 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1617
1618 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1619 are available; -m64 also implies v9. */
1620 if (TARGET_VIS || TARGET_ARCH64)
1621 {
1622 target_flags |= MASK_V9;
1623 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1624 }
1625
1626 /* -mvis also implies -mv8plus on 32-bit. */
1627 if (TARGET_VIS && !TARGET_ARCH64)
1628 target_flags |= MASK_V8PLUS;
1629
1630 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1631 if (TARGET_V9 && TARGET_ARCH32)
1632 target_flags |= MASK_DEPRECATED_V8_INSNS;
1633
1634 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1635 if (!TARGET_V9 || TARGET_ARCH64)
1636 target_flags &= ~MASK_V8PLUS;
1637
1638 /* Don't use stack biasing in 32-bit mode. */
1639 if (TARGET_ARCH32)
1640 target_flags &= ~MASK_STACK_BIAS;
1641
1642 /* Use LRA instead of reload, unless otherwise instructed. */
1643 if (!(target_flags_explicit & MASK_LRA))
1644 target_flags |= MASK_LRA;
1645
1646 /* Enable the back-to-back store errata workaround for LEON3FT. */
1647 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1648 sparc_fix_b2bst = 1;
1649
1650 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1651 if (sparc_fix_ut699)
1652 target_flags &= ~MASK_FSMULD;
1653
1654 /* Supply a default value for align_functions. */
1655 if (align_functions == 0)
1656 {
1657 if (sparc_cpu == PROCESSOR_ULTRASPARC
1658 || sparc_cpu == PROCESSOR_ULTRASPARC3
1659 || sparc_cpu == PROCESSOR_NIAGARA
1660 || sparc_cpu == PROCESSOR_NIAGARA2
1661 || sparc_cpu == PROCESSOR_NIAGARA3
1662 || sparc_cpu == PROCESSOR_NIAGARA4)
1663 align_functions = 32;
1664 else if (sparc_cpu == PROCESSOR_NIAGARA7
1665 || sparc_cpu == PROCESSOR_M8)
1666 align_functions = 64;
1667 }
1668
1669 /* Validate PCC_STRUCT_RETURN. */
1670 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1671 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1672
1673 /* Only use .uaxword when compiling for a 64-bit target. */
1674 if (!TARGET_ARCH64)
1675 targetm.asm_out.unaligned_op.di = NULL;
1676
1677 /* Do various machine dependent initializations. */
1678 sparc_init_modes ();
1679
1680 /* Set up function hooks. */
1681 init_machine_status = sparc_init_machine_status;
1682
1683 switch (sparc_cpu)
1684 {
1685 case PROCESSOR_V7:
1686 case PROCESSOR_CYPRESS:
1687 sparc_costs = &cypress_costs;
1688 break;
1689 case PROCESSOR_V8:
1690 case PROCESSOR_SPARCLITE:
1691 case PROCESSOR_SUPERSPARC:
1692 sparc_costs = &supersparc_costs;
1693 break;
1694 case PROCESSOR_F930:
1695 case PROCESSOR_F934:
1696 case PROCESSOR_HYPERSPARC:
1697 case PROCESSOR_SPARCLITE86X:
1698 sparc_costs = &hypersparc_costs;
1699 break;
1700 case PROCESSOR_LEON:
1701 sparc_costs = &leon_costs;
1702 break;
1703 case PROCESSOR_LEON3:
1704 case PROCESSOR_LEON3V7:
1705 sparc_costs = &leon3_costs;
1706 break;
1707 case PROCESSOR_SPARCLET:
1708 case PROCESSOR_TSC701:
1709 sparc_costs = &sparclet_costs;
1710 break;
1711 case PROCESSOR_V9:
1712 case PROCESSOR_ULTRASPARC:
1713 sparc_costs = &ultrasparc_costs;
1714 break;
1715 case PROCESSOR_ULTRASPARC3:
1716 sparc_costs = &ultrasparc3_costs;
1717 break;
1718 case PROCESSOR_NIAGARA:
1719 sparc_costs = &niagara_costs;
1720 break;
1721 case PROCESSOR_NIAGARA2:
1722 sparc_costs = &niagara2_costs;
1723 break;
1724 case PROCESSOR_NIAGARA3:
1725 sparc_costs = &niagara3_costs;
1726 break;
1727 case PROCESSOR_NIAGARA4:
1728 sparc_costs = &niagara4_costs;
1729 break;
1730 case PROCESSOR_NIAGARA7:
1731 sparc_costs = &niagara7_costs;
1732 break;
1733 case PROCESSOR_M8:
1734 sparc_costs = &m8_costs;
1735 break;
1736 case PROCESSOR_NATIVE:
1737 gcc_unreachable ();
1738 };
1739
1740 if (sparc_memory_model == SMM_DEFAULT)
1741 {
1742 /* Choose the memory model for the operating system. */
1743 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1744 if (os_default != SMM_DEFAULT)
1745 sparc_memory_model = os_default;
1746 /* Choose the most relaxed model for the processor. */
1747 else if (TARGET_V9)
1748 sparc_memory_model = SMM_RMO;
1749 else if (TARGET_LEON3)
1750 sparc_memory_model = SMM_TSO;
1751 else if (TARGET_LEON)
1752 sparc_memory_model = SMM_SC;
1753 else if (TARGET_V8)
1754 sparc_memory_model = SMM_PSO;
1755 else
1756 sparc_memory_model = SMM_SC;
1757 }
1758
1759 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1760 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1761 target_flags |= MASK_LONG_DOUBLE_128;
1762 #endif
1763
1764 if (TARGET_DEBUG_OPTIONS)
1765 dump_target_flags ("Final target_flags", target_flags);
1766
1767 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1768 can run at the same time. More important, it is the threshold
1769 defining when additional prefetches will be dropped by the
1770 hardware.
1771
1772 The UltraSPARC-III features a documented prefetch queue with a
1773 size of 8. Additional prefetches issued in the cpu are
1774 dropped.
1775
1776 Niagara processors are different. In these processors prefetches
1777 are handled much like regular loads. The L1 miss buffer is 32
1778 entries, but prefetches start getting affected when 30 entries
1779 become occupied. That occupation could be a mix of regular loads
1780 and prefetches though. And that buffer is shared by all threads.
1781 Once the threshold is reached, if the core is running a single
1782 thread the prefetch will retry. If more than one thread is
1783 running, the prefetch will be dropped.
1784
1785 All this makes it very difficult to determine how many
1786 simultaneous prefetches can be issued simultaneously, even in a
1787 single-threaded program. Experimental results show that setting
1788 this parameter to 32 works well when the number of threads is not
1789 high. */
1790 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1791 ((sparc_cpu == PROCESSOR_ULTRASPARC
1792 || sparc_cpu == PROCESSOR_NIAGARA
1793 || sparc_cpu == PROCESSOR_NIAGARA2
1794 || sparc_cpu == PROCESSOR_NIAGARA3
1795 || sparc_cpu == PROCESSOR_NIAGARA4)
1796 ? 2
1797 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1798 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1799 || sparc_cpu == PROCESSOR_M8)
1800 ? 32 : 3))),
1801 global_options.x_param_values,
1802 global_options_set.x_param_values);
1803
1804 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1805 bytes.
1806
1807 The Oracle SPARC Architecture (previously the UltraSPARC
1808 Architecture) specification states that when a PREFETCH[A]
1809 instruction is executed an implementation-specific amount of data
1810 is prefetched, and that it is at least 64 bytes long (aligned to
1811 at least 64 bytes).
1812
1813 However, this is not correct. The M7 (and implementations prior
1814 to that) does not guarantee a 64B prefetch into a cache if the
1815 line size is smaller. A single cache line is all that is ever
1816 prefetched. So for the M7, where the L1D$ has 32B lines and the
1817 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1818 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1819 is a read_n prefetch, which is the only type which allocates to
1820 the L1.) */
1821 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1822 (sparc_cpu == PROCESSOR_M8
1823 ? 64 : 32),
1824 global_options.x_param_values,
1825 global_options_set.x_param_values);
1826
1827 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1828 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1829 Niagara processors feature a L1D$ of 16KB. */
1830 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1831 ((sparc_cpu == PROCESSOR_ULTRASPARC
1832 || sparc_cpu == PROCESSOR_ULTRASPARC3
1833 || sparc_cpu == PROCESSOR_NIAGARA
1834 || sparc_cpu == PROCESSOR_NIAGARA2
1835 || sparc_cpu == PROCESSOR_NIAGARA3
1836 || sparc_cpu == PROCESSOR_NIAGARA4
1837 || sparc_cpu == PROCESSOR_NIAGARA7
1838 || sparc_cpu == PROCESSOR_M8)
1839 ? 16 : 64),
1840 global_options.x_param_values,
1841 global_options_set.x_param_values);
1842
1843
1844 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1845 that 512 is the default in params.def. */
1846 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1847 ((sparc_cpu == PROCESSOR_NIAGARA4
1848 || sparc_cpu == PROCESSOR_M8)
1849 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1850 ? 256 : 512)),
1851 global_options.x_param_values,
1852 global_options_set.x_param_values);
1853
1854
1855 /* Disable save slot sharing for call-clobbered registers by default.
1856 The IRA sharing algorithm works on single registers only and this
1857 pessimizes for double floating-point registers. */
1858 if (!global_options_set.x_flag_ira_share_save_slots)
1859 flag_ira_share_save_slots = 0;
1860
1861 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1862 redundant 32-to-64-bit extensions. */
1863 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1864 flag_ree = 0;
1865 }
1866 \f
1867 /* Miscellaneous utilities. */
1868
1869 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1870 or branch on register contents instructions. */
1871
1872 int
1873 v9_regcmp_p (enum rtx_code code)
1874 {
1875 return (code == EQ || code == NE || code == GE || code == LT
1876 || code == LE || code == GT);
1877 }
1878
1879 /* Nonzero if OP is a floating point constant which can
1880 be loaded into an integer register using a single
1881 sethi instruction. */
1882
1883 int
1884 fp_sethi_p (rtx op)
1885 {
1886 if (GET_CODE (op) == CONST_DOUBLE)
1887 {
1888 long i;
1889
1890 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1891 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1892 }
1893
1894 return 0;
1895 }
1896
1897 /* Nonzero if OP is a floating point constant which can
1898 be loaded into an integer register using a single
1899 mov instruction. */
1900
1901 int
1902 fp_mov_p (rtx op)
1903 {
1904 if (GET_CODE (op) == CONST_DOUBLE)
1905 {
1906 long i;
1907
1908 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1909 return SPARC_SIMM13_P (i);
1910 }
1911
1912 return 0;
1913 }
1914
1915 /* Nonzero if OP is a floating point constant which can
1916 be loaded into an integer register using a high/losum
1917 instruction sequence. */
1918
1919 int
1920 fp_high_losum_p (rtx op)
1921 {
1922 /* The constraints calling this should only be in
1923 SFmode move insns, so any constant which cannot
1924 be moved using a single insn will do. */
1925 if (GET_CODE (op) == CONST_DOUBLE)
1926 {
1927 long i;
1928
1929 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1930 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1931 }
1932
1933 return 0;
1934 }
1935
1936 /* Return true if the address of LABEL can be loaded by means of the
1937 mov{si,di}_pic_label_ref patterns in PIC mode. */
1938
1939 static bool
1940 can_use_mov_pic_label_ref (rtx label)
1941 {
1942 /* VxWorks does not impose a fixed gap between segments; the run-time
1943 gap can be different from the object-file gap. We therefore can't
1944 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1945 are absolutely sure that X is in the same segment as the GOT.
1946 Unfortunately, the flexibility of linker scripts means that we
1947 can't be sure of that in general, so assume that GOT-relative
1948 accesses are never valid on VxWorks. */
1949 if (TARGET_VXWORKS_RTP)
1950 return false;
1951
1952 /* Similarly, if the label is non-local, it might end up being placed
1953 in a different section than the current one; now mov_pic_label_ref
1954 requires the label and the code to be in the same section. */
1955 if (LABEL_REF_NONLOCAL_P (label))
1956 return false;
1957
1958 /* Finally, if we are reordering basic blocks and partition into hot
1959 and cold sections, this might happen for any label. */
1960 if (flag_reorder_blocks_and_partition)
1961 return false;
1962
1963 return true;
1964 }
1965
1966 /* Expand a move instruction. Return true if all work is done. */
1967
1968 bool
1969 sparc_expand_move (machine_mode mode, rtx *operands)
1970 {
1971 /* Handle sets of MEM first. */
1972 if (GET_CODE (operands[0]) == MEM)
1973 {
1974 /* 0 is a register (or a pair of registers) on SPARC. */
1975 if (register_or_zero_operand (operands[1], mode))
1976 return false;
1977
1978 if (!reload_in_progress)
1979 {
1980 operands[0] = validize_mem (operands[0]);
1981 operands[1] = force_reg (mode, operands[1]);
1982 }
1983 }
1984
1985 /* Fixup TLS cases. */
1986 if (TARGET_HAVE_TLS
1987 && CONSTANT_P (operands[1])
1988 && sparc_tls_referenced_p (operands [1]))
1989 {
1990 operands[1] = sparc_legitimize_tls_address (operands[1]);
1991 return false;
1992 }
1993
1994 /* Fixup PIC cases. */
1995 if (flag_pic && CONSTANT_P (operands[1]))
1996 {
1997 if (pic_address_needs_scratch (operands[1]))
1998 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1999
2000 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2001 if (GET_CODE (operands[1]) == LABEL_REF
2002 && can_use_mov_pic_label_ref (operands[1]))
2003 {
2004 if (mode == SImode)
2005 {
2006 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2007 return true;
2008 }
2009
2010 if (mode == DImode)
2011 {
2012 gcc_assert (TARGET_ARCH64);
2013 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2014 return true;
2015 }
2016 }
2017
2018 if (symbolic_operand (operands[1], mode))
2019 {
2020 operands[1]
2021 = sparc_legitimize_pic_address (operands[1],
2022 reload_in_progress
2023 ? operands[0] : NULL_RTX);
2024 return false;
2025 }
2026 }
2027
2028 /* If we are trying to toss an integer constant into FP registers,
2029 or loading a FP or vector constant, force it into memory. */
2030 if (CONSTANT_P (operands[1])
2031 && REG_P (operands[0])
2032 && (SPARC_FP_REG_P (REGNO (operands[0]))
2033 || SCALAR_FLOAT_MODE_P (mode)
2034 || VECTOR_MODE_P (mode)))
2035 {
2036 /* emit_group_store will send such bogosity to us when it is
2037 not storing directly into memory. So fix this up to avoid
2038 crashes in output_constant_pool. */
2039 if (operands [1] == const0_rtx)
2040 operands[1] = CONST0_RTX (mode);
2041
2042 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2043 always other regs. */
2044 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2045 && (const_zero_operand (operands[1], mode)
2046 || const_all_ones_operand (operands[1], mode)))
2047 return false;
2048
2049 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2050 /* We are able to build any SF constant in integer registers
2051 with at most 2 instructions. */
2052 && (mode == SFmode
2053 /* And any DF constant in integer registers if needed. */
2054 || (mode == DFmode && !can_create_pseudo_p ())))
2055 return false;
2056
2057 operands[1] = force_const_mem (mode, operands[1]);
2058 if (!reload_in_progress)
2059 operands[1] = validize_mem (operands[1]);
2060 return false;
2061 }
2062
2063 /* Accept non-constants and valid constants unmodified. */
2064 if (!CONSTANT_P (operands[1])
2065 || GET_CODE (operands[1]) == HIGH
2066 || input_operand (operands[1], mode))
2067 return false;
2068
2069 switch (mode)
2070 {
2071 case QImode:
2072 /* All QImode constants require only one insn, so proceed. */
2073 break;
2074
2075 case HImode:
2076 case SImode:
2077 sparc_emit_set_const32 (operands[0], operands[1]);
2078 return true;
2079
2080 case DImode:
2081 /* input_operand should have filtered out 32-bit mode. */
2082 sparc_emit_set_const64 (operands[0], operands[1]);
2083 return true;
2084
2085 case TImode:
2086 {
2087 rtx high, low;
2088 /* TImode isn't available in 32-bit mode. */
2089 split_double (operands[1], &high, &low);
2090 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2091 high));
2092 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2093 low));
2094 }
2095 return true;
2096
2097 default:
2098 gcc_unreachable ();
2099 }
2100
2101 return false;
2102 }
2103
2104 /* Load OP1, a 32-bit constant, into OP0, a register.
2105 We know it can't be done in one insn when we get
2106 here, the move expander guarantees this. */
2107
2108 static void
2109 sparc_emit_set_const32 (rtx op0, rtx op1)
2110 {
2111 machine_mode mode = GET_MODE (op0);
2112 rtx temp = op0;
2113
2114 if (can_create_pseudo_p ())
2115 temp = gen_reg_rtx (mode);
2116
2117 if (GET_CODE (op1) == CONST_INT)
2118 {
2119 gcc_assert (!small_int_operand (op1, mode)
2120 && !const_high_operand (op1, mode));
2121
2122 /* Emit them as real moves instead of a HIGH/LO_SUM,
2123 this way CSE can see everything and reuse intermediate
2124 values if it wants. */
2125 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2126 & ~(HOST_WIDE_INT) 0x3ff)));
2127
2128 emit_insn (gen_rtx_SET (op0,
2129 gen_rtx_IOR (mode, temp,
2130 GEN_INT (INTVAL (op1) & 0x3ff))));
2131 }
2132 else
2133 {
2134 /* A symbol, emit in the traditional way. */
2135 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2136 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2137 }
2138 }
2139
2140 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2141 If TEMP is nonzero, we are forbidden to use any other scratch
2142 registers. Otherwise, we are allowed to generate them as needed.
2143
2144 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2145 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2146
2147 void
2148 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2149 {
2150 rtx temp1, temp2, temp3, temp4, temp5;
2151 rtx ti_temp = 0;
2152
2153 if (temp && GET_MODE (temp) == TImode)
2154 {
2155 ti_temp = temp;
2156 temp = gen_rtx_REG (DImode, REGNO (temp));
2157 }
2158
2159 /* SPARC-V9 code-model support. */
2160 switch (sparc_cmodel)
2161 {
2162 case CM_MEDLOW:
2163 /* The range spanned by all instructions in the object is less
2164 than 2^31 bytes (2GB) and the distance from any instruction
2165 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2166 than 2^31 bytes (2GB).
2167
2168 The executable must be in the low 4TB of the virtual address
2169 space.
2170
2171 sethi %hi(symbol), %temp1
2172 or %temp1, %lo(symbol), %reg */
2173 if (temp)
2174 temp1 = temp; /* op0 is allowed. */
2175 else
2176 temp1 = gen_reg_rtx (DImode);
2177
2178 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2179 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2180 break;
2181
2182 case CM_MEDMID:
2183 /* The range spanned by all instructions in the object is less
2184 than 2^31 bytes (2GB) and the distance from any instruction
2185 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2186 than 2^31 bytes (2GB).
2187
2188 The executable must be in the low 16TB of the virtual address
2189 space.
2190
2191 sethi %h44(symbol), %temp1
2192 or %temp1, %m44(symbol), %temp2
2193 sllx %temp2, 12, %temp3
2194 or %temp3, %l44(symbol), %reg */
2195 if (temp)
2196 {
2197 temp1 = op0;
2198 temp2 = op0;
2199 temp3 = temp; /* op0 is allowed. */
2200 }
2201 else
2202 {
2203 temp1 = gen_reg_rtx (DImode);
2204 temp2 = gen_reg_rtx (DImode);
2205 temp3 = gen_reg_rtx (DImode);
2206 }
2207
2208 emit_insn (gen_seth44 (temp1, op1));
2209 emit_insn (gen_setm44 (temp2, temp1, op1));
2210 emit_insn (gen_rtx_SET (temp3,
2211 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2212 emit_insn (gen_setl44 (op0, temp3, op1));
2213 break;
2214
2215 case CM_MEDANY:
2216 /* The range spanned by all instructions in the object is less
2217 than 2^31 bytes (2GB) and the distance from any instruction
2218 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2219 than 2^31 bytes (2GB).
2220
2221 The executable can be placed anywhere in the virtual address
2222 space.
2223
2224 sethi %hh(symbol), %temp1
2225 sethi %lm(symbol), %temp2
2226 or %temp1, %hm(symbol), %temp3
2227 sllx %temp3, 32, %temp4
2228 or %temp4, %temp2, %temp5
2229 or %temp5, %lo(symbol), %reg */
2230 if (temp)
2231 {
2232 /* It is possible that one of the registers we got for operands[2]
2233 might coincide with that of operands[0] (which is why we made
2234 it TImode). Pick the other one to use as our scratch. */
2235 if (rtx_equal_p (temp, op0))
2236 {
2237 gcc_assert (ti_temp);
2238 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2239 }
2240 temp1 = op0;
2241 temp2 = temp; /* op0 is _not_ allowed, see above. */
2242 temp3 = op0;
2243 temp4 = op0;
2244 temp5 = op0;
2245 }
2246 else
2247 {
2248 temp1 = gen_reg_rtx (DImode);
2249 temp2 = gen_reg_rtx (DImode);
2250 temp3 = gen_reg_rtx (DImode);
2251 temp4 = gen_reg_rtx (DImode);
2252 temp5 = gen_reg_rtx (DImode);
2253 }
2254
2255 emit_insn (gen_sethh (temp1, op1));
2256 emit_insn (gen_setlm (temp2, op1));
2257 emit_insn (gen_sethm (temp3, temp1, op1));
2258 emit_insn (gen_rtx_SET (temp4,
2259 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2260 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2261 emit_insn (gen_setlo (op0, temp5, op1));
2262 break;
2263
2264 case CM_EMBMEDANY:
2265 /* Old old old backwards compatibility kruft here.
2266 Essentially it is MEDLOW with a fixed 64-bit
2267 virtual base added to all data segment addresses.
2268 Text-segment stuff is computed like MEDANY, we can't
2269 reuse the code above because the relocation knobs
2270 look different.
2271
2272 Data segment: sethi %hi(symbol), %temp1
2273 add %temp1, EMBMEDANY_BASE_REG, %temp2
2274 or %temp2, %lo(symbol), %reg */
2275 if (data_segment_operand (op1, GET_MODE (op1)))
2276 {
2277 if (temp)
2278 {
2279 temp1 = temp; /* op0 is allowed. */
2280 temp2 = op0;
2281 }
2282 else
2283 {
2284 temp1 = gen_reg_rtx (DImode);
2285 temp2 = gen_reg_rtx (DImode);
2286 }
2287
2288 emit_insn (gen_embmedany_sethi (temp1, op1));
2289 emit_insn (gen_embmedany_brsum (temp2, temp1));
2290 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2291 }
2292
2293 /* Text segment: sethi %uhi(symbol), %temp1
2294 sethi %hi(symbol), %temp2
2295 or %temp1, %ulo(symbol), %temp3
2296 sllx %temp3, 32, %temp4
2297 or %temp4, %temp2, %temp5
2298 or %temp5, %lo(symbol), %reg */
2299 else
2300 {
2301 if (temp)
2302 {
2303 /* It is possible that one of the registers we got for operands[2]
2304 might coincide with that of operands[0] (which is why we made
2305 it TImode). Pick the other one to use as our scratch. */
2306 if (rtx_equal_p (temp, op0))
2307 {
2308 gcc_assert (ti_temp);
2309 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2310 }
2311 temp1 = op0;
2312 temp2 = temp; /* op0 is _not_ allowed, see above. */
2313 temp3 = op0;
2314 temp4 = op0;
2315 temp5 = op0;
2316 }
2317 else
2318 {
2319 temp1 = gen_reg_rtx (DImode);
2320 temp2 = gen_reg_rtx (DImode);
2321 temp3 = gen_reg_rtx (DImode);
2322 temp4 = gen_reg_rtx (DImode);
2323 temp5 = gen_reg_rtx (DImode);
2324 }
2325
2326 emit_insn (gen_embmedany_textuhi (temp1, op1));
2327 emit_insn (gen_embmedany_texthi (temp2, op1));
2328 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2329 emit_insn (gen_rtx_SET (temp4,
2330 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2331 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2332 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2333 }
2334 break;
2335
2336 default:
2337 gcc_unreachable ();
2338 }
2339 }
2340
2341 /* These avoid problems when cross compiling. If we do not
2342 go through all this hair then the optimizer will see
2343 invalid REG_EQUAL notes or in some cases none at all. */
2344 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2345 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2346 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2347 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2348
2349 /* The optimizer is not to assume anything about exactly
2350 which bits are set for a HIGH, they are unspecified.
2351 Unfortunately this leads to many missed optimizations
2352 during CSE. We mask out the non-HIGH bits, and matches
2353 a plain movdi, to alleviate this problem. */
2354 static rtx
2355 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2356 {
2357 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2358 }
2359
2360 static rtx
2361 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2362 {
2363 return gen_rtx_SET (dest, GEN_INT (val));
2364 }
2365
2366 static rtx
2367 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2368 {
2369 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2370 }
2371
2372 static rtx
2373 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2374 {
2375 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2376 }
2377
2378 /* Worker routines for 64-bit constant formation on arch64.
2379 One of the key things to be doing in these emissions is
2380 to create as many temp REGs as possible. This makes it
2381 possible for half-built constants to be used later when
2382 such values are similar to something required later on.
2383 Without doing this, the optimizer cannot see such
2384 opportunities. */
2385
2386 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2387 unsigned HOST_WIDE_INT, int);
2388
2389 static void
2390 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2391 unsigned HOST_WIDE_INT low_bits, int is_neg)
2392 {
2393 unsigned HOST_WIDE_INT high_bits;
2394
2395 if (is_neg)
2396 high_bits = (~low_bits) & 0xffffffff;
2397 else
2398 high_bits = low_bits;
2399
2400 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2401 if (!is_neg)
2402 {
2403 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2404 }
2405 else
2406 {
2407 /* If we are XOR'ing with -1, then we should emit a one's complement
2408 instead. This way the combiner will notice logical operations
2409 such as ANDN later on and substitute. */
2410 if ((low_bits & 0x3ff) == 0x3ff)
2411 {
2412 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2413 }
2414 else
2415 {
2416 emit_insn (gen_rtx_SET (op0,
2417 gen_safe_XOR64 (temp,
2418 (-(HOST_WIDE_INT)0x400
2419 | (low_bits & 0x3ff)))));
2420 }
2421 }
2422 }
2423
2424 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2425 unsigned HOST_WIDE_INT, int);
2426
2427 static void
2428 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2429 unsigned HOST_WIDE_INT high_bits,
2430 unsigned HOST_WIDE_INT low_immediate,
2431 int shift_count)
2432 {
2433 rtx temp2 = op0;
2434
2435 if ((high_bits & 0xfffffc00) != 0)
2436 {
2437 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2438 if ((high_bits & ~0xfffffc00) != 0)
2439 emit_insn (gen_rtx_SET (op0,
2440 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2441 else
2442 temp2 = temp;
2443 }
2444 else
2445 {
2446 emit_insn (gen_safe_SET64 (temp, high_bits));
2447 temp2 = temp;
2448 }
2449
2450 /* Now shift it up into place. */
2451 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2452 GEN_INT (shift_count))));
2453
2454 /* If there is a low immediate part piece, finish up by
2455 putting that in as well. */
2456 if (low_immediate != 0)
2457 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2458 }
2459
2460 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2461 unsigned HOST_WIDE_INT);
2462
2463 /* Full 64-bit constant decomposition. Even though this is the
2464 'worst' case, we still optimize a few things away. */
2465 static void
2466 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2467 unsigned HOST_WIDE_INT high_bits,
2468 unsigned HOST_WIDE_INT low_bits)
2469 {
2470 rtx sub_temp = op0;
2471
2472 if (can_create_pseudo_p ())
2473 sub_temp = gen_reg_rtx (DImode);
2474
2475 if ((high_bits & 0xfffffc00) != 0)
2476 {
2477 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2478 if ((high_bits & ~0xfffffc00) != 0)
2479 emit_insn (gen_rtx_SET (sub_temp,
2480 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2481 else
2482 sub_temp = temp;
2483 }
2484 else
2485 {
2486 emit_insn (gen_safe_SET64 (temp, high_bits));
2487 sub_temp = temp;
2488 }
2489
2490 if (can_create_pseudo_p ())
2491 {
2492 rtx temp2 = gen_reg_rtx (DImode);
2493 rtx temp3 = gen_reg_rtx (DImode);
2494 rtx temp4 = gen_reg_rtx (DImode);
2495
2496 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2497 GEN_INT (32))));
2498
2499 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2500 if ((low_bits & ~0xfffffc00) != 0)
2501 {
2502 emit_insn (gen_rtx_SET (temp3,
2503 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2504 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2505 }
2506 else
2507 {
2508 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2509 }
2510 }
2511 else
2512 {
2513 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2514 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2515 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2516 int to_shift = 12;
2517
2518 /* We are in the middle of reload, so this is really
2519 painful. However we do still make an attempt to
2520 avoid emitting truly stupid code. */
2521 if (low1 != const0_rtx)
2522 {
2523 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2524 GEN_INT (to_shift))));
2525 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2526 sub_temp = op0;
2527 to_shift = 12;
2528 }
2529 else
2530 {
2531 to_shift += 12;
2532 }
2533 if (low2 != const0_rtx)
2534 {
2535 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2536 GEN_INT (to_shift))));
2537 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2538 sub_temp = op0;
2539 to_shift = 8;
2540 }
2541 else
2542 {
2543 to_shift += 8;
2544 }
2545 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2546 GEN_INT (to_shift))));
2547 if (low3 != const0_rtx)
2548 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2549 /* phew... */
2550 }
2551 }
2552
2553 /* Analyze a 64-bit constant for certain properties. */
2554 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2555 unsigned HOST_WIDE_INT,
2556 int *, int *, int *);
2557
2558 static void
2559 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2560 unsigned HOST_WIDE_INT low_bits,
2561 int *hbsp, int *lbsp, int *abbasp)
2562 {
2563 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2564 int i;
2565
2566 lowest_bit_set = highest_bit_set = -1;
2567 i = 0;
2568 do
2569 {
2570 if ((lowest_bit_set == -1)
2571 && ((low_bits >> i) & 1))
2572 lowest_bit_set = i;
2573 if ((highest_bit_set == -1)
2574 && ((high_bits >> (32 - i - 1)) & 1))
2575 highest_bit_set = (64 - i - 1);
2576 }
2577 while (++i < 32
2578 && ((highest_bit_set == -1)
2579 || (lowest_bit_set == -1)));
2580 if (i == 32)
2581 {
2582 i = 0;
2583 do
2584 {
2585 if ((lowest_bit_set == -1)
2586 && ((high_bits >> i) & 1))
2587 lowest_bit_set = i + 32;
2588 if ((highest_bit_set == -1)
2589 && ((low_bits >> (32 - i - 1)) & 1))
2590 highest_bit_set = 32 - i - 1;
2591 }
2592 while (++i < 32
2593 && ((highest_bit_set == -1)
2594 || (lowest_bit_set == -1)));
2595 }
2596 /* If there are no bits set this should have gone out
2597 as one instruction! */
2598 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2599 all_bits_between_are_set = 1;
2600 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2601 {
2602 if (i < 32)
2603 {
2604 if ((low_bits & (1 << i)) != 0)
2605 continue;
2606 }
2607 else
2608 {
2609 if ((high_bits & (1 << (i - 32))) != 0)
2610 continue;
2611 }
2612 all_bits_between_are_set = 0;
2613 break;
2614 }
2615 *hbsp = highest_bit_set;
2616 *lbsp = lowest_bit_set;
2617 *abbasp = all_bits_between_are_set;
2618 }
2619
2620 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2621
2622 static int
2623 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2624 unsigned HOST_WIDE_INT low_bits)
2625 {
2626 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2627
2628 if (high_bits == 0
2629 || high_bits == 0xffffffff)
2630 return 1;
2631
2632 analyze_64bit_constant (high_bits, low_bits,
2633 &highest_bit_set, &lowest_bit_set,
2634 &all_bits_between_are_set);
2635
2636 if ((highest_bit_set == 63
2637 || lowest_bit_set == 0)
2638 && all_bits_between_are_set != 0)
2639 return 1;
2640
2641 if ((highest_bit_set - lowest_bit_set) < 21)
2642 return 1;
2643
2644 return 0;
2645 }
2646
2647 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2648 unsigned HOST_WIDE_INT,
2649 int, int);
2650
2651 static unsigned HOST_WIDE_INT
2652 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2653 unsigned HOST_WIDE_INT low_bits,
2654 int lowest_bit_set, int shift)
2655 {
2656 HOST_WIDE_INT hi, lo;
2657
2658 if (lowest_bit_set < 32)
2659 {
2660 lo = (low_bits >> lowest_bit_set) << shift;
2661 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2662 }
2663 else
2664 {
2665 lo = 0;
2666 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2667 }
2668 gcc_assert (! (hi & lo));
2669 return (hi | lo);
2670 }
2671
2672 /* Here we are sure to be arch64 and this is an integer constant
2673 being loaded into a register. Emit the most efficient
2674 insn sequence possible. Detection of all the 1-insn cases
2675 has been done already. */
2676 static void
2677 sparc_emit_set_const64 (rtx op0, rtx op1)
2678 {
2679 unsigned HOST_WIDE_INT high_bits, low_bits;
2680 int lowest_bit_set, highest_bit_set;
2681 int all_bits_between_are_set;
2682 rtx temp = 0;
2683
2684 /* Sanity check that we know what we are working with. */
2685 gcc_assert (TARGET_ARCH64
2686 && (GET_CODE (op0) == SUBREG
2687 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2688
2689 if (! can_create_pseudo_p ())
2690 temp = op0;
2691
2692 if (GET_CODE (op1) != CONST_INT)
2693 {
2694 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2695 return;
2696 }
2697
2698 if (! temp)
2699 temp = gen_reg_rtx (DImode);
2700
2701 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2702 low_bits = (INTVAL (op1) & 0xffffffff);
2703
2704 /* low_bits bits 0 --> 31
2705 high_bits bits 32 --> 63 */
2706
2707 analyze_64bit_constant (high_bits, low_bits,
2708 &highest_bit_set, &lowest_bit_set,
2709 &all_bits_between_are_set);
2710
2711 /* First try for a 2-insn sequence. */
2712
2713 /* These situations are preferred because the optimizer can
2714 * do more things with them:
2715 * 1) mov -1, %reg
2716 * sllx %reg, shift, %reg
2717 * 2) mov -1, %reg
2718 * srlx %reg, shift, %reg
2719 * 3) mov some_small_const, %reg
2720 * sllx %reg, shift, %reg
2721 */
2722 if (((highest_bit_set == 63
2723 || lowest_bit_set == 0)
2724 && all_bits_between_are_set != 0)
2725 || ((highest_bit_set - lowest_bit_set) < 12))
2726 {
2727 HOST_WIDE_INT the_const = -1;
2728 int shift = lowest_bit_set;
2729
2730 if ((highest_bit_set != 63
2731 && lowest_bit_set != 0)
2732 || all_bits_between_are_set == 0)
2733 {
2734 the_const =
2735 create_simple_focus_bits (high_bits, low_bits,
2736 lowest_bit_set, 0);
2737 }
2738 else if (lowest_bit_set == 0)
2739 shift = -(63 - highest_bit_set);
2740
2741 gcc_assert (SPARC_SIMM13_P (the_const));
2742 gcc_assert (shift != 0);
2743
2744 emit_insn (gen_safe_SET64 (temp, the_const));
2745 if (shift > 0)
2746 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2747 GEN_INT (shift))));
2748 else if (shift < 0)
2749 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2750 GEN_INT (-shift))));
2751 return;
2752 }
2753
2754 /* Now a range of 22 or less bits set somewhere.
2755 * 1) sethi %hi(focus_bits), %reg
2756 * sllx %reg, shift, %reg
2757 * 2) sethi %hi(focus_bits), %reg
2758 * srlx %reg, shift, %reg
2759 */
2760 if ((highest_bit_set - lowest_bit_set) < 21)
2761 {
2762 unsigned HOST_WIDE_INT focus_bits =
2763 create_simple_focus_bits (high_bits, low_bits,
2764 lowest_bit_set, 10);
2765
2766 gcc_assert (SPARC_SETHI_P (focus_bits));
2767 gcc_assert (lowest_bit_set != 10);
2768
2769 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2770
2771 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2772 if (lowest_bit_set < 10)
2773 emit_insn (gen_rtx_SET (op0,
2774 gen_rtx_LSHIFTRT (DImode, temp,
2775 GEN_INT (10 - lowest_bit_set))));
2776 else if (lowest_bit_set > 10)
2777 emit_insn (gen_rtx_SET (op0,
2778 gen_rtx_ASHIFT (DImode, temp,
2779 GEN_INT (lowest_bit_set - 10))));
2780 return;
2781 }
2782
2783 /* 1) sethi %hi(low_bits), %reg
2784 * or %reg, %lo(low_bits), %reg
2785 * 2) sethi %hi(~low_bits), %reg
2786 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2787 */
2788 if (high_bits == 0
2789 || high_bits == 0xffffffff)
2790 {
2791 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2792 (high_bits == 0xffffffff));
2793 return;
2794 }
2795
2796 /* Now, try 3-insn sequences. */
2797
2798 /* 1) sethi %hi(high_bits), %reg
2799 * or %reg, %lo(high_bits), %reg
2800 * sllx %reg, 32, %reg
2801 */
2802 if (low_bits == 0)
2803 {
2804 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2805 return;
2806 }
2807
2808 /* We may be able to do something quick
2809 when the constant is negated, so try that. */
2810 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2811 (~low_bits) & 0xfffffc00))
2812 {
2813 /* NOTE: The trailing bits get XOR'd so we need the
2814 non-negated bits, not the negated ones. */
2815 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2816
2817 if ((((~high_bits) & 0xffffffff) == 0
2818 && ((~low_bits) & 0x80000000) == 0)
2819 || (((~high_bits) & 0xffffffff) == 0xffffffff
2820 && ((~low_bits) & 0x80000000) != 0))
2821 {
2822 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2823
2824 if ((SPARC_SETHI_P (fast_int)
2825 && (~high_bits & 0xffffffff) == 0)
2826 || SPARC_SIMM13_P (fast_int))
2827 emit_insn (gen_safe_SET64 (temp, fast_int));
2828 else
2829 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2830 }
2831 else
2832 {
2833 rtx negated_const;
2834 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2835 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2836 sparc_emit_set_const64 (temp, negated_const);
2837 }
2838
2839 /* If we are XOR'ing with -1, then we should emit a one's complement
2840 instead. This way the combiner will notice logical operations
2841 such as ANDN later on and substitute. */
2842 if (trailing_bits == 0x3ff)
2843 {
2844 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2845 }
2846 else
2847 {
2848 emit_insn (gen_rtx_SET (op0,
2849 gen_safe_XOR64 (temp,
2850 (-0x400 | trailing_bits))));
2851 }
2852 return;
2853 }
2854
2855 /* 1) sethi %hi(xxx), %reg
2856 * or %reg, %lo(xxx), %reg
2857 * sllx %reg, yyy, %reg
2858 *
2859 * ??? This is just a generalized version of the low_bits==0
2860 * thing above, FIXME...
2861 */
2862 if ((highest_bit_set - lowest_bit_set) < 32)
2863 {
2864 unsigned HOST_WIDE_INT focus_bits =
2865 create_simple_focus_bits (high_bits, low_bits,
2866 lowest_bit_set, 0);
2867
2868 /* We can't get here in this state. */
2869 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2870
2871 /* So what we know is that the set bits straddle the
2872 middle of the 64-bit word. */
2873 sparc_emit_set_const64_quick2 (op0, temp,
2874 focus_bits, 0,
2875 lowest_bit_set);
2876 return;
2877 }
2878
2879 /* 1) sethi %hi(high_bits), %reg
2880 * or %reg, %lo(high_bits), %reg
2881 * sllx %reg, 32, %reg
2882 * or %reg, low_bits, %reg
2883 */
2884 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2885 {
2886 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2887 return;
2888 }
2889
2890 /* The easiest way when all else fails, is full decomposition. */
2891 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2892 }
2893
2894 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2895
2896 static bool
2897 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2898 {
2899 *p1 = SPARC_ICC_REG;
2900 *p2 = SPARC_FCC_REG;
2901 return true;
2902 }
2903
2904 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2905
2906 static unsigned int
2907 sparc_min_arithmetic_precision (void)
2908 {
2909 return 32;
2910 }
2911
2912 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2913 return the mode to be used for the comparison. For floating-point,
2914 CCFP[E]mode is used. CCNZmode should be used when the first operand
2915 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2916 processing is needed. */
2917
2918 machine_mode
2919 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2920 {
2921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2922 {
2923 switch (op)
2924 {
2925 case EQ:
2926 case NE:
2927 case UNORDERED:
2928 case ORDERED:
2929 case UNLT:
2930 case UNLE:
2931 case UNGT:
2932 case UNGE:
2933 case UNEQ:
2934 case LTGT:
2935 return CCFPmode;
2936
2937 case LT:
2938 case LE:
2939 case GT:
2940 case GE:
2941 return CCFPEmode;
2942
2943 default:
2944 gcc_unreachable ();
2945 }
2946 }
2947 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2948 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2949 && y == const0_rtx)
2950 {
2951 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2952 return CCXNZmode;
2953 else
2954 return CCNZmode;
2955 }
2956 else
2957 {
2958 /* This is for the cmp<mode>_sne pattern. */
2959 if (GET_CODE (x) == NOT && y == constm1_rtx)
2960 {
2961 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2962 return CCXCmode;
2963 else
2964 return CCCmode;
2965 }
2966
2967 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
2968 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
2969 {
2970 if (GET_CODE (y) == UNSPEC
2971 && (XINT (y, 1) == UNSPEC_ADDV
2972 || XINT (y, 1) == UNSPEC_SUBV
2973 || XINT (y, 1) == UNSPEC_NEGV))
2974 return CCVmode;
2975 else
2976 return CCCmode;
2977 }
2978
2979 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2980 return CCXmode;
2981 else
2982 return CCmode;
2983 }
2984 }
2985
2986 /* Emit the compare insn and return the CC reg for a CODE comparison
2987 with operands X and Y. */
2988
2989 static rtx
2990 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2991 {
2992 machine_mode mode;
2993 rtx cc_reg;
2994
2995 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2996 return x;
2997
2998 mode = SELECT_CC_MODE (code, x, y);
2999
3000 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3001 fcc regs (cse can't tell they're really call clobbered regs and will
3002 remove a duplicate comparison even if there is an intervening function
3003 call - it will then try to reload the cc reg via an int reg which is why
3004 we need the movcc patterns). It is possible to provide the movcc
3005 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3006 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3007 to tell cse that CCFPE mode registers (even pseudos) are call
3008 clobbered. */
3009
3010 /* ??? This is an experiment. Rather than making changes to cse which may
3011 or may not be easy/clean, we do our own cse. This is possible because
3012 we will generate hard registers. Cse knows they're call clobbered (it
3013 doesn't know the same thing about pseudos). If we guess wrong, no big
3014 deal, but if we win, great! */
3015
3016 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3017 #if 1 /* experiment */
3018 {
3019 int reg;
3020 /* We cycle through the registers to ensure they're all exercised. */
3021 static int next_fcc_reg = 0;
3022 /* Previous x,y for each fcc reg. */
3023 static rtx prev_args[4][2];
3024
3025 /* Scan prev_args for x,y. */
3026 for (reg = 0; reg < 4; reg++)
3027 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3028 break;
3029 if (reg == 4)
3030 {
3031 reg = next_fcc_reg;
3032 prev_args[reg][0] = x;
3033 prev_args[reg][1] = y;
3034 next_fcc_reg = (next_fcc_reg + 1) & 3;
3035 }
3036 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3037 }
3038 #else
3039 cc_reg = gen_reg_rtx (mode);
3040 #endif /* ! experiment */
3041 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3042 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3043 else
3044 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3045
3046 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3047 will only result in an unrecognizable insn so no point in asserting. */
3048 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3049
3050 return cc_reg;
3051 }
3052
3053
3054 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3055
3056 rtx
3057 gen_compare_reg (rtx cmp)
3058 {
3059 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3060 }
3061
3062 /* This function is used for v9 only.
3063 DEST is the target of the Scc insn.
3064 CODE is the code for an Scc's comparison.
3065 X and Y are the values we compare.
3066
3067 This function is needed to turn
3068
3069 (set (reg:SI 110)
3070 (gt (reg:CCX 100 %icc)
3071 (const_int 0)))
3072 into
3073 (set (reg:SI 110)
3074 (gt:DI (reg:CCX 100 %icc)
3075 (const_int 0)))
3076
3077 IE: The instruction recognizer needs to see the mode of the comparison to
3078 find the right instruction. We could use "gt:DI" right in the
3079 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3080
3081 static int
3082 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3083 {
3084 if (! TARGET_ARCH64
3085 && (GET_MODE (x) == DImode
3086 || GET_MODE (dest) == DImode))
3087 return 0;
3088
3089 /* Try to use the movrCC insns. */
3090 if (TARGET_ARCH64
3091 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3092 && y == const0_rtx
3093 && v9_regcmp_p (compare_code))
3094 {
3095 rtx op0 = x;
3096 rtx temp;
3097
3098 /* Special case for op0 != 0. This can be done with one instruction if
3099 dest == x. */
3100
3101 if (compare_code == NE
3102 && GET_MODE (dest) == DImode
3103 && rtx_equal_p (op0, dest))
3104 {
3105 emit_insn (gen_rtx_SET (dest,
3106 gen_rtx_IF_THEN_ELSE (DImode,
3107 gen_rtx_fmt_ee (compare_code, DImode,
3108 op0, const0_rtx),
3109 const1_rtx,
3110 dest)));
3111 return 1;
3112 }
3113
3114 if (reg_overlap_mentioned_p (dest, op0))
3115 {
3116 /* Handle the case where dest == x.
3117 We "early clobber" the result. */
3118 op0 = gen_reg_rtx (GET_MODE (x));
3119 emit_move_insn (op0, x);
3120 }
3121
3122 emit_insn (gen_rtx_SET (dest, const0_rtx));
3123 if (GET_MODE (op0) != DImode)
3124 {
3125 temp = gen_reg_rtx (DImode);
3126 convert_move (temp, op0, 0);
3127 }
3128 else
3129 temp = op0;
3130 emit_insn (gen_rtx_SET (dest,
3131 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3132 gen_rtx_fmt_ee (compare_code, DImode,
3133 temp, const0_rtx),
3134 const1_rtx,
3135 dest)));
3136 return 1;
3137 }
3138 else
3139 {
3140 x = gen_compare_reg_1 (compare_code, x, y);
3141 y = const0_rtx;
3142
3143 emit_insn (gen_rtx_SET (dest, const0_rtx));
3144 emit_insn (gen_rtx_SET (dest,
3145 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3146 gen_rtx_fmt_ee (compare_code,
3147 GET_MODE (x), x, y),
3148 const1_rtx, dest)));
3149 return 1;
3150 }
3151 }
3152
3153
3154 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3155 without jumps using the addx/subx instructions. */
3156
3157 bool
3158 emit_scc_insn (rtx operands[])
3159 {
3160 rtx tem, x, y;
3161 enum rtx_code code;
3162 machine_mode mode;
3163
3164 /* The quad-word fp compare library routines all return nonzero to indicate
3165 true, which is different from the equivalent libgcc routines, so we must
3166 handle them specially here. */
3167 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3168 {
3169 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3170 GET_CODE (operands[1]));
3171 operands[2] = XEXP (operands[1], 0);
3172 operands[3] = XEXP (operands[1], 1);
3173 }
3174
3175 code = GET_CODE (operands[1]);
3176 x = operands[2];
3177 y = operands[3];
3178 mode = GET_MODE (x);
3179
3180 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3181 more applications). The exception to this is "reg != 0" which can
3182 be done in one instruction on v9 (so we do it). */
3183 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3184 {
3185 if (y != const0_rtx)
3186 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3187
3188 rtx pat = gen_rtx_SET (operands[0],
3189 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3190 x, const0_rtx));
3191
3192 /* If we can use addx/subx or addxc, add a clobber for CC. */
3193 if (mode == SImode || (code == NE && TARGET_VIS3))
3194 {
3195 rtx clobber
3196 = gen_rtx_CLOBBER (VOIDmode,
3197 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3198 SPARC_ICC_REG));
3199 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3200 }
3201
3202 emit_insn (pat);
3203 return true;
3204 }
3205
3206 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3207 if (TARGET_ARCH64
3208 && mode == DImode
3209 && !((code == LTU || code == GTU) && TARGET_VIS3)
3210 && gen_v9_scc (operands[0], code, x, y))
3211 return true;
3212
3213 /* We can do LTU and GEU using the addx/subx instructions too. And
3214 for GTU/LEU, if both operands are registers swap them and fall
3215 back to the easy case. */
3216 if (code == GTU || code == LEU)
3217 {
3218 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3219 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3220 {
3221 tem = x;
3222 x = y;
3223 y = tem;
3224 code = swap_condition (code);
3225 }
3226 }
3227
3228 if (code == LTU || code == GEU)
3229 {
3230 emit_insn (gen_rtx_SET (operands[0],
3231 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3232 gen_compare_reg_1 (code, x, y),
3233 const0_rtx)));
3234 return true;
3235 }
3236
3237 /* All the posibilities to use addx/subx based sequences has been
3238 exhausted, try for a 3 instruction sequence using v9 conditional
3239 moves. */
3240 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3241 return true;
3242
3243 /* Nope, do branches. */
3244 return false;
3245 }
3246
3247 /* Emit a conditional jump insn for the v9 architecture using comparison code
3248 CODE and jump target LABEL.
3249 This function exists to take advantage of the v9 brxx insns. */
3250
3251 static void
3252 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3253 {
3254 emit_jump_insn (gen_rtx_SET (pc_rtx,
3255 gen_rtx_IF_THEN_ELSE (VOIDmode,
3256 gen_rtx_fmt_ee (code, GET_MODE (op0),
3257 op0, const0_rtx),
3258 gen_rtx_LABEL_REF (VOIDmode, label),
3259 pc_rtx)));
3260 }
3261
3262 /* Emit a conditional jump insn for the UA2011 architecture using
3263 comparison code CODE and jump target LABEL. This function exists
3264 to take advantage of the UA2011 Compare and Branch insns. */
3265
3266 static void
3267 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3268 {
3269 rtx if_then_else;
3270
3271 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3272 gen_rtx_fmt_ee(code, GET_MODE(op0),
3273 op0, op1),
3274 gen_rtx_LABEL_REF (VOIDmode, label),
3275 pc_rtx);
3276
3277 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3278 }
3279
3280 void
3281 emit_conditional_branch_insn (rtx operands[])
3282 {
3283 /* The quad-word fp compare library routines all return nonzero to indicate
3284 true, which is different from the equivalent libgcc routines, so we must
3285 handle them specially here. */
3286 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3287 {
3288 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3289 GET_CODE (operands[0]));
3290 operands[1] = XEXP (operands[0], 0);
3291 operands[2] = XEXP (operands[0], 1);
3292 }
3293
3294 /* If we can tell early on that the comparison is against a constant
3295 that won't fit in the 5-bit signed immediate field of a cbcond,
3296 use one of the other v9 conditional branch sequences. */
3297 if (TARGET_CBCOND
3298 && GET_CODE (operands[1]) == REG
3299 && (GET_MODE (operands[1]) == SImode
3300 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3301 && (GET_CODE (operands[2]) != CONST_INT
3302 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3303 {
3304 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3305 return;
3306 }
3307
3308 if (TARGET_ARCH64 && operands[2] == const0_rtx
3309 && GET_CODE (operands[1]) == REG
3310 && GET_MODE (operands[1]) == DImode)
3311 {
3312 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3313 return;
3314 }
3315
3316 operands[1] = gen_compare_reg (operands[0]);
3317 operands[2] = const0_rtx;
3318 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3319 operands[1], operands[2]);
3320 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3321 operands[3]));
3322 }
3323
3324
3325 /* Generate a DFmode part of a hard TFmode register.
3326 REG is the TFmode hard register, LOW is 1 for the
3327 low 64bit of the register and 0 otherwise.
3328 */
3329 rtx
3330 gen_df_reg (rtx reg, int low)
3331 {
3332 int regno = REGNO (reg);
3333
3334 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3335 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3336 return gen_rtx_REG (DFmode, regno);
3337 }
3338 \f
3339 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3340 Unlike normal calls, TFmode operands are passed by reference. It is
3341 assumed that no more than 3 operands are required. */
3342
3343 static void
3344 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3345 {
3346 rtx ret_slot = NULL, arg[3], func_sym;
3347 int i;
3348
3349 /* We only expect to be called for conversions, unary, and binary ops. */
3350 gcc_assert (nargs == 2 || nargs == 3);
3351
3352 for (i = 0; i < nargs; ++i)
3353 {
3354 rtx this_arg = operands[i];
3355 rtx this_slot;
3356
3357 /* TFmode arguments and return values are passed by reference. */
3358 if (GET_MODE (this_arg) == TFmode)
3359 {
3360 int force_stack_temp;
3361
3362 force_stack_temp = 0;
3363 if (TARGET_BUGGY_QP_LIB && i == 0)
3364 force_stack_temp = 1;
3365
3366 if (GET_CODE (this_arg) == MEM
3367 && ! force_stack_temp)
3368 {
3369 tree expr = MEM_EXPR (this_arg);
3370 if (expr)
3371 mark_addressable (expr);
3372 this_arg = XEXP (this_arg, 0);
3373 }
3374 else if (CONSTANT_P (this_arg)
3375 && ! force_stack_temp)
3376 {
3377 this_slot = force_const_mem (TFmode, this_arg);
3378 this_arg = XEXP (this_slot, 0);
3379 }
3380 else
3381 {
3382 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3383
3384 /* Operand 0 is the return value. We'll copy it out later. */
3385 if (i > 0)
3386 emit_move_insn (this_slot, this_arg);
3387 else
3388 ret_slot = this_slot;
3389
3390 this_arg = XEXP (this_slot, 0);
3391 }
3392 }
3393
3394 arg[i] = this_arg;
3395 }
3396
3397 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3398
3399 if (GET_MODE (operands[0]) == TFmode)
3400 {
3401 if (nargs == 2)
3402 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3403 arg[0], GET_MODE (arg[0]),
3404 arg[1], GET_MODE (arg[1]));
3405 else
3406 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3407 arg[0], GET_MODE (arg[0]),
3408 arg[1], GET_MODE (arg[1]),
3409 arg[2], GET_MODE (arg[2]));
3410
3411 if (ret_slot)
3412 emit_move_insn (operands[0], ret_slot);
3413 }
3414 else
3415 {
3416 rtx ret;
3417
3418 gcc_assert (nargs == 2);
3419
3420 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3421 GET_MODE (operands[0]), 1,
3422 arg[1], GET_MODE (arg[1]));
3423
3424 if (ret != operands[0])
3425 emit_move_insn (operands[0], ret);
3426 }
3427 }
3428
3429 /* Expand soft-float TFmode calls to sparc abi routines. */
3430
3431 static void
3432 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3433 {
3434 const char *func;
3435
3436 switch (code)
3437 {
3438 case PLUS:
3439 func = "_Qp_add";
3440 break;
3441 case MINUS:
3442 func = "_Qp_sub";
3443 break;
3444 case MULT:
3445 func = "_Qp_mul";
3446 break;
3447 case DIV:
3448 func = "_Qp_div";
3449 break;
3450 default:
3451 gcc_unreachable ();
3452 }
3453
3454 emit_soft_tfmode_libcall (func, 3, operands);
3455 }
3456
3457 static void
3458 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3459 {
3460 const char *func;
3461
3462 gcc_assert (code == SQRT);
3463 func = "_Qp_sqrt";
3464
3465 emit_soft_tfmode_libcall (func, 2, operands);
3466 }
3467
3468 static void
3469 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3470 {
3471 const char *func;
3472
3473 switch (code)
3474 {
3475 case FLOAT_EXTEND:
3476 switch (GET_MODE (operands[1]))
3477 {
3478 case SFmode:
3479 func = "_Qp_stoq";
3480 break;
3481 case DFmode:
3482 func = "_Qp_dtoq";
3483 break;
3484 default:
3485 gcc_unreachable ();
3486 }
3487 break;
3488
3489 case FLOAT_TRUNCATE:
3490 switch (GET_MODE (operands[0]))
3491 {
3492 case SFmode:
3493 func = "_Qp_qtos";
3494 break;
3495 case DFmode:
3496 func = "_Qp_qtod";
3497 break;
3498 default:
3499 gcc_unreachable ();
3500 }
3501 break;
3502
3503 case FLOAT:
3504 switch (GET_MODE (operands[1]))
3505 {
3506 case SImode:
3507 func = "_Qp_itoq";
3508 if (TARGET_ARCH64)
3509 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3510 break;
3511 case DImode:
3512 func = "_Qp_xtoq";
3513 break;
3514 default:
3515 gcc_unreachable ();
3516 }
3517 break;
3518
3519 case UNSIGNED_FLOAT:
3520 switch (GET_MODE (operands[1]))
3521 {
3522 case SImode:
3523 func = "_Qp_uitoq";
3524 if (TARGET_ARCH64)
3525 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3526 break;
3527 case DImode:
3528 func = "_Qp_uxtoq";
3529 break;
3530 default:
3531 gcc_unreachable ();
3532 }
3533 break;
3534
3535 case FIX:
3536 switch (GET_MODE (operands[0]))
3537 {
3538 case SImode:
3539 func = "_Qp_qtoi";
3540 break;
3541 case DImode:
3542 func = "_Qp_qtox";
3543 break;
3544 default:
3545 gcc_unreachable ();
3546 }
3547 break;
3548
3549 case UNSIGNED_FIX:
3550 switch (GET_MODE (operands[0]))
3551 {
3552 case SImode:
3553 func = "_Qp_qtoui";
3554 break;
3555 case DImode:
3556 func = "_Qp_qtoux";
3557 break;
3558 default:
3559 gcc_unreachable ();
3560 }
3561 break;
3562
3563 default:
3564 gcc_unreachable ();
3565 }
3566
3567 emit_soft_tfmode_libcall (func, 2, operands);
3568 }
3569
3570 /* Expand a hard-float tfmode operation. All arguments must be in
3571 registers. */
3572
3573 static void
3574 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3575 {
3576 rtx op, dest;
3577
3578 if (GET_RTX_CLASS (code) == RTX_UNARY)
3579 {
3580 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3581 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3582 }
3583 else
3584 {
3585 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3586 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3587 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3588 operands[1], operands[2]);
3589 }
3590
3591 if (register_operand (operands[0], VOIDmode))
3592 dest = operands[0];
3593 else
3594 dest = gen_reg_rtx (GET_MODE (operands[0]));
3595
3596 emit_insn (gen_rtx_SET (dest, op));
3597
3598 if (dest != operands[0])
3599 emit_move_insn (operands[0], dest);
3600 }
3601
3602 void
3603 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3604 {
3605 if (TARGET_HARD_QUAD)
3606 emit_hard_tfmode_operation (code, operands);
3607 else
3608 emit_soft_tfmode_binop (code, operands);
3609 }
3610
3611 void
3612 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3613 {
3614 if (TARGET_HARD_QUAD)
3615 emit_hard_tfmode_operation (code, operands);
3616 else
3617 emit_soft_tfmode_unop (code, operands);
3618 }
3619
3620 void
3621 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3622 {
3623 if (TARGET_HARD_QUAD)
3624 emit_hard_tfmode_operation (code, operands);
3625 else
3626 emit_soft_tfmode_cvt (code, operands);
3627 }
3628 \f
3629 /* Return nonzero if a branch/jump/call instruction will be emitting
3630 nop into its delay slot. */
3631
3632 int
3633 empty_delay_slot (rtx_insn *insn)
3634 {
3635 rtx seq;
3636
3637 /* If no previous instruction (should not happen), return true. */
3638 if (PREV_INSN (insn) == NULL)
3639 return 1;
3640
3641 seq = NEXT_INSN (PREV_INSN (insn));
3642 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3643 return 0;
3644
3645 return 1;
3646 }
3647
3648 /* Return nonzero if we should emit a nop after a cbcond instruction.
3649 The cbcond instruction does not have a delay slot, however there is
3650 a severe performance penalty if a control transfer appears right
3651 after a cbcond. Therefore we emit a nop when we detect this
3652 situation. */
3653
3654 int
3655 emit_cbcond_nop (rtx_insn *insn)
3656 {
3657 rtx next = next_active_insn (insn);
3658
3659 if (!next)
3660 return 1;
3661
3662 if (NONJUMP_INSN_P (next)
3663 && GET_CODE (PATTERN (next)) == SEQUENCE)
3664 next = XVECEXP (PATTERN (next), 0, 0);
3665 else if (CALL_P (next)
3666 && GET_CODE (PATTERN (next)) == PARALLEL)
3667 {
3668 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3669
3670 if (GET_CODE (delay) == RETURN)
3671 {
3672 /* It's a sibling call. Do not emit the nop if we're going
3673 to emit something other than the jump itself as the first
3674 instruction of the sibcall sequence. */
3675 if (sparc_leaf_function_p || TARGET_FLAT)
3676 return 0;
3677 }
3678 }
3679
3680 if (NONJUMP_INSN_P (next))
3681 return 0;
3682
3683 return 1;
3684 }
3685
3686 /* Return nonzero if TRIAL can go into the call delay slot. */
3687
3688 int
3689 eligible_for_call_delay (rtx_insn *trial)
3690 {
3691 rtx pat;
3692
3693 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3694 return 0;
3695
3696 /* Binutils allows
3697 call __tls_get_addr, %tgd_call (foo)
3698 add %l7, %o0, %o0, %tgd_add (foo)
3699 while Sun as/ld does not. */
3700 if (TARGET_GNU_TLS || !TARGET_TLS)
3701 return 1;
3702
3703 pat = PATTERN (trial);
3704
3705 /* We must reject tgd_add{32|64}, i.e.
3706 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3707 and tldm_add{32|64}, i.e.
3708 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3709 for Sun as/ld. */
3710 if (GET_CODE (pat) == SET
3711 && GET_CODE (SET_SRC (pat)) == PLUS)
3712 {
3713 rtx unspec = XEXP (SET_SRC (pat), 1);
3714
3715 if (GET_CODE (unspec) == UNSPEC
3716 && (XINT (unspec, 1) == UNSPEC_TLSGD
3717 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3718 return 0;
3719 }
3720
3721 return 1;
3722 }
3723
3724 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3725 instruction. RETURN_P is true if the v9 variant 'return' is to be
3726 considered in the test too.
3727
3728 TRIAL must be a SET whose destination is a REG appropriate for the
3729 'restore' instruction or, if RETURN_P is true, for the 'return'
3730 instruction. */
3731
3732 static int
3733 eligible_for_restore_insn (rtx trial, bool return_p)
3734 {
3735 rtx pat = PATTERN (trial);
3736 rtx src = SET_SRC (pat);
3737 bool src_is_freg = false;
3738 rtx src_reg;
3739
3740 /* Since we now can do moves between float and integer registers when
3741 VIS3 is enabled, we have to catch this case. We can allow such
3742 moves when doing a 'return' however. */
3743 src_reg = src;
3744 if (GET_CODE (src_reg) == SUBREG)
3745 src_reg = SUBREG_REG (src_reg);
3746 if (GET_CODE (src_reg) == REG
3747 && SPARC_FP_REG_P (REGNO (src_reg)))
3748 src_is_freg = true;
3749
3750 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3751 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3752 && arith_operand (src, GET_MODE (src))
3753 && ! src_is_freg)
3754 {
3755 if (TARGET_ARCH64)
3756 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3757 else
3758 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3759 }
3760
3761 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3762 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3763 && arith_double_operand (src, GET_MODE (src))
3764 && ! src_is_freg)
3765 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3766
3767 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3768 else if (! TARGET_FPU && register_operand (src, SFmode))
3769 return 1;
3770
3771 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3772 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3773 return 1;
3774
3775 /* If we have the 'return' instruction, anything that does not use
3776 local or output registers and can go into a delay slot wins. */
3777 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3778 return 1;
3779
3780 /* The 'restore src1,src2,dest' pattern for SImode. */
3781 else if (GET_CODE (src) == PLUS
3782 && register_operand (XEXP (src, 0), SImode)
3783 && arith_operand (XEXP (src, 1), SImode))
3784 return 1;
3785
3786 /* The 'restore src1,src2,dest' pattern for DImode. */
3787 else if (GET_CODE (src) == PLUS
3788 && register_operand (XEXP (src, 0), DImode)
3789 && arith_double_operand (XEXP (src, 1), DImode))
3790 return 1;
3791
3792 /* The 'restore src1,%lo(src2),dest' pattern. */
3793 else if (GET_CODE (src) == LO_SUM
3794 && ! TARGET_CM_MEDMID
3795 && ((register_operand (XEXP (src, 0), SImode)
3796 && immediate_operand (XEXP (src, 1), SImode))
3797 || (TARGET_ARCH64
3798 && register_operand (XEXP (src, 0), DImode)
3799 && immediate_operand (XEXP (src, 1), DImode))))
3800 return 1;
3801
3802 /* The 'restore src,src,dest' pattern. */
3803 else if (GET_CODE (src) == ASHIFT
3804 && (register_operand (XEXP (src, 0), SImode)
3805 || register_operand (XEXP (src, 0), DImode))
3806 && XEXP (src, 1) == const1_rtx)
3807 return 1;
3808
3809 return 0;
3810 }
3811
3812 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3813
3814 int
3815 eligible_for_return_delay (rtx_insn *trial)
3816 {
3817 int regno;
3818 rtx pat;
3819
3820 /* If the function uses __builtin_eh_return, the eh_return machinery
3821 occupies the delay slot. */
3822 if (crtl->calls_eh_return)
3823 return 0;
3824
3825 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3826 return 0;
3827
3828 /* In the case of a leaf or flat function, anything can go into the slot. */
3829 if (sparc_leaf_function_p || TARGET_FLAT)
3830 return 1;
3831
3832 if (!NONJUMP_INSN_P (trial))
3833 return 0;
3834
3835 pat = PATTERN (trial);
3836 if (GET_CODE (pat) == PARALLEL)
3837 {
3838 int i;
3839
3840 if (! TARGET_V9)
3841 return 0;
3842 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3843 {
3844 rtx expr = XVECEXP (pat, 0, i);
3845 if (GET_CODE (expr) != SET)
3846 return 0;
3847 if (GET_CODE (SET_DEST (expr)) != REG)
3848 return 0;
3849 regno = REGNO (SET_DEST (expr));
3850 if (regno >= 8 && regno < 24)
3851 return 0;
3852 }
3853 return !epilogue_renumber (&pat, 1);
3854 }
3855
3856 if (GET_CODE (pat) != SET)
3857 return 0;
3858
3859 if (GET_CODE (SET_DEST (pat)) != REG)
3860 return 0;
3861
3862 regno = REGNO (SET_DEST (pat));
3863
3864 /* Otherwise, only operations which can be done in tandem with
3865 a `restore' or `return' insn can go into the delay slot. */
3866 if (regno >= 8 && regno < 24)
3867 return 0;
3868
3869 /* If this instruction sets up floating point register and we have a return
3870 instruction, it can probably go in. But restore will not work
3871 with FP_REGS. */
3872 if (! SPARC_INT_REG_P (regno))
3873 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3874
3875 return eligible_for_restore_insn (trial, true);
3876 }
3877
3878 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3879
3880 int
3881 eligible_for_sibcall_delay (rtx_insn *trial)
3882 {
3883 rtx pat;
3884
3885 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3886 return 0;
3887
3888 if (!NONJUMP_INSN_P (trial))
3889 return 0;
3890
3891 pat = PATTERN (trial);
3892
3893 if (sparc_leaf_function_p || TARGET_FLAT)
3894 {
3895 /* If the tail call is done using the call instruction,
3896 we have to restore %o7 in the delay slot. */
3897 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3898 return 0;
3899
3900 /* %g1 is used to build the function address */
3901 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3902 return 0;
3903
3904 return 1;
3905 }
3906
3907 if (GET_CODE (pat) != SET)
3908 return 0;
3909
3910 /* Otherwise, only operations which can be done in tandem with
3911 a `restore' insn can go into the delay slot. */
3912 if (GET_CODE (SET_DEST (pat)) != REG
3913 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3914 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3915 return 0;
3916
3917 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3918 in most cases. */
3919 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3920 return 0;
3921
3922 return eligible_for_restore_insn (trial, false);
3923 }
3924 \f
3925 /* Determine if it's legal to put X into the constant pool. This
3926 is not possible if X contains the address of a symbol that is
3927 not constant (TLS) or not known at final link time (PIC). */
3928
3929 static bool
3930 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3931 {
3932 switch (GET_CODE (x))
3933 {
3934 case CONST_INT:
3935 case CONST_WIDE_INT:
3936 case CONST_DOUBLE:
3937 case CONST_VECTOR:
3938 /* Accept all non-symbolic constants. */
3939 return false;
3940
3941 case LABEL_REF:
3942 /* Labels are OK iff we are non-PIC. */
3943 return flag_pic != 0;
3944
3945 case SYMBOL_REF:
3946 /* 'Naked' TLS symbol references are never OK,
3947 non-TLS symbols are OK iff we are non-PIC. */
3948 if (SYMBOL_REF_TLS_MODEL (x))
3949 return true;
3950 else
3951 return flag_pic != 0;
3952
3953 case CONST:
3954 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3955 case PLUS:
3956 case MINUS:
3957 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3958 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3959 case UNSPEC:
3960 return true;
3961 default:
3962 gcc_unreachable ();
3963 }
3964 }
3965 \f
3966 /* Global Offset Table support. */
3967 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3968 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3969
3970 /* Return the SYMBOL_REF for the Global Offset Table. */
3971
3972 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3973
3974 static rtx
3975 sparc_got (void)
3976 {
3977 if (!sparc_got_symbol)
3978 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3979
3980 return sparc_got_symbol;
3981 }
3982
3983 /* Ensure that we are not using patterns that are not OK with PIC. */
3984
3985 int
3986 check_pic (int i)
3987 {
3988 rtx op;
3989
3990 switch (flag_pic)
3991 {
3992 case 1:
3993 op = recog_data.operand[i];
3994 gcc_assert (GET_CODE (op) != SYMBOL_REF
3995 && (GET_CODE (op) != CONST
3996 || (GET_CODE (XEXP (op, 0)) == MINUS
3997 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3998 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3999 /* fallthrough */
4000 case 2:
4001 default:
4002 return 1;
4003 }
4004 }
4005
4006 /* Return true if X is an address which needs a temporary register when
4007 reloaded while generating PIC code. */
4008
4009 int
4010 pic_address_needs_scratch (rtx x)
4011 {
4012 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4013 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4016 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4017 return 1;
4018
4019 return 0;
4020 }
4021
4022 /* Determine if a given RTX is a valid constant. We already know this
4023 satisfies CONSTANT_P. */
4024
4025 static bool
4026 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4027 {
4028 switch (GET_CODE (x))
4029 {
4030 case CONST:
4031 case SYMBOL_REF:
4032 if (sparc_tls_referenced_p (x))
4033 return false;
4034 break;
4035
4036 case CONST_DOUBLE:
4037 /* Floating point constants are generally not ok.
4038 The only exception is 0.0 and all-ones in VIS. */
4039 if (TARGET_VIS
4040 && SCALAR_FLOAT_MODE_P (mode)
4041 && (const_zero_operand (x, mode)
4042 || const_all_ones_operand (x, mode)))
4043 return true;
4044
4045 return false;
4046
4047 case CONST_VECTOR:
4048 /* Vector constants are generally not ok.
4049 The only exception is 0 or -1 in VIS. */
4050 if (TARGET_VIS
4051 && (const_zero_operand (x, mode)
4052 || const_all_ones_operand (x, mode)))
4053 return true;
4054
4055 return false;
4056
4057 default:
4058 break;
4059 }
4060
4061 return true;
4062 }
4063
4064 /* Determine if a given RTX is a valid constant address. */
4065
4066 bool
4067 constant_address_p (rtx x)
4068 {
4069 switch (GET_CODE (x))
4070 {
4071 case LABEL_REF:
4072 case CONST_INT:
4073 case HIGH:
4074 return true;
4075
4076 case CONST:
4077 if (flag_pic && pic_address_needs_scratch (x))
4078 return false;
4079 return sparc_legitimate_constant_p (Pmode, x);
4080
4081 case SYMBOL_REF:
4082 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4083
4084 default:
4085 return false;
4086 }
4087 }
4088
4089 /* Nonzero if the constant value X is a legitimate general operand
4090 when generating PIC code. It is given that flag_pic is on and
4091 that X satisfies CONSTANT_P. */
4092
4093 bool
4094 legitimate_pic_operand_p (rtx x)
4095 {
4096 if (pic_address_needs_scratch (x))
4097 return false;
4098 if (sparc_tls_referenced_p (x))
4099 return false;
4100 return true;
4101 }
4102
4103 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4104 (CONST_INT_P (X) \
4105 && INTVAL (X) >= -0x1000 \
4106 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4107
4108 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4109 (CONST_INT_P (X) \
4110 && INTVAL (X) >= -0x1000 \
4111 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4112
4113 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4114
4115 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4116 ordinarily. This changes a bit when generating PIC. */
4117
4118 static bool
4119 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4120 {
4121 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4122
4123 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4124 rs1 = addr;
4125 else if (GET_CODE (addr) == PLUS)
4126 {
4127 rs1 = XEXP (addr, 0);
4128 rs2 = XEXP (addr, 1);
4129
4130 /* Canonicalize. REG comes first, if there are no regs,
4131 LO_SUM comes first. */
4132 if (!REG_P (rs1)
4133 && GET_CODE (rs1) != SUBREG
4134 && (REG_P (rs2)
4135 || GET_CODE (rs2) == SUBREG
4136 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4137 {
4138 rs1 = XEXP (addr, 1);
4139 rs2 = XEXP (addr, 0);
4140 }
4141
4142 if ((flag_pic == 1
4143 && rs1 == pic_offset_table_rtx
4144 && !REG_P (rs2)
4145 && GET_CODE (rs2) != SUBREG
4146 && GET_CODE (rs2) != LO_SUM
4147 && GET_CODE (rs2) != MEM
4148 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4149 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4150 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4151 || ((REG_P (rs1)
4152 || GET_CODE (rs1) == SUBREG)
4153 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4154 {
4155 imm1 = rs2;
4156 rs2 = NULL;
4157 }
4158 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4159 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4160 {
4161 /* We prohibit REG + REG for TFmode when there are no quad move insns
4162 and we consequently need to split. We do this because REG+REG
4163 is not an offsettable address. If we get the situation in reload
4164 where source and destination of a movtf pattern are both MEMs with
4165 REG+REG address, then only one of them gets converted to an
4166 offsettable address. */
4167 if (mode == TFmode
4168 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4169 return 0;
4170
4171 /* Likewise for TImode, but in all cases. */
4172 if (mode == TImode)
4173 return 0;
4174
4175 /* We prohibit REG + REG on ARCH32 if not optimizing for
4176 DFmode/DImode because then mem_min_alignment is likely to be zero
4177 after reload and the forced split would lack a matching splitter
4178 pattern. */
4179 if (TARGET_ARCH32 && !optimize
4180 && (mode == DFmode || mode == DImode))
4181 return 0;
4182 }
4183 else if (USE_AS_OFFSETABLE_LO10
4184 && GET_CODE (rs1) == LO_SUM
4185 && TARGET_ARCH64
4186 && ! TARGET_CM_MEDMID
4187 && RTX_OK_FOR_OLO10_P (rs2, mode))
4188 {
4189 rs2 = NULL;
4190 imm1 = XEXP (rs1, 1);
4191 rs1 = XEXP (rs1, 0);
4192 if (!CONSTANT_P (imm1)
4193 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4194 return 0;
4195 }
4196 }
4197 else if (GET_CODE (addr) == LO_SUM)
4198 {
4199 rs1 = XEXP (addr, 0);
4200 imm1 = XEXP (addr, 1);
4201
4202 if (!CONSTANT_P (imm1)
4203 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4204 return 0;
4205
4206 /* We can't allow TFmode in 32-bit mode, because an offset greater
4207 than the alignment (8) may cause the LO_SUM to overflow. */
4208 if (mode == TFmode && TARGET_ARCH32)
4209 return 0;
4210
4211 /* During reload, accept the HIGH+LO_SUM construct generated by
4212 sparc_legitimize_reload_address. */
4213 if (reload_in_progress
4214 && GET_CODE (rs1) == HIGH
4215 && XEXP (rs1, 0) == imm1)
4216 return 1;
4217 }
4218 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4219 return 1;
4220 else
4221 return 0;
4222
4223 if (GET_CODE (rs1) == SUBREG)
4224 rs1 = SUBREG_REG (rs1);
4225 if (!REG_P (rs1))
4226 return 0;
4227
4228 if (rs2)
4229 {
4230 if (GET_CODE (rs2) == SUBREG)
4231 rs2 = SUBREG_REG (rs2);
4232 if (!REG_P (rs2))
4233 return 0;
4234 }
4235
4236 if (strict)
4237 {
4238 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4239 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4240 return 0;
4241 }
4242 else
4243 {
4244 if ((! SPARC_INT_REG_P (REGNO (rs1))
4245 && REGNO (rs1) != FRAME_POINTER_REGNUM
4246 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4247 || (rs2
4248 && (! SPARC_INT_REG_P (REGNO (rs2))
4249 && REGNO (rs2) != FRAME_POINTER_REGNUM
4250 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4251 return 0;
4252 }
4253 return 1;
4254 }
4255
4256 /* Return the SYMBOL_REF for the tls_get_addr function. */
4257
4258 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4259
4260 static rtx
4261 sparc_tls_get_addr (void)
4262 {
4263 if (!sparc_tls_symbol)
4264 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4265
4266 return sparc_tls_symbol;
4267 }
4268
4269 /* Return the Global Offset Table to be used in TLS mode. */
4270
4271 static rtx
4272 sparc_tls_got (void)
4273 {
4274 /* In PIC mode, this is just the PIC offset table. */
4275 if (flag_pic)
4276 {
4277 crtl->uses_pic_offset_table = 1;
4278 return pic_offset_table_rtx;
4279 }
4280
4281 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4282 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4283 if (TARGET_SUN_TLS && TARGET_ARCH32)
4284 {
4285 load_got_register ();
4286 return global_offset_table_rtx;
4287 }
4288
4289 /* In all other cases, we load a new pseudo with the GOT symbol. */
4290 return copy_to_reg (sparc_got ());
4291 }
4292
4293 /* Return true if X contains a thread-local symbol. */
4294
4295 static bool
4296 sparc_tls_referenced_p (rtx x)
4297 {
4298 if (!TARGET_HAVE_TLS)
4299 return false;
4300
4301 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4302 x = XEXP (XEXP (x, 0), 0);
4303
4304 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4305 return true;
4306
4307 /* That's all we handle in sparc_legitimize_tls_address for now. */
4308 return false;
4309 }
4310
4311 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4312 this (thread-local) address. */
4313
4314 static rtx
4315 sparc_legitimize_tls_address (rtx addr)
4316 {
4317 rtx temp1, temp2, temp3, ret, o0, got;
4318 rtx_insn *insn;
4319
4320 gcc_assert (can_create_pseudo_p ());
4321
4322 if (GET_CODE (addr) == SYMBOL_REF)
4323 switch (SYMBOL_REF_TLS_MODEL (addr))
4324 {
4325 case TLS_MODEL_GLOBAL_DYNAMIC:
4326 start_sequence ();
4327 temp1 = gen_reg_rtx (SImode);
4328 temp2 = gen_reg_rtx (SImode);
4329 ret = gen_reg_rtx (Pmode);
4330 o0 = gen_rtx_REG (Pmode, 8);
4331 got = sparc_tls_got ();
4332 emit_insn (gen_tgd_hi22 (temp1, addr));
4333 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4334 if (TARGET_ARCH32)
4335 {
4336 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4337 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4338 addr, const1_rtx));
4339 }
4340 else
4341 {
4342 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4343 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4344 addr, const1_rtx));
4345 }
4346 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4347 insn = get_insns ();
4348 end_sequence ();
4349 emit_libcall_block (insn, ret, o0, addr);
4350 break;
4351
4352 case TLS_MODEL_LOCAL_DYNAMIC:
4353 start_sequence ();
4354 temp1 = gen_reg_rtx (SImode);
4355 temp2 = gen_reg_rtx (SImode);
4356 temp3 = gen_reg_rtx (Pmode);
4357 ret = gen_reg_rtx (Pmode);
4358 o0 = gen_rtx_REG (Pmode, 8);
4359 got = sparc_tls_got ();
4360 emit_insn (gen_tldm_hi22 (temp1));
4361 emit_insn (gen_tldm_lo10 (temp2, temp1));
4362 if (TARGET_ARCH32)
4363 {
4364 emit_insn (gen_tldm_add32 (o0, got, temp2));
4365 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4366 const1_rtx));
4367 }
4368 else
4369 {
4370 emit_insn (gen_tldm_add64 (o0, got, temp2));
4371 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4372 const1_rtx));
4373 }
4374 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4375 insn = get_insns ();
4376 end_sequence ();
4377 emit_libcall_block (insn, temp3, o0,
4378 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4379 UNSPEC_TLSLD_BASE));
4380 temp1 = gen_reg_rtx (SImode);
4381 temp2 = gen_reg_rtx (SImode);
4382 emit_insn (gen_tldo_hix22 (temp1, addr));
4383 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4384 if (TARGET_ARCH32)
4385 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4386 else
4387 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4388 break;
4389
4390 case TLS_MODEL_INITIAL_EXEC:
4391 temp1 = gen_reg_rtx (SImode);
4392 temp2 = gen_reg_rtx (SImode);
4393 temp3 = gen_reg_rtx (Pmode);
4394 got = sparc_tls_got ();
4395 emit_insn (gen_tie_hi22 (temp1, addr));
4396 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4397 if (TARGET_ARCH32)
4398 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4399 else
4400 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4401 if (TARGET_SUN_TLS)
4402 {
4403 ret = gen_reg_rtx (Pmode);
4404 if (TARGET_ARCH32)
4405 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4406 temp3, addr));
4407 else
4408 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4409 temp3, addr));
4410 }
4411 else
4412 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4413 break;
4414
4415 case TLS_MODEL_LOCAL_EXEC:
4416 temp1 = gen_reg_rtx (Pmode);
4417 temp2 = gen_reg_rtx (Pmode);
4418 if (TARGET_ARCH32)
4419 {
4420 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4421 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4422 }
4423 else
4424 {
4425 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4426 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4427 }
4428 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4429 break;
4430
4431 default:
4432 gcc_unreachable ();
4433 }
4434
4435 else if (GET_CODE (addr) == CONST)
4436 {
4437 rtx base, offset;
4438
4439 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4440
4441 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4442 offset = XEXP (XEXP (addr, 0), 1);
4443
4444 base = force_operand (base, NULL_RTX);
4445 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4446 offset = force_reg (Pmode, offset);
4447 ret = gen_rtx_PLUS (Pmode, base, offset);
4448 }
4449
4450 else
4451 gcc_unreachable (); /* for now ... */
4452
4453 return ret;
4454 }
4455
4456 /* Legitimize PIC addresses. If the address is already position-independent,
4457 we return ORIG. Newly generated position-independent addresses go into a
4458 reg. This is REG if nonzero, otherwise we allocate register(s) as
4459 necessary. */
4460
4461 static rtx
4462 sparc_legitimize_pic_address (rtx orig, rtx reg)
4463 {
4464 bool gotdata_op = false;
4465
4466 if (GET_CODE (orig) == SYMBOL_REF
4467 /* See the comment in sparc_expand_move. */
4468 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4469 {
4470 rtx pic_ref, address;
4471 rtx_insn *insn;
4472
4473 if (reg == 0)
4474 {
4475 gcc_assert (can_create_pseudo_p ());
4476 reg = gen_reg_rtx (Pmode);
4477 }
4478
4479 if (flag_pic == 2)
4480 {
4481 /* If not during reload, allocate another temp reg here for loading
4482 in the address, so that these instructions can be optimized
4483 properly. */
4484 rtx temp_reg = (! can_create_pseudo_p ()
4485 ? reg : gen_reg_rtx (Pmode));
4486
4487 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4488 won't get confused into thinking that these two instructions
4489 are loading in the true address of the symbol. If in the
4490 future a PIC rtx exists, that should be used instead. */
4491 if (TARGET_ARCH64)
4492 {
4493 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4494 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4495 }
4496 else
4497 {
4498 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4499 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4500 }
4501 address = temp_reg;
4502 gotdata_op = true;
4503 }
4504 else
4505 address = orig;
4506
4507 crtl->uses_pic_offset_table = 1;
4508 if (gotdata_op)
4509 {
4510 if (TARGET_ARCH64)
4511 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4512 pic_offset_table_rtx,
4513 address, orig));
4514 else
4515 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4516 pic_offset_table_rtx,
4517 address, orig));
4518 }
4519 else
4520 {
4521 pic_ref
4522 = gen_const_mem (Pmode,
4523 gen_rtx_PLUS (Pmode,
4524 pic_offset_table_rtx, address));
4525 insn = emit_move_insn (reg, pic_ref);
4526 }
4527
4528 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4529 by loop. */
4530 set_unique_reg_note (insn, REG_EQUAL, orig);
4531 return reg;
4532 }
4533 else if (GET_CODE (orig) == CONST)
4534 {
4535 rtx base, offset;
4536
4537 if (GET_CODE (XEXP (orig, 0)) == PLUS
4538 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4539 return orig;
4540
4541 if (reg == 0)
4542 {
4543 gcc_assert (can_create_pseudo_p ());
4544 reg = gen_reg_rtx (Pmode);
4545 }
4546
4547 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4548 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4549 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4550 base == reg ? NULL_RTX : reg);
4551
4552 if (GET_CODE (offset) == CONST_INT)
4553 {
4554 if (SMALL_INT (offset))
4555 return plus_constant (Pmode, base, INTVAL (offset));
4556 else if (can_create_pseudo_p ())
4557 offset = force_reg (Pmode, offset);
4558 else
4559 /* If we reach here, then something is seriously wrong. */
4560 gcc_unreachable ();
4561 }
4562 return gen_rtx_PLUS (Pmode, base, offset);
4563 }
4564 else if (GET_CODE (orig) == LABEL_REF)
4565 /* ??? We ought to be checking that the register is live instead, in case
4566 it is eliminated. */
4567 crtl->uses_pic_offset_table = 1;
4568
4569 return orig;
4570 }
4571
4572 /* Try machine-dependent ways of modifying an illegitimate address X
4573 to be legitimate. If we find one, return the new, valid address.
4574
4575 OLDX is the address as it was before break_out_memory_refs was called.
4576 In some cases it is useful to look at this to decide what needs to be done.
4577
4578 MODE is the mode of the operand pointed to by X.
4579
4580 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4581
4582 static rtx
4583 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4584 machine_mode mode)
4585 {
4586 rtx orig_x = x;
4587
4588 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4589 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4590 force_operand (XEXP (x, 0), NULL_RTX));
4591 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4592 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4593 force_operand (XEXP (x, 1), NULL_RTX));
4594 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4595 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4596 XEXP (x, 1));
4597 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4598 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4599 force_operand (XEXP (x, 1), NULL_RTX));
4600
4601 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4602 return x;
4603
4604 if (sparc_tls_referenced_p (x))
4605 x = sparc_legitimize_tls_address (x);
4606 else if (flag_pic)
4607 x = sparc_legitimize_pic_address (x, NULL_RTX);
4608 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4609 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4610 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4611 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4612 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4613 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4614 else if (GET_CODE (x) == SYMBOL_REF
4615 || GET_CODE (x) == CONST
4616 || GET_CODE (x) == LABEL_REF)
4617 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4618
4619 return x;
4620 }
4621
4622 /* Delegitimize an address that was legitimized by the above function. */
4623
4624 static rtx
4625 sparc_delegitimize_address (rtx x)
4626 {
4627 x = delegitimize_mem_from_attrs (x);
4628
4629 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4630 switch (XINT (XEXP (x, 1), 1))
4631 {
4632 case UNSPEC_MOVE_PIC:
4633 case UNSPEC_TLSLE:
4634 x = XVECEXP (XEXP (x, 1), 0, 0);
4635 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4636 break;
4637 default:
4638 break;
4639 }
4640
4641 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4642 if (GET_CODE (x) == MINUS
4643 && REG_P (XEXP (x, 0))
4644 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4645 && GET_CODE (XEXP (x, 1)) == LO_SUM
4646 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4647 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4648 {
4649 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4650 gcc_assert (GET_CODE (x) == LABEL_REF);
4651 }
4652
4653 return x;
4654 }
4655
4656 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4657 replace the input X, or the original X if no replacement is called for.
4658 The output parameter *WIN is 1 if the calling macro should goto WIN,
4659 0 if it should not.
4660
4661 For SPARC, we wish to handle addresses by splitting them into
4662 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4663 This cuts the number of extra insns by one.
4664
4665 Do nothing when generating PIC code and the address is a symbolic
4666 operand or requires a scratch register. */
4667
4668 rtx
4669 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4670 int opnum, int type,
4671 int ind_levels ATTRIBUTE_UNUSED, int *win)
4672 {
4673 /* Decompose SImode constants into HIGH+LO_SUM. */
4674 if (CONSTANT_P (x)
4675 && (mode != TFmode || TARGET_ARCH64)
4676 && GET_MODE (x) == SImode
4677 && GET_CODE (x) != LO_SUM
4678 && GET_CODE (x) != HIGH
4679 && sparc_cmodel <= CM_MEDLOW
4680 && !(flag_pic
4681 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4682 {
4683 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4684 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4685 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4686 opnum, (enum reload_type)type);
4687 *win = 1;
4688 return x;
4689 }
4690
4691 /* We have to recognize what we have already generated above. */
4692 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4693 {
4694 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4695 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4696 opnum, (enum reload_type)type);
4697 *win = 1;
4698 return x;
4699 }
4700
4701 *win = 0;
4702 return x;
4703 }
4704
4705 /* Return true if ADDR (a legitimate address expression)
4706 has an effect that depends on the machine mode it is used for.
4707
4708 In PIC mode,
4709
4710 (mem:HI [%l7+a])
4711
4712 is not equivalent to
4713
4714 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4715
4716 because [%l7+a+1] is interpreted as the address of (a+1). */
4717
4718
4719 static bool
4720 sparc_mode_dependent_address_p (const_rtx addr,
4721 addr_space_t as ATTRIBUTE_UNUSED)
4722 {
4723 if (flag_pic && GET_CODE (addr) == PLUS)
4724 {
4725 rtx op0 = XEXP (addr, 0);
4726 rtx op1 = XEXP (addr, 1);
4727 if (op0 == pic_offset_table_rtx
4728 && symbolic_operand (op1, VOIDmode))
4729 return true;
4730 }
4731
4732 return false;
4733 }
4734
4735 #ifdef HAVE_GAS_HIDDEN
4736 # define USE_HIDDEN_LINKONCE 1
4737 #else
4738 # define USE_HIDDEN_LINKONCE 0
4739 #endif
4740
4741 static void
4742 get_pc_thunk_name (char name[32], unsigned int regno)
4743 {
4744 const char *reg_name = reg_names[regno];
4745
4746 /* Skip the leading '%' as that cannot be used in a
4747 symbol name. */
4748 reg_name += 1;
4749
4750 if (USE_HIDDEN_LINKONCE)
4751 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4752 else
4753 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4754 }
4755
4756 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4757
4758 static rtx
4759 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4760 {
4761 int orig_flag_pic = flag_pic;
4762 rtx insn;
4763
4764 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4765 flag_pic = 0;
4766 if (TARGET_ARCH64)
4767 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4768 else
4769 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4770 flag_pic = orig_flag_pic;
4771
4772 return insn;
4773 }
4774
4775 /* Emit code to load the GOT register. */
4776
4777 void
4778 load_got_register (void)
4779 {
4780 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4781 if (!global_offset_table_rtx)
4782 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4783
4784 if (TARGET_VXWORKS_RTP)
4785 emit_insn (gen_vxworks_load_got ());
4786 else
4787 {
4788 /* The GOT symbol is subject to a PC-relative relocation so we need a
4789 helper function to add the PC value and thus get the final value. */
4790 if (!got_helper_rtx)
4791 {
4792 char name[32];
4793 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4794 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4795 }
4796
4797 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4798 got_helper_rtx,
4799 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4800 }
4801
4802 /* Need to emit this whether or not we obey regdecls,
4803 since setjmp/longjmp can cause life info to screw up.
4804 ??? In the case where we don't obey regdecls, this is not sufficient
4805 since we may not fall out the bottom. */
4806 emit_use (global_offset_table_rtx);
4807 }
4808
4809 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4810 address of the call target. */
4811
4812 void
4813 sparc_emit_call_insn (rtx pat, rtx addr)
4814 {
4815 rtx_insn *insn;
4816
4817 insn = emit_call_insn (pat);
4818
4819 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4820 if (TARGET_VXWORKS_RTP
4821 && flag_pic
4822 && GET_CODE (addr) == SYMBOL_REF
4823 && (SYMBOL_REF_DECL (addr)
4824 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4825 : !SYMBOL_REF_LOCAL_P (addr)))
4826 {
4827 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4828 crtl->uses_pic_offset_table = 1;
4829 }
4830 }
4831 \f
4832 /* Return 1 if RTX is a MEM which is known to be aligned to at
4833 least a DESIRED byte boundary. */
4834
4835 int
4836 mem_min_alignment (rtx mem, int desired)
4837 {
4838 rtx addr, base, offset;
4839
4840 /* If it's not a MEM we can't accept it. */
4841 if (GET_CODE (mem) != MEM)
4842 return 0;
4843
4844 /* Obviously... */
4845 if (!TARGET_UNALIGNED_DOUBLES
4846 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4847 return 1;
4848
4849 /* ??? The rest of the function predates MEM_ALIGN so
4850 there is probably a bit of redundancy. */
4851 addr = XEXP (mem, 0);
4852 base = offset = NULL_RTX;
4853 if (GET_CODE (addr) == PLUS)
4854 {
4855 if (GET_CODE (XEXP (addr, 0)) == REG)
4856 {
4857 base = XEXP (addr, 0);
4858
4859 /* What we are saying here is that if the base
4860 REG is aligned properly, the compiler will make
4861 sure any REG based index upon it will be so
4862 as well. */
4863 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4864 offset = XEXP (addr, 1);
4865 else
4866 offset = const0_rtx;
4867 }
4868 }
4869 else if (GET_CODE (addr) == REG)
4870 {
4871 base = addr;
4872 offset = const0_rtx;
4873 }
4874
4875 if (base != NULL_RTX)
4876 {
4877 int regno = REGNO (base);
4878
4879 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4880 {
4881 /* Check if the compiler has recorded some information
4882 about the alignment of the base REG. If reload has
4883 completed, we already matched with proper alignments.
4884 If not running global_alloc, reload might give us
4885 unaligned pointer to local stack though. */
4886 if (((cfun != 0
4887 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4888 || (optimize && reload_completed))
4889 && (INTVAL (offset) & (desired - 1)) == 0)
4890 return 1;
4891 }
4892 else
4893 {
4894 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4895 return 1;
4896 }
4897 }
4898 else if (! TARGET_UNALIGNED_DOUBLES
4899 || CONSTANT_P (addr)
4900 || GET_CODE (addr) == LO_SUM)
4901 {
4902 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4903 is true, in which case we can only assume that an access is aligned if
4904 it is to a constant address, or the address involves a LO_SUM. */
4905 return 1;
4906 }
4907
4908 /* An obviously unaligned address. */
4909 return 0;
4910 }
4911
4912 \f
4913 /* Vectors to keep interesting information about registers where it can easily
4914 be got. We used to use the actual mode value as the bit number, but there
4915 are more than 32 modes now. Instead we use two tables: one indexed by
4916 hard register number, and one indexed by mode. */
4917
4918 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4919 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4920 mapped into one sparc_mode_class mode. */
4921
4922 enum sparc_mode_class {
4923 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4924 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4925 CC_MODE, CCFP_MODE
4926 };
4927
4928 /* Modes for single-word and smaller quantities. */
4929 #define S_MODES \
4930 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4931
4932 /* Modes for double-word and smaller quantities. */
4933 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4934
4935 /* Modes for quad-word and smaller quantities. */
4936 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4937
4938 /* Modes for 8-word and smaller quantities. */
4939 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4940
4941 /* Modes for single-float quantities. */
4942 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4943
4944 /* Modes for double-float and smaller quantities. */
4945 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4946
4947 /* Modes for quad-float and smaller quantities. */
4948 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4949
4950 /* Modes for quad-float pairs and smaller quantities. */
4951 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4952
4953 /* Modes for double-float only quantities. */
4954 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4955
4956 /* Modes for quad-float and double-float only quantities. */
4957 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4958
4959 /* Modes for quad-float pairs and double-float only quantities. */
4960 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4961
4962 /* Modes for condition codes. */
4963 #define CC_MODES (1 << (int) CC_MODE)
4964 #define CCFP_MODES (1 << (int) CCFP_MODE)
4965
4966 /* Value is 1 if register/mode pair is acceptable on sparc.
4967
4968 The funny mixture of D and T modes is because integer operations
4969 do not specially operate on tetra quantities, so non-quad-aligned
4970 registers can hold quadword quantities (except %o4 and %i4 because
4971 they cross fixed registers).
4972
4973 ??? Note that, despite the settings, non-double-aligned parameter
4974 registers can hold double-word quantities in 32-bit mode. */
4975
4976 /* This points to either the 32-bit or the 64-bit version. */
4977 const int *hard_regno_mode_classes;
4978
4979 static const int hard_32bit_mode_classes[] = {
4980 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4981 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4982 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4983 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4984
4985 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4986 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4987 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4988 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4989
4990 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4991 and none can hold SFmode/SImode values. */
4992 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4993 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4994 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4995 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4996
4997 /* %fcc[0123] */
4998 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4999
5000 /* %icc, %sfp, %gsr */
5001 CC_MODES, 0, D_MODES
5002 };
5003
5004 static const int hard_64bit_mode_classes[] = {
5005 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5006 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5007 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5008 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5009
5010 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5011 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5012 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5013 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5014
5015 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5016 and none can hold SFmode/SImode values. */
5017 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5018 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5019 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5020 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5021
5022 /* %fcc[0123] */
5023 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5024
5025 /* %icc, %sfp, %gsr */
5026 CC_MODES, 0, D_MODES
5027 };
5028
5029 int sparc_mode_class [NUM_MACHINE_MODES];
5030
5031 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5032
5033 static void
5034 sparc_init_modes (void)
5035 {
5036 int i;
5037
5038 for (i = 0; i < NUM_MACHINE_MODES; i++)
5039 {
5040 machine_mode m = (machine_mode) i;
5041 unsigned int size = GET_MODE_SIZE (m);
5042
5043 switch (GET_MODE_CLASS (m))
5044 {
5045 case MODE_INT:
5046 case MODE_PARTIAL_INT:
5047 case MODE_COMPLEX_INT:
5048 if (size < 4)
5049 sparc_mode_class[i] = 1 << (int) H_MODE;
5050 else if (size == 4)
5051 sparc_mode_class[i] = 1 << (int) S_MODE;
5052 else if (size == 8)
5053 sparc_mode_class[i] = 1 << (int) D_MODE;
5054 else if (size == 16)
5055 sparc_mode_class[i] = 1 << (int) T_MODE;
5056 else if (size == 32)
5057 sparc_mode_class[i] = 1 << (int) O_MODE;
5058 else
5059 sparc_mode_class[i] = 0;
5060 break;
5061 case MODE_VECTOR_INT:
5062 if (size == 4)
5063 sparc_mode_class[i] = 1 << (int) SF_MODE;
5064 else if (size == 8)
5065 sparc_mode_class[i] = 1 << (int) DF_MODE;
5066 else
5067 sparc_mode_class[i] = 0;
5068 break;
5069 case MODE_FLOAT:
5070 case MODE_COMPLEX_FLOAT:
5071 if (size == 4)
5072 sparc_mode_class[i] = 1 << (int) SF_MODE;
5073 else if (size == 8)
5074 sparc_mode_class[i] = 1 << (int) DF_MODE;
5075 else if (size == 16)
5076 sparc_mode_class[i] = 1 << (int) TF_MODE;
5077 else if (size == 32)
5078 sparc_mode_class[i] = 1 << (int) OF_MODE;
5079 else
5080 sparc_mode_class[i] = 0;
5081 break;
5082 case MODE_CC:
5083 if (m == CCFPmode || m == CCFPEmode)
5084 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5085 else
5086 sparc_mode_class[i] = 1 << (int) CC_MODE;
5087 break;
5088 default:
5089 sparc_mode_class[i] = 0;
5090 break;
5091 }
5092 }
5093
5094 if (TARGET_ARCH64)
5095 hard_regno_mode_classes = hard_64bit_mode_classes;
5096 else
5097 hard_regno_mode_classes = hard_32bit_mode_classes;
5098
5099 /* Initialize the array used by REGNO_REG_CLASS. */
5100 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5101 {
5102 if (i < 16 && TARGET_V8PLUS)
5103 sparc_regno_reg_class[i] = I64_REGS;
5104 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5105 sparc_regno_reg_class[i] = GENERAL_REGS;
5106 else if (i < 64)
5107 sparc_regno_reg_class[i] = FP_REGS;
5108 else if (i < 96)
5109 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5110 else if (i < 100)
5111 sparc_regno_reg_class[i] = FPCC_REGS;
5112 else
5113 sparc_regno_reg_class[i] = NO_REGS;
5114 }
5115 }
5116 \f
5117 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5118
5119 static inline bool
5120 save_global_or_fp_reg_p (unsigned int regno,
5121 int leaf_function ATTRIBUTE_UNUSED)
5122 {
5123 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5124 }
5125
5126 /* Return whether the return address register (%i7) is needed. */
5127
5128 static inline bool
5129 return_addr_reg_needed_p (int leaf_function)
5130 {
5131 /* If it is live, for example because of __builtin_return_address (0). */
5132 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5133 return true;
5134
5135 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5136 if (!leaf_function
5137 /* Loading the GOT register clobbers %o7. */
5138 || crtl->uses_pic_offset_table
5139 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5140 return true;
5141
5142 return false;
5143 }
5144
5145 /* Return whether REGNO, a local or in register, must be saved/restored. */
5146
5147 static bool
5148 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5149 {
5150 /* General case: call-saved registers live at some point. */
5151 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5152 return true;
5153
5154 /* Frame pointer register (%fp) if needed. */
5155 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5156 return true;
5157
5158 /* Return address register (%i7) if needed. */
5159 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5160 return true;
5161
5162 /* GOT register (%l7) if needed. */
5163 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5164 return true;
5165
5166 /* If the function accesses prior frames, the frame pointer and the return
5167 address of the previous frame must be saved on the stack. */
5168 if (crtl->accesses_prior_frames
5169 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5170 return true;
5171
5172 return false;
5173 }
5174
5175 /* Compute the frame size required by the function. This function is called
5176 during the reload pass and also by sparc_expand_prologue. */
5177
5178 HOST_WIDE_INT
5179 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5180 {
5181 HOST_WIDE_INT frame_size, apparent_frame_size;
5182 int args_size, n_global_fp_regs = 0;
5183 bool save_local_in_regs_p = false;
5184 unsigned int i;
5185
5186 /* If the function allocates dynamic stack space, the dynamic offset is
5187 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5188 if (leaf_function && !cfun->calls_alloca)
5189 args_size = 0;
5190 else
5191 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5192
5193 /* Calculate space needed for global registers. */
5194 if (TARGET_ARCH64)
5195 {
5196 for (i = 0; i < 8; i++)
5197 if (save_global_or_fp_reg_p (i, 0))
5198 n_global_fp_regs += 2;
5199 }
5200 else
5201 {
5202 for (i = 0; i < 8; i += 2)
5203 if (save_global_or_fp_reg_p (i, 0)
5204 || save_global_or_fp_reg_p (i + 1, 0))
5205 n_global_fp_regs += 2;
5206 }
5207
5208 /* In the flat window model, find out which local and in registers need to
5209 be saved. We don't reserve space in the current frame for them as they
5210 will be spilled into the register window save area of the caller's frame.
5211 However, as soon as we use this register window save area, we must create
5212 that of the current frame to make it the live one. */
5213 if (TARGET_FLAT)
5214 for (i = 16; i < 32; i++)
5215 if (save_local_or_in_reg_p (i, leaf_function))
5216 {
5217 save_local_in_regs_p = true;
5218 break;
5219 }
5220
5221 /* Calculate space needed for FP registers. */
5222 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5223 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5224 n_global_fp_regs += 2;
5225
5226 if (size == 0
5227 && n_global_fp_regs == 0
5228 && args_size == 0
5229 && !save_local_in_regs_p)
5230 frame_size = apparent_frame_size = 0;
5231 else
5232 {
5233 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5234 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5235 apparent_frame_size += n_global_fp_regs * 4;
5236
5237 /* We need to add the size of the outgoing argument area. */
5238 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5239
5240 /* And that of the register window save area. */
5241 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5242
5243 /* Finally, bump to the appropriate alignment. */
5244 frame_size = SPARC_STACK_ALIGN (frame_size);
5245 }
5246
5247 /* Set up values for use in prologue and epilogue. */
5248 sparc_frame_size = frame_size;
5249 sparc_apparent_frame_size = apparent_frame_size;
5250 sparc_n_global_fp_regs = n_global_fp_regs;
5251 sparc_save_local_in_regs_p = save_local_in_regs_p;
5252
5253 return frame_size;
5254 }
5255
5256 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5257
5258 int
5259 sparc_initial_elimination_offset (int to)
5260 {
5261 int offset;
5262
5263 if (to == STACK_POINTER_REGNUM)
5264 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5265 else
5266 offset = 0;
5267
5268 offset += SPARC_STACK_BIAS;
5269 return offset;
5270 }
5271
5272 /* Output any necessary .register pseudo-ops. */
5273
5274 void
5275 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5276 {
5277 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5278 int i;
5279
5280 if (TARGET_ARCH32)
5281 return;
5282
5283 /* Check if %g[2367] were used without
5284 .register being printed for them already. */
5285 for (i = 2; i < 8; i++)
5286 {
5287 if (df_regs_ever_live_p (i)
5288 && ! sparc_hard_reg_printed [i])
5289 {
5290 sparc_hard_reg_printed [i] = 1;
5291 /* %g7 is used as TLS base register, use #ignore
5292 for it instead of #scratch. */
5293 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5294 i == 7 ? "ignore" : "scratch");
5295 }
5296 if (i == 3) i = 5;
5297 }
5298 #endif
5299 }
5300
5301 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5302
5303 #if PROBE_INTERVAL > 4096
5304 #error Cannot use indexed addressing mode for stack probing
5305 #endif
5306
5307 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5308 inclusive. These are offsets from the current stack pointer.
5309
5310 Note that we don't use the REG+REG addressing mode for the probes because
5311 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5312 so the advantages of having a single code win here. */
5313
5314 static void
5315 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5316 {
5317 rtx g1 = gen_rtx_REG (Pmode, 1);
5318
5319 /* See if we have a constant small number of probes to generate. If so,
5320 that's the easy case. */
5321 if (size <= PROBE_INTERVAL)
5322 {
5323 emit_move_insn (g1, GEN_INT (first));
5324 emit_insn (gen_rtx_SET (g1,
5325 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5326 emit_stack_probe (plus_constant (Pmode, g1, -size));
5327 }
5328
5329 /* The run-time loop is made up of 9 insns in the generic case while the
5330 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5331 else if (size <= 4 * PROBE_INTERVAL)
5332 {
5333 HOST_WIDE_INT i;
5334
5335 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5336 emit_insn (gen_rtx_SET (g1,
5337 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5338 emit_stack_probe (g1);
5339
5340 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5341 it exceeds SIZE. If only two probes are needed, this will not
5342 generate any code. Then probe at FIRST + SIZE. */
5343 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5344 {
5345 emit_insn (gen_rtx_SET (g1,
5346 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5347 emit_stack_probe (g1);
5348 }
5349
5350 emit_stack_probe (plus_constant (Pmode, g1,
5351 (i - PROBE_INTERVAL) - size));
5352 }
5353
5354 /* Otherwise, do the same as above, but in a loop. Note that we must be
5355 extra careful with variables wrapping around because we might be at
5356 the very top (or the very bottom) of the address space and we have
5357 to be able to handle this case properly; in particular, we use an
5358 equality test for the loop condition. */
5359 else
5360 {
5361 HOST_WIDE_INT rounded_size;
5362 rtx g4 = gen_rtx_REG (Pmode, 4);
5363
5364 emit_move_insn (g1, GEN_INT (first));
5365
5366
5367 /* Step 1: round SIZE to the previous multiple of the interval. */
5368
5369 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5370 emit_move_insn (g4, GEN_INT (rounded_size));
5371
5372
5373 /* Step 2: compute initial and final value of the loop counter. */
5374
5375 /* TEST_ADDR = SP + FIRST. */
5376 emit_insn (gen_rtx_SET (g1,
5377 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5378
5379 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5380 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5381
5382
5383 /* Step 3: the loop
5384
5385 while (TEST_ADDR != LAST_ADDR)
5386 {
5387 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5388 probe at TEST_ADDR
5389 }
5390
5391 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5392 until it is equal to ROUNDED_SIZE. */
5393
5394 if (TARGET_ARCH64)
5395 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5396 else
5397 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5398
5399
5400 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5401 that SIZE is equal to ROUNDED_SIZE. */
5402
5403 if (size != rounded_size)
5404 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5405 }
5406
5407 /* Make sure nothing is scheduled before we are done. */
5408 emit_insn (gen_blockage ());
5409 }
5410
5411 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5412 absolute addresses. */
5413
5414 const char *
5415 output_probe_stack_range (rtx reg1, rtx reg2)
5416 {
5417 static int labelno = 0;
5418 char loop_lab[32];
5419 rtx xops[2];
5420
5421 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5422
5423 /* Loop. */
5424 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5425
5426 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5427 xops[0] = reg1;
5428 xops[1] = GEN_INT (-PROBE_INTERVAL);
5429 output_asm_insn ("add\t%0, %1, %0", xops);
5430
5431 /* Test if TEST_ADDR == LAST_ADDR. */
5432 xops[1] = reg2;
5433 output_asm_insn ("cmp\t%0, %1", xops);
5434
5435 /* Probe at TEST_ADDR and branch. */
5436 if (TARGET_ARCH64)
5437 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5438 else
5439 fputs ("\tbne\t", asm_out_file);
5440 assemble_name_raw (asm_out_file, loop_lab);
5441 fputc ('\n', asm_out_file);
5442 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5443 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5444
5445 return "";
5446 }
5447
5448 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5449 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5450 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5451 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5452 the action to be performed if it returns false. Return the new offset. */
5453
5454 typedef bool (*sorr_pred_t) (unsigned int, int);
5455 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5456
5457 static int
5458 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5459 int offset, int leaf_function, sorr_pred_t save_p,
5460 sorr_act_t action_true, sorr_act_t action_false)
5461 {
5462 unsigned int i;
5463 rtx mem;
5464 rtx_insn *insn;
5465
5466 if (TARGET_ARCH64 && high <= 32)
5467 {
5468 int fp_offset = -1;
5469
5470 for (i = low; i < high; i++)
5471 {
5472 if (save_p (i, leaf_function))
5473 {
5474 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5475 base, offset));
5476 if (action_true == SORR_SAVE)
5477 {
5478 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5479 RTX_FRAME_RELATED_P (insn) = 1;
5480 }
5481 else /* action_true == SORR_RESTORE */
5482 {
5483 /* The frame pointer must be restored last since its old
5484 value may be used as base address for the frame. This
5485 is problematic in 64-bit mode only because of the lack
5486 of double-word load instruction. */
5487 if (i == HARD_FRAME_POINTER_REGNUM)
5488 fp_offset = offset;
5489 else
5490 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5491 }
5492 offset += 8;
5493 }
5494 else if (action_false == SORR_ADVANCE)
5495 offset += 8;
5496 }
5497
5498 if (fp_offset >= 0)
5499 {
5500 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5501 emit_move_insn (hard_frame_pointer_rtx, mem);
5502 }
5503 }
5504 else
5505 {
5506 for (i = low; i < high; i += 2)
5507 {
5508 bool reg0 = save_p (i, leaf_function);
5509 bool reg1 = save_p (i + 1, leaf_function);
5510 machine_mode mode;
5511 int regno;
5512
5513 if (reg0 && reg1)
5514 {
5515 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5516 regno = i;
5517 }
5518 else if (reg0)
5519 {
5520 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5521 regno = i;
5522 }
5523 else if (reg1)
5524 {
5525 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5526 regno = i + 1;
5527 offset += 4;
5528 }
5529 else
5530 {
5531 if (action_false == SORR_ADVANCE)
5532 offset += 8;
5533 continue;
5534 }
5535
5536 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5537 if (action_true == SORR_SAVE)
5538 {
5539 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5540 RTX_FRAME_RELATED_P (insn) = 1;
5541 if (mode == DImode)
5542 {
5543 rtx set1, set2;
5544 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5545 offset));
5546 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5547 RTX_FRAME_RELATED_P (set1) = 1;
5548 mem
5549 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5550 offset + 4));
5551 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5552 RTX_FRAME_RELATED_P (set2) = 1;
5553 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5554 gen_rtx_PARALLEL (VOIDmode,
5555 gen_rtvec (2, set1, set2)));
5556 }
5557 }
5558 else /* action_true == SORR_RESTORE */
5559 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5560
5561 /* Bump and round down to double word
5562 in case we already bumped by 4. */
5563 offset = ROUND_DOWN (offset + 8, 8);
5564 }
5565 }
5566
5567 return offset;
5568 }
5569
5570 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5571
5572 static rtx
5573 emit_adjust_base_to_offset (rtx base, int offset)
5574 {
5575 /* ??? This might be optimized a little as %g1 might already have a
5576 value close enough that a single add insn will do. */
5577 /* ??? Although, all of this is probably only a temporary fix because
5578 if %g1 can hold a function result, then sparc_expand_epilogue will
5579 lose (the result will be clobbered). */
5580 rtx new_base = gen_rtx_REG (Pmode, 1);
5581 emit_move_insn (new_base, GEN_INT (offset));
5582 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5583 return new_base;
5584 }
5585
5586 /* Emit code to save/restore call-saved global and FP registers. */
5587
5588 static void
5589 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5590 {
5591 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5592 {
5593 base = emit_adjust_base_to_offset (base, offset);
5594 offset = 0;
5595 }
5596
5597 offset
5598 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5599 save_global_or_fp_reg_p, action, SORR_NONE);
5600 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5601 save_global_or_fp_reg_p, action, SORR_NONE);
5602 }
5603
5604 /* Emit code to save/restore call-saved local and in registers. */
5605
5606 static void
5607 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5608 {
5609 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5610 {
5611 base = emit_adjust_base_to_offset (base, offset);
5612 offset = 0;
5613 }
5614
5615 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5616 save_local_or_in_reg_p, action, SORR_ADVANCE);
5617 }
5618
5619 /* Emit a window_save insn. */
5620
5621 static rtx_insn *
5622 emit_window_save (rtx increment)
5623 {
5624 rtx_insn *insn = emit_insn (gen_window_save (increment));
5625 RTX_FRAME_RELATED_P (insn) = 1;
5626
5627 /* The incoming return address (%o7) is saved in %i7. */
5628 add_reg_note (insn, REG_CFA_REGISTER,
5629 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5630 gen_rtx_REG (Pmode,
5631 INCOMING_RETURN_ADDR_REGNUM)));
5632
5633 /* The window save event. */
5634 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5635
5636 /* The CFA is %fp, the hard frame pointer. */
5637 add_reg_note (insn, REG_CFA_DEF_CFA,
5638 plus_constant (Pmode, hard_frame_pointer_rtx,
5639 INCOMING_FRAME_SP_OFFSET));
5640
5641 return insn;
5642 }
5643
5644 /* Generate an increment for the stack pointer. */
5645
5646 static rtx
5647 gen_stack_pointer_inc (rtx increment)
5648 {
5649 return gen_rtx_SET (stack_pointer_rtx,
5650 gen_rtx_PLUS (Pmode,
5651 stack_pointer_rtx,
5652 increment));
5653 }
5654
5655 /* Expand the function prologue. The prologue is responsible for reserving
5656 storage for the frame, saving the call-saved registers and loading the
5657 GOT register if needed. */
5658
5659 void
5660 sparc_expand_prologue (void)
5661 {
5662 HOST_WIDE_INT size;
5663 rtx_insn *insn;
5664
5665 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5666 on the final value of the flag means deferring the prologue/epilogue
5667 expansion until just before the second scheduling pass, which is too
5668 late to emit multiple epilogues or return insns.
5669
5670 Of course we are making the assumption that the value of the flag
5671 will not change between now and its final value. Of the three parts
5672 of the formula, only the last one can reasonably vary. Let's take a
5673 closer look, after assuming that the first two ones are set to true
5674 (otherwise the last value is effectively silenced).
5675
5676 If only_leaf_regs_used returns false, the global predicate will also
5677 be false so the actual frame size calculated below will be positive.
5678 As a consequence, the save_register_window insn will be emitted in
5679 the instruction stream; now this insn explicitly references %fp
5680 which is not a leaf register so only_leaf_regs_used will always
5681 return false subsequently.
5682
5683 If only_leaf_regs_used returns true, we hope that the subsequent
5684 optimization passes won't cause non-leaf registers to pop up. For
5685 example, the regrename pass has special provisions to not rename to
5686 non-leaf registers in a leaf function. */
5687 sparc_leaf_function_p
5688 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5689
5690 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5691
5692 if (flag_stack_usage_info)
5693 current_function_static_stack_size = size;
5694
5695 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5696 {
5697 if (crtl->is_leaf && !cfun->calls_alloca)
5698 {
5699 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5700 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5701 size - STACK_CHECK_PROTECT);
5702 }
5703 else if (size > 0)
5704 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5705 }
5706
5707 if (size == 0)
5708 ; /* do nothing. */
5709 else if (sparc_leaf_function_p)
5710 {
5711 rtx size_int_rtx = GEN_INT (-size);
5712
5713 if (size <= 4096)
5714 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5715 else if (size <= 8192)
5716 {
5717 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5718 RTX_FRAME_RELATED_P (insn) = 1;
5719
5720 /* %sp is still the CFA register. */
5721 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5722 }
5723 else
5724 {
5725 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5726 emit_move_insn (size_rtx, size_int_rtx);
5727 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5728 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5729 gen_stack_pointer_inc (size_int_rtx));
5730 }
5731
5732 RTX_FRAME_RELATED_P (insn) = 1;
5733 }
5734 else
5735 {
5736 rtx size_int_rtx = GEN_INT (-size);
5737
5738 if (size <= 4096)
5739 emit_window_save (size_int_rtx);
5740 else if (size <= 8192)
5741 {
5742 emit_window_save (GEN_INT (-4096));
5743
5744 /* %sp is not the CFA register anymore. */
5745 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5746
5747 /* Make sure no %fp-based store is issued until after the frame is
5748 established. The offset between the frame pointer and the stack
5749 pointer is calculated relative to the value of the stack pointer
5750 at the end of the function prologue, and moving instructions that
5751 access the stack via the frame pointer between the instructions
5752 that decrement the stack pointer could result in accessing the
5753 register window save area, which is volatile. */
5754 emit_insn (gen_frame_blockage ());
5755 }
5756 else
5757 {
5758 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5759 emit_move_insn (size_rtx, size_int_rtx);
5760 emit_window_save (size_rtx);
5761 }
5762 }
5763
5764 if (sparc_leaf_function_p)
5765 {
5766 sparc_frame_base_reg = stack_pointer_rtx;
5767 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5768 }
5769 else
5770 {
5771 sparc_frame_base_reg = hard_frame_pointer_rtx;
5772 sparc_frame_base_offset = SPARC_STACK_BIAS;
5773 }
5774
5775 if (sparc_n_global_fp_regs > 0)
5776 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5777 sparc_frame_base_offset
5778 - sparc_apparent_frame_size,
5779 SORR_SAVE);
5780
5781 /* Load the GOT register if needed. */
5782 if (crtl->uses_pic_offset_table)
5783 load_got_register ();
5784
5785 /* Advertise that the data calculated just above are now valid. */
5786 sparc_prologue_data_valid_p = true;
5787 }
5788
5789 /* Expand the function prologue. The prologue is responsible for reserving
5790 storage for the frame, saving the call-saved registers and loading the
5791 GOT register if needed. */
5792
5793 void
5794 sparc_flat_expand_prologue (void)
5795 {
5796 HOST_WIDE_INT size;
5797 rtx_insn *insn;
5798
5799 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5800
5801 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5802
5803 if (flag_stack_usage_info)
5804 current_function_static_stack_size = size;
5805
5806 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5807 {
5808 if (crtl->is_leaf && !cfun->calls_alloca)
5809 {
5810 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5811 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5812 size - STACK_CHECK_PROTECT);
5813 }
5814 else if (size > 0)
5815 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5816 }
5817
5818 if (sparc_save_local_in_regs_p)
5819 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5820 SORR_SAVE);
5821
5822 if (size == 0)
5823 ; /* do nothing. */
5824 else
5825 {
5826 rtx size_int_rtx, size_rtx;
5827
5828 size_rtx = size_int_rtx = GEN_INT (-size);
5829
5830 /* We establish the frame (i.e. decrement the stack pointer) first, even
5831 if we use a frame pointer, because we cannot clobber any call-saved
5832 registers, including the frame pointer, if we haven't created a new
5833 register save area, for the sake of compatibility with the ABI. */
5834 if (size <= 4096)
5835 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5836 else if (size <= 8192 && !frame_pointer_needed)
5837 {
5838 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5839 RTX_FRAME_RELATED_P (insn) = 1;
5840 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5841 }
5842 else
5843 {
5844 size_rtx = gen_rtx_REG (Pmode, 1);
5845 emit_move_insn (size_rtx, size_int_rtx);
5846 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5847 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5848 gen_stack_pointer_inc (size_int_rtx));
5849 }
5850 RTX_FRAME_RELATED_P (insn) = 1;
5851
5852 /* Ensure nothing is scheduled until after the frame is established. */
5853 emit_insn (gen_blockage ());
5854
5855 if (frame_pointer_needed)
5856 {
5857 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5858 gen_rtx_MINUS (Pmode,
5859 stack_pointer_rtx,
5860 size_rtx)));
5861 RTX_FRAME_RELATED_P (insn) = 1;
5862
5863 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5864 gen_rtx_SET (hard_frame_pointer_rtx,
5865 plus_constant (Pmode, stack_pointer_rtx,
5866 size)));
5867 }
5868
5869 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5870 {
5871 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5872 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5873
5874 insn = emit_move_insn (i7, o7);
5875 RTX_FRAME_RELATED_P (insn) = 1;
5876
5877 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5878
5879 /* Prevent this instruction from ever being considered dead,
5880 even if this function has no epilogue. */
5881 emit_use (i7);
5882 }
5883 }
5884
5885 if (frame_pointer_needed)
5886 {
5887 sparc_frame_base_reg = hard_frame_pointer_rtx;
5888 sparc_frame_base_offset = SPARC_STACK_BIAS;
5889 }
5890 else
5891 {
5892 sparc_frame_base_reg = stack_pointer_rtx;
5893 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5894 }
5895
5896 if (sparc_n_global_fp_regs > 0)
5897 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5898 sparc_frame_base_offset
5899 - sparc_apparent_frame_size,
5900 SORR_SAVE);
5901
5902 /* Load the GOT register if needed. */
5903 if (crtl->uses_pic_offset_table)
5904 load_got_register ();
5905
5906 /* Advertise that the data calculated just above are now valid. */
5907 sparc_prologue_data_valid_p = true;
5908 }
5909
5910 /* This function generates the assembly code for function entry, which boils
5911 down to emitting the necessary .register directives. */
5912
5913 static void
5914 sparc_asm_function_prologue (FILE *file)
5915 {
5916 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5917 if (!TARGET_FLAT)
5918 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5919
5920 sparc_output_scratch_registers (file);
5921 }
5922
5923 /* Expand the function epilogue, either normal or part of a sibcall.
5924 We emit all the instructions except the return or the call. */
5925
5926 void
5927 sparc_expand_epilogue (bool for_eh)
5928 {
5929 HOST_WIDE_INT size = sparc_frame_size;
5930
5931 if (cfun->calls_alloca)
5932 emit_insn (gen_frame_blockage ());
5933
5934 if (sparc_n_global_fp_regs > 0)
5935 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5936 sparc_frame_base_offset
5937 - sparc_apparent_frame_size,
5938 SORR_RESTORE);
5939
5940 if (size == 0 || for_eh)
5941 ; /* do nothing. */
5942 else if (sparc_leaf_function_p)
5943 {
5944 if (size <= 4096)
5945 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5946 else if (size <= 8192)
5947 {
5948 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5949 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5950 }
5951 else
5952 {
5953 rtx reg = gen_rtx_REG (Pmode, 1);
5954 emit_move_insn (reg, GEN_INT (size));
5955 emit_insn (gen_stack_pointer_inc (reg));
5956 }
5957 }
5958 }
5959
5960 /* Expand the function epilogue, either normal or part of a sibcall.
5961 We emit all the instructions except the return or the call. */
5962
5963 void
5964 sparc_flat_expand_epilogue (bool for_eh)
5965 {
5966 HOST_WIDE_INT size = sparc_frame_size;
5967
5968 if (sparc_n_global_fp_regs > 0)
5969 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5970 sparc_frame_base_offset
5971 - sparc_apparent_frame_size,
5972 SORR_RESTORE);
5973
5974 /* If we have a frame pointer, we'll need both to restore it before the
5975 frame is destroyed and use its current value in destroying the frame.
5976 Since we don't have an atomic way to do that in the flat window model,
5977 we save the current value into a temporary register (%g1). */
5978 if (frame_pointer_needed && !for_eh)
5979 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5980
5981 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5982 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5983 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5984
5985 if (sparc_save_local_in_regs_p)
5986 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5987 sparc_frame_base_offset,
5988 SORR_RESTORE);
5989
5990 if (size == 0 || for_eh)
5991 ; /* do nothing. */
5992 else if (frame_pointer_needed)
5993 {
5994 /* Make sure the frame is destroyed after everything else is done. */
5995 emit_insn (gen_blockage ());
5996
5997 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5998 }
5999 else
6000 {
6001 /* Likewise. */
6002 emit_insn (gen_blockage ());
6003
6004 if (size <= 4096)
6005 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6006 else if (size <= 8192)
6007 {
6008 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6009 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6010 }
6011 else
6012 {
6013 rtx reg = gen_rtx_REG (Pmode, 1);
6014 emit_move_insn (reg, GEN_INT (size));
6015 emit_insn (gen_stack_pointer_inc (reg));
6016 }
6017 }
6018 }
6019
6020 /* Return true if it is appropriate to emit `return' instructions in the
6021 body of a function. */
6022
6023 bool
6024 sparc_can_use_return_insn_p (void)
6025 {
6026 return sparc_prologue_data_valid_p
6027 && sparc_n_global_fp_regs == 0
6028 && TARGET_FLAT
6029 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6030 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6031 }
6032
6033 /* This function generates the assembly code for function exit. */
6034
6035 static void
6036 sparc_asm_function_epilogue (FILE *file)
6037 {
6038 /* If the last two instructions of a function are "call foo; dslot;"
6039 the return address might point to the first instruction in the next
6040 function and we have to output a dummy nop for the sake of sane
6041 backtraces in such cases. This is pointless for sibling calls since
6042 the return address is explicitly adjusted. */
6043
6044 rtx_insn *insn = get_last_insn ();
6045
6046 rtx last_real_insn = prev_real_insn (insn);
6047 if (last_real_insn
6048 && NONJUMP_INSN_P (last_real_insn)
6049 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6050 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6051
6052 if (last_real_insn
6053 && CALL_P (last_real_insn)
6054 && !SIBLING_CALL_P (last_real_insn))
6055 fputs("\tnop\n", file);
6056
6057 sparc_output_deferred_case_vectors ();
6058 }
6059
6060 /* Output a 'restore' instruction. */
6061
6062 static void
6063 output_restore (rtx pat)
6064 {
6065 rtx operands[3];
6066
6067 if (! pat)
6068 {
6069 fputs ("\t restore\n", asm_out_file);
6070 return;
6071 }
6072
6073 gcc_assert (GET_CODE (pat) == SET);
6074
6075 operands[0] = SET_DEST (pat);
6076 pat = SET_SRC (pat);
6077
6078 switch (GET_CODE (pat))
6079 {
6080 case PLUS:
6081 operands[1] = XEXP (pat, 0);
6082 operands[2] = XEXP (pat, 1);
6083 output_asm_insn (" restore %r1, %2, %Y0", operands);
6084 break;
6085 case LO_SUM:
6086 operands[1] = XEXP (pat, 0);
6087 operands[2] = XEXP (pat, 1);
6088 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6089 break;
6090 case ASHIFT:
6091 operands[1] = XEXP (pat, 0);
6092 gcc_assert (XEXP (pat, 1) == const1_rtx);
6093 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6094 break;
6095 default:
6096 operands[1] = pat;
6097 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6098 break;
6099 }
6100 }
6101
6102 /* Output a return. */
6103
6104 const char *
6105 output_return (rtx_insn *insn)
6106 {
6107 if (crtl->calls_eh_return)
6108 {
6109 /* If the function uses __builtin_eh_return, the eh_return
6110 machinery occupies the delay slot. */
6111 gcc_assert (!final_sequence);
6112
6113 if (flag_delayed_branch)
6114 {
6115 if (!TARGET_FLAT && TARGET_V9)
6116 fputs ("\treturn\t%i7+8\n", asm_out_file);
6117 else
6118 {
6119 if (!TARGET_FLAT)
6120 fputs ("\trestore\n", asm_out_file);
6121
6122 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6123 }
6124
6125 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6126 }
6127 else
6128 {
6129 if (!TARGET_FLAT)
6130 fputs ("\trestore\n", asm_out_file);
6131
6132 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6133 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6134 }
6135 }
6136 else if (sparc_leaf_function_p || TARGET_FLAT)
6137 {
6138 /* This is a leaf or flat function so we don't have to bother restoring
6139 the register window, which frees us from dealing with the convoluted
6140 semantics of restore/return. We simply output the jump to the
6141 return address and the insn in the delay slot (if any). */
6142
6143 return "jmp\t%%o7+%)%#";
6144 }
6145 else
6146 {
6147 /* This is a regular function so we have to restore the register window.
6148 We may have a pending insn for the delay slot, which will be either
6149 combined with the 'restore' instruction or put in the delay slot of
6150 the 'return' instruction. */
6151
6152 if (final_sequence)
6153 {
6154 rtx delay, pat;
6155
6156 delay = NEXT_INSN (insn);
6157 gcc_assert (delay);
6158
6159 pat = PATTERN (delay);
6160
6161 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6162 {
6163 epilogue_renumber (&pat, 0);
6164 return "return\t%%i7+%)%#";
6165 }
6166 else
6167 {
6168 output_asm_insn ("jmp\t%%i7+%)", NULL);
6169 output_restore (pat);
6170 PATTERN (delay) = gen_blockage ();
6171 INSN_CODE (delay) = -1;
6172 }
6173 }
6174 else
6175 {
6176 /* The delay slot is empty. */
6177 if (TARGET_V9)
6178 return "return\t%%i7+%)\n\t nop";
6179 else if (flag_delayed_branch)
6180 return "jmp\t%%i7+%)\n\t restore";
6181 else
6182 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6183 }
6184 }
6185
6186 return "";
6187 }
6188
6189 /* Output a sibling call. */
6190
6191 const char *
6192 output_sibcall (rtx_insn *insn, rtx call_operand)
6193 {
6194 rtx operands[1];
6195
6196 gcc_assert (flag_delayed_branch);
6197
6198 operands[0] = call_operand;
6199
6200 if (sparc_leaf_function_p || TARGET_FLAT)
6201 {
6202 /* This is a leaf or flat function so we don't have to bother restoring
6203 the register window. We simply output the jump to the function and
6204 the insn in the delay slot (if any). */
6205
6206 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6207
6208 if (final_sequence)
6209 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6210 operands);
6211 else
6212 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6213 it into branch if possible. */
6214 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6215 operands);
6216 }
6217 else
6218 {
6219 /* This is a regular function so we have to restore the register window.
6220 We may have a pending insn for the delay slot, which will be combined
6221 with the 'restore' instruction. */
6222
6223 output_asm_insn ("call\t%a0, 0", operands);
6224
6225 if (final_sequence)
6226 {
6227 rtx_insn *delay = NEXT_INSN (insn);
6228 gcc_assert (delay);
6229
6230 output_restore (PATTERN (delay));
6231
6232 PATTERN (delay) = gen_blockage ();
6233 INSN_CODE (delay) = -1;
6234 }
6235 else
6236 output_restore (NULL_RTX);
6237 }
6238
6239 return "";
6240 }
6241 \f
6242 /* Functions for handling argument passing.
6243
6244 For 32-bit, the first 6 args are normally in registers and the rest are
6245 pushed. Any arg that starts within the first 6 words is at least
6246 partially passed in a register unless its data type forbids.
6247
6248 For 64-bit, the argument registers are laid out as an array of 16 elements
6249 and arguments are added sequentially. The first 6 int args and up to the
6250 first 16 fp args (depending on size) are passed in regs.
6251
6252 Slot Stack Integral Float Float in structure Double Long Double
6253 ---- ----- -------- ----- ------------------ ------ -----------
6254 15 [SP+248] %f31 %f30,%f31 %d30
6255 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6256 13 [SP+232] %f27 %f26,%f27 %d26
6257 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6258 11 [SP+216] %f23 %f22,%f23 %d22
6259 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6260 9 [SP+200] %f19 %f18,%f19 %d18
6261 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6262 7 [SP+184] %f15 %f14,%f15 %d14
6263 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6264 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6265 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6266 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6267 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6268 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6269 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6270
6271 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6272
6273 Integral arguments are always passed as 64-bit quantities appropriately
6274 extended.
6275
6276 Passing of floating point values is handled as follows.
6277 If a prototype is in scope:
6278 If the value is in a named argument (i.e. not a stdarg function or a
6279 value not part of the `...') then the value is passed in the appropriate
6280 fp reg.
6281 If the value is part of the `...' and is passed in one of the first 6
6282 slots then the value is passed in the appropriate int reg.
6283 If the value is part of the `...' and is not passed in one of the first 6
6284 slots then the value is passed in memory.
6285 If a prototype is not in scope:
6286 If the value is one of the first 6 arguments the value is passed in the
6287 appropriate integer reg and the appropriate fp reg.
6288 If the value is not one of the first 6 arguments the value is passed in
6289 the appropriate fp reg and in memory.
6290
6291
6292 Summary of the calling conventions implemented by GCC on the SPARC:
6293
6294 32-bit ABI:
6295 size argument return value
6296
6297 small integer <4 int. reg. int. reg.
6298 word 4 int. reg. int. reg.
6299 double word 8 int. reg. int. reg.
6300
6301 _Complex small integer <8 int. reg. int. reg.
6302 _Complex word 8 int. reg. int. reg.
6303 _Complex double word 16 memory int. reg.
6304
6305 vector integer <=8 int. reg. FP reg.
6306 vector integer >8 memory memory
6307
6308 float 4 int. reg. FP reg.
6309 double 8 int. reg. FP reg.
6310 long double 16 memory memory
6311
6312 _Complex float 8 memory FP reg.
6313 _Complex double 16 memory FP reg.
6314 _Complex long double 32 memory FP reg.
6315
6316 vector float any memory memory
6317
6318 aggregate any memory memory
6319
6320
6321
6322 64-bit ABI:
6323 size argument return value
6324
6325 small integer <8 int. reg. int. reg.
6326 word 8 int. reg. int. reg.
6327 double word 16 int. reg. int. reg.
6328
6329 _Complex small integer <16 int. reg. int. reg.
6330 _Complex word 16 int. reg. int. reg.
6331 _Complex double word 32 memory int. reg.
6332
6333 vector integer <=16 FP reg. FP reg.
6334 vector integer 16<s<=32 memory FP reg.
6335 vector integer >32 memory memory
6336
6337 float 4 FP reg. FP reg.
6338 double 8 FP reg. FP reg.
6339 long double 16 FP reg. FP reg.
6340
6341 _Complex float 8 FP reg. FP reg.
6342 _Complex double 16 FP reg. FP reg.
6343 _Complex long double 32 memory FP reg.
6344
6345 vector float <=16 FP reg. FP reg.
6346 vector float 16<s<=32 memory FP reg.
6347 vector float >32 memory memory
6348
6349 aggregate <=16 reg. reg.
6350 aggregate 16<s<=32 memory reg.
6351 aggregate >32 memory memory
6352
6353
6354
6355 Note #1: complex floating-point types follow the extended SPARC ABIs as
6356 implemented by the Sun compiler.
6357
6358 Note #2: integral vector types follow the scalar floating-point types
6359 conventions to match what is implemented by the Sun VIS SDK.
6360
6361 Note #3: floating-point vector types follow the aggregate types
6362 conventions. */
6363
6364
6365 /* Maximum number of int regs for args. */
6366 #define SPARC_INT_ARG_MAX 6
6367 /* Maximum number of fp regs for args. */
6368 #define SPARC_FP_ARG_MAX 16
6369 /* Number of words (partially) occupied for a given size in units. */
6370 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6371
6372 /* Handle the INIT_CUMULATIVE_ARGS macro.
6373 Initialize a variable CUM of type CUMULATIVE_ARGS
6374 for a call to a function whose data type is FNTYPE.
6375 For a library call, FNTYPE is 0. */
6376
6377 void
6378 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6379 {
6380 cum->words = 0;
6381 cum->prototype_p = fntype && prototype_p (fntype);
6382 cum->libcall_p = !fntype;
6383 }
6384
6385 /* Handle promotion of pointer and integer arguments. */
6386
6387 static machine_mode
6388 sparc_promote_function_mode (const_tree type, machine_mode mode,
6389 int *punsignedp, const_tree, int)
6390 {
6391 if (type && POINTER_TYPE_P (type))
6392 {
6393 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6394 return Pmode;
6395 }
6396
6397 /* Integral arguments are passed as full words, as per the ABI. */
6398 if (GET_MODE_CLASS (mode) == MODE_INT
6399 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6400 return word_mode;
6401
6402 return mode;
6403 }
6404
6405 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6406
6407 static bool
6408 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6409 {
6410 return TARGET_ARCH64 ? true : false;
6411 }
6412
6413 /* Traverse the record TYPE recursively and call FUNC on its fields.
6414 NAMED is true if this is for a named parameter. DATA is passed
6415 to FUNC for each field. OFFSET is the starting position and
6416 PACKED is true if we are inside a packed record. */
6417
6418 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6419 static void
6420 traverse_record_type (const_tree type, bool named, T *data,
6421 HOST_WIDE_INT offset = 0, bool packed = false)
6422 {
6423 /* The ABI obviously doesn't specify how packed structures are passed.
6424 These are passed in integer regs if possible, otherwise memory. */
6425 if (!packed)
6426 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6427 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6428 {
6429 packed = true;
6430 break;
6431 }
6432
6433 /* Walk the real fields, but skip those with no size or a zero size.
6434 ??? Fields with variable offset are handled as having zero offset. */
6435 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6436 if (TREE_CODE (field) == FIELD_DECL)
6437 {
6438 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6439 continue;
6440
6441 HOST_WIDE_INT bitpos = offset;
6442 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6443 bitpos += int_bit_position (field);
6444
6445 tree field_type = TREE_TYPE (field);
6446 if (TREE_CODE (field_type) == RECORD_TYPE)
6447 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6448 packed);
6449 else
6450 {
6451 const bool fp_type
6452 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6453 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6454 data);
6455 }
6456 }
6457 }
6458
6459 /* Handle recursive register classifying for structure layout. */
6460
6461 typedef struct
6462 {
6463 bool fp_regs; /* true if field eligible to FP registers. */
6464 bool fp_regs_in_first_word; /* true if such field in first word. */
6465 } classify_data_t;
6466
6467 /* A subroutine of function_arg_slotno. Classify the field. */
6468
6469 inline void
6470 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6471 classify_data_t *data)
6472 {
6473 if (fp)
6474 {
6475 data->fp_regs = true;
6476 if (bitpos < BITS_PER_WORD)
6477 data->fp_regs_in_first_word = true;
6478 }
6479 }
6480
6481 /* Compute the slot number to pass an argument in.
6482 Return the slot number or -1 if passing on the stack.
6483
6484 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6485 the preceding args and about the function being called.
6486 MODE is the argument's machine mode.
6487 TYPE is the data type of the argument (as a tree).
6488 This is null for libcalls where that information may
6489 not be available.
6490 NAMED is nonzero if this argument is a named parameter
6491 (otherwise it is an extra parameter matching an ellipsis).
6492 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6493 *PREGNO records the register number to use if scalar type.
6494 *PPADDING records the amount of padding needed in words. */
6495
6496 static int
6497 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6498 const_tree type, bool named, bool incoming,
6499 int *pregno, int *ppadding)
6500 {
6501 int regbase = (incoming
6502 ? SPARC_INCOMING_INT_ARG_FIRST
6503 : SPARC_OUTGOING_INT_ARG_FIRST);
6504 int slotno = cum->words;
6505 enum mode_class mclass;
6506 int regno;
6507
6508 *ppadding = 0;
6509
6510 if (type && TREE_ADDRESSABLE (type))
6511 return -1;
6512
6513 if (TARGET_ARCH32
6514 && mode == BLKmode
6515 && type
6516 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6517 return -1;
6518
6519 /* For SPARC64, objects requiring 16-byte alignment get it. */
6520 if (TARGET_ARCH64
6521 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6522 && (slotno & 1) != 0)
6523 slotno++, *ppadding = 1;
6524
6525 mclass = GET_MODE_CLASS (mode);
6526 if (type && TREE_CODE (type) == VECTOR_TYPE)
6527 {
6528 /* Vector types deserve special treatment because they are
6529 polymorphic wrt their mode, depending upon whether VIS
6530 instructions are enabled. */
6531 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6532 {
6533 /* The SPARC port defines no floating-point vector modes. */
6534 gcc_assert (mode == BLKmode);
6535 }
6536 else
6537 {
6538 /* Integral vector types should either have a vector
6539 mode or an integral mode, because we are guaranteed
6540 by pass_by_reference that their size is not greater
6541 than 16 bytes and TImode is 16-byte wide. */
6542 gcc_assert (mode != BLKmode);
6543
6544 /* Vector integers are handled like floats according to
6545 the Sun VIS SDK. */
6546 mclass = MODE_FLOAT;
6547 }
6548 }
6549
6550 switch (mclass)
6551 {
6552 case MODE_FLOAT:
6553 case MODE_COMPLEX_FLOAT:
6554 case MODE_VECTOR_INT:
6555 if (TARGET_ARCH64 && TARGET_FPU && named)
6556 {
6557 /* If all arg slots are filled, then must pass on stack. */
6558 if (slotno >= SPARC_FP_ARG_MAX)
6559 return -1;
6560
6561 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6562 /* Arguments filling only one single FP register are
6563 right-justified in the outer double FP register. */
6564 if (GET_MODE_SIZE (mode) <= 4)
6565 regno++;
6566 break;
6567 }
6568 /* fallthrough */
6569
6570 case MODE_INT:
6571 case MODE_COMPLEX_INT:
6572 /* If all arg slots are filled, then must pass on stack. */
6573 if (slotno >= SPARC_INT_ARG_MAX)
6574 return -1;
6575
6576 regno = regbase + slotno;
6577 break;
6578
6579 case MODE_RANDOM:
6580 if (mode == VOIDmode)
6581 /* MODE is VOIDmode when generating the actual call. */
6582 return -1;
6583
6584 gcc_assert (mode == BLKmode);
6585
6586 if (TARGET_ARCH32
6587 || !type
6588 || (TREE_CODE (type) != RECORD_TYPE
6589 && TREE_CODE (type) != VECTOR_TYPE))
6590 {
6591 /* If all arg slots are filled, then must pass on stack. */
6592 if (slotno >= SPARC_INT_ARG_MAX)
6593 return -1;
6594
6595 regno = regbase + slotno;
6596 }
6597 else /* TARGET_ARCH64 && type */
6598 {
6599 /* If all arg slots are filled, then must pass on stack. */
6600 if (slotno >= SPARC_FP_ARG_MAX)
6601 return -1;
6602
6603 if (TREE_CODE (type) == RECORD_TYPE)
6604 {
6605 classify_data_t data = { false, false };
6606 traverse_record_type<classify_data_t, classify_registers>
6607 (type, named, &data);
6608
6609 if (data.fp_regs)
6610 {
6611 /* If all FP slots are filled except for the last one and
6612 there is no FP field in the first word, then must pass
6613 on stack. */
6614 if (slotno >= SPARC_FP_ARG_MAX - 1
6615 && !data.fp_regs_in_first_word)
6616 return -1;
6617 }
6618 else
6619 {
6620 /* If all int slots are filled, then must pass on stack. */
6621 if (slotno >= SPARC_INT_ARG_MAX)
6622 return -1;
6623 }
6624 }
6625
6626 /* PREGNO isn't set since both int and FP regs can be used. */
6627 return slotno;
6628 }
6629 break;
6630
6631 default :
6632 gcc_unreachable ();
6633 }
6634
6635 *pregno = regno;
6636 return slotno;
6637 }
6638
6639 /* Handle recursive register counting/assigning for structure layout. */
6640
6641 typedef struct
6642 {
6643 int slotno; /* slot number of the argument. */
6644 int regbase; /* regno of the base register. */
6645 int intoffset; /* offset of the first pending integer field. */
6646 int nregs; /* number of words passed in registers. */
6647 bool stack; /* true if part of the argument is on the stack. */
6648 rtx ret; /* return expression being built. */
6649 } assign_data_t;
6650
6651 /* A subroutine of function_arg_record_value. Compute the number of integer
6652 registers to be assigned between PARMS->intoffset and BITPOS. Return
6653 true if at least one integer register is assigned or false otherwise. */
6654
6655 static bool
6656 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6657 {
6658 if (data->intoffset < 0)
6659 return false;
6660
6661 const int intoffset = data->intoffset;
6662 data->intoffset = -1;
6663
6664 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6665 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6666 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6667 int nregs = (endbit - startbit) / BITS_PER_WORD;
6668
6669 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6670 {
6671 nregs = SPARC_INT_ARG_MAX - this_slotno;
6672
6673 /* We need to pass this field (partly) on the stack. */
6674 data->stack = 1;
6675 }
6676
6677 if (nregs <= 0)
6678 return false;
6679
6680 *pnregs = nregs;
6681 return true;
6682 }
6683
6684 /* A subroutine of function_arg_record_value. Compute the number and the mode
6685 of the FP registers to be assigned for FIELD. Return true if at least one
6686 FP register is assigned or false otherwise. */
6687
6688 static bool
6689 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6690 assign_data_t *data,
6691 int *pnregs, machine_mode *pmode)
6692 {
6693 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6694 machine_mode mode = DECL_MODE (field);
6695 int nregs, nslots;
6696
6697 /* Slots are counted as words while regs are counted as having the size of
6698 the (inner) mode. */
6699 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6700 {
6701 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6702 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6703 }
6704 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6705 {
6706 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6707 nregs = 2;
6708 }
6709 else
6710 nregs = 1;
6711
6712 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6713
6714 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6715 {
6716 nslots = SPARC_FP_ARG_MAX - this_slotno;
6717 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6718
6719 /* We need to pass this field (partly) on the stack. */
6720 data->stack = 1;
6721
6722 if (nregs <= 0)
6723 return false;
6724 }
6725
6726 *pnregs = nregs;
6727 *pmode = mode;
6728 return true;
6729 }
6730
6731 /* A subroutine of function_arg_record_value. Count the number of registers
6732 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6733
6734 inline void
6735 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6736 assign_data_t *data)
6737 {
6738 if (fp)
6739 {
6740 int nregs;
6741 machine_mode mode;
6742
6743 if (compute_int_layout (bitpos, data, &nregs))
6744 data->nregs += nregs;
6745
6746 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6747 data->nregs += nregs;
6748 }
6749 else
6750 {
6751 if (data->intoffset < 0)
6752 data->intoffset = bitpos;
6753 }
6754 }
6755
6756 /* A subroutine of function_arg_record_value. Assign the bits of the
6757 structure between PARMS->intoffset and BITPOS to integer registers. */
6758
6759 static void
6760 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6761 {
6762 int intoffset = data->intoffset;
6763 machine_mode mode;
6764 int nregs;
6765
6766 if (!compute_int_layout (bitpos, data, &nregs))
6767 return;
6768
6769 /* If this is the trailing part of a word, only load that much into
6770 the register. Otherwise load the whole register. Note that in
6771 the latter case we may pick up unwanted bits. It's not a problem
6772 at the moment but may wish to revisit. */
6773 if (intoffset % BITS_PER_WORD != 0)
6774 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6775 MODE_INT);
6776 else
6777 mode = word_mode;
6778
6779 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6780 unsigned int regno = data->regbase + this_slotno;
6781 intoffset /= BITS_PER_UNIT;
6782
6783 do
6784 {
6785 rtx reg = gen_rtx_REG (mode, regno);
6786 XVECEXP (data->ret, 0, data->stack + data->nregs)
6787 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6788 data->nregs += 1;
6789 mode = word_mode;
6790 regno += 1;
6791 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6792 }
6793 while (--nregs > 0);
6794 }
6795
6796 /* A subroutine of function_arg_record_value. Assign FIELD at position
6797 BITPOS to FP registers. */
6798
6799 static void
6800 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6801 assign_data_t *data)
6802 {
6803 int nregs;
6804 machine_mode mode;
6805
6806 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6807 return;
6808
6809 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6810 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6811 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6812 regno++;
6813 int pos = bitpos / BITS_PER_UNIT;
6814
6815 do
6816 {
6817 rtx reg = gen_rtx_REG (mode, regno);
6818 XVECEXP (data->ret, 0, data->stack + data->nregs)
6819 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6820 data->nregs += 1;
6821 regno += GET_MODE_SIZE (mode) / 4;
6822 pos += GET_MODE_SIZE (mode);
6823 }
6824 while (--nregs > 0);
6825 }
6826
6827 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6828 the structure between PARMS->intoffset and BITPOS to registers. */
6829
6830 inline void
6831 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6832 assign_data_t *data)
6833 {
6834 if (fp)
6835 {
6836 assign_int_registers (bitpos, data);
6837
6838 assign_fp_registers (field, bitpos, data);
6839 }
6840 else
6841 {
6842 if (data->intoffset < 0)
6843 data->intoffset = bitpos;
6844 }
6845 }
6846
6847 /* Used by function_arg and sparc_function_value_1 to implement the complex
6848 conventions of the 64-bit ABI for passing and returning structures.
6849 Return an expression valid as a return value for the FUNCTION_ARG
6850 and TARGET_FUNCTION_VALUE.
6851
6852 TYPE is the data type of the argument (as a tree).
6853 This is null for libcalls where that information may
6854 not be available.
6855 MODE is the argument's machine mode.
6856 SLOTNO is the index number of the argument's slot in the parameter array.
6857 NAMED is true if this argument is a named parameter
6858 (otherwise it is an extra parameter matching an ellipsis).
6859 REGBASE is the regno of the base register for the parameter array. */
6860
6861 static rtx
6862 function_arg_record_value (const_tree type, machine_mode mode,
6863 int slotno, bool named, int regbase)
6864 {
6865 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6866 assign_data_t data;
6867 int nregs;
6868
6869 data.slotno = slotno;
6870 data.regbase = regbase;
6871
6872 /* Count how many registers we need. */
6873 data.nregs = 0;
6874 data.intoffset = 0;
6875 data.stack = false;
6876 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6877
6878 /* Take into account pending integer fields. */
6879 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6880 data.nregs += nregs;
6881
6882 /* Allocate the vector and handle some annoying special cases. */
6883 nregs = data.nregs;
6884
6885 if (nregs == 0)
6886 {
6887 /* ??? Empty structure has no value? Duh? */
6888 if (typesize <= 0)
6889 {
6890 /* Though there's nothing really to store, return a word register
6891 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6892 leads to breakage due to the fact that there are zero bytes to
6893 load. */
6894 return gen_rtx_REG (mode, regbase);
6895 }
6896
6897 /* ??? C++ has structures with no fields, and yet a size. Give up
6898 for now and pass everything back in integer registers. */
6899 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6900 if (nregs + slotno > SPARC_INT_ARG_MAX)
6901 nregs = SPARC_INT_ARG_MAX - slotno;
6902 }
6903
6904 gcc_assert (nregs > 0);
6905
6906 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6907
6908 /* If at least one field must be passed on the stack, generate
6909 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6910 also be passed on the stack. We can't do much better because the
6911 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6912 of structures for which the fields passed exclusively in registers
6913 are not at the beginning of the structure. */
6914 if (data.stack)
6915 XVECEXP (data.ret, 0, 0)
6916 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6917
6918 /* Assign the registers. */
6919 data.nregs = 0;
6920 data.intoffset = 0;
6921 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6922
6923 /* Assign pending integer fields. */
6924 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6925
6926 gcc_assert (data.nregs == nregs);
6927
6928 return data.ret;
6929 }
6930
6931 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6932 of the 64-bit ABI for passing and returning unions.
6933 Return an expression valid as a return value for the FUNCTION_ARG
6934 and TARGET_FUNCTION_VALUE.
6935
6936 SIZE is the size in bytes of the union.
6937 MODE is the argument's machine mode.
6938 REGNO is the hard register the union will be passed in. */
6939
6940 static rtx
6941 function_arg_union_value (int size, machine_mode mode, int slotno,
6942 int regno)
6943 {
6944 int nwords = CEIL_NWORDS (size), i;
6945 rtx regs;
6946
6947 /* See comment in previous function for empty structures. */
6948 if (nwords == 0)
6949 return gen_rtx_REG (mode, regno);
6950
6951 if (slotno == SPARC_INT_ARG_MAX - 1)
6952 nwords = 1;
6953
6954 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6955
6956 for (i = 0; i < nwords; i++)
6957 {
6958 /* Unions are passed left-justified. */
6959 XVECEXP (regs, 0, i)
6960 = gen_rtx_EXPR_LIST (VOIDmode,
6961 gen_rtx_REG (word_mode, regno),
6962 GEN_INT (UNITS_PER_WORD * i));
6963 regno++;
6964 }
6965
6966 return regs;
6967 }
6968
6969 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6970 for passing and returning BLKmode vectors.
6971 Return an expression valid as a return value for the FUNCTION_ARG
6972 and TARGET_FUNCTION_VALUE.
6973
6974 SIZE is the size in bytes of the vector.
6975 REGNO is the FP hard register the vector will be passed in. */
6976
6977 static rtx
6978 function_arg_vector_value (int size, int regno)
6979 {
6980 const int nregs = MAX (1, size / 8);
6981 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6982
6983 if (size < 8)
6984 XVECEXP (regs, 0, 0)
6985 = gen_rtx_EXPR_LIST (VOIDmode,
6986 gen_rtx_REG (SImode, regno),
6987 const0_rtx);
6988 else
6989 for (int i = 0; i < nregs; i++)
6990 XVECEXP (regs, 0, i)
6991 = gen_rtx_EXPR_LIST (VOIDmode,
6992 gen_rtx_REG (DImode, regno + 2*i),
6993 GEN_INT (i*8));
6994
6995 return regs;
6996 }
6997
6998 /* Determine where to put an argument to a function.
6999 Value is zero to push the argument on the stack,
7000 or a hard register in which to store the argument.
7001
7002 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7003 the preceding args and about the function being called.
7004 MODE is the argument's machine mode.
7005 TYPE is the data type of the argument (as a tree).
7006 This is null for libcalls where that information may
7007 not be available.
7008 NAMED is true if this argument is a named parameter
7009 (otherwise it is an extra parameter matching an ellipsis).
7010 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7011 TARGET_FUNCTION_INCOMING_ARG. */
7012
7013 static rtx
7014 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7015 const_tree type, bool named, bool incoming)
7016 {
7017 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7018
7019 int regbase = (incoming
7020 ? SPARC_INCOMING_INT_ARG_FIRST
7021 : SPARC_OUTGOING_INT_ARG_FIRST);
7022 int slotno, regno, padding;
7023 enum mode_class mclass = GET_MODE_CLASS (mode);
7024
7025 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7026 &regno, &padding);
7027 if (slotno == -1)
7028 return 0;
7029
7030 /* Vector types deserve special treatment because they are polymorphic wrt
7031 their mode, depending upon whether VIS instructions are enabled. */
7032 if (type && TREE_CODE (type) == VECTOR_TYPE)
7033 {
7034 HOST_WIDE_INT size = int_size_in_bytes (type);
7035 gcc_assert ((TARGET_ARCH32 && size <= 8)
7036 || (TARGET_ARCH64 && size <= 16));
7037
7038 if (mode == BLKmode)
7039 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7040
7041 mclass = MODE_FLOAT;
7042 }
7043
7044 if (TARGET_ARCH32)
7045 return gen_rtx_REG (mode, regno);
7046
7047 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7048 and are promoted to registers if possible. */
7049 if (type && TREE_CODE (type) == RECORD_TYPE)
7050 {
7051 HOST_WIDE_INT size = int_size_in_bytes (type);
7052 gcc_assert (size <= 16);
7053
7054 return function_arg_record_value (type, mode, slotno, named, regbase);
7055 }
7056
7057 /* Unions up to 16 bytes in size are passed in integer registers. */
7058 else if (type && TREE_CODE (type) == UNION_TYPE)
7059 {
7060 HOST_WIDE_INT size = int_size_in_bytes (type);
7061 gcc_assert (size <= 16);
7062
7063 return function_arg_union_value (size, mode, slotno, regno);
7064 }
7065
7066 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7067 but also have the slot allocated for them.
7068 If no prototype is in scope fp values in register slots get passed
7069 in two places, either fp regs and int regs or fp regs and memory. */
7070 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7071 && SPARC_FP_REG_P (regno))
7072 {
7073 rtx reg = gen_rtx_REG (mode, regno);
7074 if (cum->prototype_p || cum->libcall_p)
7075 return reg;
7076 else
7077 {
7078 rtx v0, v1;
7079
7080 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7081 {
7082 int intreg;
7083
7084 /* On incoming, we don't need to know that the value
7085 is passed in %f0 and %i0, and it confuses other parts
7086 causing needless spillage even on the simplest cases. */
7087 if (incoming)
7088 return reg;
7089
7090 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7091 + (regno - SPARC_FP_ARG_FIRST) / 2);
7092
7093 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7094 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7095 const0_rtx);
7096 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7097 }
7098 else
7099 {
7100 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7101 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7102 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7103 }
7104 }
7105 }
7106
7107 /* All other aggregate types are passed in an integer register in a mode
7108 corresponding to the size of the type. */
7109 else if (type && AGGREGATE_TYPE_P (type))
7110 {
7111 HOST_WIDE_INT size = int_size_in_bytes (type);
7112 gcc_assert (size <= 16);
7113
7114 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7115 }
7116
7117 return gen_rtx_REG (mode, regno);
7118 }
7119
7120 /* Handle the TARGET_FUNCTION_ARG target hook. */
7121
7122 static rtx
7123 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7124 const_tree type, bool named)
7125 {
7126 return sparc_function_arg_1 (cum, mode, type, named, false);
7127 }
7128
7129 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7130
7131 static rtx
7132 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7133 const_tree type, bool named)
7134 {
7135 return sparc_function_arg_1 (cum, mode, type, named, true);
7136 }
7137
7138 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7139
7140 static unsigned int
7141 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7142 {
7143 return ((TARGET_ARCH64
7144 && (GET_MODE_ALIGNMENT (mode) == 128
7145 || (type && TYPE_ALIGN (type) == 128)))
7146 ? 128
7147 : PARM_BOUNDARY);
7148 }
7149
7150 /* For an arg passed partly in registers and partly in memory,
7151 this is the number of bytes of registers used.
7152 For args passed entirely in registers or entirely in memory, zero.
7153
7154 Any arg that starts in the first 6 regs but won't entirely fit in them
7155 needs partial registers on v8. On v9, structures with integer
7156 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7157 values that begin in the last fp reg [where "last fp reg" varies with the
7158 mode] will be split between that reg and memory. */
7159
7160 static int
7161 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7162 tree type, bool named)
7163 {
7164 int slotno, regno, padding;
7165
7166 /* We pass false for incoming here, it doesn't matter. */
7167 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7168 false, &regno, &padding);
7169
7170 if (slotno == -1)
7171 return 0;
7172
7173 if (TARGET_ARCH32)
7174 {
7175 if ((slotno + (mode == BLKmode
7176 ? CEIL_NWORDS (int_size_in_bytes (type))
7177 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7178 > SPARC_INT_ARG_MAX)
7179 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7180 }
7181 else
7182 {
7183 /* We are guaranteed by pass_by_reference that the size of the
7184 argument is not greater than 16 bytes, so we only need to return
7185 one word if the argument is partially passed in registers. */
7186
7187 if (type && AGGREGATE_TYPE_P (type))
7188 {
7189 int size = int_size_in_bytes (type);
7190
7191 if (size > UNITS_PER_WORD
7192 && (slotno == SPARC_INT_ARG_MAX - 1
7193 || slotno == SPARC_FP_ARG_MAX - 1))
7194 return UNITS_PER_WORD;
7195 }
7196 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7197 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7198 && ! (TARGET_FPU && named)))
7199 {
7200 /* The complex types are passed as packed types. */
7201 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7202 && slotno == SPARC_INT_ARG_MAX - 1)
7203 return UNITS_PER_WORD;
7204 }
7205 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7206 {
7207 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7208 > SPARC_FP_ARG_MAX)
7209 return UNITS_PER_WORD;
7210 }
7211 }
7212
7213 return 0;
7214 }
7215
7216 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7217 Specify whether to pass the argument by reference. */
7218
7219 static bool
7220 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7221 machine_mode mode, const_tree type,
7222 bool named ATTRIBUTE_UNUSED)
7223 {
7224 if (TARGET_ARCH32)
7225 /* Original SPARC 32-bit ABI says that structures and unions,
7226 and quad-precision floats are passed by reference. For Pascal,
7227 also pass arrays by reference. All other base types are passed
7228 in registers.
7229
7230 Extended ABI (as implemented by the Sun compiler) says that all
7231 complex floats are passed by reference. Pass complex integers
7232 in registers up to 8 bytes. More generally, enforce the 2-word
7233 cap for passing arguments in registers.
7234
7235 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7236 integers are passed like floats of the same size, that is in
7237 registers up to 8 bytes. Pass all vector floats by reference
7238 like structure and unions. */
7239 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7240 || mode == SCmode
7241 /* Catch CDImode, TFmode, DCmode and TCmode. */
7242 || GET_MODE_SIZE (mode) > 8
7243 || (type
7244 && TREE_CODE (type) == VECTOR_TYPE
7245 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7246 else
7247 /* Original SPARC 64-bit ABI says that structures and unions
7248 smaller than 16 bytes are passed in registers, as well as
7249 all other base types.
7250
7251 Extended ABI (as implemented by the Sun compiler) says that
7252 complex floats are passed in registers up to 16 bytes. Pass
7253 all complex integers in registers up to 16 bytes. More generally,
7254 enforce the 2-word cap for passing arguments in registers.
7255
7256 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7257 integers are passed like floats of the same size, that is in
7258 registers (up to 16 bytes). Pass all vector floats like structure
7259 and unions. */
7260 return ((type
7261 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7262 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7263 /* Catch CTImode and TCmode. */
7264 || GET_MODE_SIZE (mode) > 16);
7265 }
7266
7267 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7268 Update the data in CUM to advance over an argument
7269 of mode MODE and data type TYPE.
7270 TYPE is null for libcalls where that information may not be available. */
7271
7272 static void
7273 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7274 const_tree type, bool named)
7275 {
7276 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7277 int regno, padding;
7278
7279 /* We pass false for incoming here, it doesn't matter. */
7280 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7281
7282 /* If argument requires leading padding, add it. */
7283 cum->words += padding;
7284
7285 if (TARGET_ARCH32)
7286 cum->words += (mode == BLKmode
7287 ? CEIL_NWORDS (int_size_in_bytes (type))
7288 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7289 else
7290 {
7291 if (type && AGGREGATE_TYPE_P (type))
7292 {
7293 int size = int_size_in_bytes (type);
7294
7295 if (size <= 8)
7296 ++cum->words;
7297 else if (size <= 16)
7298 cum->words += 2;
7299 else /* passed by reference */
7300 ++cum->words;
7301 }
7302 else
7303 cum->words += (mode == BLKmode
7304 ? CEIL_NWORDS (int_size_in_bytes (type))
7305 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7306 }
7307 }
7308
7309 /* Handle the FUNCTION_ARG_PADDING macro.
7310 For the 64-bit ABI structs are always stored left shifted in their
7311 argument slot. */
7312
7313 enum direction
7314 function_arg_padding (machine_mode mode, const_tree type)
7315 {
7316 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7317 return upward;
7318
7319 /* Fall back to the default. */
7320 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7321 }
7322
7323 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7324 Specify whether to return the return value in memory. */
7325
7326 static bool
7327 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7328 {
7329 if (TARGET_ARCH32)
7330 /* Original SPARC 32-bit ABI says that structures and unions,
7331 and quad-precision floats are returned in memory. All other
7332 base types are returned in registers.
7333
7334 Extended ABI (as implemented by the Sun compiler) says that
7335 all complex floats are returned in registers (8 FP registers
7336 at most for '_Complex long double'). Return all complex integers
7337 in registers (4 at most for '_Complex long long').
7338
7339 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7340 integers are returned like floats of the same size, that is in
7341 registers up to 8 bytes and in memory otherwise. Return all
7342 vector floats in memory like structure and unions; note that
7343 they always have BLKmode like the latter. */
7344 return (TYPE_MODE (type) == BLKmode
7345 || TYPE_MODE (type) == TFmode
7346 || (TREE_CODE (type) == VECTOR_TYPE
7347 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7348 else
7349 /* Original SPARC 64-bit ABI says that structures and unions
7350 smaller than 32 bytes are returned in registers, as well as
7351 all other base types.
7352
7353 Extended ABI (as implemented by the Sun compiler) says that all
7354 complex floats are returned in registers (8 FP registers at most
7355 for '_Complex long double'). Return all complex integers in
7356 registers (4 at most for '_Complex TItype').
7357
7358 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7359 integers are returned like floats of the same size, that is in
7360 registers. Return all vector floats like structure and unions;
7361 note that they always have BLKmode like the latter. */
7362 return (TYPE_MODE (type) == BLKmode
7363 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7364 }
7365
7366 /* Handle the TARGET_STRUCT_VALUE target hook.
7367 Return where to find the structure return value address. */
7368
7369 static rtx
7370 sparc_struct_value_rtx (tree fndecl, int incoming)
7371 {
7372 if (TARGET_ARCH64)
7373 return 0;
7374 else
7375 {
7376 rtx mem;
7377
7378 if (incoming)
7379 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7380 STRUCT_VALUE_OFFSET));
7381 else
7382 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7383 STRUCT_VALUE_OFFSET));
7384
7385 /* Only follow the SPARC ABI for fixed-size structure returns.
7386 Variable size structure returns are handled per the normal
7387 procedures in GCC. This is enabled by -mstd-struct-return */
7388 if (incoming == 2
7389 && sparc_std_struct_return
7390 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7391 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7392 {
7393 /* We must check and adjust the return address, as it is optional
7394 as to whether the return object is really provided. */
7395 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7396 rtx scratch = gen_reg_rtx (SImode);
7397 rtx_code_label *endlab = gen_label_rtx ();
7398
7399 /* Calculate the return object size. */
7400 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7401 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7402 /* Construct a temporary return value. */
7403 rtx temp_val
7404 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7405
7406 /* Implement SPARC 32-bit psABI callee return struct checking:
7407
7408 Fetch the instruction where we will return to and see if
7409 it's an unimp instruction (the most significant 10 bits
7410 will be zero). */
7411 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7412 plus_constant (Pmode,
7413 ret_reg, 8)));
7414 /* Assume the size is valid and pre-adjust. */
7415 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7416 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7417 0, endlab);
7418 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7419 /* Write the address of the memory pointed to by temp_val into
7420 the memory pointed to by mem. */
7421 emit_move_insn (mem, XEXP (temp_val, 0));
7422 emit_label (endlab);
7423 }
7424
7425 return mem;
7426 }
7427 }
7428
7429 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7430 For v9, function return values are subject to the same rules as arguments,
7431 except that up to 32 bytes may be returned in registers. */
7432
7433 static rtx
7434 sparc_function_value_1 (const_tree type, machine_mode mode,
7435 bool outgoing)
7436 {
7437 /* Beware that the two values are swapped here wrt function_arg. */
7438 int regbase = (outgoing
7439 ? SPARC_INCOMING_INT_ARG_FIRST
7440 : SPARC_OUTGOING_INT_ARG_FIRST);
7441 enum mode_class mclass = GET_MODE_CLASS (mode);
7442 int regno;
7443
7444 /* Vector types deserve special treatment because they are polymorphic wrt
7445 their mode, depending upon whether VIS instructions are enabled. */
7446 if (type && TREE_CODE (type) == VECTOR_TYPE)
7447 {
7448 HOST_WIDE_INT size = int_size_in_bytes (type);
7449 gcc_assert ((TARGET_ARCH32 && size <= 8)
7450 || (TARGET_ARCH64 && size <= 32));
7451
7452 if (mode == BLKmode)
7453 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7454
7455 mclass = MODE_FLOAT;
7456 }
7457
7458 if (TARGET_ARCH64 && type)
7459 {
7460 /* Structures up to 32 bytes in size are returned in registers. */
7461 if (TREE_CODE (type) == RECORD_TYPE)
7462 {
7463 HOST_WIDE_INT size = int_size_in_bytes (type);
7464 gcc_assert (size <= 32);
7465
7466 return function_arg_record_value (type, mode, 0, 1, regbase);
7467 }
7468
7469 /* Unions up to 32 bytes in size are returned in integer registers. */
7470 else if (TREE_CODE (type) == UNION_TYPE)
7471 {
7472 HOST_WIDE_INT size = int_size_in_bytes (type);
7473 gcc_assert (size <= 32);
7474
7475 return function_arg_union_value (size, mode, 0, regbase);
7476 }
7477
7478 /* Objects that require it are returned in FP registers. */
7479 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7480 ;
7481
7482 /* All other aggregate types are returned in an integer register in a
7483 mode corresponding to the size of the type. */
7484 else if (AGGREGATE_TYPE_P (type))
7485 {
7486 /* All other aggregate types are passed in an integer register
7487 in a mode corresponding to the size of the type. */
7488 HOST_WIDE_INT size = int_size_in_bytes (type);
7489 gcc_assert (size <= 32);
7490
7491 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7492
7493 /* ??? We probably should have made the same ABI change in
7494 3.4.0 as the one we made for unions. The latter was
7495 required by the SCD though, while the former is not
7496 specified, so we favored compatibility and efficiency.
7497
7498 Now we're stuck for aggregates larger than 16 bytes,
7499 because OImode vanished in the meantime. Let's not
7500 try to be unduly clever, and simply follow the ABI
7501 for unions in that case. */
7502 if (mode == BLKmode)
7503 return function_arg_union_value (size, mode, 0, regbase);
7504 else
7505 mclass = MODE_INT;
7506 }
7507
7508 /* We should only have pointer and integer types at this point. This
7509 must match sparc_promote_function_mode. */
7510 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7511 mode = word_mode;
7512 }
7513
7514 /* We should only have pointer and integer types at this point, except with
7515 -freg-struct-return. This must match sparc_promote_function_mode. */
7516 else if (TARGET_ARCH32
7517 && !(type && AGGREGATE_TYPE_P (type))
7518 && mclass == MODE_INT
7519 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7520 mode = word_mode;
7521
7522 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7523 regno = SPARC_FP_ARG_FIRST;
7524 else
7525 regno = regbase;
7526
7527 return gen_rtx_REG (mode, regno);
7528 }
7529
7530 /* Handle TARGET_FUNCTION_VALUE.
7531 On the SPARC, the value is found in the first "output" register, but the
7532 called function leaves it in the first "input" register. */
7533
7534 static rtx
7535 sparc_function_value (const_tree valtype,
7536 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7537 bool outgoing)
7538 {
7539 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7540 }
7541
7542 /* Handle TARGET_LIBCALL_VALUE. */
7543
7544 static rtx
7545 sparc_libcall_value (machine_mode mode,
7546 const_rtx fun ATTRIBUTE_UNUSED)
7547 {
7548 return sparc_function_value_1 (NULL_TREE, mode, false);
7549 }
7550
7551 /* Handle FUNCTION_VALUE_REGNO_P.
7552 On the SPARC, the first "output" reg is used for integer values, and the
7553 first floating point register is used for floating point values. */
7554
7555 static bool
7556 sparc_function_value_regno_p (const unsigned int regno)
7557 {
7558 return (regno == 8 || (TARGET_FPU && regno == 32));
7559 }
7560
7561 /* Do what is necessary for `va_start'. We look at the current function
7562 to determine if stdarg or varargs is used and return the address of
7563 the first unnamed parameter. */
7564
7565 static rtx
7566 sparc_builtin_saveregs (void)
7567 {
7568 int first_reg = crtl->args.info.words;
7569 rtx address;
7570 int regno;
7571
7572 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7573 emit_move_insn (gen_rtx_MEM (word_mode,
7574 gen_rtx_PLUS (Pmode,
7575 frame_pointer_rtx,
7576 GEN_INT (FIRST_PARM_OFFSET (0)
7577 + (UNITS_PER_WORD
7578 * regno)))),
7579 gen_rtx_REG (word_mode,
7580 SPARC_INCOMING_INT_ARG_FIRST + regno));
7581
7582 address = gen_rtx_PLUS (Pmode,
7583 frame_pointer_rtx,
7584 GEN_INT (FIRST_PARM_OFFSET (0)
7585 + UNITS_PER_WORD * first_reg));
7586
7587 return address;
7588 }
7589
7590 /* Implement `va_start' for stdarg. */
7591
7592 static void
7593 sparc_va_start (tree valist, rtx nextarg)
7594 {
7595 nextarg = expand_builtin_saveregs ();
7596 std_expand_builtin_va_start (valist, nextarg);
7597 }
7598
7599 /* Implement `va_arg' for stdarg. */
7600
7601 static tree
7602 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7603 gimple_seq *post_p)
7604 {
7605 HOST_WIDE_INT size, rsize, align;
7606 tree addr, incr;
7607 bool indirect;
7608 tree ptrtype = build_pointer_type (type);
7609
7610 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7611 {
7612 indirect = true;
7613 size = rsize = UNITS_PER_WORD;
7614 align = 0;
7615 }
7616 else
7617 {
7618 indirect = false;
7619 size = int_size_in_bytes (type);
7620 rsize = ROUND_UP (size, UNITS_PER_WORD);
7621 align = 0;
7622
7623 if (TARGET_ARCH64)
7624 {
7625 /* For SPARC64, objects requiring 16-byte alignment get it. */
7626 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7627 align = 2 * UNITS_PER_WORD;
7628
7629 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7630 are left-justified in their slots. */
7631 if (AGGREGATE_TYPE_P (type))
7632 {
7633 if (size == 0)
7634 size = rsize = UNITS_PER_WORD;
7635 else
7636 size = rsize;
7637 }
7638 }
7639 }
7640
7641 incr = valist;
7642 if (align)
7643 {
7644 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7645 incr = fold_convert (sizetype, incr);
7646 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7647 size_int (-align));
7648 incr = fold_convert (ptr_type_node, incr);
7649 }
7650
7651 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7652 addr = incr;
7653
7654 if (BYTES_BIG_ENDIAN && size < rsize)
7655 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7656
7657 if (indirect)
7658 {
7659 addr = fold_convert (build_pointer_type (ptrtype), addr);
7660 addr = build_va_arg_indirect_ref (addr);
7661 }
7662
7663 /* If the address isn't aligned properly for the type, we need a temporary.
7664 FIXME: This is inefficient, usually we can do this in registers. */
7665 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7666 {
7667 tree tmp = create_tmp_var (type, "va_arg_tmp");
7668 tree dest_addr = build_fold_addr_expr (tmp);
7669 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7670 3, dest_addr, addr, size_int (rsize));
7671 TREE_ADDRESSABLE (tmp) = 1;
7672 gimplify_and_add (copy, pre_p);
7673 addr = dest_addr;
7674 }
7675
7676 else
7677 addr = fold_convert (ptrtype, addr);
7678
7679 incr = fold_build_pointer_plus_hwi (incr, rsize);
7680 gimplify_assign (valist, incr, post_p);
7681
7682 return build_va_arg_indirect_ref (addr);
7683 }
7684 \f
7685 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7686 Specify whether the vector mode is supported by the hardware. */
7687
7688 static bool
7689 sparc_vector_mode_supported_p (machine_mode mode)
7690 {
7691 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7692 }
7693 \f
7694 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7695
7696 static machine_mode
7697 sparc_preferred_simd_mode (machine_mode mode)
7698 {
7699 if (TARGET_VIS)
7700 switch (mode)
7701 {
7702 case SImode:
7703 return V2SImode;
7704 case HImode:
7705 return V4HImode;
7706 case QImode:
7707 return V8QImode;
7708
7709 default:;
7710 }
7711
7712 return word_mode;
7713 }
7714 \f
7715 /* Return the string to output an unconditional branch to LABEL, which is
7716 the operand number of the label.
7717
7718 DEST is the destination insn (i.e. the label), INSN is the source. */
7719
7720 const char *
7721 output_ubranch (rtx dest, rtx_insn *insn)
7722 {
7723 static char string[64];
7724 bool v9_form = false;
7725 int delta;
7726 char *p;
7727
7728 /* Even if we are trying to use cbcond for this, evaluate
7729 whether we can use V9 branches as our backup plan. */
7730
7731 delta = 5000000;
7732 if (INSN_ADDRESSES_SET_P ())
7733 delta = (INSN_ADDRESSES (INSN_UID (dest))
7734 - INSN_ADDRESSES (INSN_UID (insn)));
7735
7736 /* Leave some instructions for "slop". */
7737 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7738 v9_form = true;
7739
7740 if (TARGET_CBCOND)
7741 {
7742 bool emit_nop = emit_cbcond_nop (insn);
7743 bool far = false;
7744 const char *rval;
7745
7746 if (delta < -500 || delta > 500)
7747 far = true;
7748
7749 if (far)
7750 {
7751 if (v9_form)
7752 rval = "ba,a,pt\t%%xcc, %l0";
7753 else
7754 rval = "b,a\t%l0";
7755 }
7756 else
7757 {
7758 if (emit_nop)
7759 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7760 else
7761 rval = "cwbe\t%%g0, %%g0, %l0";
7762 }
7763 return rval;
7764 }
7765
7766 if (v9_form)
7767 strcpy (string, "ba%*,pt\t%%xcc, ");
7768 else
7769 strcpy (string, "b%*\t");
7770
7771 p = strchr (string, '\0');
7772 *p++ = '%';
7773 *p++ = 'l';
7774 *p++ = '0';
7775 *p++ = '%';
7776 *p++ = '(';
7777 *p = '\0';
7778
7779 return string;
7780 }
7781
7782 /* Return the string to output a conditional branch to LABEL, which is
7783 the operand number of the label. OP is the conditional expression.
7784 XEXP (OP, 0) is assumed to be a condition code register (integer or
7785 floating point) and its mode specifies what kind of comparison we made.
7786
7787 DEST is the destination insn (i.e. the label), INSN is the source.
7788
7789 REVERSED is nonzero if we should reverse the sense of the comparison.
7790
7791 ANNUL is nonzero if we should generate an annulling branch. */
7792
7793 const char *
7794 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7795 rtx_insn *insn)
7796 {
7797 static char string[64];
7798 enum rtx_code code = GET_CODE (op);
7799 rtx cc_reg = XEXP (op, 0);
7800 machine_mode mode = GET_MODE (cc_reg);
7801 const char *labelno, *branch;
7802 int spaces = 8, far;
7803 char *p;
7804
7805 /* v9 branches are limited to +-1MB. If it is too far away,
7806 change
7807
7808 bne,pt %xcc, .LC30
7809
7810 to
7811
7812 be,pn %xcc, .+12
7813 nop
7814 ba .LC30
7815
7816 and
7817
7818 fbne,a,pn %fcc2, .LC29
7819
7820 to
7821
7822 fbe,pt %fcc2, .+16
7823 nop
7824 ba .LC29 */
7825
7826 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7827 if (reversed ^ far)
7828 {
7829 /* Reversal of FP compares takes care -- an ordered compare
7830 becomes an unordered compare and vice versa. */
7831 if (mode == CCFPmode || mode == CCFPEmode)
7832 code = reverse_condition_maybe_unordered (code);
7833 else
7834 code = reverse_condition (code);
7835 }
7836
7837 /* Start by writing the branch condition. */
7838 if (mode == CCFPmode || mode == CCFPEmode)
7839 {
7840 switch (code)
7841 {
7842 case NE:
7843 branch = "fbne";
7844 break;
7845 case EQ:
7846 branch = "fbe";
7847 break;
7848 case GE:
7849 branch = "fbge";
7850 break;
7851 case GT:
7852 branch = "fbg";
7853 break;
7854 case LE:
7855 branch = "fble";
7856 break;
7857 case LT:
7858 branch = "fbl";
7859 break;
7860 case UNORDERED:
7861 branch = "fbu";
7862 break;
7863 case ORDERED:
7864 branch = "fbo";
7865 break;
7866 case UNGT:
7867 branch = "fbug";
7868 break;
7869 case UNLT:
7870 branch = "fbul";
7871 break;
7872 case UNEQ:
7873 branch = "fbue";
7874 break;
7875 case UNGE:
7876 branch = "fbuge";
7877 break;
7878 case UNLE:
7879 branch = "fbule";
7880 break;
7881 case LTGT:
7882 branch = "fblg";
7883 break;
7884 default:
7885 gcc_unreachable ();
7886 }
7887
7888 /* ??? !v9: FP branches cannot be preceded by another floating point
7889 insn. Because there is currently no concept of pre-delay slots,
7890 we can fix this only by always emitting a nop before a floating
7891 point branch. */
7892
7893 string[0] = '\0';
7894 if (! TARGET_V9)
7895 strcpy (string, "nop\n\t");
7896 strcat (string, branch);
7897 }
7898 else
7899 {
7900 switch (code)
7901 {
7902 case NE:
7903 if (mode == CCVmode || mode == CCXVmode)
7904 branch = "bvs";
7905 else
7906 branch = "bne";
7907 break;
7908 case EQ:
7909 if (mode == CCVmode || mode == CCXVmode)
7910 branch = "bvc";
7911 else
7912 branch = "be";
7913 break;
7914 case GE:
7915 if (mode == CCNZmode || mode == CCXNZmode)
7916 branch = "bpos";
7917 else
7918 branch = "bge";
7919 break;
7920 case GT:
7921 branch = "bg";
7922 break;
7923 case LE:
7924 branch = "ble";
7925 break;
7926 case LT:
7927 if (mode == CCNZmode || mode == CCXNZmode)
7928 branch = "bneg";
7929 else
7930 branch = "bl";
7931 break;
7932 case GEU:
7933 branch = "bgeu";
7934 break;
7935 case GTU:
7936 branch = "bgu";
7937 break;
7938 case LEU:
7939 branch = "bleu";
7940 break;
7941 case LTU:
7942 branch = "blu";
7943 break;
7944 default:
7945 gcc_unreachable ();
7946 }
7947 strcpy (string, branch);
7948 }
7949 spaces -= strlen (branch);
7950 p = strchr (string, '\0');
7951
7952 /* Now add the annulling, the label, and a possible noop. */
7953 if (annul && ! far)
7954 {
7955 strcpy (p, ",a");
7956 p += 2;
7957 spaces -= 2;
7958 }
7959
7960 if (TARGET_V9)
7961 {
7962 rtx note;
7963 int v8 = 0;
7964
7965 if (! far && insn && INSN_ADDRESSES_SET_P ())
7966 {
7967 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7968 - INSN_ADDRESSES (INSN_UID (insn)));
7969 /* Leave some instructions for "slop". */
7970 if (delta < -260000 || delta >= 260000)
7971 v8 = 1;
7972 }
7973
7974 switch (mode)
7975 {
7976 case CCmode:
7977 case CCNZmode:
7978 case CCCmode:
7979 case CCVmode:
7980 labelno = "%%icc, ";
7981 if (v8)
7982 labelno = "";
7983 break;
7984 case CCXmode:
7985 case CCXNZmode:
7986 case CCXCmode:
7987 case CCXVmode:
7988 labelno = "%%xcc, ";
7989 gcc_assert (!v8);
7990 break;
7991 case CCFPmode:
7992 case CCFPEmode:
7993 {
7994 static char v9_fcc_labelno[] = "%%fccX, ";
7995 /* Set the char indicating the number of the fcc reg to use. */
7996 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7997 labelno = v9_fcc_labelno;
7998 if (v8)
7999 {
8000 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8001 labelno = "";
8002 }
8003 }
8004 break;
8005 default:
8006 gcc_unreachable ();
8007 }
8008
8009 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8010 {
8011 strcpy (p,
8012 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8013 >= profile_probability::even ()) ^ far)
8014 ? ",pt" : ",pn");
8015 p += 3;
8016 spaces -= 3;
8017 }
8018 }
8019 else
8020 labelno = "";
8021
8022 if (spaces > 0)
8023 *p++ = '\t';
8024 else
8025 *p++ = ' ';
8026 strcpy (p, labelno);
8027 p = strchr (p, '\0');
8028 if (far)
8029 {
8030 strcpy (p, ".+12\n\t nop\n\tb\t");
8031 /* Skip the next insn if requested or
8032 if we know that it will be a nop. */
8033 if (annul || ! final_sequence)
8034 p[3] = '6';
8035 p += 14;
8036 }
8037 *p++ = '%';
8038 *p++ = 'l';
8039 *p++ = label + '0';
8040 *p++ = '%';
8041 *p++ = '#';
8042 *p = '\0';
8043
8044 return string;
8045 }
8046
8047 /* Emit a library call comparison between floating point X and Y.
8048 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8049 Return the new operator to be used in the comparison sequence.
8050
8051 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8052 values as arguments instead of the TFmode registers themselves,
8053 that's why we cannot call emit_float_lib_cmp. */
8054
8055 rtx
8056 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8057 {
8058 const char *qpfunc;
8059 rtx slot0, slot1, result, tem, tem2, libfunc;
8060 machine_mode mode;
8061 enum rtx_code new_comparison;
8062
8063 switch (comparison)
8064 {
8065 case EQ:
8066 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8067 break;
8068
8069 case NE:
8070 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8071 break;
8072
8073 case GT:
8074 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8075 break;
8076
8077 case GE:
8078 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8079 break;
8080
8081 case LT:
8082 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8083 break;
8084
8085 case LE:
8086 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8087 break;
8088
8089 case ORDERED:
8090 case UNORDERED:
8091 case UNGT:
8092 case UNLT:
8093 case UNEQ:
8094 case UNGE:
8095 case UNLE:
8096 case LTGT:
8097 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8098 break;
8099
8100 default:
8101 gcc_unreachable ();
8102 }
8103
8104 if (TARGET_ARCH64)
8105 {
8106 if (MEM_P (x))
8107 {
8108 tree expr = MEM_EXPR (x);
8109 if (expr)
8110 mark_addressable (expr);
8111 slot0 = x;
8112 }
8113 else
8114 {
8115 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8116 emit_move_insn (slot0, x);
8117 }
8118
8119 if (MEM_P (y))
8120 {
8121 tree expr = MEM_EXPR (y);
8122 if (expr)
8123 mark_addressable (expr);
8124 slot1 = y;
8125 }
8126 else
8127 {
8128 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8129 emit_move_insn (slot1, y);
8130 }
8131
8132 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8133 emit_library_call (libfunc, LCT_NORMAL,
8134 DImode, 2,
8135 XEXP (slot0, 0), Pmode,
8136 XEXP (slot1, 0), Pmode);
8137 mode = DImode;
8138 }
8139 else
8140 {
8141 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8142 emit_library_call (libfunc, LCT_NORMAL,
8143 SImode, 2,
8144 x, TFmode, y, TFmode);
8145 mode = SImode;
8146 }
8147
8148
8149 /* Immediately move the result of the libcall into a pseudo
8150 register so reload doesn't clobber the value if it needs
8151 the return register for a spill reg. */
8152 result = gen_reg_rtx (mode);
8153 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8154
8155 switch (comparison)
8156 {
8157 default:
8158 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8159 case ORDERED:
8160 case UNORDERED:
8161 new_comparison = (comparison == UNORDERED ? EQ : NE);
8162 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8163 case UNGT:
8164 case UNGE:
8165 new_comparison = (comparison == UNGT ? GT : NE);
8166 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8167 case UNLE:
8168 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8169 case UNLT:
8170 tem = gen_reg_rtx (mode);
8171 if (TARGET_ARCH32)
8172 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8173 else
8174 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8175 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8176 case UNEQ:
8177 case LTGT:
8178 tem = gen_reg_rtx (mode);
8179 if (TARGET_ARCH32)
8180 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8181 else
8182 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8183 tem2 = gen_reg_rtx (mode);
8184 if (TARGET_ARCH32)
8185 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8186 else
8187 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8188 new_comparison = (comparison == UNEQ ? EQ : NE);
8189 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8190 }
8191
8192 gcc_unreachable ();
8193 }
8194
8195 /* Generate an unsigned DImode to FP conversion. This is the same code
8196 optabs would emit if we didn't have TFmode patterns. */
8197
8198 void
8199 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8200 {
8201 rtx i0, i1, f0, in, out;
8202
8203 out = operands[0];
8204 in = force_reg (DImode, operands[1]);
8205 rtx_code_label *neglab = gen_label_rtx ();
8206 rtx_code_label *donelab = gen_label_rtx ();
8207 i0 = gen_reg_rtx (DImode);
8208 i1 = gen_reg_rtx (DImode);
8209 f0 = gen_reg_rtx (mode);
8210
8211 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8212
8213 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8214 emit_jump_insn (gen_jump (donelab));
8215 emit_barrier ();
8216
8217 emit_label (neglab);
8218
8219 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8220 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8221 emit_insn (gen_iordi3 (i0, i0, i1));
8222 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8223 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8224
8225 emit_label (donelab);
8226 }
8227
8228 /* Generate an FP to unsigned DImode conversion. This is the same code
8229 optabs would emit if we didn't have TFmode patterns. */
8230
8231 void
8232 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8233 {
8234 rtx i0, i1, f0, in, out, limit;
8235
8236 out = operands[0];
8237 in = force_reg (mode, operands[1]);
8238 rtx_code_label *neglab = gen_label_rtx ();
8239 rtx_code_label *donelab = gen_label_rtx ();
8240 i0 = gen_reg_rtx (DImode);
8241 i1 = gen_reg_rtx (DImode);
8242 limit = gen_reg_rtx (mode);
8243 f0 = gen_reg_rtx (mode);
8244
8245 emit_move_insn (limit,
8246 const_double_from_real_value (
8247 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8248 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8249
8250 emit_insn (gen_rtx_SET (out,
8251 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8252 emit_jump_insn (gen_jump (donelab));
8253 emit_barrier ();
8254
8255 emit_label (neglab);
8256
8257 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8258 emit_insn (gen_rtx_SET (i0,
8259 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8260 emit_insn (gen_movdi (i1, const1_rtx));
8261 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8262 emit_insn (gen_xordi3 (out, i0, i1));
8263
8264 emit_label (donelab);
8265 }
8266
8267 /* Return the string to output a compare and branch instruction to DEST.
8268 DEST is the destination insn (i.e. the label), INSN is the source,
8269 and OP is the conditional expression. */
8270
8271 const char *
8272 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8273 {
8274 machine_mode mode = GET_MODE (XEXP (op, 0));
8275 enum rtx_code code = GET_CODE (op);
8276 const char *cond_str, *tmpl;
8277 int far, emit_nop, len;
8278 static char string[64];
8279 char size_char;
8280
8281 /* Compare and Branch is limited to +-2KB. If it is too far away,
8282 change
8283
8284 cxbne X, Y, .LC30
8285
8286 to
8287
8288 cxbe X, Y, .+16
8289 nop
8290 ba,pt xcc, .LC30
8291 nop */
8292
8293 len = get_attr_length (insn);
8294
8295 far = len == 4;
8296 emit_nop = len == 2;
8297
8298 if (far)
8299 code = reverse_condition (code);
8300
8301 size_char = ((mode == SImode) ? 'w' : 'x');
8302
8303 switch (code)
8304 {
8305 case NE:
8306 cond_str = "ne";
8307 break;
8308
8309 case EQ:
8310 cond_str = "e";
8311 break;
8312
8313 case GE:
8314 cond_str = "ge";
8315 break;
8316
8317 case GT:
8318 cond_str = "g";
8319 break;
8320
8321 case LE:
8322 cond_str = "le";
8323 break;
8324
8325 case LT:
8326 cond_str = "l";
8327 break;
8328
8329 case GEU:
8330 cond_str = "cc";
8331 break;
8332
8333 case GTU:
8334 cond_str = "gu";
8335 break;
8336
8337 case LEU:
8338 cond_str = "leu";
8339 break;
8340
8341 case LTU:
8342 cond_str = "cs";
8343 break;
8344
8345 default:
8346 gcc_unreachable ();
8347 }
8348
8349 if (far)
8350 {
8351 int veryfar = 1, delta;
8352
8353 if (INSN_ADDRESSES_SET_P ())
8354 {
8355 delta = (INSN_ADDRESSES (INSN_UID (dest))
8356 - INSN_ADDRESSES (INSN_UID (insn)));
8357 /* Leave some instructions for "slop". */
8358 if (delta >= -260000 && delta < 260000)
8359 veryfar = 0;
8360 }
8361
8362 if (veryfar)
8363 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8364 else
8365 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8366 }
8367 else
8368 {
8369 if (emit_nop)
8370 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8371 else
8372 tmpl = "c%cb%s\t%%1, %%2, %%3";
8373 }
8374
8375 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8376
8377 return string;
8378 }
8379
8380 /* Return the string to output a conditional branch to LABEL, testing
8381 register REG. LABEL is the operand number of the label; REG is the
8382 operand number of the reg. OP is the conditional expression. The mode
8383 of REG says what kind of comparison we made.
8384
8385 DEST is the destination insn (i.e. the label), INSN is the source.
8386
8387 REVERSED is nonzero if we should reverse the sense of the comparison.
8388
8389 ANNUL is nonzero if we should generate an annulling branch. */
8390
8391 const char *
8392 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8393 int annul, rtx_insn *insn)
8394 {
8395 static char string[64];
8396 enum rtx_code code = GET_CODE (op);
8397 machine_mode mode = GET_MODE (XEXP (op, 0));
8398 rtx note;
8399 int far;
8400 char *p;
8401
8402 /* branch on register are limited to +-128KB. If it is too far away,
8403 change
8404
8405 brnz,pt %g1, .LC30
8406
8407 to
8408
8409 brz,pn %g1, .+12
8410 nop
8411 ba,pt %xcc, .LC30
8412
8413 and
8414
8415 brgez,a,pn %o1, .LC29
8416
8417 to
8418
8419 brlz,pt %o1, .+16
8420 nop
8421 ba,pt %xcc, .LC29 */
8422
8423 far = get_attr_length (insn) >= 3;
8424
8425 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8426 if (reversed ^ far)
8427 code = reverse_condition (code);
8428
8429 /* Only 64-bit versions of these instructions exist. */
8430 gcc_assert (mode == DImode);
8431
8432 /* Start by writing the branch condition. */
8433
8434 switch (code)
8435 {
8436 case NE:
8437 strcpy (string, "brnz");
8438 break;
8439
8440 case EQ:
8441 strcpy (string, "brz");
8442 break;
8443
8444 case GE:
8445 strcpy (string, "brgez");
8446 break;
8447
8448 case LT:
8449 strcpy (string, "brlz");
8450 break;
8451
8452 case LE:
8453 strcpy (string, "brlez");
8454 break;
8455
8456 case GT:
8457 strcpy (string, "brgz");
8458 break;
8459
8460 default:
8461 gcc_unreachable ();
8462 }
8463
8464 p = strchr (string, '\0');
8465
8466 /* Now add the annulling, reg, label, and nop. */
8467 if (annul && ! far)
8468 {
8469 strcpy (p, ",a");
8470 p += 2;
8471 }
8472
8473 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8474 {
8475 strcpy (p,
8476 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8477 >= profile_probability::even ()) ^ far)
8478 ? ",pt" : ",pn");
8479 p += 3;
8480 }
8481
8482 *p = p < string + 8 ? '\t' : ' ';
8483 p++;
8484 *p++ = '%';
8485 *p++ = '0' + reg;
8486 *p++ = ',';
8487 *p++ = ' ';
8488 if (far)
8489 {
8490 int veryfar = 1, delta;
8491
8492 if (INSN_ADDRESSES_SET_P ())
8493 {
8494 delta = (INSN_ADDRESSES (INSN_UID (dest))
8495 - INSN_ADDRESSES (INSN_UID (insn)));
8496 /* Leave some instructions for "slop". */
8497 if (delta >= -260000 && delta < 260000)
8498 veryfar = 0;
8499 }
8500
8501 strcpy (p, ".+12\n\t nop\n\t");
8502 /* Skip the next insn if requested or
8503 if we know that it will be a nop. */
8504 if (annul || ! final_sequence)
8505 p[3] = '6';
8506 p += 12;
8507 if (veryfar)
8508 {
8509 strcpy (p, "b\t");
8510 p += 2;
8511 }
8512 else
8513 {
8514 strcpy (p, "ba,pt\t%%xcc, ");
8515 p += 13;
8516 }
8517 }
8518 *p++ = '%';
8519 *p++ = 'l';
8520 *p++ = '0' + label;
8521 *p++ = '%';
8522 *p++ = '#';
8523 *p = '\0';
8524
8525 return string;
8526 }
8527
8528 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8529 Such instructions cannot be used in the delay slot of return insn on v9.
8530 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8531 */
8532
8533 static int
8534 epilogue_renumber (register rtx *where, int test)
8535 {
8536 register const char *fmt;
8537 register int i;
8538 register enum rtx_code code;
8539
8540 if (*where == 0)
8541 return 0;
8542
8543 code = GET_CODE (*where);
8544
8545 switch (code)
8546 {
8547 case REG:
8548 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8549 return 1;
8550 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8551 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8552 /* fallthrough */
8553 case SCRATCH:
8554 case CC0:
8555 case PC:
8556 case CONST_INT:
8557 case CONST_WIDE_INT:
8558 case CONST_DOUBLE:
8559 return 0;
8560
8561 /* Do not replace the frame pointer with the stack pointer because
8562 it can cause the delayed instruction to load below the stack.
8563 This occurs when instructions like:
8564
8565 (set (reg/i:SI 24 %i0)
8566 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8567 (const_int -20 [0xffffffec])) 0))
8568
8569 are in the return delayed slot. */
8570 case PLUS:
8571 if (GET_CODE (XEXP (*where, 0)) == REG
8572 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8573 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8574 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8575 return 1;
8576 break;
8577
8578 case MEM:
8579 if (SPARC_STACK_BIAS
8580 && GET_CODE (XEXP (*where, 0)) == REG
8581 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8582 return 1;
8583 break;
8584
8585 default:
8586 break;
8587 }
8588
8589 fmt = GET_RTX_FORMAT (code);
8590
8591 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8592 {
8593 if (fmt[i] == 'E')
8594 {
8595 register int j;
8596 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8597 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8598 return 1;
8599 }
8600 else if (fmt[i] == 'e'
8601 && epilogue_renumber (&(XEXP (*where, i)), test))
8602 return 1;
8603 }
8604 return 0;
8605 }
8606 \f
8607 /* Leaf functions and non-leaf functions have different needs. */
8608
8609 static const int
8610 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8611
8612 static const int
8613 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8614
8615 static const int *const reg_alloc_orders[] = {
8616 reg_leaf_alloc_order,
8617 reg_nonleaf_alloc_order};
8618
8619 void
8620 order_regs_for_local_alloc (void)
8621 {
8622 static int last_order_nonleaf = 1;
8623
8624 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8625 {
8626 last_order_nonleaf = !last_order_nonleaf;
8627 memcpy ((char *) reg_alloc_order,
8628 (const char *) reg_alloc_orders[last_order_nonleaf],
8629 FIRST_PSEUDO_REGISTER * sizeof (int));
8630 }
8631 }
8632 \f
8633 /* Return 1 if REG and MEM are legitimate enough to allow the various
8634 MEM<-->REG splits to be run. */
8635
8636 int
8637 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8638 {
8639 /* Punt if we are here by mistake. */
8640 gcc_assert (reload_completed);
8641
8642 /* We must have an offsettable memory reference. */
8643 if (!offsettable_memref_p (mem))
8644 return 0;
8645
8646 /* If we have legitimate args for ldd/std, we do not want
8647 the split to happen. */
8648 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8649 return 0;
8650
8651 /* Success. */
8652 return 1;
8653 }
8654
8655 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8656
8657 void
8658 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8659 {
8660 rtx high_part = gen_highpart (mode, dest);
8661 rtx low_part = gen_lowpart (mode, dest);
8662 rtx word0 = adjust_address (src, mode, 0);
8663 rtx word1 = adjust_address (src, mode, 4);
8664
8665 if (reg_overlap_mentioned_p (high_part, word1))
8666 {
8667 emit_move_insn_1 (low_part, word1);
8668 emit_move_insn_1 (high_part, word0);
8669 }
8670 else
8671 {
8672 emit_move_insn_1 (high_part, word0);
8673 emit_move_insn_1 (low_part, word1);
8674 }
8675 }
8676
8677 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8678
8679 void
8680 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8681 {
8682 rtx word0 = adjust_address (dest, mode, 0);
8683 rtx word1 = adjust_address (dest, mode, 4);
8684 rtx high_part = gen_highpart (mode, src);
8685 rtx low_part = gen_lowpart (mode, src);
8686
8687 emit_move_insn_1 (word0, high_part);
8688 emit_move_insn_1 (word1, low_part);
8689 }
8690
8691 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8692
8693 int
8694 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8695 {
8696 /* Punt if we are here by mistake. */
8697 gcc_assert (reload_completed);
8698
8699 if (GET_CODE (reg1) == SUBREG)
8700 reg1 = SUBREG_REG (reg1);
8701 if (GET_CODE (reg1) != REG)
8702 return 0;
8703 const int regno1 = REGNO (reg1);
8704
8705 if (GET_CODE (reg2) == SUBREG)
8706 reg2 = SUBREG_REG (reg2);
8707 if (GET_CODE (reg2) != REG)
8708 return 0;
8709 const int regno2 = REGNO (reg2);
8710
8711 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8712 return 1;
8713
8714 if (TARGET_VIS3)
8715 {
8716 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8717 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8718 return 1;
8719 }
8720
8721 return 0;
8722 }
8723
8724 /* Split a REG <--> REG move into a pair of moves in MODE. */
8725
8726 void
8727 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8728 {
8729 rtx dest1 = gen_highpart (mode, dest);
8730 rtx dest2 = gen_lowpart (mode, dest);
8731 rtx src1 = gen_highpart (mode, src);
8732 rtx src2 = gen_lowpart (mode, src);
8733
8734 /* Now emit using the real source and destination we found, swapping
8735 the order if we detect overlap. */
8736 if (reg_overlap_mentioned_p (dest1, src2))
8737 {
8738 emit_move_insn_1 (dest2, src2);
8739 emit_move_insn_1 (dest1, src1);
8740 }
8741 else
8742 {
8743 emit_move_insn_1 (dest1, src1);
8744 emit_move_insn_1 (dest2, src2);
8745 }
8746 }
8747
8748 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8749 This makes them candidates for using ldd and std insns.
8750
8751 Note reg1 and reg2 *must* be hard registers. */
8752
8753 int
8754 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8755 {
8756 /* We might have been passed a SUBREG. */
8757 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8758 return 0;
8759
8760 if (REGNO (reg1) % 2 != 0)
8761 return 0;
8762
8763 /* Integer ldd is deprecated in SPARC V9 */
8764 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8765 return 0;
8766
8767 return (REGNO (reg1) == REGNO (reg2) - 1);
8768 }
8769
8770 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8771 an ldd or std insn.
8772
8773 This can only happen when addr1 and addr2, the addresses in mem1
8774 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8775 addr1 must also be aligned on a 64-bit boundary.
8776
8777 Also iff dependent_reg_rtx is not null it should not be used to
8778 compute the address for mem1, i.e. we cannot optimize a sequence
8779 like:
8780 ld [%o0], %o0
8781 ld [%o0 + 4], %o1
8782 to
8783 ldd [%o0], %o0
8784 nor:
8785 ld [%g3 + 4], %g3
8786 ld [%g3], %g2
8787 to
8788 ldd [%g3], %g2
8789
8790 But, note that the transformation from:
8791 ld [%g2 + 4], %g3
8792 ld [%g2], %g2
8793 to
8794 ldd [%g2], %g2
8795 is perfectly fine. Thus, the peephole2 patterns always pass us
8796 the destination register of the first load, never the second one.
8797
8798 For stores we don't have a similar problem, so dependent_reg_rtx is
8799 NULL_RTX. */
8800
8801 int
8802 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8803 {
8804 rtx addr1, addr2;
8805 unsigned int reg1;
8806 HOST_WIDE_INT offset1;
8807
8808 /* The mems cannot be volatile. */
8809 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8810 return 0;
8811
8812 /* MEM1 should be aligned on a 64-bit boundary. */
8813 if (MEM_ALIGN (mem1) < 64)
8814 return 0;
8815
8816 addr1 = XEXP (mem1, 0);
8817 addr2 = XEXP (mem2, 0);
8818
8819 /* Extract a register number and offset (if used) from the first addr. */
8820 if (GET_CODE (addr1) == PLUS)
8821 {
8822 /* If not a REG, return zero. */
8823 if (GET_CODE (XEXP (addr1, 0)) != REG)
8824 return 0;
8825 else
8826 {
8827 reg1 = REGNO (XEXP (addr1, 0));
8828 /* The offset must be constant! */
8829 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8830 return 0;
8831 offset1 = INTVAL (XEXP (addr1, 1));
8832 }
8833 }
8834 else if (GET_CODE (addr1) != REG)
8835 return 0;
8836 else
8837 {
8838 reg1 = REGNO (addr1);
8839 /* This was a simple (mem (reg)) expression. Offset is 0. */
8840 offset1 = 0;
8841 }
8842
8843 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8844 if (GET_CODE (addr2) != PLUS)
8845 return 0;
8846
8847 if (GET_CODE (XEXP (addr2, 0)) != REG
8848 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8849 return 0;
8850
8851 if (reg1 != REGNO (XEXP (addr2, 0)))
8852 return 0;
8853
8854 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8855 return 0;
8856
8857 /* The first offset must be evenly divisible by 8 to ensure the
8858 address is 64-bit aligned. */
8859 if (offset1 % 8 != 0)
8860 return 0;
8861
8862 /* The offset for the second addr must be 4 more than the first addr. */
8863 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8864 return 0;
8865
8866 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8867 instructions. */
8868 return 1;
8869 }
8870
8871 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8872
8873 rtx
8874 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8875 {
8876 rtx x = widen_memory_access (mem1, mode, 0);
8877 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8878 return x;
8879 }
8880
8881 /* Return 1 if reg is a pseudo, or is the first register in
8882 a hard register pair. This makes it suitable for use in
8883 ldd and std insns. */
8884
8885 int
8886 register_ok_for_ldd (rtx reg)
8887 {
8888 /* We might have been passed a SUBREG. */
8889 if (!REG_P (reg))
8890 return 0;
8891
8892 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8893 return (REGNO (reg) % 2 == 0);
8894
8895 return 1;
8896 }
8897
8898 /* Return 1 if OP, a MEM, has an address which is known to be
8899 aligned to an 8-byte boundary. */
8900
8901 int
8902 memory_ok_for_ldd (rtx op)
8903 {
8904 /* In 64-bit mode, we assume that the address is word-aligned. */
8905 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8906 return 0;
8907
8908 if (! can_create_pseudo_p ()
8909 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8910 return 0;
8911
8912 return 1;
8913 }
8914 \f
8915 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8916
8917 static bool
8918 sparc_print_operand_punct_valid_p (unsigned char code)
8919 {
8920 if (code == '#'
8921 || code == '*'
8922 || code == '('
8923 || code == ')'
8924 || code == '_'
8925 || code == '&')
8926 return true;
8927
8928 return false;
8929 }
8930
8931 /* Implement TARGET_PRINT_OPERAND.
8932 Print operand X (an rtx) in assembler syntax to file FILE.
8933 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8934 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8935
8936 static void
8937 sparc_print_operand (FILE *file, rtx x, int code)
8938 {
8939 const char *s;
8940
8941 switch (code)
8942 {
8943 case '#':
8944 /* Output an insn in a delay slot. */
8945 if (final_sequence)
8946 sparc_indent_opcode = 1;
8947 else
8948 fputs ("\n\t nop", file);
8949 return;
8950 case '*':
8951 /* Output an annul flag if there's nothing for the delay slot and we
8952 are optimizing. This is always used with '(' below.
8953 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8954 this is a dbx bug. So, we only do this when optimizing.
8955 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8956 Always emit a nop in case the next instruction is a branch. */
8957 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8958 fputs (",a", file);
8959 return;
8960 case '(':
8961 /* Output a 'nop' if there's nothing for the delay slot and we are
8962 not optimizing. This is always used with '*' above. */
8963 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8964 fputs ("\n\t nop", file);
8965 else if (final_sequence)
8966 sparc_indent_opcode = 1;
8967 return;
8968 case ')':
8969 /* Output the right displacement from the saved PC on function return.
8970 The caller may have placed an "unimp" insn immediately after the call
8971 so we have to account for it. This insn is used in the 32-bit ABI
8972 when calling a function that returns a non zero-sized structure. The
8973 64-bit ABI doesn't have it. Be careful to have this test be the same
8974 as that for the call. The exception is when sparc_std_struct_return
8975 is enabled, the psABI is followed exactly and the adjustment is made
8976 by the code in sparc_struct_value_rtx. The call emitted is the same
8977 when sparc_std_struct_return is enabled. */
8978 if (!TARGET_ARCH64
8979 && cfun->returns_struct
8980 && !sparc_std_struct_return
8981 && DECL_SIZE (DECL_RESULT (current_function_decl))
8982 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8983 == INTEGER_CST
8984 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8985 fputs ("12", file);
8986 else
8987 fputc ('8', file);
8988 return;
8989 case '_':
8990 /* Output the Embedded Medium/Anywhere code model base register. */
8991 fputs (EMBMEDANY_BASE_REG, file);
8992 return;
8993 case '&':
8994 /* Print some local dynamic TLS name. */
8995 if (const char *name = get_some_local_dynamic_name ())
8996 assemble_name (file, name);
8997 else
8998 output_operand_lossage ("'%%&' used without any "
8999 "local dynamic TLS references");
9000 return;
9001
9002 case 'Y':
9003 /* Adjust the operand to take into account a RESTORE operation. */
9004 if (GET_CODE (x) == CONST_INT)
9005 break;
9006 else if (GET_CODE (x) != REG)
9007 output_operand_lossage ("invalid %%Y operand");
9008 else if (REGNO (x) < 8)
9009 fputs (reg_names[REGNO (x)], file);
9010 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9011 fputs (reg_names[REGNO (x)-16], file);
9012 else
9013 output_operand_lossage ("invalid %%Y operand");
9014 return;
9015 case 'L':
9016 /* Print out the low order register name of a register pair. */
9017 if (WORDS_BIG_ENDIAN)
9018 fputs (reg_names[REGNO (x)+1], file);
9019 else
9020 fputs (reg_names[REGNO (x)], file);
9021 return;
9022 case 'H':
9023 /* Print out the high order register name of a register pair. */
9024 if (WORDS_BIG_ENDIAN)
9025 fputs (reg_names[REGNO (x)], file);
9026 else
9027 fputs (reg_names[REGNO (x)+1], file);
9028 return;
9029 case 'R':
9030 /* Print out the second register name of a register pair or quad.
9031 I.e., R (%o0) => %o1. */
9032 fputs (reg_names[REGNO (x)+1], file);
9033 return;
9034 case 'S':
9035 /* Print out the third register name of a register quad.
9036 I.e., S (%o0) => %o2. */
9037 fputs (reg_names[REGNO (x)+2], file);
9038 return;
9039 case 'T':
9040 /* Print out the fourth register name of a register quad.
9041 I.e., T (%o0) => %o3. */
9042 fputs (reg_names[REGNO (x)+3], file);
9043 return;
9044 case 'x':
9045 /* Print a condition code register. */
9046 if (REGNO (x) == SPARC_ICC_REG)
9047 {
9048 switch (GET_MODE (x))
9049 {
9050 case CCmode:
9051 case CCNZmode:
9052 case CCCmode:
9053 case CCVmode:
9054 s = "%icc";
9055 break;
9056 case CCXmode:
9057 case CCXNZmode:
9058 case CCXCmode:
9059 case CCXVmode:
9060 s = "%xcc";
9061 break;
9062 default:
9063 gcc_unreachable ();
9064 }
9065 fputs (s, file);
9066 }
9067 else
9068 /* %fccN register */
9069 fputs (reg_names[REGNO (x)], file);
9070 return;
9071 case 'm':
9072 /* Print the operand's address only. */
9073 output_address (GET_MODE (x), XEXP (x, 0));
9074 return;
9075 case 'r':
9076 /* In this case we need a register. Use %g0 if the
9077 operand is const0_rtx. */
9078 if (x == const0_rtx
9079 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9080 {
9081 fputs ("%g0", file);
9082 return;
9083 }
9084 else
9085 break;
9086
9087 case 'A':
9088 switch (GET_CODE (x))
9089 {
9090 case IOR:
9091 s = "or";
9092 break;
9093 case AND:
9094 s = "and";
9095 break;
9096 case XOR:
9097 s = "xor";
9098 break;
9099 default:
9100 output_operand_lossage ("invalid %%A operand");
9101 s = "";
9102 break;
9103 }
9104 fputs (s, file);
9105 return;
9106
9107 case 'B':
9108 switch (GET_CODE (x))
9109 {
9110 case IOR:
9111 s = "orn";
9112 break;
9113 case AND:
9114 s = "andn";
9115 break;
9116 case XOR:
9117 s = "xnor";
9118 break;
9119 default:
9120 output_operand_lossage ("invalid %%B operand");
9121 s = "";
9122 break;
9123 }
9124 fputs (s, file);
9125 return;
9126
9127 /* This is used by the conditional move instructions. */
9128 case 'C':
9129 {
9130 machine_mode mode = GET_MODE (XEXP (x, 0));
9131 switch (GET_CODE (x))
9132 {
9133 case NE:
9134 if (mode == CCVmode || mode == CCXVmode)
9135 s = "vs";
9136 else
9137 s = "ne";
9138 break;
9139 case EQ:
9140 if (mode == CCVmode || mode == CCXVmode)
9141 s = "vc";
9142 else
9143 s = "e";
9144 break;
9145 case GE:
9146 if (mode == CCNZmode || mode == CCXNZmode)
9147 s = "pos";
9148 else
9149 s = "ge";
9150 break;
9151 case GT:
9152 s = "g";
9153 break;
9154 case LE:
9155 s = "le";
9156 break;
9157 case LT:
9158 if (mode == CCNZmode || mode == CCXNZmode)
9159 s = "neg";
9160 else
9161 s = "l";
9162 break;
9163 case GEU:
9164 s = "geu";
9165 break;
9166 case GTU:
9167 s = "gu";
9168 break;
9169 case LEU:
9170 s = "leu";
9171 break;
9172 case LTU:
9173 s = "lu";
9174 break;
9175 case LTGT:
9176 s = "lg";
9177 break;
9178 case UNORDERED:
9179 s = "u";
9180 break;
9181 case ORDERED:
9182 s = "o";
9183 break;
9184 case UNLT:
9185 s = "ul";
9186 break;
9187 case UNLE:
9188 s = "ule";
9189 break;
9190 case UNGT:
9191 s = "ug";
9192 break;
9193 case UNGE:
9194 s = "uge"
9195 ; break;
9196 case UNEQ:
9197 s = "ue";
9198 break;
9199 default:
9200 output_operand_lossage ("invalid %%C operand");
9201 s = "";
9202 break;
9203 }
9204 fputs (s, file);
9205 return;
9206 }
9207
9208 /* This are used by the movr instruction pattern. */
9209 case 'D':
9210 {
9211 switch (GET_CODE (x))
9212 {
9213 case NE:
9214 s = "ne";
9215 break;
9216 case EQ:
9217 s = "e";
9218 break;
9219 case GE:
9220 s = "gez";
9221 break;
9222 case LT:
9223 s = "lz";
9224 break;
9225 case LE:
9226 s = "lez";
9227 break;
9228 case GT:
9229 s = "gz";
9230 break;
9231 default:
9232 output_operand_lossage ("invalid %%D operand");
9233 s = "";
9234 break;
9235 }
9236 fputs (s, file);
9237 return;
9238 }
9239
9240 case 'b':
9241 {
9242 /* Print a sign-extended character. */
9243 int i = trunc_int_for_mode (INTVAL (x), QImode);
9244 fprintf (file, "%d", i);
9245 return;
9246 }
9247
9248 case 'f':
9249 /* Operand must be a MEM; write its address. */
9250 if (GET_CODE (x) != MEM)
9251 output_operand_lossage ("invalid %%f operand");
9252 output_address (GET_MODE (x), XEXP (x, 0));
9253 return;
9254
9255 case 's':
9256 {
9257 /* Print a sign-extended 32-bit value. */
9258 HOST_WIDE_INT i;
9259 if (GET_CODE(x) == CONST_INT)
9260 i = INTVAL (x);
9261 else
9262 {
9263 output_operand_lossage ("invalid %%s operand");
9264 return;
9265 }
9266 i = trunc_int_for_mode (i, SImode);
9267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9268 return;
9269 }
9270
9271 case 0:
9272 /* Do nothing special. */
9273 break;
9274
9275 default:
9276 /* Undocumented flag. */
9277 output_operand_lossage ("invalid operand output code");
9278 }
9279
9280 if (GET_CODE (x) == REG)
9281 fputs (reg_names[REGNO (x)], file);
9282 else if (GET_CODE (x) == MEM)
9283 {
9284 fputc ('[', file);
9285 /* Poor Sun assembler doesn't understand absolute addressing. */
9286 if (CONSTANT_P (XEXP (x, 0)))
9287 fputs ("%g0+", file);
9288 output_address (GET_MODE (x), XEXP (x, 0));
9289 fputc (']', file);
9290 }
9291 else if (GET_CODE (x) == HIGH)
9292 {
9293 fputs ("%hi(", file);
9294 output_addr_const (file, XEXP (x, 0));
9295 fputc (')', file);
9296 }
9297 else if (GET_CODE (x) == LO_SUM)
9298 {
9299 sparc_print_operand (file, XEXP (x, 0), 0);
9300 if (TARGET_CM_MEDMID)
9301 fputs ("+%l44(", file);
9302 else
9303 fputs ("+%lo(", file);
9304 output_addr_const (file, XEXP (x, 1));
9305 fputc (')', file);
9306 }
9307 else if (GET_CODE (x) == CONST_DOUBLE)
9308 output_operand_lossage ("floating-point constant not a valid immediate operand");
9309 else
9310 output_addr_const (file, x);
9311 }
9312
9313 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9314
9315 static void
9316 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9317 {
9318 register rtx base, index = 0;
9319 int offset = 0;
9320 register rtx addr = x;
9321
9322 if (REG_P (addr))
9323 fputs (reg_names[REGNO (addr)], file);
9324 else if (GET_CODE (addr) == PLUS)
9325 {
9326 if (CONST_INT_P (XEXP (addr, 0)))
9327 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9328 else if (CONST_INT_P (XEXP (addr, 1)))
9329 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9330 else
9331 base = XEXP (addr, 0), index = XEXP (addr, 1);
9332 if (GET_CODE (base) == LO_SUM)
9333 {
9334 gcc_assert (USE_AS_OFFSETABLE_LO10
9335 && TARGET_ARCH64
9336 && ! TARGET_CM_MEDMID);
9337 output_operand (XEXP (base, 0), 0);
9338 fputs ("+%lo(", file);
9339 output_address (VOIDmode, XEXP (base, 1));
9340 fprintf (file, ")+%d", offset);
9341 }
9342 else
9343 {
9344 fputs (reg_names[REGNO (base)], file);
9345 if (index == 0)
9346 fprintf (file, "%+d", offset);
9347 else if (REG_P (index))
9348 fprintf (file, "+%s", reg_names[REGNO (index)]);
9349 else if (GET_CODE (index) == SYMBOL_REF
9350 || GET_CODE (index) == LABEL_REF
9351 || GET_CODE (index) == CONST)
9352 fputc ('+', file), output_addr_const (file, index);
9353 else gcc_unreachable ();
9354 }
9355 }
9356 else if (GET_CODE (addr) == MINUS
9357 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9358 {
9359 output_addr_const (file, XEXP (addr, 0));
9360 fputs ("-(", file);
9361 output_addr_const (file, XEXP (addr, 1));
9362 fputs ("-.)", file);
9363 }
9364 else if (GET_CODE (addr) == LO_SUM)
9365 {
9366 output_operand (XEXP (addr, 0), 0);
9367 if (TARGET_CM_MEDMID)
9368 fputs ("+%l44(", file);
9369 else
9370 fputs ("+%lo(", file);
9371 output_address (VOIDmode, XEXP (addr, 1));
9372 fputc (')', file);
9373 }
9374 else if (flag_pic
9375 && GET_CODE (addr) == CONST
9376 && GET_CODE (XEXP (addr, 0)) == MINUS
9377 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9378 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9379 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9380 {
9381 addr = XEXP (addr, 0);
9382 output_addr_const (file, XEXP (addr, 0));
9383 /* Group the args of the second CONST in parenthesis. */
9384 fputs ("-(", file);
9385 /* Skip past the second CONST--it does nothing for us. */
9386 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9387 /* Close the parenthesis. */
9388 fputc (')', file);
9389 }
9390 else
9391 {
9392 output_addr_const (file, addr);
9393 }
9394 }
9395 \f
9396 /* Target hook for assembling integer objects. The sparc version has
9397 special handling for aligned DI-mode objects. */
9398
9399 static bool
9400 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9401 {
9402 /* ??? We only output .xword's for symbols and only then in environments
9403 where the assembler can handle them. */
9404 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9405 {
9406 if (TARGET_V9)
9407 {
9408 assemble_integer_with_op ("\t.xword\t", x);
9409 return true;
9410 }
9411 else
9412 {
9413 assemble_aligned_integer (4, const0_rtx);
9414 assemble_aligned_integer (4, x);
9415 return true;
9416 }
9417 }
9418 return default_assemble_integer (x, size, aligned_p);
9419 }
9420 \f
9421 /* Return the value of a code used in the .proc pseudo-op that says
9422 what kind of result this function returns. For non-C types, we pick
9423 the closest C type. */
9424
9425 #ifndef SHORT_TYPE_SIZE
9426 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9427 #endif
9428
9429 #ifndef INT_TYPE_SIZE
9430 #define INT_TYPE_SIZE BITS_PER_WORD
9431 #endif
9432
9433 #ifndef LONG_TYPE_SIZE
9434 #define LONG_TYPE_SIZE BITS_PER_WORD
9435 #endif
9436
9437 #ifndef LONG_LONG_TYPE_SIZE
9438 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9439 #endif
9440
9441 #ifndef FLOAT_TYPE_SIZE
9442 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9443 #endif
9444
9445 #ifndef DOUBLE_TYPE_SIZE
9446 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9447 #endif
9448
9449 #ifndef LONG_DOUBLE_TYPE_SIZE
9450 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9451 #endif
9452
9453 unsigned long
9454 sparc_type_code (register tree type)
9455 {
9456 register unsigned long qualifiers = 0;
9457 register unsigned shift;
9458
9459 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9460 setting more, since some assemblers will give an error for this. Also,
9461 we must be careful to avoid shifts of 32 bits or more to avoid getting
9462 unpredictable results. */
9463
9464 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9465 {
9466 switch (TREE_CODE (type))
9467 {
9468 case ERROR_MARK:
9469 return qualifiers;
9470
9471 case ARRAY_TYPE:
9472 qualifiers |= (3 << shift);
9473 break;
9474
9475 case FUNCTION_TYPE:
9476 case METHOD_TYPE:
9477 qualifiers |= (2 << shift);
9478 break;
9479
9480 case POINTER_TYPE:
9481 case REFERENCE_TYPE:
9482 case OFFSET_TYPE:
9483 qualifiers |= (1 << shift);
9484 break;
9485
9486 case RECORD_TYPE:
9487 return (qualifiers | 8);
9488
9489 case UNION_TYPE:
9490 case QUAL_UNION_TYPE:
9491 return (qualifiers | 9);
9492
9493 case ENUMERAL_TYPE:
9494 return (qualifiers | 10);
9495
9496 case VOID_TYPE:
9497 return (qualifiers | 16);
9498
9499 case INTEGER_TYPE:
9500 /* If this is a range type, consider it to be the underlying
9501 type. */
9502 if (TREE_TYPE (type) != 0)
9503 break;
9504
9505 /* Carefully distinguish all the standard types of C,
9506 without messing up if the language is not C. We do this by
9507 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9508 look at both the names and the above fields, but that's redundant.
9509 Any type whose size is between two C types will be considered
9510 to be the wider of the two types. Also, we do not have a
9511 special code to use for "long long", so anything wider than
9512 long is treated the same. Note that we can't distinguish
9513 between "int" and "long" in this code if they are the same
9514 size, but that's fine, since neither can the assembler. */
9515
9516 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9517 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9518
9519 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9520 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9521
9522 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9523 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9524
9525 else
9526 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9527
9528 case REAL_TYPE:
9529 /* If this is a range type, consider it to be the underlying
9530 type. */
9531 if (TREE_TYPE (type) != 0)
9532 break;
9533
9534 /* Carefully distinguish all the standard types of C,
9535 without messing up if the language is not C. */
9536
9537 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9538 return (qualifiers | 6);
9539
9540 else
9541 return (qualifiers | 7);
9542
9543 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9544 /* ??? We need to distinguish between double and float complex types,
9545 but I don't know how yet because I can't reach this code from
9546 existing front-ends. */
9547 return (qualifiers | 7); /* Who knows? */
9548
9549 case VECTOR_TYPE:
9550 case BOOLEAN_TYPE: /* Boolean truth value type. */
9551 case LANG_TYPE:
9552 case NULLPTR_TYPE:
9553 return qualifiers;
9554
9555 default:
9556 gcc_unreachable (); /* Not a type! */
9557 }
9558 }
9559
9560 return qualifiers;
9561 }
9562 \f
9563 /* Nested function support. */
9564
9565 /* Emit RTL insns to initialize the variable parts of a trampoline.
9566 FNADDR is an RTX for the address of the function's pure code.
9567 CXT is an RTX for the static chain value for the function.
9568
9569 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9570 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9571 (to store insns). This is a bit excessive. Perhaps a different
9572 mechanism would be better here.
9573
9574 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9575
9576 static void
9577 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9578 {
9579 /* SPARC 32-bit trampoline:
9580
9581 sethi %hi(fn), %g1
9582 sethi %hi(static), %g2
9583 jmp %g1+%lo(fn)
9584 or %g2, %lo(static), %g2
9585
9586 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9587 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9588 */
9589
9590 emit_move_insn
9591 (adjust_address (m_tramp, SImode, 0),
9592 expand_binop (SImode, ior_optab,
9593 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9594 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9595 NULL_RTX, 1, OPTAB_DIRECT));
9596
9597 emit_move_insn
9598 (adjust_address (m_tramp, SImode, 4),
9599 expand_binop (SImode, ior_optab,
9600 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9601 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9602 NULL_RTX, 1, OPTAB_DIRECT));
9603
9604 emit_move_insn
9605 (adjust_address (m_tramp, SImode, 8),
9606 expand_binop (SImode, ior_optab,
9607 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9608 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9609 NULL_RTX, 1, OPTAB_DIRECT));
9610
9611 emit_move_insn
9612 (adjust_address (m_tramp, SImode, 12),
9613 expand_binop (SImode, ior_optab,
9614 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9615 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9616 NULL_RTX, 1, OPTAB_DIRECT));
9617
9618 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9619 aligned on a 16 byte boundary so one flush clears it all. */
9620 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9621 if (sparc_cpu != PROCESSOR_ULTRASPARC
9622 && sparc_cpu != PROCESSOR_ULTRASPARC3
9623 && sparc_cpu != PROCESSOR_NIAGARA
9624 && sparc_cpu != PROCESSOR_NIAGARA2
9625 && sparc_cpu != PROCESSOR_NIAGARA3
9626 && sparc_cpu != PROCESSOR_NIAGARA4
9627 && sparc_cpu != PROCESSOR_NIAGARA7
9628 && sparc_cpu != PROCESSOR_M8)
9629 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9630
9631 /* Call __enable_execute_stack after writing onto the stack to make sure
9632 the stack address is accessible. */
9633 #ifdef HAVE_ENABLE_EXECUTE_STACK
9634 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9635 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9636 #endif
9637
9638 }
9639
9640 /* The 64-bit version is simpler because it makes more sense to load the
9641 values as "immediate" data out of the trampoline. It's also easier since
9642 we can read the PC without clobbering a register. */
9643
9644 static void
9645 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9646 {
9647 /* SPARC 64-bit trampoline:
9648
9649 rd %pc, %g1
9650 ldx [%g1+24], %g5
9651 jmp %g5
9652 ldx [%g1+16], %g5
9653 +16 bytes data
9654 */
9655
9656 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9657 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9658 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9659 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9660 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9661 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9662 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9663 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9664 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9665 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9666 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9667
9668 if (sparc_cpu != PROCESSOR_ULTRASPARC
9669 && sparc_cpu != PROCESSOR_ULTRASPARC3
9670 && sparc_cpu != PROCESSOR_NIAGARA
9671 && sparc_cpu != PROCESSOR_NIAGARA2
9672 && sparc_cpu != PROCESSOR_NIAGARA3
9673 && sparc_cpu != PROCESSOR_NIAGARA4
9674 && sparc_cpu != PROCESSOR_NIAGARA7
9675 && sparc_cpu != PROCESSOR_M8)
9676 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9677
9678 /* Call __enable_execute_stack after writing onto the stack to make sure
9679 the stack address is accessible. */
9680 #ifdef HAVE_ENABLE_EXECUTE_STACK
9681 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9682 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9683 #endif
9684 }
9685
9686 /* Worker for TARGET_TRAMPOLINE_INIT. */
9687
9688 static void
9689 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9690 {
9691 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9692 cxt = force_reg (Pmode, cxt);
9693 if (TARGET_ARCH64)
9694 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9695 else
9696 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9697 }
9698 \f
9699 /* Adjust the cost of a scheduling dependency. Return the new cost of
9700 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9701
9702 static int
9703 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9704 int cost)
9705 {
9706 enum attr_type insn_type;
9707
9708 if (recog_memoized (insn) < 0)
9709 return cost;
9710
9711 insn_type = get_attr_type (insn);
9712
9713 if (dep_type == 0)
9714 {
9715 /* Data dependency; DEP_INSN writes a register that INSN reads some
9716 cycles later. */
9717
9718 /* if a load, then the dependence must be on the memory address;
9719 add an extra "cycle". Note that the cost could be two cycles
9720 if the reg was written late in an instruction group; we ca not tell
9721 here. */
9722 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9723 return cost + 3;
9724
9725 /* Get the delay only if the address of the store is the dependence. */
9726 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9727 {
9728 rtx pat = PATTERN(insn);
9729 rtx dep_pat = PATTERN (dep_insn);
9730
9731 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9732 return cost; /* This should not happen! */
9733
9734 /* The dependency between the two instructions was on the data that
9735 is being stored. Assume that this implies that the address of the
9736 store is not dependent. */
9737 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9738 return cost;
9739
9740 return cost + 3; /* An approximation. */
9741 }
9742
9743 /* A shift instruction cannot receive its data from an instruction
9744 in the same cycle; add a one cycle penalty. */
9745 if (insn_type == TYPE_SHIFT)
9746 return cost + 3; /* Split before cascade into shift. */
9747 }
9748 else
9749 {
9750 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9751 INSN writes some cycles later. */
9752
9753 /* These are only significant for the fpu unit; writing a fp reg before
9754 the fpu has finished with it stalls the processor. */
9755
9756 /* Reusing an integer register causes no problems. */
9757 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9758 return 0;
9759 }
9760
9761 return cost;
9762 }
9763
9764 static int
9765 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9766 int cost)
9767 {
9768 enum attr_type insn_type, dep_type;
9769 rtx pat = PATTERN(insn);
9770 rtx dep_pat = PATTERN (dep_insn);
9771
9772 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9773 return cost;
9774
9775 insn_type = get_attr_type (insn);
9776 dep_type = get_attr_type (dep_insn);
9777
9778 switch (dtype)
9779 {
9780 case 0:
9781 /* Data dependency; DEP_INSN writes a register that INSN reads some
9782 cycles later. */
9783
9784 switch (insn_type)
9785 {
9786 case TYPE_STORE:
9787 case TYPE_FPSTORE:
9788 /* Get the delay iff the address of the store is the dependence. */
9789 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9790 return cost;
9791
9792 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9793 return cost;
9794 return cost + 3;
9795
9796 case TYPE_LOAD:
9797 case TYPE_SLOAD:
9798 case TYPE_FPLOAD:
9799 /* If a load, then the dependence must be on the memory address. If
9800 the addresses aren't equal, then it might be a false dependency */
9801 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9802 {
9803 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9804 || GET_CODE (SET_DEST (dep_pat)) != MEM
9805 || GET_CODE (SET_SRC (pat)) != MEM
9806 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9807 XEXP (SET_SRC (pat), 0)))
9808 return cost + 2;
9809
9810 return cost + 8;
9811 }
9812 break;
9813
9814 case TYPE_BRANCH:
9815 /* Compare to branch latency is 0. There is no benefit from
9816 separating compare and branch. */
9817 if (dep_type == TYPE_COMPARE)
9818 return 0;
9819 /* Floating point compare to branch latency is less than
9820 compare to conditional move. */
9821 if (dep_type == TYPE_FPCMP)
9822 return cost - 1;
9823 break;
9824 default:
9825 break;
9826 }
9827 break;
9828
9829 case REG_DEP_ANTI:
9830 /* Anti-dependencies only penalize the fpu unit. */
9831 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9832 return 0;
9833 break;
9834
9835 default:
9836 break;
9837 }
9838
9839 return cost;
9840 }
9841
9842 static int
9843 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9844 unsigned int)
9845 {
9846 switch (sparc_cpu)
9847 {
9848 case PROCESSOR_SUPERSPARC:
9849 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9850 break;
9851 case PROCESSOR_HYPERSPARC:
9852 case PROCESSOR_SPARCLITE86X:
9853 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9854 break;
9855 default:
9856 break;
9857 }
9858 return cost;
9859 }
9860
9861 static void
9862 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9863 int sched_verbose ATTRIBUTE_UNUSED,
9864 int max_ready ATTRIBUTE_UNUSED)
9865 {}
9866
9867 static int
9868 sparc_use_sched_lookahead (void)
9869 {
9870 if (sparc_cpu == PROCESSOR_NIAGARA
9871 || sparc_cpu == PROCESSOR_NIAGARA2
9872 || sparc_cpu == PROCESSOR_NIAGARA3)
9873 return 0;
9874 if (sparc_cpu == PROCESSOR_NIAGARA4
9875 || sparc_cpu == PROCESSOR_NIAGARA7
9876 || sparc_cpu == PROCESSOR_M8)
9877 return 2;
9878 if (sparc_cpu == PROCESSOR_ULTRASPARC
9879 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9880 return 4;
9881 if ((1 << sparc_cpu) &
9882 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9883 (1 << PROCESSOR_SPARCLITE86X)))
9884 return 3;
9885 return 0;
9886 }
9887
9888 static int
9889 sparc_issue_rate (void)
9890 {
9891 switch (sparc_cpu)
9892 {
9893 case PROCESSOR_NIAGARA:
9894 case PROCESSOR_NIAGARA2:
9895 case PROCESSOR_NIAGARA3:
9896 default:
9897 return 1;
9898 case PROCESSOR_NIAGARA4:
9899 case PROCESSOR_NIAGARA7:
9900 case PROCESSOR_V9:
9901 /* Assume V9 processors are capable of at least dual-issue. */
9902 return 2;
9903 case PROCESSOR_SUPERSPARC:
9904 return 3;
9905 case PROCESSOR_HYPERSPARC:
9906 case PROCESSOR_SPARCLITE86X:
9907 return 2;
9908 case PROCESSOR_ULTRASPARC:
9909 case PROCESSOR_ULTRASPARC3:
9910 case PROCESSOR_M8:
9911 return 4;
9912 }
9913 }
9914
9915 static int
9916 set_extends (rtx_insn *insn)
9917 {
9918 register rtx pat = PATTERN (insn);
9919
9920 switch (GET_CODE (SET_SRC (pat)))
9921 {
9922 /* Load and some shift instructions zero extend. */
9923 case MEM:
9924 case ZERO_EXTEND:
9925 /* sethi clears the high bits */
9926 case HIGH:
9927 /* LO_SUM is used with sethi. sethi cleared the high
9928 bits and the values used with lo_sum are positive */
9929 case LO_SUM:
9930 /* Store flag stores 0 or 1 */
9931 case LT: case LTU:
9932 case GT: case GTU:
9933 case LE: case LEU:
9934 case GE: case GEU:
9935 case EQ:
9936 case NE:
9937 return 1;
9938 case AND:
9939 {
9940 rtx op0 = XEXP (SET_SRC (pat), 0);
9941 rtx op1 = XEXP (SET_SRC (pat), 1);
9942 if (GET_CODE (op1) == CONST_INT)
9943 return INTVAL (op1) >= 0;
9944 if (GET_CODE (op0) != REG)
9945 return 0;
9946 if (sparc_check_64 (op0, insn) == 1)
9947 return 1;
9948 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9949 }
9950 case IOR:
9951 case XOR:
9952 {
9953 rtx op0 = XEXP (SET_SRC (pat), 0);
9954 rtx op1 = XEXP (SET_SRC (pat), 1);
9955 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9956 return 0;
9957 if (GET_CODE (op1) == CONST_INT)
9958 return INTVAL (op1) >= 0;
9959 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9960 }
9961 case LSHIFTRT:
9962 return GET_MODE (SET_SRC (pat)) == SImode;
9963 /* Positive integers leave the high bits zero. */
9964 case CONST_INT:
9965 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9966 case ASHIFTRT:
9967 case SIGN_EXTEND:
9968 return - (GET_MODE (SET_SRC (pat)) == SImode);
9969 case REG:
9970 return sparc_check_64 (SET_SRC (pat), insn);
9971 default:
9972 return 0;
9973 }
9974 }
9975
9976 /* We _ought_ to have only one kind per function, but... */
9977 static GTY(()) rtx sparc_addr_diff_list;
9978 static GTY(()) rtx sparc_addr_list;
9979
9980 void
9981 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9982 {
9983 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9984 if (diff)
9985 sparc_addr_diff_list
9986 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9987 else
9988 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9989 }
9990
9991 static void
9992 sparc_output_addr_vec (rtx vec)
9993 {
9994 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9995 int idx, vlen = XVECLEN (body, 0);
9996
9997 #ifdef ASM_OUTPUT_ADDR_VEC_START
9998 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9999 #endif
10000
10001 #ifdef ASM_OUTPUT_CASE_LABEL
10002 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10003 NEXT_INSN (lab));
10004 #else
10005 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10006 #endif
10007
10008 for (idx = 0; idx < vlen; idx++)
10009 {
10010 ASM_OUTPUT_ADDR_VEC_ELT
10011 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10012 }
10013
10014 #ifdef ASM_OUTPUT_ADDR_VEC_END
10015 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10016 #endif
10017 }
10018
10019 static void
10020 sparc_output_addr_diff_vec (rtx vec)
10021 {
10022 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10023 rtx base = XEXP (XEXP (body, 0), 0);
10024 int idx, vlen = XVECLEN (body, 1);
10025
10026 #ifdef ASM_OUTPUT_ADDR_VEC_START
10027 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10028 #endif
10029
10030 #ifdef ASM_OUTPUT_CASE_LABEL
10031 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10032 NEXT_INSN (lab));
10033 #else
10034 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10035 #endif
10036
10037 for (idx = 0; idx < vlen; idx++)
10038 {
10039 ASM_OUTPUT_ADDR_DIFF_ELT
10040 (asm_out_file,
10041 body,
10042 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10043 CODE_LABEL_NUMBER (base));
10044 }
10045
10046 #ifdef ASM_OUTPUT_ADDR_VEC_END
10047 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10048 #endif
10049 }
10050
10051 static void
10052 sparc_output_deferred_case_vectors (void)
10053 {
10054 rtx t;
10055 int align;
10056
10057 if (sparc_addr_list == NULL_RTX
10058 && sparc_addr_diff_list == NULL_RTX)
10059 return;
10060
10061 /* Align to cache line in the function's code section. */
10062 switch_to_section (current_function_section ());
10063
10064 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10065 if (align > 0)
10066 ASM_OUTPUT_ALIGN (asm_out_file, align);
10067
10068 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10069 sparc_output_addr_vec (XEXP (t, 0));
10070 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10071 sparc_output_addr_diff_vec (XEXP (t, 0));
10072
10073 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10074 }
10075
10076 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10077 unknown. Return 1 if the high bits are zero, -1 if the register is
10078 sign extended. */
10079 int
10080 sparc_check_64 (rtx x, rtx_insn *insn)
10081 {
10082 /* If a register is set only once it is safe to ignore insns this
10083 code does not know how to handle. The loop will either recognize
10084 the single set and return the correct value or fail to recognize
10085 it and return 0. */
10086 int set_once = 0;
10087 rtx y = x;
10088
10089 gcc_assert (GET_CODE (x) == REG);
10090
10091 if (GET_MODE (x) == DImode)
10092 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10093
10094 if (flag_expensive_optimizations
10095 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10096 set_once = 1;
10097
10098 if (insn == 0)
10099 {
10100 if (set_once)
10101 insn = get_last_insn_anywhere ();
10102 else
10103 return 0;
10104 }
10105
10106 while ((insn = PREV_INSN (insn)))
10107 {
10108 switch (GET_CODE (insn))
10109 {
10110 case JUMP_INSN:
10111 case NOTE:
10112 break;
10113 case CODE_LABEL:
10114 case CALL_INSN:
10115 default:
10116 if (! set_once)
10117 return 0;
10118 break;
10119 case INSN:
10120 {
10121 rtx pat = PATTERN (insn);
10122 if (GET_CODE (pat) != SET)
10123 return 0;
10124 if (rtx_equal_p (x, SET_DEST (pat)))
10125 return set_extends (insn);
10126 if (y && rtx_equal_p (y, SET_DEST (pat)))
10127 return set_extends (insn);
10128 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10129 return 0;
10130 }
10131 }
10132 }
10133 return 0;
10134 }
10135
10136 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10137 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10138
10139 const char *
10140 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10141 {
10142 static char asm_code[60];
10143
10144 /* The scratch register is only required when the destination
10145 register is not a 64-bit global or out register. */
10146 if (which_alternative != 2)
10147 operands[3] = operands[0];
10148
10149 /* We can only shift by constants <= 63. */
10150 if (GET_CODE (operands[2]) == CONST_INT)
10151 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10152
10153 if (GET_CODE (operands[1]) == CONST_INT)
10154 {
10155 output_asm_insn ("mov\t%1, %3", operands);
10156 }
10157 else
10158 {
10159 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10160 if (sparc_check_64 (operands[1], insn) <= 0)
10161 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10162 output_asm_insn ("or\t%L1, %3, %3", operands);
10163 }
10164
10165 strcpy (asm_code, opcode);
10166
10167 if (which_alternative != 2)
10168 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10169 else
10170 return
10171 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10172 }
10173 \f
10174 /* Output rtl to increment the profiler label LABELNO
10175 for profiling a function entry. */
10176
10177 void
10178 sparc_profile_hook (int labelno)
10179 {
10180 char buf[32];
10181 rtx lab, fun;
10182
10183 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10184 if (NO_PROFILE_COUNTERS)
10185 {
10186 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
10187 }
10188 else
10189 {
10190 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10191 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10192 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
10193 }
10194 }
10195 \f
10196 #ifdef TARGET_SOLARIS
10197 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10198
10199 static void
10200 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10201 tree decl ATTRIBUTE_UNUSED)
10202 {
10203 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10204 {
10205 solaris_elf_asm_comdat_section (name, flags, decl);
10206 return;
10207 }
10208
10209 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10210
10211 if (!(flags & SECTION_DEBUG))
10212 fputs (",#alloc", asm_out_file);
10213 if (flags & SECTION_WRITE)
10214 fputs (",#write", asm_out_file);
10215 if (flags & SECTION_TLS)
10216 fputs (",#tls", asm_out_file);
10217 if (flags & SECTION_CODE)
10218 fputs (",#execinstr", asm_out_file);
10219
10220 if (flags & SECTION_NOTYPE)
10221 ;
10222 else if (flags & SECTION_BSS)
10223 fputs (",#nobits", asm_out_file);
10224 else
10225 fputs (",#progbits", asm_out_file);
10226
10227 fputc ('\n', asm_out_file);
10228 }
10229 #endif /* TARGET_SOLARIS */
10230
10231 /* We do not allow indirect calls to be optimized into sibling calls.
10232
10233 We cannot use sibling calls when delayed branches are disabled
10234 because they will likely require the call delay slot to be filled.
10235
10236 Also, on SPARC 32-bit we cannot emit a sibling call when the
10237 current function returns a structure. This is because the "unimp
10238 after call" convention would cause the callee to return to the
10239 wrong place. The generic code already disallows cases where the
10240 function being called returns a structure.
10241
10242 It may seem strange how this last case could occur. Usually there
10243 is code after the call which jumps to epilogue code which dumps the
10244 return value into the struct return area. That ought to invalidate
10245 the sibling call right? Well, in the C++ case we can end up passing
10246 the pointer to the struct return area to a constructor (which returns
10247 void) and then nothing else happens. Such a sibling call would look
10248 valid without the added check here.
10249
10250 VxWorks PIC PLT entries require the global pointer to be initialized
10251 on entry. We therefore can't emit sibling calls to them. */
10252 static bool
10253 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10254 {
10255 return (decl
10256 && flag_delayed_branch
10257 && (TARGET_ARCH64 || ! cfun->returns_struct)
10258 && !(TARGET_VXWORKS_RTP
10259 && flag_pic
10260 && !targetm.binds_local_p (decl)));
10261 }
10262 \f
10263 /* libfunc renaming. */
10264
10265 static void
10266 sparc_init_libfuncs (void)
10267 {
10268 if (TARGET_ARCH32)
10269 {
10270 /* Use the subroutines that Sun's library provides for integer
10271 multiply and divide. The `*' prevents an underscore from
10272 being prepended by the compiler. .umul is a little faster
10273 than .mul. */
10274 set_optab_libfunc (smul_optab, SImode, "*.umul");
10275 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10276 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10277 set_optab_libfunc (smod_optab, SImode, "*.rem");
10278 set_optab_libfunc (umod_optab, SImode, "*.urem");
10279
10280 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10281 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10282 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10283 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10284 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10285 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10286
10287 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10288 is because with soft-float, the SFmode and DFmode sqrt
10289 instructions will be absent, and the compiler will notice and
10290 try to use the TFmode sqrt instruction for calls to the
10291 builtin function sqrt, but this fails. */
10292 if (TARGET_FPU)
10293 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10294
10295 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10296 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10297 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10298 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10299 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10300 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10301
10302 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10303 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10304 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10305 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10306
10307 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10308 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10309 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10310 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10311
10312 if (DITF_CONVERSION_LIBFUNCS)
10313 {
10314 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10315 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10316 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10317 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10318 }
10319
10320 if (SUN_CONVERSION_LIBFUNCS)
10321 {
10322 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10323 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10324 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10325 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10326 }
10327 }
10328 if (TARGET_ARCH64)
10329 {
10330 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10331 do not exist in the library. Make sure the compiler does not
10332 emit calls to them by accident. (It should always use the
10333 hardware instructions.) */
10334 set_optab_libfunc (smul_optab, SImode, 0);
10335 set_optab_libfunc (sdiv_optab, SImode, 0);
10336 set_optab_libfunc (udiv_optab, SImode, 0);
10337 set_optab_libfunc (smod_optab, SImode, 0);
10338 set_optab_libfunc (umod_optab, SImode, 0);
10339
10340 if (SUN_INTEGER_MULTIPLY_64)
10341 {
10342 set_optab_libfunc (smul_optab, DImode, "__mul64");
10343 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10344 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10345 set_optab_libfunc (smod_optab, DImode, "__rem64");
10346 set_optab_libfunc (umod_optab, DImode, "__urem64");
10347 }
10348
10349 if (SUN_CONVERSION_LIBFUNCS)
10350 {
10351 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10352 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10353 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10354 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10355 }
10356 }
10357 }
10358 \f
10359 /* SPARC builtins. */
10360 enum sparc_builtins
10361 {
10362 /* FPU builtins. */
10363 SPARC_BUILTIN_LDFSR,
10364 SPARC_BUILTIN_STFSR,
10365
10366 /* VIS 1.0 builtins. */
10367 SPARC_BUILTIN_FPACK16,
10368 SPARC_BUILTIN_FPACK32,
10369 SPARC_BUILTIN_FPACKFIX,
10370 SPARC_BUILTIN_FEXPAND,
10371 SPARC_BUILTIN_FPMERGE,
10372 SPARC_BUILTIN_FMUL8X16,
10373 SPARC_BUILTIN_FMUL8X16AU,
10374 SPARC_BUILTIN_FMUL8X16AL,
10375 SPARC_BUILTIN_FMUL8SUX16,
10376 SPARC_BUILTIN_FMUL8ULX16,
10377 SPARC_BUILTIN_FMULD8SUX16,
10378 SPARC_BUILTIN_FMULD8ULX16,
10379 SPARC_BUILTIN_FALIGNDATAV4HI,
10380 SPARC_BUILTIN_FALIGNDATAV8QI,
10381 SPARC_BUILTIN_FALIGNDATAV2SI,
10382 SPARC_BUILTIN_FALIGNDATADI,
10383 SPARC_BUILTIN_WRGSR,
10384 SPARC_BUILTIN_RDGSR,
10385 SPARC_BUILTIN_ALIGNADDR,
10386 SPARC_BUILTIN_ALIGNADDRL,
10387 SPARC_BUILTIN_PDIST,
10388 SPARC_BUILTIN_EDGE8,
10389 SPARC_BUILTIN_EDGE8L,
10390 SPARC_BUILTIN_EDGE16,
10391 SPARC_BUILTIN_EDGE16L,
10392 SPARC_BUILTIN_EDGE32,
10393 SPARC_BUILTIN_EDGE32L,
10394 SPARC_BUILTIN_FCMPLE16,
10395 SPARC_BUILTIN_FCMPLE32,
10396 SPARC_BUILTIN_FCMPNE16,
10397 SPARC_BUILTIN_FCMPNE32,
10398 SPARC_BUILTIN_FCMPGT16,
10399 SPARC_BUILTIN_FCMPGT32,
10400 SPARC_BUILTIN_FCMPEQ16,
10401 SPARC_BUILTIN_FCMPEQ32,
10402 SPARC_BUILTIN_FPADD16,
10403 SPARC_BUILTIN_FPADD16S,
10404 SPARC_BUILTIN_FPADD32,
10405 SPARC_BUILTIN_FPADD32S,
10406 SPARC_BUILTIN_FPSUB16,
10407 SPARC_BUILTIN_FPSUB16S,
10408 SPARC_BUILTIN_FPSUB32,
10409 SPARC_BUILTIN_FPSUB32S,
10410 SPARC_BUILTIN_ARRAY8,
10411 SPARC_BUILTIN_ARRAY16,
10412 SPARC_BUILTIN_ARRAY32,
10413
10414 /* VIS 2.0 builtins. */
10415 SPARC_BUILTIN_EDGE8N,
10416 SPARC_BUILTIN_EDGE8LN,
10417 SPARC_BUILTIN_EDGE16N,
10418 SPARC_BUILTIN_EDGE16LN,
10419 SPARC_BUILTIN_EDGE32N,
10420 SPARC_BUILTIN_EDGE32LN,
10421 SPARC_BUILTIN_BMASK,
10422 SPARC_BUILTIN_BSHUFFLEV4HI,
10423 SPARC_BUILTIN_BSHUFFLEV8QI,
10424 SPARC_BUILTIN_BSHUFFLEV2SI,
10425 SPARC_BUILTIN_BSHUFFLEDI,
10426
10427 /* VIS 3.0 builtins. */
10428 SPARC_BUILTIN_CMASK8,
10429 SPARC_BUILTIN_CMASK16,
10430 SPARC_BUILTIN_CMASK32,
10431 SPARC_BUILTIN_FCHKSM16,
10432 SPARC_BUILTIN_FSLL16,
10433 SPARC_BUILTIN_FSLAS16,
10434 SPARC_BUILTIN_FSRL16,
10435 SPARC_BUILTIN_FSRA16,
10436 SPARC_BUILTIN_FSLL32,
10437 SPARC_BUILTIN_FSLAS32,
10438 SPARC_BUILTIN_FSRL32,
10439 SPARC_BUILTIN_FSRA32,
10440 SPARC_BUILTIN_PDISTN,
10441 SPARC_BUILTIN_FMEAN16,
10442 SPARC_BUILTIN_FPADD64,
10443 SPARC_BUILTIN_FPSUB64,
10444 SPARC_BUILTIN_FPADDS16,
10445 SPARC_BUILTIN_FPADDS16S,
10446 SPARC_BUILTIN_FPSUBS16,
10447 SPARC_BUILTIN_FPSUBS16S,
10448 SPARC_BUILTIN_FPADDS32,
10449 SPARC_BUILTIN_FPADDS32S,
10450 SPARC_BUILTIN_FPSUBS32,
10451 SPARC_BUILTIN_FPSUBS32S,
10452 SPARC_BUILTIN_FUCMPLE8,
10453 SPARC_BUILTIN_FUCMPNE8,
10454 SPARC_BUILTIN_FUCMPGT8,
10455 SPARC_BUILTIN_FUCMPEQ8,
10456 SPARC_BUILTIN_FHADDS,
10457 SPARC_BUILTIN_FHADDD,
10458 SPARC_BUILTIN_FHSUBS,
10459 SPARC_BUILTIN_FHSUBD,
10460 SPARC_BUILTIN_FNHADDS,
10461 SPARC_BUILTIN_FNHADDD,
10462 SPARC_BUILTIN_UMULXHI,
10463 SPARC_BUILTIN_XMULX,
10464 SPARC_BUILTIN_XMULXHI,
10465
10466 /* VIS 4.0 builtins. */
10467 SPARC_BUILTIN_FPADD8,
10468 SPARC_BUILTIN_FPADDS8,
10469 SPARC_BUILTIN_FPADDUS8,
10470 SPARC_BUILTIN_FPADDUS16,
10471 SPARC_BUILTIN_FPCMPLE8,
10472 SPARC_BUILTIN_FPCMPGT8,
10473 SPARC_BUILTIN_FPCMPULE16,
10474 SPARC_BUILTIN_FPCMPUGT16,
10475 SPARC_BUILTIN_FPCMPULE32,
10476 SPARC_BUILTIN_FPCMPUGT32,
10477 SPARC_BUILTIN_FPMAX8,
10478 SPARC_BUILTIN_FPMAX16,
10479 SPARC_BUILTIN_FPMAX32,
10480 SPARC_BUILTIN_FPMAXU8,
10481 SPARC_BUILTIN_FPMAXU16,
10482 SPARC_BUILTIN_FPMAXU32,
10483 SPARC_BUILTIN_FPMIN8,
10484 SPARC_BUILTIN_FPMIN16,
10485 SPARC_BUILTIN_FPMIN32,
10486 SPARC_BUILTIN_FPMINU8,
10487 SPARC_BUILTIN_FPMINU16,
10488 SPARC_BUILTIN_FPMINU32,
10489 SPARC_BUILTIN_FPSUB8,
10490 SPARC_BUILTIN_FPSUBS8,
10491 SPARC_BUILTIN_FPSUBUS8,
10492 SPARC_BUILTIN_FPSUBUS16,
10493
10494 /* VIS 4.0B builtins. */
10495
10496 /* Note that all the DICTUNPACK* entries should be kept
10497 contiguous. */
10498 SPARC_BUILTIN_FIRST_DICTUNPACK,
10499 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10500 SPARC_BUILTIN_DICTUNPACK16,
10501 SPARC_BUILTIN_DICTUNPACK32,
10502 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10503
10504 /* Note that all the FPCMP*SHL entries should be kept
10505 contiguous. */
10506 SPARC_BUILTIN_FIRST_FPCMPSHL,
10507 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10508 SPARC_BUILTIN_FPCMPGT8SHL,
10509 SPARC_BUILTIN_FPCMPEQ8SHL,
10510 SPARC_BUILTIN_FPCMPNE8SHL,
10511 SPARC_BUILTIN_FPCMPLE16SHL,
10512 SPARC_BUILTIN_FPCMPGT16SHL,
10513 SPARC_BUILTIN_FPCMPEQ16SHL,
10514 SPARC_BUILTIN_FPCMPNE16SHL,
10515 SPARC_BUILTIN_FPCMPLE32SHL,
10516 SPARC_BUILTIN_FPCMPGT32SHL,
10517 SPARC_BUILTIN_FPCMPEQ32SHL,
10518 SPARC_BUILTIN_FPCMPNE32SHL,
10519 SPARC_BUILTIN_FPCMPULE8SHL,
10520 SPARC_BUILTIN_FPCMPUGT8SHL,
10521 SPARC_BUILTIN_FPCMPULE16SHL,
10522 SPARC_BUILTIN_FPCMPUGT16SHL,
10523 SPARC_BUILTIN_FPCMPULE32SHL,
10524 SPARC_BUILTIN_FPCMPUGT32SHL,
10525 SPARC_BUILTIN_FPCMPDE8SHL,
10526 SPARC_BUILTIN_FPCMPDE16SHL,
10527 SPARC_BUILTIN_FPCMPDE32SHL,
10528 SPARC_BUILTIN_FPCMPUR8SHL,
10529 SPARC_BUILTIN_FPCMPUR16SHL,
10530 SPARC_BUILTIN_FPCMPUR32SHL,
10531 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10532
10533 SPARC_BUILTIN_MAX
10534 };
10535
10536 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10537 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10538
10539 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10540 The instruction should require a constant operand of some sort. The
10541 function prints an error if OPVAL is not valid. */
10542
10543 static int
10544 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10545 {
10546 if (GET_CODE (opval) != CONST_INT)
10547 {
10548 error ("%qs expects a constant argument", insn_data[icode].name);
10549 return false;
10550 }
10551
10552 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10553 {
10554 error ("constant argument out of range for %qs", insn_data[icode].name);
10555 return false;
10556 }
10557 return true;
10558 }
10559
10560 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10561 function decl or NULL_TREE if the builtin was not added. */
10562
10563 static tree
10564 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10565 tree type)
10566 {
10567 tree t
10568 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10569
10570 if (t)
10571 {
10572 sparc_builtins[code] = t;
10573 sparc_builtins_icode[code] = icode;
10574 }
10575
10576 return t;
10577 }
10578
10579 /* Likewise, but also marks the function as "const". */
10580
10581 static tree
10582 def_builtin_const (const char *name, enum insn_code icode,
10583 enum sparc_builtins code, tree type)
10584 {
10585 tree t = def_builtin (name, icode, code, type);
10586
10587 if (t)
10588 TREE_READONLY (t) = 1;
10589
10590 return t;
10591 }
10592
10593 /* Implement the TARGET_INIT_BUILTINS target hook.
10594 Create builtin functions for special SPARC instructions. */
10595
10596 static void
10597 sparc_init_builtins (void)
10598 {
10599 if (TARGET_FPU)
10600 sparc_fpu_init_builtins ();
10601
10602 if (TARGET_VIS)
10603 sparc_vis_init_builtins ();
10604 }
10605
10606 /* Create builtin functions for FPU instructions. */
10607
10608 static void
10609 sparc_fpu_init_builtins (void)
10610 {
10611 tree ftype
10612 = build_function_type_list (void_type_node,
10613 build_pointer_type (unsigned_type_node), 0);
10614 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10615 SPARC_BUILTIN_LDFSR, ftype);
10616 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10617 SPARC_BUILTIN_STFSR, ftype);
10618 }
10619
10620 /* Create builtin functions for VIS instructions. */
10621
10622 static void
10623 sparc_vis_init_builtins (void)
10624 {
10625 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10626 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10627 tree v4hi = build_vector_type (intHI_type_node, 4);
10628 tree v2hi = build_vector_type (intHI_type_node, 2);
10629 tree v2si = build_vector_type (intSI_type_node, 2);
10630 tree v1si = build_vector_type (intSI_type_node, 1);
10631
10632 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10633 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10634 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10635 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10636 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10637 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10638 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10639 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10640 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10641 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10642 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10643 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10644 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10645 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10646 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10647 v8qi, v8qi,
10648 intDI_type_node, 0);
10649 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10650 v8qi, v8qi, 0);
10651 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10652 v8qi, v8qi, 0);
10653 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10654 intSI_type_node, 0);
10655 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10656 intSI_type_node, 0);
10657 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10658 intDI_type_node, 0);
10659 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10660 intDI_type_node,
10661 intDI_type_node, 0);
10662 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10663 intSI_type_node,
10664 intSI_type_node, 0);
10665 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10666 ptr_type_node,
10667 intSI_type_node, 0);
10668 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10669 ptr_type_node,
10670 intDI_type_node, 0);
10671 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10672 ptr_type_node,
10673 ptr_type_node, 0);
10674 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10675 ptr_type_node,
10676 ptr_type_node, 0);
10677 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10678 v4hi, v4hi, 0);
10679 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10680 v2si, v2si, 0);
10681 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10682 v4hi, v4hi, 0);
10683 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10684 v2si, v2si, 0);
10685 tree void_ftype_di = build_function_type_list (void_type_node,
10686 intDI_type_node, 0);
10687 tree di_ftype_void = build_function_type_list (intDI_type_node,
10688 void_type_node, 0);
10689 tree void_ftype_si = build_function_type_list (void_type_node,
10690 intSI_type_node, 0);
10691 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10692 float_type_node,
10693 float_type_node, 0);
10694 tree df_ftype_df_df = build_function_type_list (double_type_node,
10695 double_type_node,
10696 double_type_node, 0);
10697
10698 /* Packing and expanding vectors. */
10699 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10700 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10701 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10702 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10703 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10704 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10705 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10706 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10707 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10708 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10709
10710 /* Multiplications. */
10711 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10712 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10713 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10714 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10715 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10716 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10717 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10718 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10719 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10720 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10721 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10722 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10723 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10724 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10725
10726 /* Data aligning. */
10727 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10728 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10729 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10730 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10731 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10732 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10733 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10734 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10735
10736 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10737 SPARC_BUILTIN_WRGSR, void_ftype_di);
10738 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10739 SPARC_BUILTIN_RDGSR, di_ftype_void);
10740
10741 if (TARGET_ARCH64)
10742 {
10743 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10744 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10745 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10746 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10747 }
10748 else
10749 {
10750 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10751 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10752 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10753 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10754 }
10755
10756 /* Pixel distance. */
10757 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10758 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10759
10760 /* Edge handling. */
10761 if (TARGET_ARCH64)
10762 {
10763 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10764 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10765 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10766 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10767 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10768 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10769 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10770 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10771 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10772 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10773 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10774 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10775 }
10776 else
10777 {
10778 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10779 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10780 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10781 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10782 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10783 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10784 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10785 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10786 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10787 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10788 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10789 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10790 }
10791
10792 /* Pixel compare. */
10793 if (TARGET_ARCH64)
10794 {
10795 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10796 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10797 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10798 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10799 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10800 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10801 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10802 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10803 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10804 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10805 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10806 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10807 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10808 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10809 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10810 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10811 }
10812 else
10813 {
10814 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10815 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10816 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10817 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10818 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10819 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10820 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10821 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10822 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10823 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10824 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10825 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10826 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10827 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10828 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10829 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10830 }
10831
10832 /* Addition and subtraction. */
10833 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10834 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10835 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10836 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10837 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10838 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10839 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10840 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10841 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10842 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10843 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10844 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10845 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10846 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10847 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10848 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10849
10850 /* Three-dimensional array addressing. */
10851 if (TARGET_ARCH64)
10852 {
10853 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10854 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10855 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10856 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10857 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10858 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10859 }
10860 else
10861 {
10862 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10863 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10864 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10865 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10866 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10867 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10868 }
10869
10870 if (TARGET_VIS2)
10871 {
10872 /* Edge handling. */
10873 if (TARGET_ARCH64)
10874 {
10875 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10876 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10877 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10878 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10879 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10880 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10881 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10882 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10883 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10884 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10885 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10886 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10887 }
10888 else
10889 {
10890 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10891 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10892 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10893 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10894 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10895 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10896 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10897 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10898 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10899 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10900 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10901 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10902 }
10903
10904 /* Byte mask and shuffle. */
10905 if (TARGET_ARCH64)
10906 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10907 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10908 else
10909 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10910 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10911 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10912 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10913 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10914 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10915 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10916 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10917 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10918 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10919 }
10920
10921 if (TARGET_VIS3)
10922 {
10923 if (TARGET_ARCH64)
10924 {
10925 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10926 SPARC_BUILTIN_CMASK8, void_ftype_di);
10927 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10928 SPARC_BUILTIN_CMASK16, void_ftype_di);
10929 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10930 SPARC_BUILTIN_CMASK32, void_ftype_di);
10931 }
10932 else
10933 {
10934 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10935 SPARC_BUILTIN_CMASK8, void_ftype_si);
10936 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10937 SPARC_BUILTIN_CMASK16, void_ftype_si);
10938 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10939 SPARC_BUILTIN_CMASK32, void_ftype_si);
10940 }
10941
10942 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10943 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10944
10945 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10946 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10947 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10948 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10949 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10950 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10951 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10952 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10953 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10954 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10955 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10956 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10957 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10958 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10959 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10960 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10961
10962 if (TARGET_ARCH64)
10963 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10964 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10965 else
10966 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10967 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10968
10969 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10970 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10971 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10972 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10973 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10974 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10975
10976 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10977 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10978 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10979 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10980 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10981 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10982 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10983 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10984 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10985 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10986 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10987 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10988 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10989 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10990 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10991 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10992
10993 if (TARGET_ARCH64)
10994 {
10995 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10996 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10997 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10998 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10999 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11000 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11001 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11002 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11003 }
11004 else
11005 {
11006 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11007 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11008 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11009 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11010 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11011 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11012 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11013 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11014 }
11015
11016 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11017 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11018 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11019 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11020 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11021 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11022 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11023 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11024 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11025 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11026 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11027 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11028
11029 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11030 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11031 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11032 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11033 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11034 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11035 }
11036
11037 if (TARGET_VIS4)
11038 {
11039 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11040 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11041 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11042 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11043 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11044 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11045 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11046 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11047
11048
11049 if (TARGET_ARCH64)
11050 {
11051 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11052 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11053 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11054 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11055 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11056 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11057 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11058 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11059 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11060 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11061 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11062 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11063 }
11064 else
11065 {
11066 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11067 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11068 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11069 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11070 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11071 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11072 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11073 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11074 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11075 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11076 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11077 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11078 }
11079
11080 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11081 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11082 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11083 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11084 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11085 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11086 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11087 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11088 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11089 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11090 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11091 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11092 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11093 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11094 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11095 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11096 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11097 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11098 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11099 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11100 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11101 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11102 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11103 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11104 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11105 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11106 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11107 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11108 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11109 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11110 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11111 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11112 }
11113
11114 if (TARGET_VIS4B)
11115 {
11116 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11117 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11118 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11119 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11120 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11121 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11122
11123 if (TARGET_ARCH64)
11124 {
11125 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11126 v8qi, v8qi,
11127 intSI_type_node, 0);
11128 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11129 v4hi, v4hi,
11130 intSI_type_node, 0);
11131 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11132 v2si, v2si,
11133 intSI_type_node, 0);
11134
11135 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11136 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11137 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11138 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11139 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11140 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11141 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11142 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11143
11144 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11145 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11146 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11147 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11148 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11149 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11150 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11151 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11152
11153 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11154 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11155 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11156 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11157 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11158 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11159 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11160 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11161
11162
11163 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11164 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11165 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11166 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11167
11168 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11169 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11170 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11171 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11172
11173 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11174 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11175 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11176 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11177
11178 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11179 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11180 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11181 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11182 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11183 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11184
11185 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11186 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11187 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11188 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11189 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11190 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11191
11192 }
11193 else
11194 {
11195 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11196 v8qi, v8qi,
11197 intSI_type_node, 0);
11198 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11199 v4hi, v4hi,
11200 intSI_type_node, 0);
11201 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11202 v2si, v2si,
11203 intSI_type_node, 0);
11204
11205 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11206 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11207 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11208 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11209 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11210 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11211 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11212 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11213
11214 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11215 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11216 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11217 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11218 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11219 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11220 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11221 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11222
11223 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11224 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11225 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11226 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11227 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11228 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11229 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11230 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11231
11232
11233 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11234 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11235 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11236 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11237
11238 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11239 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11240 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11241 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11242
11243 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11244 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11245 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11246 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11247
11248 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11249 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11250 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11251 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11252 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11253 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11254
11255 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11256 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11257 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11258 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11259 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11260 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11261 }
11262 }
11263 }
11264
11265 /* Implement TARGET_BUILTIN_DECL hook. */
11266
11267 static tree
11268 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11269 {
11270 if (code >= SPARC_BUILTIN_MAX)
11271 return error_mark_node;
11272
11273 return sparc_builtins[code];
11274 }
11275
11276 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11277
11278 static rtx
11279 sparc_expand_builtin (tree exp, rtx target,
11280 rtx subtarget ATTRIBUTE_UNUSED,
11281 machine_mode tmode ATTRIBUTE_UNUSED,
11282 int ignore ATTRIBUTE_UNUSED)
11283 {
11284 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11285 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11286 enum insn_code icode = sparc_builtins_icode[code];
11287 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11288 call_expr_arg_iterator iter;
11289 int arg_count = 0;
11290 rtx pat, op[4];
11291 tree arg;
11292
11293 if (nonvoid)
11294 {
11295 machine_mode tmode = insn_data[icode].operand[0].mode;
11296 if (!target
11297 || GET_MODE (target) != tmode
11298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11299 op[0] = gen_reg_rtx (tmode);
11300 else
11301 op[0] = target;
11302 }
11303
11304 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11305 {
11306 const struct insn_operand_data *insn_op;
11307 int idx;
11308
11309 if (arg == error_mark_node)
11310 return NULL_RTX;
11311
11312 arg_count++;
11313 idx = arg_count - !nonvoid;
11314 insn_op = &insn_data[icode].operand[idx];
11315 op[arg_count] = expand_normal (arg);
11316
11317 /* Some of the builtins require constant arguments. We check
11318 for this here. */
11319 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11320 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11321 && arg_count == 3)
11322 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11323 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11324 && arg_count == 2))
11325 {
11326 if (!check_constant_argument (icode, idx, op[arg_count]))
11327 return const0_rtx;
11328 }
11329
11330 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11331 {
11332 if (!address_operand (op[arg_count], SImode))
11333 {
11334 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11335 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11336 }
11337 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11338 }
11339
11340 else if (insn_op->mode == V1DImode
11341 && GET_MODE (op[arg_count]) == DImode)
11342 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11343
11344 else if (insn_op->mode == V1SImode
11345 && GET_MODE (op[arg_count]) == SImode)
11346 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11347
11348 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11349 insn_op->mode))
11350 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11351 }
11352
11353 switch (arg_count)
11354 {
11355 case 0:
11356 pat = GEN_FCN (icode) (op[0]);
11357 break;
11358 case 1:
11359 if (nonvoid)
11360 pat = GEN_FCN (icode) (op[0], op[1]);
11361 else
11362 pat = GEN_FCN (icode) (op[1]);
11363 break;
11364 case 2:
11365 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11366 break;
11367 case 3:
11368 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11369 break;
11370 default:
11371 gcc_unreachable ();
11372 }
11373
11374 if (!pat)
11375 return NULL_RTX;
11376
11377 emit_insn (pat);
11378
11379 return (nonvoid ? op[0] : const0_rtx);
11380 }
11381
11382 /* Return the upper 16 bits of the 8x16 multiplication. */
11383
11384 static int
11385 sparc_vis_mul8x16 (int e8, int e16)
11386 {
11387 return (e8 * e16 + 128) / 256;
11388 }
11389
11390 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11391 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11392
11393 static void
11394 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
11395 tree inner_type, tree cst0, tree cst1)
11396 {
11397 unsigned i, num = VECTOR_CST_NELTS (cst0);
11398 int scale;
11399
11400 switch (fncode)
11401 {
11402 case SPARC_BUILTIN_FMUL8X16:
11403 for (i = 0; i < num; ++i)
11404 {
11405 int val
11406 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11407 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11408 n_elts[i] = build_int_cst (inner_type, val);
11409 }
11410 break;
11411
11412 case SPARC_BUILTIN_FMUL8X16AU:
11413 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11414
11415 for (i = 0; i < num; ++i)
11416 {
11417 int val
11418 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11419 scale);
11420 n_elts[i] = build_int_cst (inner_type, val);
11421 }
11422 break;
11423
11424 case SPARC_BUILTIN_FMUL8X16AL:
11425 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11426
11427 for (i = 0; i < num; ++i)
11428 {
11429 int val
11430 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11431 scale);
11432 n_elts[i] = build_int_cst (inner_type, val);
11433 }
11434 break;
11435
11436 default:
11437 gcc_unreachable ();
11438 }
11439 }
11440
11441 /* Implement TARGET_FOLD_BUILTIN hook.
11442
11443 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11444 result of the function call is ignored. NULL_TREE is returned if the
11445 function could not be folded. */
11446
11447 static tree
11448 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11449 tree *args, bool ignore)
11450 {
11451 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11452 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11453 tree arg0, arg1, arg2;
11454
11455 if (ignore)
11456 switch (code)
11457 {
11458 case SPARC_BUILTIN_LDFSR:
11459 case SPARC_BUILTIN_STFSR:
11460 case SPARC_BUILTIN_ALIGNADDR:
11461 case SPARC_BUILTIN_WRGSR:
11462 case SPARC_BUILTIN_BMASK:
11463 case SPARC_BUILTIN_CMASK8:
11464 case SPARC_BUILTIN_CMASK16:
11465 case SPARC_BUILTIN_CMASK32:
11466 break;
11467
11468 default:
11469 return build_zero_cst (rtype);
11470 }
11471
11472 switch (code)
11473 {
11474 case SPARC_BUILTIN_FEXPAND:
11475 arg0 = args[0];
11476 STRIP_NOPS (arg0);
11477
11478 if (TREE_CODE (arg0) == VECTOR_CST)
11479 {
11480 tree inner_type = TREE_TYPE (rtype);
11481 tree *n_elts;
11482 unsigned i;
11483
11484 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11485 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11486 n_elts[i] = build_int_cst (inner_type,
11487 TREE_INT_CST_LOW
11488 (VECTOR_CST_ELT (arg0, i)) << 4);
11489 return build_vector (rtype, n_elts);
11490 }
11491 break;
11492
11493 case SPARC_BUILTIN_FMUL8X16:
11494 case SPARC_BUILTIN_FMUL8X16AU:
11495 case SPARC_BUILTIN_FMUL8X16AL:
11496 arg0 = args[0];
11497 arg1 = args[1];
11498 STRIP_NOPS (arg0);
11499 STRIP_NOPS (arg1);
11500
11501 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11502 {
11503 tree inner_type = TREE_TYPE (rtype);
11504 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11505 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11506 return build_vector (rtype, n_elts);
11507 }
11508 break;
11509
11510 case SPARC_BUILTIN_FPMERGE:
11511 arg0 = args[0];
11512 arg1 = args[1];
11513 STRIP_NOPS (arg0);
11514 STRIP_NOPS (arg1);
11515
11516 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11517 {
11518 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11519 unsigned i;
11520 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11521 {
11522 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11523 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11524 }
11525
11526 return build_vector (rtype, n_elts);
11527 }
11528 break;
11529
11530 case SPARC_BUILTIN_PDIST:
11531 case SPARC_BUILTIN_PDISTN:
11532 arg0 = args[0];
11533 arg1 = args[1];
11534 STRIP_NOPS (arg0);
11535 STRIP_NOPS (arg1);
11536 if (code == SPARC_BUILTIN_PDIST)
11537 {
11538 arg2 = args[2];
11539 STRIP_NOPS (arg2);
11540 }
11541 else
11542 arg2 = integer_zero_node;
11543
11544 if (TREE_CODE (arg0) == VECTOR_CST
11545 && TREE_CODE (arg1) == VECTOR_CST
11546 && TREE_CODE (arg2) == INTEGER_CST)
11547 {
11548 bool overflow = false;
11549 widest_int result = wi::to_widest (arg2);
11550 widest_int tmp;
11551 unsigned i;
11552
11553 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11554 {
11555 tree e0 = VECTOR_CST_ELT (arg0, i);
11556 tree e1 = VECTOR_CST_ELT (arg1, i);
11557
11558 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11559
11560 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11561 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11562 if (wi::neg_p (tmp))
11563 tmp = wi::neg (tmp, &neg2_ovf);
11564 else
11565 neg2_ovf = false;
11566 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11567 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11568 }
11569
11570 gcc_assert (!overflow);
11571
11572 return wide_int_to_tree (rtype, result);
11573 }
11574
11575 default:
11576 break;
11577 }
11578
11579 return NULL_TREE;
11580 }
11581 \f
11582 /* ??? This duplicates information provided to the compiler by the
11583 ??? scheduler description. Some day, teach genautomata to output
11584 ??? the latencies and then CSE will just use that. */
11585
11586 static bool
11587 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11588 int opno ATTRIBUTE_UNUSED,
11589 int *total, bool speed ATTRIBUTE_UNUSED)
11590 {
11591 int code = GET_CODE (x);
11592 bool float_mode_p = FLOAT_MODE_P (mode);
11593
11594 switch (code)
11595 {
11596 case CONST_INT:
11597 if (SMALL_INT (x))
11598 *total = 0;
11599 else
11600 *total = 2;
11601 return true;
11602
11603 case CONST_WIDE_INT:
11604 *total = 0;
11605 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11606 *total += 2;
11607 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11608 *total += 2;
11609 return true;
11610
11611 case HIGH:
11612 *total = 2;
11613 return true;
11614
11615 case CONST:
11616 case LABEL_REF:
11617 case SYMBOL_REF:
11618 *total = 4;
11619 return true;
11620
11621 case CONST_DOUBLE:
11622 *total = 8;
11623 return true;
11624
11625 case MEM:
11626 /* If outer-code was a sign or zero extension, a cost
11627 of COSTS_N_INSNS (1) was already added in. This is
11628 why we are subtracting it back out. */
11629 if (outer_code == ZERO_EXTEND)
11630 {
11631 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11632 }
11633 else if (outer_code == SIGN_EXTEND)
11634 {
11635 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11636 }
11637 else if (float_mode_p)
11638 {
11639 *total = sparc_costs->float_load;
11640 }
11641 else
11642 {
11643 *total = sparc_costs->int_load;
11644 }
11645
11646 return true;
11647
11648 case PLUS:
11649 case MINUS:
11650 if (float_mode_p)
11651 *total = sparc_costs->float_plusminus;
11652 else
11653 *total = COSTS_N_INSNS (1);
11654 return false;
11655
11656 case FMA:
11657 {
11658 rtx sub;
11659
11660 gcc_assert (float_mode_p);
11661 *total = sparc_costs->float_mul;
11662
11663 sub = XEXP (x, 0);
11664 if (GET_CODE (sub) == NEG)
11665 sub = XEXP (sub, 0);
11666 *total += rtx_cost (sub, mode, FMA, 0, speed);
11667
11668 sub = XEXP (x, 2);
11669 if (GET_CODE (sub) == NEG)
11670 sub = XEXP (sub, 0);
11671 *total += rtx_cost (sub, mode, FMA, 2, speed);
11672 return true;
11673 }
11674
11675 case MULT:
11676 if (float_mode_p)
11677 *total = sparc_costs->float_mul;
11678 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11679 *total = COSTS_N_INSNS (25);
11680 else
11681 {
11682 int bit_cost;
11683
11684 bit_cost = 0;
11685 if (sparc_costs->int_mul_bit_factor)
11686 {
11687 int nbits;
11688
11689 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11690 {
11691 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11692 for (nbits = 0; value != 0; value &= value - 1)
11693 nbits++;
11694 }
11695 else
11696 nbits = 7;
11697
11698 if (nbits < 3)
11699 nbits = 3;
11700 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11701 bit_cost = COSTS_N_INSNS (bit_cost);
11702 }
11703
11704 if (mode == DImode || !TARGET_HARD_MUL)
11705 *total = sparc_costs->int_mulX + bit_cost;
11706 else
11707 *total = sparc_costs->int_mul + bit_cost;
11708 }
11709 return false;
11710
11711 case ASHIFT:
11712 case ASHIFTRT:
11713 case LSHIFTRT:
11714 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11715 return false;
11716
11717 case DIV:
11718 case UDIV:
11719 case MOD:
11720 case UMOD:
11721 if (float_mode_p)
11722 {
11723 if (mode == DFmode)
11724 *total = sparc_costs->float_div_df;
11725 else
11726 *total = sparc_costs->float_div_sf;
11727 }
11728 else
11729 {
11730 if (mode == DImode)
11731 *total = sparc_costs->int_divX;
11732 else
11733 *total = sparc_costs->int_div;
11734 }
11735 return false;
11736
11737 case NEG:
11738 if (! float_mode_p)
11739 {
11740 *total = COSTS_N_INSNS (1);
11741 return false;
11742 }
11743 /* FALLTHRU */
11744
11745 case ABS:
11746 case FLOAT:
11747 case UNSIGNED_FLOAT:
11748 case FIX:
11749 case UNSIGNED_FIX:
11750 case FLOAT_EXTEND:
11751 case FLOAT_TRUNCATE:
11752 *total = sparc_costs->float_move;
11753 return false;
11754
11755 case SQRT:
11756 if (mode == DFmode)
11757 *total = sparc_costs->float_sqrt_df;
11758 else
11759 *total = sparc_costs->float_sqrt_sf;
11760 return false;
11761
11762 case COMPARE:
11763 if (float_mode_p)
11764 *total = sparc_costs->float_cmp;
11765 else
11766 *total = COSTS_N_INSNS (1);
11767 return false;
11768
11769 case IF_THEN_ELSE:
11770 if (float_mode_p)
11771 *total = sparc_costs->float_cmove;
11772 else
11773 *total = sparc_costs->int_cmove;
11774 return false;
11775
11776 case IOR:
11777 /* Handle the NAND vector patterns. */
11778 if (sparc_vector_mode_supported_p (mode)
11779 && GET_CODE (XEXP (x, 0)) == NOT
11780 && GET_CODE (XEXP (x, 1)) == NOT)
11781 {
11782 *total = COSTS_N_INSNS (1);
11783 return true;
11784 }
11785 else
11786 return false;
11787
11788 default:
11789 return false;
11790 }
11791 }
11792
11793 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11794
11795 static inline bool
11796 general_or_i64_p (reg_class_t rclass)
11797 {
11798 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11799 }
11800
11801 /* Implement TARGET_REGISTER_MOVE_COST. */
11802
11803 static int
11804 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11805 reg_class_t from, reg_class_t to)
11806 {
11807 bool need_memory = false;
11808
11809 /* This helps postreload CSE to eliminate redundant comparisons. */
11810 if (from == NO_REGS || to == NO_REGS)
11811 return 100;
11812
11813 if (from == FPCC_REGS || to == FPCC_REGS)
11814 need_memory = true;
11815 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11816 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11817 {
11818 if (TARGET_VIS3)
11819 {
11820 int size = GET_MODE_SIZE (mode);
11821 if (size == 8 || size == 4)
11822 {
11823 if (! TARGET_ARCH32 || size == 4)
11824 return 4;
11825 else
11826 return 6;
11827 }
11828 }
11829 need_memory = true;
11830 }
11831
11832 if (need_memory)
11833 {
11834 if (sparc_cpu == PROCESSOR_ULTRASPARC
11835 || sparc_cpu == PROCESSOR_ULTRASPARC3
11836 || sparc_cpu == PROCESSOR_NIAGARA
11837 || sparc_cpu == PROCESSOR_NIAGARA2
11838 || sparc_cpu == PROCESSOR_NIAGARA3
11839 || sparc_cpu == PROCESSOR_NIAGARA4
11840 || sparc_cpu == PROCESSOR_NIAGARA7
11841 || sparc_cpu == PROCESSOR_M8)
11842 return 12;
11843
11844 return 6;
11845 }
11846
11847 return 2;
11848 }
11849
11850 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11851 This is achieved by means of a manual dynamic stack space allocation in
11852 the current frame. We make the assumption that SEQ doesn't contain any
11853 function calls, with the possible exception of calls to the GOT helper. */
11854
11855 static void
11856 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11857 {
11858 /* We must preserve the lowest 16 words for the register save area. */
11859 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11860 /* We really need only 2 words of fresh stack space. */
11861 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11862
11863 rtx slot
11864 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11865 SPARC_STACK_BIAS + offset));
11866
11867 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11868 emit_insn (gen_rtx_SET (slot, reg));
11869 if (reg2)
11870 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11871 reg2));
11872 emit_insn (seq);
11873 if (reg2)
11874 emit_insn (gen_rtx_SET (reg2,
11875 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11876 emit_insn (gen_rtx_SET (reg, slot));
11877 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11878 }
11879
11880 /* Output the assembler code for a thunk function. THUNK_DECL is the
11881 declaration for the thunk function itself, FUNCTION is the decl for
11882 the target function. DELTA is an immediate constant offset to be
11883 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11884 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11885
11886 static void
11887 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11888 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11889 tree function)
11890 {
11891 rtx this_rtx, funexp;
11892 rtx_insn *insn;
11893 unsigned int int_arg_first;
11894
11895 reload_completed = 1;
11896 epilogue_completed = 1;
11897
11898 emit_note (NOTE_INSN_PROLOGUE_END);
11899
11900 if (TARGET_FLAT)
11901 {
11902 sparc_leaf_function_p = 1;
11903
11904 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11905 }
11906 else if (flag_delayed_branch)
11907 {
11908 /* We will emit a regular sibcall below, so we need to instruct
11909 output_sibcall that we are in a leaf function. */
11910 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11911
11912 /* This will cause final.c to invoke leaf_renumber_regs so we
11913 must behave as if we were in a not-yet-leafified function. */
11914 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11915 }
11916 else
11917 {
11918 /* We will emit the sibcall manually below, so we will need to
11919 manually spill non-leaf registers. */
11920 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11921
11922 /* We really are in a leaf function. */
11923 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11924 }
11925
11926 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11927 returns a structure, the structure return pointer is there instead. */
11928 if (TARGET_ARCH64
11929 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11930 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11931 else
11932 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11933
11934 /* Add DELTA. When possible use a plain add, otherwise load it into
11935 a register first. */
11936 if (delta)
11937 {
11938 rtx delta_rtx = GEN_INT (delta);
11939
11940 if (! SPARC_SIMM13_P (delta))
11941 {
11942 rtx scratch = gen_rtx_REG (Pmode, 1);
11943 emit_move_insn (scratch, delta_rtx);
11944 delta_rtx = scratch;
11945 }
11946
11947 /* THIS_RTX += DELTA. */
11948 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11949 }
11950
11951 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11952 if (vcall_offset)
11953 {
11954 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11955 rtx scratch = gen_rtx_REG (Pmode, 1);
11956
11957 gcc_assert (vcall_offset < 0);
11958
11959 /* SCRATCH = *THIS_RTX. */
11960 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11961
11962 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11963 may not have any available scratch register at this point. */
11964 if (SPARC_SIMM13_P (vcall_offset))
11965 ;
11966 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11967 else if (! fixed_regs[5]
11968 /* The below sequence is made up of at least 2 insns,
11969 while the default method may need only one. */
11970 && vcall_offset < -8192)
11971 {
11972 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11973 emit_move_insn (scratch2, vcall_offset_rtx);
11974 vcall_offset_rtx = scratch2;
11975 }
11976 else
11977 {
11978 rtx increment = GEN_INT (-4096);
11979
11980 /* VCALL_OFFSET is a negative number whose typical range can be
11981 estimated as -32768..0 in 32-bit mode. In almost all cases
11982 it is therefore cheaper to emit multiple add insns than
11983 spilling and loading the constant into a register (at least
11984 6 insns). */
11985 while (! SPARC_SIMM13_P (vcall_offset))
11986 {
11987 emit_insn (gen_add2_insn (scratch, increment));
11988 vcall_offset += 4096;
11989 }
11990 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11991 }
11992
11993 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11994 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11995 gen_rtx_PLUS (Pmode,
11996 scratch,
11997 vcall_offset_rtx)));
11998
11999 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12000 emit_insn (gen_add2_insn (this_rtx, scratch));
12001 }
12002
12003 /* Generate a tail call to the target function. */
12004 if (! TREE_USED (function))
12005 {
12006 assemble_external (function);
12007 TREE_USED (function) = 1;
12008 }
12009 funexp = XEXP (DECL_RTL (function), 0);
12010
12011 if (flag_delayed_branch)
12012 {
12013 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12014 insn = emit_call_insn (gen_sibcall (funexp));
12015 SIBLING_CALL_P (insn) = 1;
12016 }
12017 else
12018 {
12019 /* The hoops we have to jump through in order to generate a sibcall
12020 without using delay slots... */
12021 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12022
12023 if (flag_pic)
12024 {
12025 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12026 start_sequence ();
12027 load_got_register (); /* clobbers %o7 */
12028 scratch = sparc_legitimize_pic_address (funexp, scratch);
12029 seq = get_insns ();
12030 end_sequence ();
12031 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12032 }
12033 else if (TARGET_ARCH32)
12034 {
12035 emit_insn (gen_rtx_SET (scratch,
12036 gen_rtx_HIGH (SImode, funexp)));
12037 emit_insn (gen_rtx_SET (scratch,
12038 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12039 }
12040 else /* TARGET_ARCH64 */
12041 {
12042 switch (sparc_cmodel)
12043 {
12044 case CM_MEDLOW:
12045 case CM_MEDMID:
12046 /* The destination can serve as a temporary. */
12047 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12048 break;
12049
12050 case CM_MEDANY:
12051 case CM_EMBMEDANY:
12052 /* The destination cannot serve as a temporary. */
12053 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12054 start_sequence ();
12055 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12056 seq = get_insns ();
12057 end_sequence ();
12058 emit_and_preserve (seq, spill_reg, 0);
12059 break;
12060
12061 default:
12062 gcc_unreachable ();
12063 }
12064 }
12065
12066 emit_jump_insn (gen_indirect_jump (scratch));
12067 }
12068
12069 emit_barrier ();
12070
12071 /* Run just enough of rest_of_compilation to get the insns emitted.
12072 There's not really enough bulk here to make other passes such as
12073 instruction scheduling worth while. Note that use_thunk calls
12074 assemble_start_function and assemble_end_function. */
12075 insn = get_insns ();
12076 shorten_branches (insn);
12077 final_start_function (insn, file, 1);
12078 final (insn, file, 1);
12079 final_end_function ();
12080
12081 reload_completed = 0;
12082 epilogue_completed = 0;
12083 }
12084
12085 /* Return true if sparc_output_mi_thunk would be able to output the
12086 assembler code for the thunk function specified by the arguments
12087 it is passed, and false otherwise. */
12088 static bool
12089 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12090 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12091 HOST_WIDE_INT vcall_offset,
12092 const_tree function ATTRIBUTE_UNUSED)
12093 {
12094 /* Bound the loop used in the default method above. */
12095 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12096 }
12097
12098 /* How to allocate a 'struct machine_function'. */
12099
12100 static struct machine_function *
12101 sparc_init_machine_status (void)
12102 {
12103 return ggc_cleared_alloc<machine_function> ();
12104 }
12105
12106 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12107 We need to emit DTP-relative relocations. */
12108
12109 static void
12110 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12111 {
12112 switch (size)
12113 {
12114 case 4:
12115 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12116 break;
12117 case 8:
12118 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12119 break;
12120 default:
12121 gcc_unreachable ();
12122 }
12123 output_addr_const (file, x);
12124 fputs (")", file);
12125 }
12126
12127 /* Do whatever processing is required at the end of a file. */
12128
12129 static void
12130 sparc_file_end (void)
12131 {
12132 /* If we need to emit the special GOT helper function, do so now. */
12133 if (got_helper_rtx)
12134 {
12135 const char *name = XSTR (got_helper_rtx, 0);
12136 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12137 #ifdef DWARF2_UNWIND_INFO
12138 bool do_cfi;
12139 #endif
12140
12141 if (USE_HIDDEN_LINKONCE)
12142 {
12143 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12144 get_identifier (name),
12145 build_function_type_list (void_type_node,
12146 NULL_TREE));
12147 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12148 NULL_TREE, void_type_node);
12149 TREE_PUBLIC (decl) = 1;
12150 TREE_STATIC (decl) = 1;
12151 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12152 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12153 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12154 resolve_unique_section (decl, 0, flag_function_sections);
12155 allocate_struct_function (decl, true);
12156 cfun->is_thunk = 1;
12157 current_function_decl = decl;
12158 init_varasm_status ();
12159 assemble_start_function (decl, name);
12160 }
12161 else
12162 {
12163 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12164 switch_to_section (text_section);
12165 if (align > 0)
12166 ASM_OUTPUT_ALIGN (asm_out_file, align);
12167 ASM_OUTPUT_LABEL (asm_out_file, name);
12168 }
12169
12170 #ifdef DWARF2_UNWIND_INFO
12171 do_cfi = dwarf2out_do_cfi_asm ();
12172 if (do_cfi)
12173 fprintf (asm_out_file, "\t.cfi_startproc\n");
12174 #endif
12175 if (flag_delayed_branch)
12176 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12177 reg_name, reg_name);
12178 else
12179 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12180 reg_name, reg_name);
12181 #ifdef DWARF2_UNWIND_INFO
12182 if (do_cfi)
12183 fprintf (asm_out_file, "\t.cfi_endproc\n");
12184 #endif
12185 }
12186
12187 if (NEED_INDICATE_EXEC_STACK)
12188 file_end_indicate_exec_stack ();
12189
12190 #ifdef TARGET_SOLARIS
12191 solaris_file_end ();
12192 #endif
12193 }
12194
12195 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12196 /* Implement TARGET_MANGLE_TYPE. */
12197
12198 static const char *
12199 sparc_mangle_type (const_tree type)
12200 {
12201 if (TARGET_ARCH32
12202 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12203 && TARGET_LONG_DOUBLE_128)
12204 return "g";
12205
12206 /* For all other types, use normal C++ mangling. */
12207 return NULL;
12208 }
12209 #endif
12210
12211 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12212 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12213 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12214
12215 void
12216 sparc_emit_membar_for_model (enum memmodel model,
12217 int load_store, int before_after)
12218 {
12219 /* Bits for the MEMBAR mmask field. */
12220 const int LoadLoad = 1;
12221 const int StoreLoad = 2;
12222 const int LoadStore = 4;
12223 const int StoreStore = 8;
12224
12225 int mm = 0, implied = 0;
12226
12227 switch (sparc_memory_model)
12228 {
12229 case SMM_SC:
12230 /* Sequential Consistency. All memory transactions are immediately
12231 visible in sequential execution order. No barriers needed. */
12232 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12233 break;
12234
12235 case SMM_TSO:
12236 /* Total Store Ordering: all memory transactions with store semantics
12237 are followed by an implied StoreStore. */
12238 implied |= StoreStore;
12239
12240 /* If we're not looking for a raw barrer (before+after), then atomic
12241 operations get the benefit of being both load and store. */
12242 if (load_store == 3 && before_after == 1)
12243 implied |= StoreLoad;
12244 /* FALLTHRU */
12245
12246 case SMM_PSO:
12247 /* Partial Store Ordering: all memory transactions with load semantics
12248 are followed by an implied LoadLoad | LoadStore. */
12249 implied |= LoadLoad | LoadStore;
12250
12251 /* If we're not looking for a raw barrer (before+after), then atomic
12252 operations get the benefit of being both load and store. */
12253 if (load_store == 3 && before_after == 2)
12254 implied |= StoreLoad | StoreStore;
12255 /* FALLTHRU */
12256
12257 case SMM_RMO:
12258 /* Relaxed Memory Ordering: no implicit bits. */
12259 break;
12260
12261 default:
12262 gcc_unreachable ();
12263 }
12264
12265 if (before_after & 1)
12266 {
12267 if (is_mm_release (model) || is_mm_acq_rel (model)
12268 || is_mm_seq_cst (model))
12269 {
12270 if (load_store & 1)
12271 mm |= LoadLoad | StoreLoad;
12272 if (load_store & 2)
12273 mm |= LoadStore | StoreStore;
12274 }
12275 }
12276 if (before_after & 2)
12277 {
12278 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12279 || is_mm_seq_cst (model))
12280 {
12281 if (load_store & 1)
12282 mm |= LoadLoad | LoadStore;
12283 if (load_store & 2)
12284 mm |= StoreLoad | StoreStore;
12285 }
12286 }
12287
12288 /* Remove the bits implied by the system memory model. */
12289 mm &= ~implied;
12290
12291 /* For raw barriers (before+after), always emit a barrier.
12292 This will become a compile-time barrier if needed. */
12293 if (mm || before_after == 3)
12294 emit_insn (gen_membar (GEN_INT (mm)));
12295 }
12296
12297 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12298 compare and swap on the word containing the byte or half-word. */
12299
12300 static void
12301 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12302 rtx oldval, rtx newval)
12303 {
12304 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12305 rtx addr = gen_reg_rtx (Pmode);
12306 rtx off = gen_reg_rtx (SImode);
12307 rtx oldv = gen_reg_rtx (SImode);
12308 rtx newv = gen_reg_rtx (SImode);
12309 rtx oldvalue = gen_reg_rtx (SImode);
12310 rtx newvalue = gen_reg_rtx (SImode);
12311 rtx res = gen_reg_rtx (SImode);
12312 rtx resv = gen_reg_rtx (SImode);
12313 rtx memsi, val, mask, cc;
12314
12315 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12316
12317 if (Pmode != SImode)
12318 addr1 = gen_lowpart (SImode, addr1);
12319 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12320
12321 memsi = gen_rtx_MEM (SImode, addr);
12322 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12323 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12324
12325 val = copy_to_reg (memsi);
12326
12327 emit_insn (gen_rtx_SET (off,
12328 gen_rtx_XOR (SImode, off,
12329 GEN_INT (GET_MODE (mem) == QImode
12330 ? 3 : 2))));
12331
12332 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12333
12334 if (GET_MODE (mem) == QImode)
12335 mask = force_reg (SImode, GEN_INT (0xff));
12336 else
12337 mask = force_reg (SImode, GEN_INT (0xffff));
12338
12339 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12340
12341 emit_insn (gen_rtx_SET (val,
12342 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12343 val)));
12344
12345 oldval = gen_lowpart (SImode, oldval);
12346 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12347
12348 newval = gen_lowpart_common (SImode, newval);
12349 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12350
12351 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12352
12353 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12354
12355 rtx_code_label *end_label = gen_label_rtx ();
12356 rtx_code_label *loop_label = gen_label_rtx ();
12357 emit_label (loop_label);
12358
12359 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12360
12361 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12362
12363 emit_move_insn (bool_result, const1_rtx);
12364
12365 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12366
12367 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12368
12369 emit_insn (gen_rtx_SET (resv,
12370 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12371 res)));
12372
12373 emit_move_insn (bool_result, const0_rtx);
12374
12375 cc = gen_compare_reg_1 (NE, resv, val);
12376 emit_insn (gen_rtx_SET (val, resv));
12377
12378 /* Use cbranchcc4 to separate the compare and branch! */
12379 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12380 cc, const0_rtx, loop_label));
12381
12382 emit_label (end_label);
12383
12384 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12385
12386 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12387
12388 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12389 }
12390
12391 /* Expand code to perform a compare-and-swap. */
12392
12393 void
12394 sparc_expand_compare_and_swap (rtx operands[])
12395 {
12396 rtx bval, retval, mem, oldval, newval;
12397 machine_mode mode;
12398 enum memmodel model;
12399
12400 bval = operands[0];
12401 retval = operands[1];
12402 mem = operands[2];
12403 oldval = operands[3];
12404 newval = operands[4];
12405 model = (enum memmodel) INTVAL (operands[6]);
12406 mode = GET_MODE (mem);
12407
12408 sparc_emit_membar_for_model (model, 3, 1);
12409
12410 if (reg_overlap_mentioned_p (retval, oldval))
12411 oldval = copy_to_reg (oldval);
12412
12413 if (mode == QImode || mode == HImode)
12414 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12415 else
12416 {
12417 rtx (*gen) (rtx, rtx, rtx, rtx);
12418 rtx x;
12419
12420 if (mode == SImode)
12421 gen = gen_atomic_compare_and_swapsi_1;
12422 else
12423 gen = gen_atomic_compare_and_swapdi_1;
12424 emit_insn (gen (retval, mem, oldval, newval));
12425
12426 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12427 if (x != bval)
12428 convert_move (bval, x, 1);
12429 }
12430
12431 sparc_emit_membar_for_model (model, 3, 2);
12432 }
12433
12434 void
12435 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12436 {
12437 rtx t_1, t_2, t_3;
12438
12439 sel = gen_lowpart (DImode, sel);
12440 switch (vmode)
12441 {
12442 case V2SImode:
12443 /* inp = xxxxxxxAxxxxxxxB */
12444 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12445 NULL_RTX, 1, OPTAB_DIRECT);
12446 /* t_1 = ....xxxxxxxAxxx. */
12447 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12448 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12449 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12450 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12451 /* sel = .......B */
12452 /* t_1 = ...A.... */
12453 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12454 /* sel = ...A...B */
12455 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12456 /* sel = AAAABBBB * 4 */
12457 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12458 /* sel = { A*4, A*4+1, A*4+2, ... } */
12459 break;
12460
12461 case V4HImode:
12462 /* inp = xxxAxxxBxxxCxxxD */
12463 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12464 NULL_RTX, 1, OPTAB_DIRECT);
12465 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12466 NULL_RTX, 1, OPTAB_DIRECT);
12467 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12468 NULL_RTX, 1, OPTAB_DIRECT);
12469 /* t_1 = ..xxxAxxxBxxxCxx */
12470 /* t_2 = ....xxxAxxxBxxxC */
12471 /* t_3 = ......xxxAxxxBxx */
12472 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12473 GEN_INT (0x07),
12474 NULL_RTX, 1, OPTAB_DIRECT);
12475 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12476 GEN_INT (0x0700),
12477 NULL_RTX, 1, OPTAB_DIRECT);
12478 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12479 GEN_INT (0x070000),
12480 NULL_RTX, 1, OPTAB_DIRECT);
12481 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12482 GEN_INT (0x07000000),
12483 NULL_RTX, 1, OPTAB_DIRECT);
12484 /* sel = .......D */
12485 /* t_1 = .....C.. */
12486 /* t_2 = ...B.... */
12487 /* t_3 = .A...... */
12488 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12489 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12490 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12491 /* sel = .A.B.C.D */
12492 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12493 /* sel = AABBCCDD * 2 */
12494 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12495 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12496 break;
12497
12498 case V8QImode:
12499 /* input = xAxBxCxDxExFxGxH */
12500 sel = expand_simple_binop (DImode, AND, sel,
12501 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12502 | 0x0f0f0f0f),
12503 NULL_RTX, 1, OPTAB_DIRECT);
12504 /* sel = .A.B.C.D.E.F.G.H */
12505 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12506 NULL_RTX, 1, OPTAB_DIRECT);
12507 /* t_1 = ..A.B.C.D.E.F.G. */
12508 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12509 NULL_RTX, 1, OPTAB_DIRECT);
12510 /* sel = .AABBCCDDEEFFGGH */
12511 sel = expand_simple_binop (DImode, AND, sel,
12512 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12513 | 0xff00ff),
12514 NULL_RTX, 1, OPTAB_DIRECT);
12515 /* sel = ..AB..CD..EF..GH */
12516 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12517 NULL_RTX, 1, OPTAB_DIRECT);
12518 /* t_1 = ....AB..CD..EF.. */
12519 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12520 NULL_RTX, 1, OPTAB_DIRECT);
12521 /* sel = ..ABABCDCDEFEFGH */
12522 sel = expand_simple_binop (DImode, AND, sel,
12523 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12524 NULL_RTX, 1, OPTAB_DIRECT);
12525 /* sel = ....ABCD....EFGH */
12526 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12527 NULL_RTX, 1, OPTAB_DIRECT);
12528 /* t_1 = ........ABCD.... */
12529 sel = gen_lowpart (SImode, sel);
12530 t_1 = gen_lowpart (SImode, t_1);
12531 break;
12532
12533 default:
12534 gcc_unreachable ();
12535 }
12536
12537 /* Always perform the final addition/merge within the bmask insn. */
12538 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12539 }
12540
12541 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12542
12543 static bool
12544 sparc_frame_pointer_required (void)
12545 {
12546 /* If the stack pointer is dynamically modified in the function, it cannot
12547 serve as the frame pointer. */
12548 if (cfun->calls_alloca)
12549 return true;
12550
12551 /* If the function receives nonlocal gotos, it needs to save the frame
12552 pointer in the nonlocal_goto_save_area object. */
12553 if (cfun->has_nonlocal_label)
12554 return true;
12555
12556 /* In flat mode, that's it. */
12557 if (TARGET_FLAT)
12558 return false;
12559
12560 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12561 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12562 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12563 }
12564
12565 /* The way this is structured, we can't eliminate SFP in favor of SP
12566 if the frame pointer is required: we want to use the SFP->HFP elimination
12567 in that case. But the test in update_eliminables doesn't know we are
12568 assuming below that we only do the former elimination. */
12569
12570 static bool
12571 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12572 {
12573 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12574 }
12575
12576 /* Return the hard frame pointer directly to bypass the stack bias. */
12577
12578 static rtx
12579 sparc_builtin_setjmp_frame_value (void)
12580 {
12581 return hard_frame_pointer_rtx;
12582 }
12583
12584 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12585 they won't be allocated. */
12586
12587 static void
12588 sparc_conditional_register_usage (void)
12589 {
12590 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12591 {
12592 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12593 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12594 }
12595 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12596 /* then honor it. */
12597 if (TARGET_ARCH32 && fixed_regs[5])
12598 fixed_regs[5] = 1;
12599 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12600 fixed_regs[5] = 0;
12601 if (! TARGET_V9)
12602 {
12603 int regno;
12604 for (regno = SPARC_FIRST_V9_FP_REG;
12605 regno <= SPARC_LAST_V9_FP_REG;
12606 regno++)
12607 fixed_regs[regno] = 1;
12608 /* %fcc0 is used by v8 and v9. */
12609 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12610 regno <= SPARC_LAST_V9_FCC_REG;
12611 regno++)
12612 fixed_regs[regno] = 1;
12613 }
12614 if (! TARGET_FPU)
12615 {
12616 int regno;
12617 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12618 fixed_regs[regno] = 1;
12619 }
12620 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12621 /* then honor it. Likewise with g3 and g4. */
12622 if (fixed_regs[2] == 2)
12623 fixed_regs[2] = ! TARGET_APP_REGS;
12624 if (fixed_regs[3] == 2)
12625 fixed_regs[3] = ! TARGET_APP_REGS;
12626 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12627 fixed_regs[4] = ! TARGET_APP_REGS;
12628 else if (TARGET_CM_EMBMEDANY)
12629 fixed_regs[4] = 1;
12630 else if (fixed_regs[4] == 2)
12631 fixed_regs[4] = 0;
12632 if (TARGET_FLAT)
12633 {
12634 int regno;
12635 /* Disable leaf functions. */
12636 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12638 leaf_reg_remap [regno] = regno;
12639 }
12640 if (TARGET_VIS)
12641 global_regs[SPARC_GSR_REG] = 1;
12642 }
12643
12644 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12645
12646 - We can't load constants into FP registers.
12647 - We can't load FP constants into integer registers when soft-float,
12648 because there is no soft-float pattern with a r/F constraint.
12649 - We can't load FP constants into integer registers for TFmode unless
12650 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12651 - Try and reload integer constants (symbolic or otherwise) back into
12652 registers directly, rather than having them dumped to memory. */
12653
12654 static reg_class_t
12655 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12656 {
12657 machine_mode mode = GET_MODE (x);
12658 if (CONSTANT_P (x))
12659 {
12660 if (FP_REG_CLASS_P (rclass)
12661 || rclass == GENERAL_OR_FP_REGS
12662 || rclass == GENERAL_OR_EXTRA_FP_REGS
12663 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12664 || (mode == TFmode && ! const_zero_operand (x, mode)))
12665 return NO_REGS;
12666
12667 if (GET_MODE_CLASS (mode) == MODE_INT)
12668 return GENERAL_REGS;
12669
12670 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12671 {
12672 if (! FP_REG_CLASS_P (rclass)
12673 || !(const_zero_operand (x, mode)
12674 || const_all_ones_operand (x, mode)))
12675 return NO_REGS;
12676 }
12677 }
12678
12679 if (TARGET_VIS3
12680 && ! TARGET_ARCH64
12681 && (rclass == EXTRA_FP_REGS
12682 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12683 {
12684 int regno = true_regnum (x);
12685
12686 if (SPARC_INT_REG_P (regno))
12687 return (rclass == EXTRA_FP_REGS
12688 ? FP_REGS : GENERAL_OR_FP_REGS);
12689 }
12690
12691 return rclass;
12692 }
12693
12694 /* Return true if we use LRA instead of reload pass. */
12695
12696 static bool
12697 sparc_lra_p (void)
12698 {
12699 return TARGET_LRA;
12700 }
12701
12702 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12703 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12704
12705 const char *
12706 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12707 {
12708 char mulstr[32];
12709
12710 gcc_assert (! TARGET_ARCH64);
12711
12712 if (sparc_check_64 (operands[1], insn) <= 0)
12713 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12714 if (which_alternative == 1)
12715 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12716 if (GET_CODE (operands[2]) == CONST_INT)
12717 {
12718 if (which_alternative == 1)
12719 {
12720 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12721 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12722 output_asm_insn (mulstr, operands);
12723 return "srlx\t%L0, 32, %H0";
12724 }
12725 else
12726 {
12727 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12728 output_asm_insn ("or\t%L1, %3, %3", operands);
12729 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12730 output_asm_insn (mulstr, operands);
12731 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12732 return "mov\t%3, %L0";
12733 }
12734 }
12735 else if (rtx_equal_p (operands[1], operands[2]))
12736 {
12737 if (which_alternative == 1)
12738 {
12739 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12740 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12741 output_asm_insn (mulstr, operands);
12742 return "srlx\t%L0, 32, %H0";
12743 }
12744 else
12745 {
12746 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12747 output_asm_insn ("or\t%L1, %3, %3", operands);
12748 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12749 output_asm_insn (mulstr, operands);
12750 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12751 return "mov\t%3, %L0";
12752 }
12753 }
12754 if (sparc_check_64 (operands[2], insn) <= 0)
12755 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12756 if (which_alternative == 1)
12757 {
12758 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12759 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12760 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12761 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12762 output_asm_insn (mulstr, operands);
12763 return "srlx\t%L0, 32, %H0";
12764 }
12765 else
12766 {
12767 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12768 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12769 output_asm_insn ("or\t%L1, %3, %3", operands);
12770 output_asm_insn ("or\t%L2, %4, %4", operands);
12771 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12772 output_asm_insn (mulstr, operands);
12773 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12774 return "mov\t%3, %L0";
12775 }
12776 }
12777
12778 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12779 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12780 and INNER_MODE are the modes describing TARGET. */
12781
12782 static void
12783 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12784 machine_mode inner_mode)
12785 {
12786 rtx t1, final_insn, sel;
12787 int bmask;
12788
12789 t1 = gen_reg_rtx (mode);
12790
12791 elt = convert_modes (SImode, inner_mode, elt, true);
12792 emit_move_insn (gen_lowpart(SImode, t1), elt);
12793
12794 switch (mode)
12795 {
12796 case V2SImode:
12797 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12798 bmask = 0x45674567;
12799 break;
12800 case V4HImode:
12801 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12802 bmask = 0x67676767;
12803 break;
12804 case V8QImode:
12805 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12806 bmask = 0x77777777;
12807 break;
12808 default:
12809 gcc_unreachable ();
12810 }
12811
12812 sel = force_reg (SImode, GEN_INT (bmask));
12813 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12814 emit_insn (final_insn);
12815 }
12816
12817 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12818 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12819
12820 static void
12821 vector_init_fpmerge (rtx target, rtx elt)
12822 {
12823 rtx t1, t2, t2_low, t3, t3_low;
12824
12825 t1 = gen_reg_rtx (V4QImode);
12826 elt = convert_modes (SImode, QImode, elt, true);
12827 emit_move_insn (gen_lowpart (SImode, t1), elt);
12828
12829 t2 = gen_reg_rtx (V8QImode);
12830 t2_low = gen_lowpart (V4QImode, t2);
12831 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12832
12833 t3 = gen_reg_rtx (V8QImode);
12834 t3_low = gen_lowpart (V4QImode, t3);
12835 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12836
12837 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12838 }
12839
12840 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12841 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12842
12843 static void
12844 vector_init_faligndata (rtx target, rtx elt)
12845 {
12846 rtx t1 = gen_reg_rtx (V4HImode);
12847 int i;
12848
12849 elt = convert_modes (SImode, HImode, elt, true);
12850 emit_move_insn (gen_lowpart (SImode, t1), elt);
12851
12852 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12853 force_reg (SImode, GEN_INT (6)),
12854 const0_rtx));
12855
12856 for (i = 0; i < 4; i++)
12857 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12858 }
12859
12860 /* Emit code to initialize TARGET to values for individual fields VALS. */
12861
12862 void
12863 sparc_expand_vector_init (rtx target, rtx vals)
12864 {
12865 const machine_mode mode = GET_MODE (target);
12866 const machine_mode inner_mode = GET_MODE_INNER (mode);
12867 const int n_elts = GET_MODE_NUNITS (mode);
12868 int i, n_var = 0;
12869 bool all_same = true;
12870 rtx mem;
12871
12872 for (i = 0; i < n_elts; i++)
12873 {
12874 rtx x = XVECEXP (vals, 0, i);
12875 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12876 n_var++;
12877
12878 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12879 all_same = false;
12880 }
12881
12882 if (n_var == 0)
12883 {
12884 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12885 return;
12886 }
12887
12888 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12889 {
12890 if (GET_MODE_SIZE (inner_mode) == 4)
12891 {
12892 emit_move_insn (gen_lowpart (SImode, target),
12893 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12894 return;
12895 }
12896 else if (GET_MODE_SIZE (inner_mode) == 8)
12897 {
12898 emit_move_insn (gen_lowpart (DImode, target),
12899 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12900 return;
12901 }
12902 }
12903 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12904 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12905 {
12906 emit_move_insn (gen_highpart (word_mode, target),
12907 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12908 emit_move_insn (gen_lowpart (word_mode, target),
12909 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12910 return;
12911 }
12912
12913 if (all_same && GET_MODE_SIZE (mode) == 8)
12914 {
12915 if (TARGET_VIS2)
12916 {
12917 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12918 return;
12919 }
12920 if (mode == V8QImode)
12921 {
12922 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12923 return;
12924 }
12925 if (mode == V4HImode)
12926 {
12927 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12928 return;
12929 }
12930 }
12931
12932 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12933 for (i = 0; i < n_elts; i++)
12934 emit_move_insn (adjust_address_nv (mem, inner_mode,
12935 i * GET_MODE_SIZE (inner_mode)),
12936 XVECEXP (vals, 0, i));
12937 emit_move_insn (target, mem);
12938 }
12939
12940 /* Implement TARGET_SECONDARY_RELOAD. */
12941
12942 static reg_class_t
12943 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12944 machine_mode mode, secondary_reload_info *sri)
12945 {
12946 enum reg_class rclass = (enum reg_class) rclass_i;
12947
12948 sri->icode = CODE_FOR_nothing;
12949 sri->extra_cost = 0;
12950
12951 /* We need a temporary when loading/storing a HImode/QImode value
12952 between memory and the FPU registers. This can happen when combine puts
12953 a paradoxical subreg in a float/fix conversion insn. */
12954 if (FP_REG_CLASS_P (rclass)
12955 && (mode == HImode || mode == QImode)
12956 && (GET_CODE (x) == MEM
12957 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12958 && true_regnum (x) == -1)))
12959 return GENERAL_REGS;
12960
12961 /* On 32-bit we need a temporary when loading/storing a DFmode value
12962 between unaligned memory and the upper FPU registers. */
12963 if (TARGET_ARCH32
12964 && rclass == EXTRA_FP_REGS
12965 && mode == DFmode
12966 && GET_CODE (x) == MEM
12967 && ! mem_min_alignment (x, 8))
12968 return FP_REGS;
12969
12970 if (((TARGET_CM_MEDANY
12971 && symbolic_operand (x, mode))
12972 || (TARGET_CM_EMBMEDANY
12973 && text_segment_operand (x, mode)))
12974 && ! flag_pic)
12975 {
12976 if (in_p)
12977 sri->icode = direct_optab_handler (reload_in_optab, mode);
12978 else
12979 sri->icode = direct_optab_handler (reload_out_optab, mode);
12980 return NO_REGS;
12981 }
12982
12983 if (TARGET_VIS3 && TARGET_ARCH32)
12984 {
12985 int regno = true_regnum (x);
12986
12987 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12988 to move 8-byte values in 4-byte pieces. This only works via
12989 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12990 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12991 an FP_REGS intermediate move. */
12992 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12993 || ((general_or_i64_p (rclass)
12994 || rclass == GENERAL_OR_FP_REGS)
12995 && SPARC_FP_REG_P (regno)))
12996 {
12997 sri->extra_cost = 2;
12998 return FP_REGS;
12999 }
13000 }
13001
13002 return NO_REGS;
13003 }
13004
13005 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13006 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13007
13008 bool
13009 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13010 {
13011 enum rtx_code rc = GET_CODE (operands[1]);
13012 machine_mode cmp_mode;
13013 rtx cc_reg, dst, cmp;
13014
13015 cmp = operands[1];
13016 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13017 return false;
13018
13019 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13020 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13021
13022 cmp_mode = GET_MODE (XEXP (cmp, 0));
13023 rc = GET_CODE (cmp);
13024
13025 dst = operands[0];
13026 if (! rtx_equal_p (operands[2], dst)
13027 && ! rtx_equal_p (operands[3], dst))
13028 {
13029 if (reg_overlap_mentioned_p (dst, cmp))
13030 dst = gen_reg_rtx (mode);
13031
13032 emit_move_insn (dst, operands[3]);
13033 }
13034 else if (operands[2] == dst)
13035 {
13036 operands[2] = operands[3];
13037
13038 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13039 rc = reverse_condition_maybe_unordered (rc);
13040 else
13041 rc = reverse_condition (rc);
13042 }
13043
13044 if (XEXP (cmp, 1) == const0_rtx
13045 && GET_CODE (XEXP (cmp, 0)) == REG
13046 && cmp_mode == DImode
13047 && v9_regcmp_p (rc))
13048 cc_reg = XEXP (cmp, 0);
13049 else
13050 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13051
13052 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13053
13054 emit_insn (gen_rtx_SET (dst,
13055 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13056
13057 if (dst != operands[0])
13058 emit_move_insn (operands[0], dst);
13059
13060 return true;
13061 }
13062
13063 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13064 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13065 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13066 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13067 code to be used for the condition mask. */
13068
13069 void
13070 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13071 {
13072 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13073 enum rtx_code code = GET_CODE (operands[3]);
13074
13075 mask = gen_reg_rtx (Pmode);
13076 cop0 = operands[4];
13077 cop1 = operands[5];
13078 if (code == LT || code == GE)
13079 {
13080 rtx t;
13081
13082 code = swap_condition (code);
13083 t = cop0; cop0 = cop1; cop1 = t;
13084 }
13085
13086 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13087
13088 fcmp = gen_rtx_UNSPEC (Pmode,
13089 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13090 fcode);
13091
13092 cmask = gen_rtx_UNSPEC (DImode,
13093 gen_rtvec (2, mask, gsr),
13094 ccode);
13095
13096 bshuf = gen_rtx_UNSPEC (mode,
13097 gen_rtvec (3, operands[1], operands[2], gsr),
13098 UNSPEC_BSHUFFLE);
13099
13100 emit_insn (gen_rtx_SET (mask, fcmp));
13101 emit_insn (gen_rtx_SET (gsr, cmask));
13102
13103 emit_insn (gen_rtx_SET (operands[0], bshuf));
13104 }
13105
13106 /* On sparc, any mode which naturally allocates into the float
13107 registers should return 4 here. */
13108
13109 unsigned int
13110 sparc_regmode_natural_size (machine_mode mode)
13111 {
13112 int size = UNITS_PER_WORD;
13113
13114 if (TARGET_ARCH64)
13115 {
13116 enum mode_class mclass = GET_MODE_CLASS (mode);
13117
13118 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13119 size = 4;
13120 }
13121
13122 return size;
13123 }
13124
13125 /* Return TRUE if it is a good idea to tie two pseudo registers
13126 when one has mode MODE1 and one has mode MODE2.
13127 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
13128 for any hard reg, then this must be FALSE for correct output.
13129
13130 For V9 we have to deal with the fact that only the lower 32 floating
13131 point registers are 32-bit addressable. */
13132
13133 bool
13134 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13135 {
13136 enum mode_class mclass1, mclass2;
13137 unsigned short size1, size2;
13138
13139 if (mode1 == mode2)
13140 return true;
13141
13142 mclass1 = GET_MODE_CLASS (mode1);
13143 mclass2 = GET_MODE_CLASS (mode2);
13144 if (mclass1 != mclass2)
13145 return false;
13146
13147 if (! TARGET_V9)
13148 return true;
13149
13150 /* Classes are the same and we are V9 so we have to deal with upper
13151 vs. lower floating point registers. If one of the modes is a
13152 4-byte mode, and the other is not, we have to mark them as not
13153 tieable because only the lower 32 floating point register are
13154 addressable 32-bits at a time.
13155
13156 We can't just test explicitly for SFmode, otherwise we won't
13157 cover the vector mode cases properly. */
13158
13159 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13160 return true;
13161
13162 size1 = GET_MODE_SIZE (mode1);
13163 size2 = GET_MODE_SIZE (mode2);
13164 if ((size1 > 4 && size2 == 4)
13165 || (size2 > 4 && size1 == 4))
13166 return false;
13167
13168 return true;
13169 }
13170
13171 /* Implement TARGET_CSTORE_MODE. */
13172
13173 static machine_mode
13174 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13175 {
13176 return (TARGET_ARCH64 ? DImode : SImode);
13177 }
13178
13179 /* Return the compound expression made of T1 and T2. */
13180
13181 static inline tree
13182 compound_expr (tree t1, tree t2)
13183 {
13184 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13185 }
13186
13187 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13188
13189 static void
13190 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13191 {
13192 if (!TARGET_FPU)
13193 return;
13194
13195 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13196 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13197
13198 /* We generate the equivalent of feholdexcept (&fenv_var):
13199
13200 unsigned int fenv_var;
13201 __builtin_store_fsr (&fenv_var);
13202
13203 unsigned int tmp1_var;
13204 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13205
13206 __builtin_load_fsr (&tmp1_var); */
13207
13208 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13209 TREE_ADDRESSABLE (fenv_var) = 1;
13210 tree fenv_addr = build_fold_addr_expr (fenv_var);
13211 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13212 tree hold_stfsr
13213 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13214 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13215
13216 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13217 TREE_ADDRESSABLE (tmp1_var) = 1;
13218 tree masked_fenv_var
13219 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13220 build_int_cst (unsigned_type_node,
13221 ~(accrued_exception_mask | trap_enable_mask)));
13222 tree hold_mask
13223 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13224 NULL_TREE, NULL_TREE);
13225
13226 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13227 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13228 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13229
13230 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13231
13232 /* We reload the value of tmp1_var to clear the exceptions:
13233
13234 __builtin_load_fsr (&tmp1_var); */
13235
13236 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13237
13238 /* We generate the equivalent of feupdateenv (&fenv_var):
13239
13240 unsigned int tmp2_var;
13241 __builtin_store_fsr (&tmp2_var);
13242
13243 __builtin_load_fsr (&fenv_var);
13244
13245 if (SPARC_LOW_FE_EXCEPT_VALUES)
13246 tmp2_var >>= 5;
13247 __atomic_feraiseexcept ((int) tmp2_var); */
13248
13249 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13250 TREE_ADDRESSABLE (tmp2_var) = 1;
13251 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13252 tree update_stfsr
13253 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13254 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13255
13256 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13257
13258 tree atomic_feraiseexcept
13259 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13260 tree update_call
13261 = build_call_expr (atomic_feraiseexcept, 1,
13262 fold_convert (integer_type_node, tmp2_var));
13263
13264 if (SPARC_LOW_FE_EXCEPT_VALUES)
13265 {
13266 tree shifted_tmp2_var
13267 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13268 build_int_cst (unsigned_type_node, 5));
13269 tree update_shift
13270 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13271 update_call = compound_expr (update_shift, update_call);
13272 }
13273
13274 *update
13275 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13276 }
13277
13278 #include "gt-sparc.h"