]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2019 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
470 };
471
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
518
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
528
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
546
547 struct GTY(()) machine_function
548 {
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
551
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
555
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
561
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
564
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
570
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
573
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
576 };
577
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
586
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
589
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
594
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 machine_mode, const_tree, bool);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 machine_mode, const_tree, bool);
661 static rtx sparc_function_arg_1 (cumulative_args_t,
662 machine_mode, const_tree, bool, bool);
663 static rtx sparc_function_arg (cumulative_args_t,
664 machine_mode, const_tree, bool);
665 static rtx sparc_function_incoming_arg (cumulative_args_t,
666 machine_mode, const_tree, bool);
667 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
668 static unsigned int sparc_function_arg_boundary (machine_mode,
669 const_tree);
670 static int sparc_arg_partial_bytes (cumulative_args_t,
671 machine_mode, tree, bool);
672 static bool sparc_return_in_memory (const_tree, const_tree);
673 static rtx sparc_struct_value_rtx (tree, int);
674 static rtx sparc_function_value (const_tree, const_tree, bool);
675 static rtx sparc_libcall_value (machine_mode, const_rtx);
676 static bool sparc_function_value_regno_p (const unsigned int);
677 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
678 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
679 static void sparc_file_end (void);
680 static bool sparc_frame_pointer_required (void);
681 static bool sparc_can_eliminate (const int, const int);
682 static rtx sparc_builtin_setjmp_frame_value (void);
683 static void sparc_conditional_register_usage (void);
684 static bool sparc_use_pseudo_pic_reg (void);
685 static void sparc_init_pic_reg (void);
686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
687 static const char *sparc_mangle_type (const_tree);
688 #endif
689 static void sparc_trampoline_init (rtx, tree, rtx);
690 static machine_mode sparc_preferred_simd_mode (scalar_mode);
691 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
692 static bool sparc_lra_p (void);
693 static bool sparc_print_operand_punct_valid_p (unsigned char);
694 static void sparc_print_operand (FILE *, rtx, int);
695 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
696 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
697 machine_mode,
698 secondary_reload_info *);
699 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
700 reg_class_t);
701 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
702 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
703 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
704 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
705 static unsigned int sparc_min_arithmetic_precision (void);
706 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
707 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
708 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
709 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
710 reg_class_t);
711 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
712 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
713 const vec_perm_indices &);
714 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
715 \f
716 #ifdef SUBTARGET_ATTRIBUTE_TABLE
717 /* Table of valid machine attributes. */
718 static const struct attribute_spec sparc_attribute_table[] =
719 {
720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
721 do_diagnostic, handler, exclude } */
722 SUBTARGET_ATTRIBUTE_TABLE,
723 { NULL, 0, 0, false, false, false, false, NULL, NULL }
724 };
725 #endif
726 \f
727 /* Option handling. */
728
729 /* Parsed value. */
730 enum cmodel sparc_cmodel;
731
732 char sparc_hard_reg_printed[8];
733
734 /* Initialize the GCC target structure. */
735
736 /* The default is to use .half rather than .short for aligned HI objects. */
737 #undef TARGET_ASM_ALIGNED_HI_OP
738 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
739
740 #undef TARGET_ASM_UNALIGNED_HI_OP
741 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
742 #undef TARGET_ASM_UNALIGNED_SI_OP
743 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
744 #undef TARGET_ASM_UNALIGNED_DI_OP
745 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
746
747 /* The target hook has to handle DI-mode values. */
748 #undef TARGET_ASM_INTEGER
749 #define TARGET_ASM_INTEGER sparc_assemble_integer
750
751 #undef TARGET_ASM_FUNCTION_PROLOGUE
752 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
753 #undef TARGET_ASM_FUNCTION_EPILOGUE
754 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
755
756 #undef TARGET_SCHED_ADJUST_COST
757 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
758 #undef TARGET_SCHED_ISSUE_RATE
759 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
760 #undef TARGET_SCHED_INIT
761 #define TARGET_SCHED_INIT sparc_sched_init
762 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
763 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
764
765 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
766 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
767
768 #undef TARGET_INIT_LIBFUNCS
769 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
770
771 #undef TARGET_LEGITIMIZE_ADDRESS
772 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
773 #undef TARGET_DELEGITIMIZE_ADDRESS
774 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
775 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
776 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
777
778 #undef TARGET_INIT_BUILTINS
779 #define TARGET_INIT_BUILTINS sparc_init_builtins
780 #undef TARGET_BUILTIN_DECL
781 #define TARGET_BUILTIN_DECL sparc_builtin_decl
782 #undef TARGET_EXPAND_BUILTIN
783 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
784 #undef TARGET_FOLD_BUILTIN
785 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
786
787 #if TARGET_TLS
788 #undef TARGET_HAVE_TLS
789 #define TARGET_HAVE_TLS true
790 #endif
791
792 #undef TARGET_CANNOT_FORCE_CONST_MEM
793 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
794
795 #undef TARGET_ASM_OUTPUT_MI_THUNK
796 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
797 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
798 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
799
800 #undef TARGET_RTX_COSTS
801 #define TARGET_RTX_COSTS sparc_rtx_costs
802 #undef TARGET_ADDRESS_COST
803 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
804 #undef TARGET_REGISTER_MOVE_COST
805 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
806
807 #undef TARGET_PROMOTE_FUNCTION_MODE
808 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
809 #undef TARGET_STRICT_ARGUMENT_NAMING
810 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
811
812 #undef TARGET_MUST_PASS_IN_STACK
813 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
814 #undef TARGET_PASS_BY_REFERENCE
815 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
816 #undef TARGET_ARG_PARTIAL_BYTES
817 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
818 #undef TARGET_FUNCTION_ARG_ADVANCE
819 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
820 #undef TARGET_FUNCTION_ARG
821 #define TARGET_FUNCTION_ARG sparc_function_arg
822 #undef TARGET_FUNCTION_INCOMING_ARG
823 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
824 #undef TARGET_FUNCTION_ARG_PADDING
825 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
826 #undef TARGET_FUNCTION_ARG_BOUNDARY
827 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
828
829 #undef TARGET_RETURN_IN_MEMORY
830 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
831 #undef TARGET_STRUCT_VALUE_RTX
832 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
833 #undef TARGET_FUNCTION_VALUE
834 #define TARGET_FUNCTION_VALUE sparc_function_value
835 #undef TARGET_LIBCALL_VALUE
836 #define TARGET_LIBCALL_VALUE sparc_libcall_value
837 #undef TARGET_FUNCTION_VALUE_REGNO_P
838 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
839
840 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
841 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
842
843 #undef TARGET_ASAN_SHADOW_OFFSET
844 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
845
846 #undef TARGET_EXPAND_BUILTIN_VA_START
847 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
848 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
849 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
850
851 #undef TARGET_VECTOR_MODE_SUPPORTED_P
852 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
853
854 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
855 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
856
857 #ifdef SUBTARGET_INSERT_ATTRIBUTES
858 #undef TARGET_INSERT_ATTRIBUTES
859 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
860 #endif
861
862 #ifdef SUBTARGET_ATTRIBUTE_TABLE
863 #undef TARGET_ATTRIBUTE_TABLE
864 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
865 #endif
866
867 #undef TARGET_OPTION_OVERRIDE
868 #define TARGET_OPTION_OVERRIDE sparc_option_override
869
870 #ifdef TARGET_THREAD_SSP_OFFSET
871 #undef TARGET_STACK_PROTECT_GUARD
872 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
873 #endif
874
875 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
876 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
877 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
878 #endif
879
880 #undef TARGET_ASM_FILE_END
881 #define TARGET_ASM_FILE_END sparc_file_end
882
883 #undef TARGET_FRAME_POINTER_REQUIRED
884 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
885
886 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
887 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
888
889 #undef TARGET_CAN_ELIMINATE
890 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
891
892 #undef TARGET_PREFERRED_RELOAD_CLASS
893 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
894
895 #undef TARGET_SECONDARY_RELOAD
896 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
897 #undef TARGET_SECONDARY_MEMORY_NEEDED
898 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
899 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
900 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
901
902 #undef TARGET_CONDITIONAL_REGISTER_USAGE
903 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
904
905 #undef TARGET_INIT_PIC_REG
906 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
907
908 #undef TARGET_USE_PSEUDO_PIC_REG
909 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
910
911 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
912 #undef TARGET_MANGLE_TYPE
913 #define TARGET_MANGLE_TYPE sparc_mangle_type
914 #endif
915
916 #undef TARGET_LRA_P
917 #define TARGET_LRA_P sparc_lra_p
918
919 #undef TARGET_LEGITIMATE_ADDRESS_P
920 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
921
922 #undef TARGET_LEGITIMATE_CONSTANT_P
923 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
924
925 #undef TARGET_TRAMPOLINE_INIT
926 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
927
928 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
929 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
930 #undef TARGET_PRINT_OPERAND
931 #define TARGET_PRINT_OPERAND sparc_print_operand
932 #undef TARGET_PRINT_OPERAND_ADDRESS
933 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
934
935 /* The value stored by LDSTUB. */
936 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
937 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
938
939 #undef TARGET_CSTORE_MODE
940 #define TARGET_CSTORE_MODE sparc_cstore_mode
941
942 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
943 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
944
945 #undef TARGET_FIXED_CONDITION_CODE_REGS
946 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
947
948 #undef TARGET_MIN_ARITHMETIC_PRECISION
949 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
950
951 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
952 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
953
954 #undef TARGET_HARD_REGNO_NREGS
955 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
956 #undef TARGET_HARD_REGNO_MODE_OK
957 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
958
959 #undef TARGET_MODES_TIEABLE_P
960 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
961
962 #undef TARGET_CAN_CHANGE_MODE_CLASS
963 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
964
965 #undef TARGET_CONSTANT_ALIGNMENT
966 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
967
968 #undef TARGET_VECTORIZE_VEC_PERM_CONST
969 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
970
971 #undef TARGET_CAN_FOLLOW_JUMP
972 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
973
974 struct gcc_target targetm = TARGET_INITIALIZER;
975
976 /* Return the memory reference contained in X if any, zero otherwise. */
977
978 static rtx
979 mem_ref (rtx x)
980 {
981 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
982 x = XEXP (x, 0);
983
984 if (MEM_P (x))
985 return x;
986
987 return NULL_RTX;
988 }
989
990 /* True if any of INSN's source register(s) is REG. */
991
992 static bool
993 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
994 {
995 extract_insn (insn);
996 return ((REG_P (recog_data.operand[1])
997 && REGNO (recog_data.operand[1]) == reg)
998 || (recog_data.n_operands == 3
999 && REG_P (recog_data.operand[2])
1000 && REGNO (recog_data.operand[2]) == reg));
1001 }
1002
1003 /* True if INSN is a floating-point division or square-root. */
1004
1005 static bool
1006 div_sqrt_insn_p (rtx_insn *insn)
1007 {
1008 if (GET_CODE (PATTERN (insn)) != SET)
1009 return false;
1010
1011 switch (get_attr_type (insn))
1012 {
1013 case TYPE_FPDIVS:
1014 case TYPE_FPSQRTS:
1015 case TYPE_FPDIVD:
1016 case TYPE_FPSQRTD:
1017 return true;
1018 default:
1019 return false;
1020 }
1021 }
1022
1023 /* True if INSN is a floating-point instruction. */
1024
1025 static bool
1026 fpop_insn_p (rtx_insn *insn)
1027 {
1028 if (GET_CODE (PATTERN (insn)) != SET)
1029 return false;
1030
1031 switch (get_attr_type (insn))
1032 {
1033 case TYPE_FPMOVE:
1034 case TYPE_FPCMOVE:
1035 case TYPE_FP:
1036 case TYPE_FPCMP:
1037 case TYPE_FPMUL:
1038 case TYPE_FPDIVS:
1039 case TYPE_FPSQRTS:
1040 case TYPE_FPDIVD:
1041 case TYPE_FPSQRTD:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1048 /* True if INSN is an atomic instruction. */
1049
1050 static bool
1051 atomic_insn_for_leon3_p (rtx_insn *insn)
1052 {
1053 switch (INSN_CODE (insn))
1054 {
1055 case CODE_FOR_swapsi:
1056 case CODE_FOR_ldstub:
1057 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1058 return true;
1059 default:
1060 return false;
1061 }
1062 }
1063
1064 /* We use a machine specific pass to enable workarounds for errata.
1065
1066 We need to have the (essentially) final form of the insn stream in order
1067 to properly detect the various hazards. Therefore, this machine specific
1068 pass runs as late as possible. */
1069
1070 /* True if INSN is a md pattern or asm statement. */
1071 #define USEFUL_INSN_P(INSN) \
1072 (NONDEBUG_INSN_P (INSN) \
1073 && GET_CODE (PATTERN (INSN)) != USE \
1074 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1075
1076 static unsigned int
1077 sparc_do_work_around_errata (void)
1078 {
1079 rtx_insn *insn, *next;
1080
1081 /* Force all instructions to be split into their final form. */
1082 split_all_insns_noflow ();
1083
1084 /* Now look for specific patterns in the insn stream. */
1085 for (insn = get_insns (); insn; insn = next)
1086 {
1087 bool insert_nop = false;
1088 rtx set;
1089 rtx_insn *jump;
1090 rtx_sequence *seq;
1091
1092 /* Look into the instruction in a delay slot. */
1093 if (NONJUMP_INSN_P (insn)
1094 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1095 {
1096 jump = seq->insn (0);
1097 insn = seq->insn (1);
1098 }
1099 else if (JUMP_P (insn))
1100 jump = insn;
1101 else
1102 jump = NULL;
1103
1104 /* Place a NOP at the branch target of an integer branch if it is a
1105 floating-point operation or a floating-point branch. */
1106 if (sparc_fix_gr712rc
1107 && jump
1108 && jump_to_label_p (jump)
1109 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1110 {
1111 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1112 if (target
1113 && (fpop_insn_p (target)
1114 || (JUMP_P (target)
1115 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1116 emit_insn_before (gen_nop (), target);
1117 }
1118
1119 /* Insert a NOP between load instruction and atomic instruction. Insert
1120 a NOP at branch target if there is a load in delay slot and an atomic
1121 instruction at branch target. */
1122 if (sparc_fix_ut700
1123 && NONJUMP_INSN_P (insn)
1124 && (set = single_set (insn)) != NULL_RTX
1125 && mem_ref (SET_SRC (set))
1126 && REG_P (SET_DEST (set)))
1127 {
1128 if (jump && jump_to_label_p (jump))
1129 {
1130 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1131 if (target && atomic_insn_for_leon3_p (target))
1132 emit_insn_before (gen_nop (), target);
1133 }
1134
1135 next = next_active_insn (insn);
1136 if (!next)
1137 break;
1138
1139 if (atomic_insn_for_leon3_p (next))
1140 insert_nop = true;
1141 }
1142
1143 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1144 ends with another fdiv or fsqrt instruction with no dependencies on
1145 the former, along with an appropriate pattern in between. */
1146 if (sparc_fix_lost_divsqrt
1147 && NONJUMP_INSN_P (insn)
1148 && div_sqrt_insn_p (insn))
1149 {
1150 int i;
1151 int fp_found = 0;
1152 rtx_insn *after;
1153
1154 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1155
1156 next = next_active_insn (insn);
1157 if (!next)
1158 break;
1159
1160 for (after = next, i = 0; i < 4; i++)
1161 {
1162 /* Count floating-point operations. */
1163 if (i != 3 && fpop_insn_p (after))
1164 {
1165 /* If the insn uses the destination register of
1166 the div/sqrt, then it cannot be problematic. */
1167 if (insn_uses_reg_p (after, dest_reg))
1168 break;
1169 fp_found++;
1170 }
1171
1172 /* Count floating-point loads. */
1173 if (i != 3
1174 && (set = single_set (after)) != NULL_RTX
1175 && REG_P (SET_DEST (set))
1176 && REGNO (SET_DEST (set)) > 31)
1177 {
1178 /* If the insn uses the destination register of
1179 the div/sqrt, then it cannot be problematic. */
1180 if (REGNO (SET_DEST (set)) == dest_reg)
1181 break;
1182 fp_found++;
1183 }
1184
1185 /* Check if this is a problematic sequence. */
1186 if (i > 1
1187 && fp_found >= 2
1188 && div_sqrt_insn_p (after))
1189 {
1190 /* If this is the short version of the problematic
1191 sequence we add two NOPs in a row to also prevent
1192 the long version. */
1193 if (i == 2)
1194 emit_insn_before (gen_nop (), next);
1195 insert_nop = true;
1196 break;
1197 }
1198
1199 /* No need to scan past a second div/sqrt. */
1200 if (div_sqrt_insn_p (after))
1201 break;
1202
1203 /* Insert NOP before branch. */
1204 if (i < 3
1205 && (!NONJUMP_INSN_P (after)
1206 || GET_CODE (PATTERN (after)) == SEQUENCE))
1207 {
1208 insert_nop = true;
1209 break;
1210 }
1211
1212 after = next_active_insn (after);
1213 if (!after)
1214 break;
1215 }
1216 }
1217
1218 /* Look for either of these two sequences:
1219
1220 Sequence A:
1221 1. store of word size or less (e.g. st / stb / sth / stf)
1222 2. any single instruction that is not a load or store
1223 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1224
1225 Sequence B:
1226 1. store of double word size (e.g. std / stdf)
1227 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1228 if (sparc_fix_b2bst
1229 && NONJUMP_INSN_P (insn)
1230 && (set = single_set (insn)) != NULL_RTX
1231 && MEM_P (SET_DEST (set)))
1232 {
1233 /* Sequence B begins with a double-word store. */
1234 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1235 rtx_insn *after;
1236 int i;
1237
1238 next = next_active_insn (insn);
1239 if (!next)
1240 break;
1241
1242 for (after = next, i = 0; i < 2; i++)
1243 {
1244 /* Skip empty assembly statements. */
1245 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1246 || (USEFUL_INSN_P (after)
1247 && (asm_noperands (PATTERN (after))>=0)
1248 && !strcmp (decode_asm_operands (PATTERN (after),
1249 NULL, NULL, NULL,
1250 NULL, NULL), "")))
1251 after = next_active_insn (after);
1252 if (!after)
1253 break;
1254
1255 /* If the insn is a branch, then it cannot be problematic. */
1256 if (!NONJUMP_INSN_P (after)
1257 || GET_CODE (PATTERN (after)) == SEQUENCE)
1258 break;
1259
1260 /* Sequence B is only two instructions long. */
1261 if (seq_b)
1262 {
1263 /* Add NOP if followed by a store. */
1264 if ((set = single_set (after)) != NULL_RTX
1265 && MEM_P (SET_DEST (set)))
1266 insert_nop = true;
1267
1268 /* Otherwise it is ok. */
1269 break;
1270 }
1271
1272 /* If the second instruction is a load or a store,
1273 then the sequence cannot be problematic. */
1274 if (i == 0)
1275 {
1276 if ((set = single_set (after)) != NULL_RTX
1277 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1278 break;
1279
1280 after = next_active_insn (after);
1281 if (!after)
1282 break;
1283 }
1284
1285 /* Add NOP if third instruction is a store. */
1286 if (i == 1
1287 && (set = single_set (after)) != NULL_RTX
1288 && MEM_P (SET_DEST (set)))
1289 insert_nop = true;
1290 }
1291 }
1292
1293 /* Look for a single-word load into an odd-numbered FP register. */
1294 else if (sparc_fix_at697f
1295 && NONJUMP_INSN_P (insn)
1296 && (set = single_set (insn)) != NULL_RTX
1297 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1298 && mem_ref (SET_SRC (set))
1299 && REG_P (SET_DEST (set))
1300 && REGNO (SET_DEST (set)) > 31
1301 && REGNO (SET_DEST (set)) % 2 != 0)
1302 {
1303 /* The wrong dependency is on the enclosing double register. */
1304 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1305 unsigned int src1, src2, dest;
1306 int code;
1307
1308 next = next_active_insn (insn);
1309 if (!next)
1310 break;
1311 /* If the insn is a branch, then it cannot be problematic. */
1312 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1313 continue;
1314
1315 extract_insn (next);
1316 code = INSN_CODE (next);
1317
1318 switch (code)
1319 {
1320 case CODE_FOR_adddf3:
1321 case CODE_FOR_subdf3:
1322 case CODE_FOR_muldf3:
1323 case CODE_FOR_divdf3:
1324 dest = REGNO (recog_data.operand[0]);
1325 src1 = REGNO (recog_data.operand[1]);
1326 src2 = REGNO (recog_data.operand[2]);
1327 if (src1 != src2)
1328 {
1329 /* Case [1-4]:
1330 ld [address], %fx+1
1331 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1332 if ((src1 == x || src2 == x)
1333 && (dest == src1 || dest == src2))
1334 insert_nop = true;
1335 }
1336 else
1337 {
1338 /* Case 5:
1339 ld [address], %fx+1
1340 FPOPd %fx, %fx, %fx */
1341 if (src1 == x
1342 && dest == src1
1343 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1344 insert_nop = true;
1345 }
1346 break;
1347
1348 case CODE_FOR_sqrtdf2:
1349 dest = REGNO (recog_data.operand[0]);
1350 src1 = REGNO (recog_data.operand[1]);
1351 /* Case 6:
1352 ld [address], %fx+1
1353 fsqrtd %fx, %fx */
1354 if (src1 == x && dest == src1)
1355 insert_nop = true;
1356 break;
1357
1358 default:
1359 break;
1360 }
1361 }
1362
1363 /* Look for a single-word load into an integer register. */
1364 else if (sparc_fix_ut699
1365 && NONJUMP_INSN_P (insn)
1366 && (set = single_set (insn)) != NULL_RTX
1367 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1368 && (mem_ref (SET_SRC (set)) != NULL_RTX
1369 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1370 && REG_P (SET_DEST (set))
1371 && REGNO (SET_DEST (set)) < 32)
1372 {
1373 /* There is no problem if the second memory access has a data
1374 dependency on the first single-cycle load. */
1375 rtx x = SET_DEST (set);
1376
1377 next = next_active_insn (insn);
1378 if (!next)
1379 break;
1380 /* If the insn is a branch, then it cannot be problematic. */
1381 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1382 continue;
1383
1384 /* Look for a second memory access to/from an integer register. */
1385 if ((set = single_set (next)) != NULL_RTX)
1386 {
1387 rtx src = SET_SRC (set);
1388 rtx dest = SET_DEST (set);
1389 rtx mem;
1390
1391 /* LDD is affected. */
1392 if ((mem = mem_ref (src)) != NULL_RTX
1393 && REG_P (dest)
1394 && REGNO (dest) < 32
1395 && !reg_mentioned_p (x, XEXP (mem, 0)))
1396 insert_nop = true;
1397
1398 /* STD is *not* affected. */
1399 else if (MEM_P (dest)
1400 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1401 && (src == CONST0_RTX (GET_MODE (dest))
1402 || (REG_P (src)
1403 && REGNO (src) < 32
1404 && REGNO (src) != REGNO (x)))
1405 && !reg_mentioned_p (x, XEXP (dest, 0)))
1406 insert_nop = true;
1407
1408 /* GOT accesses uses LD. */
1409 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1410 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1411 insert_nop = true;
1412 }
1413 }
1414
1415 /* Look for a single-word load/operation into an FP register. */
1416 else if (sparc_fix_ut699
1417 && NONJUMP_INSN_P (insn)
1418 && (set = single_set (insn)) != NULL_RTX
1419 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1420 && REG_P (SET_DEST (set))
1421 && REGNO (SET_DEST (set)) > 31)
1422 {
1423 /* Number of instructions in the problematic window. */
1424 const int n_insns = 4;
1425 /* The problematic combination is with the sibling FP register. */
1426 const unsigned int x = REGNO (SET_DEST (set));
1427 const unsigned int y = x ^ 1;
1428 rtx_insn *after;
1429 int i;
1430
1431 next = next_active_insn (insn);
1432 if (!next)
1433 break;
1434 /* If the insn is a branch, then it cannot be problematic. */
1435 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1436 continue;
1437
1438 /* Look for a second load/operation into the sibling FP register. */
1439 if (!((set = single_set (next)) != NULL_RTX
1440 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1441 && REG_P (SET_DEST (set))
1442 && REGNO (SET_DEST (set)) == y))
1443 continue;
1444
1445 /* Look for a (possible) store from the FP register in the next N
1446 instructions, but bail out if it is again modified or if there
1447 is a store from the sibling FP register before this store. */
1448 for (after = next, i = 0; i < n_insns; i++)
1449 {
1450 bool branch_p;
1451
1452 after = next_active_insn (after);
1453 if (!after)
1454 break;
1455
1456 /* This is a branch with an empty delay slot. */
1457 if (!NONJUMP_INSN_P (after))
1458 {
1459 if (++i == n_insns)
1460 break;
1461 branch_p = true;
1462 after = NULL;
1463 }
1464 /* This is a branch with a filled delay slot. */
1465 else if (rtx_sequence *seq =
1466 dyn_cast <rtx_sequence *> (PATTERN (after)))
1467 {
1468 if (++i == n_insns)
1469 break;
1470 branch_p = true;
1471 after = seq->insn (1);
1472 }
1473 /* This is a regular instruction. */
1474 else
1475 branch_p = false;
1476
1477 if (after && (set = single_set (after)) != NULL_RTX)
1478 {
1479 const rtx src = SET_SRC (set);
1480 const rtx dest = SET_DEST (set);
1481 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1482
1483 /* If the FP register is again modified before the store,
1484 then the store isn't affected. */
1485 if (REG_P (dest)
1486 && (REGNO (dest) == x
1487 || (REGNO (dest) == y && size == 8)))
1488 break;
1489
1490 if (MEM_P (dest) && REG_P (src))
1491 {
1492 /* If there is a store from the sibling FP register
1493 before the store, then the store is not affected. */
1494 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1495 break;
1496
1497 /* Otherwise, the store is affected. */
1498 if (REGNO (src) == x && size == 4)
1499 {
1500 insert_nop = true;
1501 break;
1502 }
1503 }
1504 }
1505
1506 /* If we have a branch in the first M instructions, then we
1507 cannot see the (M+2)th instruction so we play safe. */
1508 if (branch_p && i <= (n_insns - 2))
1509 {
1510 insert_nop = true;
1511 break;
1512 }
1513 }
1514 }
1515
1516 else
1517 next = NEXT_INSN (insn);
1518
1519 if (insert_nop)
1520 emit_insn_before (gen_nop (), next);
1521 }
1522
1523 return 0;
1524 }
1525
1526 namespace {
1527
1528 const pass_data pass_data_work_around_errata =
1529 {
1530 RTL_PASS, /* type */
1531 "errata", /* name */
1532 OPTGROUP_NONE, /* optinfo_flags */
1533 TV_MACH_DEP, /* tv_id */
1534 0, /* properties_required */
1535 0, /* properties_provided */
1536 0, /* properties_destroyed */
1537 0, /* todo_flags_start */
1538 0, /* todo_flags_finish */
1539 };
1540
1541 class pass_work_around_errata : public rtl_opt_pass
1542 {
1543 public:
1544 pass_work_around_errata(gcc::context *ctxt)
1545 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1546 {}
1547
1548 /* opt_pass methods: */
1549 virtual bool gate (function *)
1550 {
1551 return sparc_fix_at697f
1552 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1553 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1554 }
1555
1556 virtual unsigned int execute (function *)
1557 {
1558 return sparc_do_work_around_errata ();
1559 }
1560
1561 }; // class pass_work_around_errata
1562
1563 } // anon namespace
1564
1565 rtl_opt_pass *
1566 make_pass_work_around_errata (gcc::context *ctxt)
1567 {
1568 return new pass_work_around_errata (ctxt);
1569 }
1570
1571 /* Helpers for TARGET_DEBUG_OPTIONS. */
1572 static void
1573 dump_target_flag_bits (const int flags)
1574 {
1575 if (flags & MASK_64BIT)
1576 fprintf (stderr, "64BIT ");
1577 if (flags & MASK_APP_REGS)
1578 fprintf (stderr, "APP_REGS ");
1579 if (flags & MASK_FASTER_STRUCTS)
1580 fprintf (stderr, "FASTER_STRUCTS ");
1581 if (flags & MASK_FLAT)
1582 fprintf (stderr, "FLAT ");
1583 if (flags & MASK_FMAF)
1584 fprintf (stderr, "FMAF ");
1585 if (flags & MASK_FSMULD)
1586 fprintf (stderr, "FSMULD ");
1587 if (flags & MASK_FPU)
1588 fprintf (stderr, "FPU ");
1589 if (flags & MASK_HARD_QUAD)
1590 fprintf (stderr, "HARD_QUAD ");
1591 if (flags & MASK_POPC)
1592 fprintf (stderr, "POPC ");
1593 if (flags & MASK_PTR64)
1594 fprintf (stderr, "PTR64 ");
1595 if (flags & MASK_STACK_BIAS)
1596 fprintf (stderr, "STACK_BIAS ");
1597 if (flags & MASK_UNALIGNED_DOUBLES)
1598 fprintf (stderr, "UNALIGNED_DOUBLES ");
1599 if (flags & MASK_V8PLUS)
1600 fprintf (stderr, "V8PLUS ");
1601 if (flags & MASK_VIS)
1602 fprintf (stderr, "VIS ");
1603 if (flags & MASK_VIS2)
1604 fprintf (stderr, "VIS2 ");
1605 if (flags & MASK_VIS3)
1606 fprintf (stderr, "VIS3 ");
1607 if (flags & MASK_VIS4)
1608 fprintf (stderr, "VIS4 ");
1609 if (flags & MASK_VIS4B)
1610 fprintf (stderr, "VIS4B ");
1611 if (flags & MASK_CBCOND)
1612 fprintf (stderr, "CBCOND ");
1613 if (flags & MASK_DEPRECATED_V8_INSNS)
1614 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1615 if (flags & MASK_SPARCLET)
1616 fprintf (stderr, "SPARCLET ");
1617 if (flags & MASK_SPARCLITE)
1618 fprintf (stderr, "SPARCLITE ");
1619 if (flags & MASK_V8)
1620 fprintf (stderr, "V8 ");
1621 if (flags & MASK_V9)
1622 fprintf (stderr, "V9 ");
1623 }
1624
1625 static void
1626 dump_target_flags (const char *prefix, const int flags)
1627 {
1628 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1629 dump_target_flag_bits (flags);
1630 fprintf(stderr, "]\n");
1631 }
1632
1633 /* Validate and override various options, and do some machine dependent
1634 initialization. */
1635
1636 static void
1637 sparc_option_override (void)
1638 {
1639 static struct code_model {
1640 const char *const name;
1641 const enum cmodel value;
1642 } const cmodels[] = {
1643 { "32", CM_32 },
1644 { "medlow", CM_MEDLOW },
1645 { "medmid", CM_MEDMID },
1646 { "medany", CM_MEDANY },
1647 { "embmedany", CM_EMBMEDANY },
1648 { NULL, (enum cmodel) 0 }
1649 };
1650 const struct code_model *cmodel;
1651 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1652 static struct cpu_default {
1653 const int cpu;
1654 const enum processor_type processor;
1655 } const cpu_default[] = {
1656 /* There must be one entry here for each TARGET_CPU value. */
1657 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1658 { TARGET_CPU_v8, PROCESSOR_V8 },
1659 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1660 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1661 { TARGET_CPU_leon, PROCESSOR_LEON },
1662 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1663 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1664 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1665 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1666 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1667 { TARGET_CPU_v9, PROCESSOR_V9 },
1668 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1669 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1670 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1671 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1672 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1673 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1674 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1675 { TARGET_CPU_m8, PROCESSOR_M8 },
1676 { -1, PROCESSOR_V7 }
1677 };
1678 const struct cpu_default *def;
1679 /* Table of values for -m{cpu,tune}=. This must match the order of
1680 the enum processor_type in sparc-opts.h. */
1681 static struct cpu_table {
1682 const char *const name;
1683 const int disable;
1684 const int enable;
1685 } const cpu_table[] = {
1686 { "v7", MASK_ISA, 0 },
1687 { "cypress", MASK_ISA, 0 },
1688 { "v8", MASK_ISA, MASK_V8 },
1689 /* TI TMS390Z55 supersparc */
1690 { "supersparc", MASK_ISA, MASK_V8 },
1691 { "hypersparc", MASK_ISA, MASK_V8 },
1692 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1693 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1694 { "leon3v7", MASK_ISA, MASK_LEON3 },
1695 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1696 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1697 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1698 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1699 { "f934", MASK_ISA, MASK_SPARCLITE },
1700 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1701 { "sparclet", MASK_ISA, MASK_SPARCLET },
1702 /* TEMIC sparclet */
1703 { "tsc701", MASK_ISA, MASK_SPARCLET },
1704 { "v9", MASK_ISA, MASK_V9 },
1705 /* UltraSPARC I, II, IIi */
1706 { "ultrasparc", MASK_ISA,
1707 /* Although insns using %y are deprecated, it is a clear win. */
1708 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1709 /* UltraSPARC III */
1710 /* ??? Check if %y issue still holds true. */
1711 { "ultrasparc3", MASK_ISA,
1712 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1713 /* UltraSPARC T1 */
1714 { "niagara", MASK_ISA,
1715 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1716 /* UltraSPARC T2 */
1717 { "niagara2", MASK_ISA,
1718 MASK_V9|MASK_POPC|MASK_VIS2 },
1719 /* UltraSPARC T3 */
1720 { "niagara3", MASK_ISA,
1721 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1722 /* UltraSPARC T4 */
1723 { "niagara4", MASK_ISA,
1724 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1725 /* UltraSPARC M7 */
1726 { "niagara7", MASK_ISA,
1727 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1728 /* UltraSPARC M8 */
1729 { "m8", MASK_ISA,
1730 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1731 };
1732 const struct cpu_table *cpu;
1733 unsigned int i;
1734
1735 if (sparc_debug_string != NULL)
1736 {
1737 const char *q;
1738 char *p;
1739
1740 p = ASTRDUP (sparc_debug_string);
1741 while ((q = strtok (p, ",")) != NULL)
1742 {
1743 bool invert;
1744 int mask;
1745
1746 p = NULL;
1747 if (*q == '!')
1748 {
1749 invert = true;
1750 q++;
1751 }
1752 else
1753 invert = false;
1754
1755 if (! strcmp (q, "all"))
1756 mask = MASK_DEBUG_ALL;
1757 else if (! strcmp (q, "options"))
1758 mask = MASK_DEBUG_OPTIONS;
1759 else
1760 error ("unknown -mdebug-%s switch", q);
1761
1762 if (invert)
1763 sparc_debug &= ~mask;
1764 else
1765 sparc_debug |= mask;
1766 }
1767 }
1768
1769 /* Enable the FsMULd instruction by default if not explicitly specified by
1770 the user. It may be later disabled by the CPU (explicitly or not). */
1771 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1772 target_flags |= MASK_FSMULD;
1773
1774 if (TARGET_DEBUG_OPTIONS)
1775 {
1776 dump_target_flags("Initial target_flags", target_flags);
1777 dump_target_flags("target_flags_explicit", target_flags_explicit);
1778 }
1779
1780 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1781 SUBTARGET_OVERRIDE_OPTIONS;
1782 #endif
1783
1784 #ifndef SPARC_BI_ARCH
1785 /* Check for unsupported architecture size. */
1786 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1787 error ("%s is not supported by this configuration",
1788 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1789 #endif
1790
1791 /* We force all 64bit archs to use 128 bit long double */
1792 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1793 {
1794 error ("-mlong-double-64 not allowed with -m64");
1795 target_flags |= MASK_LONG_DOUBLE_128;
1796 }
1797
1798 /* Code model selection. */
1799 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1800
1801 #ifdef SPARC_BI_ARCH
1802 if (TARGET_ARCH32)
1803 sparc_cmodel = CM_32;
1804 #endif
1805
1806 if (sparc_cmodel_string != NULL)
1807 {
1808 if (TARGET_ARCH64)
1809 {
1810 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1811 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1812 break;
1813 if (cmodel->name == NULL)
1814 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1815 else
1816 sparc_cmodel = cmodel->value;
1817 }
1818 else
1819 error ("-mcmodel= is not supported on 32-bit systems");
1820 }
1821
1822 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1823 for (i = 8; i < 16; i++)
1824 if (!call_used_regs [i])
1825 {
1826 error ("-fcall-saved-REG is not supported for out registers");
1827 call_used_regs [i] = 1;
1828 }
1829
1830 /* Set the default CPU if no -mcpu option was specified. */
1831 if (!global_options_set.x_sparc_cpu_and_features)
1832 {
1833 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1834 if (def->cpu == TARGET_CPU_DEFAULT)
1835 break;
1836 gcc_assert (def->cpu != -1);
1837 sparc_cpu_and_features = def->processor;
1838 }
1839
1840 /* Set the default CPU if no -mtune option was specified. */
1841 if (!global_options_set.x_sparc_cpu)
1842 sparc_cpu = sparc_cpu_and_features;
1843
1844 cpu = &cpu_table[(int) sparc_cpu_and_features];
1845
1846 if (TARGET_DEBUG_OPTIONS)
1847 {
1848 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1849 dump_target_flags ("cpu->disable", cpu->disable);
1850 dump_target_flags ("cpu->enable", cpu->enable);
1851 }
1852
1853 target_flags &= ~cpu->disable;
1854 target_flags |= (cpu->enable
1855 #ifndef HAVE_AS_FMAF_HPC_VIS3
1856 & ~(MASK_FMAF | MASK_VIS3)
1857 #endif
1858 #ifndef HAVE_AS_SPARC4
1859 & ~MASK_CBCOND
1860 #endif
1861 #ifndef HAVE_AS_SPARC5_VIS4
1862 & ~(MASK_VIS4 | MASK_SUBXC)
1863 #endif
1864 #ifndef HAVE_AS_SPARC6
1865 & ~(MASK_VIS4B)
1866 #endif
1867 #ifndef HAVE_AS_LEON
1868 & ~(MASK_LEON | MASK_LEON3)
1869 #endif
1870 & ~(target_flags_explicit & MASK_FEATURES)
1871 );
1872
1873 /* FsMULd is a V8 instruction. */
1874 if (!TARGET_V8 && !TARGET_V9)
1875 target_flags &= ~MASK_FSMULD;
1876
1877 /* -mvis2 implies -mvis. */
1878 if (TARGET_VIS2)
1879 target_flags |= MASK_VIS;
1880
1881 /* -mvis3 implies -mvis2 and -mvis. */
1882 if (TARGET_VIS3)
1883 target_flags |= MASK_VIS2 | MASK_VIS;
1884
1885 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1886 if (TARGET_VIS4)
1887 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1888
1889 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1890 if (TARGET_VIS4B)
1891 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1892
1893 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1894 FPU is disabled. */
1895 if (!TARGET_FPU)
1896 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1897 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1898
1899 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1900 are available; -m64 also implies v9. */
1901 if (TARGET_VIS || TARGET_ARCH64)
1902 {
1903 target_flags |= MASK_V9;
1904 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1905 }
1906
1907 /* -mvis also implies -mv8plus on 32-bit. */
1908 if (TARGET_VIS && !TARGET_ARCH64)
1909 target_flags |= MASK_V8PLUS;
1910
1911 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1912 if (TARGET_V9 && TARGET_ARCH32)
1913 target_flags |= MASK_DEPRECATED_V8_INSNS;
1914
1915 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1916 if (!TARGET_V9 || TARGET_ARCH64)
1917 target_flags &= ~MASK_V8PLUS;
1918
1919 /* Don't use stack biasing in 32-bit mode. */
1920 if (TARGET_ARCH32)
1921 target_flags &= ~MASK_STACK_BIAS;
1922
1923 /* Use LRA instead of reload, unless otherwise instructed. */
1924 if (!(target_flags_explicit & MASK_LRA))
1925 target_flags |= MASK_LRA;
1926
1927 /* Enable applicable errata workarounds for LEON3FT. */
1928 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1929 {
1930 sparc_fix_b2bst = 1;
1931 sparc_fix_lost_divsqrt = 1;
1932 }
1933
1934 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1935 if (sparc_fix_ut699)
1936 target_flags &= ~MASK_FSMULD;
1937
1938 /* Supply a default value for align_functions. */
1939 if (flag_align_functions && !str_align_functions)
1940 {
1941 if (sparc_cpu == PROCESSOR_ULTRASPARC
1942 || sparc_cpu == PROCESSOR_ULTRASPARC3
1943 || sparc_cpu == PROCESSOR_NIAGARA
1944 || sparc_cpu == PROCESSOR_NIAGARA2
1945 || sparc_cpu == PROCESSOR_NIAGARA3
1946 || sparc_cpu == PROCESSOR_NIAGARA4)
1947 str_align_functions = "32";
1948 else if (sparc_cpu == PROCESSOR_NIAGARA7
1949 || sparc_cpu == PROCESSOR_M8)
1950 str_align_functions = "64";
1951 }
1952
1953 /* Validate PCC_STRUCT_RETURN. */
1954 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1955 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1956
1957 /* Only use .uaxword when compiling for a 64-bit target. */
1958 if (!TARGET_ARCH64)
1959 targetm.asm_out.unaligned_op.di = NULL;
1960
1961 /* Do various machine dependent initializations. */
1962 sparc_init_modes ();
1963
1964 /* Set up function hooks. */
1965 init_machine_status = sparc_init_machine_status;
1966
1967 switch (sparc_cpu)
1968 {
1969 case PROCESSOR_V7:
1970 case PROCESSOR_CYPRESS:
1971 sparc_costs = &cypress_costs;
1972 break;
1973 case PROCESSOR_V8:
1974 case PROCESSOR_SPARCLITE:
1975 case PROCESSOR_SUPERSPARC:
1976 sparc_costs = &supersparc_costs;
1977 break;
1978 case PROCESSOR_F930:
1979 case PROCESSOR_F934:
1980 case PROCESSOR_HYPERSPARC:
1981 case PROCESSOR_SPARCLITE86X:
1982 sparc_costs = &hypersparc_costs;
1983 break;
1984 case PROCESSOR_LEON:
1985 sparc_costs = &leon_costs;
1986 break;
1987 case PROCESSOR_LEON3:
1988 case PROCESSOR_LEON3V7:
1989 sparc_costs = &leon3_costs;
1990 break;
1991 case PROCESSOR_SPARCLET:
1992 case PROCESSOR_TSC701:
1993 sparc_costs = &sparclet_costs;
1994 break;
1995 case PROCESSOR_V9:
1996 case PROCESSOR_ULTRASPARC:
1997 sparc_costs = &ultrasparc_costs;
1998 break;
1999 case PROCESSOR_ULTRASPARC3:
2000 sparc_costs = &ultrasparc3_costs;
2001 break;
2002 case PROCESSOR_NIAGARA:
2003 sparc_costs = &niagara_costs;
2004 break;
2005 case PROCESSOR_NIAGARA2:
2006 sparc_costs = &niagara2_costs;
2007 break;
2008 case PROCESSOR_NIAGARA3:
2009 sparc_costs = &niagara3_costs;
2010 break;
2011 case PROCESSOR_NIAGARA4:
2012 sparc_costs = &niagara4_costs;
2013 break;
2014 case PROCESSOR_NIAGARA7:
2015 sparc_costs = &niagara7_costs;
2016 break;
2017 case PROCESSOR_M8:
2018 sparc_costs = &m8_costs;
2019 break;
2020 case PROCESSOR_NATIVE:
2021 gcc_unreachable ();
2022 };
2023
2024 if (sparc_memory_model == SMM_DEFAULT)
2025 {
2026 /* Choose the memory model for the operating system. */
2027 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
2028 if (os_default != SMM_DEFAULT)
2029 sparc_memory_model = os_default;
2030 /* Choose the most relaxed model for the processor. */
2031 else if (TARGET_V9)
2032 sparc_memory_model = SMM_RMO;
2033 else if (TARGET_LEON3)
2034 sparc_memory_model = SMM_TSO;
2035 else if (TARGET_LEON)
2036 sparc_memory_model = SMM_SC;
2037 else if (TARGET_V8)
2038 sparc_memory_model = SMM_PSO;
2039 else
2040 sparc_memory_model = SMM_SC;
2041 }
2042
2043 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
2044 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2045 target_flags |= MASK_LONG_DOUBLE_128;
2046 #endif
2047
2048 if (TARGET_DEBUG_OPTIONS)
2049 dump_target_flags ("Final target_flags", target_flags);
2050
2051 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2052 can run at the same time. More important, it is the threshold
2053 defining when additional prefetches will be dropped by the
2054 hardware.
2055
2056 The UltraSPARC-III features a documented prefetch queue with a
2057 size of 8. Additional prefetches issued in the cpu are
2058 dropped.
2059
2060 Niagara processors are different. In these processors prefetches
2061 are handled much like regular loads. The L1 miss buffer is 32
2062 entries, but prefetches start getting affected when 30 entries
2063 become occupied. That occupation could be a mix of regular loads
2064 and prefetches though. And that buffer is shared by all threads.
2065 Once the threshold is reached, if the core is running a single
2066 thread the prefetch will retry. If more than one thread is
2067 running, the prefetch will be dropped.
2068
2069 All this makes it very difficult to determine how many
2070 simultaneous prefetches can be issued simultaneously, even in a
2071 single-threaded program. Experimental results show that setting
2072 this parameter to 32 works well when the number of threads is not
2073 high. */
2074 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2075 ((sparc_cpu == PROCESSOR_ULTRASPARC
2076 || sparc_cpu == PROCESSOR_NIAGARA
2077 || sparc_cpu == PROCESSOR_NIAGARA2
2078 || sparc_cpu == PROCESSOR_NIAGARA3
2079 || sparc_cpu == PROCESSOR_NIAGARA4)
2080 ? 2
2081 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2082 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2083 || sparc_cpu == PROCESSOR_M8)
2084 ? 32 : 3))),
2085 global_options.x_param_values,
2086 global_options_set.x_param_values);
2087
2088 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2089 bytes.
2090
2091 The Oracle SPARC Architecture (previously the UltraSPARC
2092 Architecture) specification states that when a PREFETCH[A]
2093 instruction is executed an implementation-specific amount of data
2094 is prefetched, and that it is at least 64 bytes long (aligned to
2095 at least 64 bytes).
2096
2097 However, this is not correct. The M7 (and implementations prior
2098 to that) does not guarantee a 64B prefetch into a cache if the
2099 line size is smaller. A single cache line is all that is ever
2100 prefetched. So for the M7, where the L1D$ has 32B lines and the
2101 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2102 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2103 is a read_n prefetch, which is the only type which allocates to
2104 the L1.) */
2105 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2106 (sparc_cpu == PROCESSOR_M8
2107 ? 64 : 32),
2108 global_options.x_param_values,
2109 global_options_set.x_param_values);
2110
2111 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2112 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2113 Niagara processors feature a L1D$ of 16KB. */
2114 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2115 ((sparc_cpu == PROCESSOR_ULTRASPARC
2116 || sparc_cpu == PROCESSOR_ULTRASPARC3
2117 || sparc_cpu == PROCESSOR_NIAGARA
2118 || sparc_cpu == PROCESSOR_NIAGARA2
2119 || sparc_cpu == PROCESSOR_NIAGARA3
2120 || sparc_cpu == PROCESSOR_NIAGARA4
2121 || sparc_cpu == PROCESSOR_NIAGARA7
2122 || sparc_cpu == PROCESSOR_M8)
2123 ? 16 : 64),
2124 global_options.x_param_values,
2125 global_options_set.x_param_values);
2126
2127
2128 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2129 that 512 is the default in params.def. */
2130 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2131 ((sparc_cpu == PROCESSOR_NIAGARA4
2132 || sparc_cpu == PROCESSOR_M8)
2133 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2134 ? 256 : 512)),
2135 global_options.x_param_values,
2136 global_options_set.x_param_values);
2137
2138
2139 /* Disable save slot sharing for call-clobbered registers by default.
2140 The IRA sharing algorithm works on single registers only and this
2141 pessimizes for double floating-point registers. */
2142 if (!global_options_set.x_flag_ira_share_save_slots)
2143 flag_ira_share_save_slots = 0;
2144
2145 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2146 redundant 32-to-64-bit extensions. */
2147 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2148 flag_ree = 0;
2149 }
2150 \f
2151 /* Miscellaneous utilities. */
2152
2153 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2154 or branch on register contents instructions. */
2155
2156 int
2157 v9_regcmp_p (enum rtx_code code)
2158 {
2159 return (code == EQ || code == NE || code == GE || code == LT
2160 || code == LE || code == GT);
2161 }
2162
2163 /* Nonzero if OP is a floating point constant which can
2164 be loaded into an integer register using a single
2165 sethi instruction. */
2166
2167 int
2168 fp_sethi_p (rtx op)
2169 {
2170 if (GET_CODE (op) == CONST_DOUBLE)
2171 {
2172 long i;
2173
2174 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2175 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2176 }
2177
2178 return 0;
2179 }
2180
2181 /* Nonzero if OP is a floating point constant which can
2182 be loaded into an integer register using a single
2183 mov instruction. */
2184
2185 int
2186 fp_mov_p (rtx op)
2187 {
2188 if (GET_CODE (op) == CONST_DOUBLE)
2189 {
2190 long i;
2191
2192 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2193 return SPARC_SIMM13_P (i);
2194 }
2195
2196 return 0;
2197 }
2198
2199 /* Nonzero if OP is a floating point constant which can
2200 be loaded into an integer register using a high/losum
2201 instruction sequence. */
2202
2203 int
2204 fp_high_losum_p (rtx op)
2205 {
2206 /* The constraints calling this should only be in
2207 SFmode move insns, so any constant which cannot
2208 be moved using a single insn will do. */
2209 if (GET_CODE (op) == CONST_DOUBLE)
2210 {
2211 long i;
2212
2213 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2214 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2215 }
2216
2217 return 0;
2218 }
2219
2220 /* Return true if the address of LABEL can be loaded by means of the
2221 mov{si,di}_pic_label_ref patterns in PIC mode. */
2222
2223 static bool
2224 can_use_mov_pic_label_ref (rtx label)
2225 {
2226 /* VxWorks does not impose a fixed gap between segments; the run-time
2227 gap can be different from the object-file gap. We therefore can't
2228 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2229 are absolutely sure that X is in the same segment as the GOT.
2230 Unfortunately, the flexibility of linker scripts means that we
2231 can't be sure of that in general, so assume that GOT-relative
2232 accesses are never valid on VxWorks. */
2233 if (TARGET_VXWORKS_RTP)
2234 return false;
2235
2236 /* Similarly, if the label is non-local, it might end up being placed
2237 in a different section than the current one; now mov_pic_label_ref
2238 requires the label and the code to be in the same section. */
2239 if (LABEL_REF_NONLOCAL_P (label))
2240 return false;
2241
2242 /* Finally, if we are reordering basic blocks and partition into hot
2243 and cold sections, this might happen for any label. */
2244 if (flag_reorder_blocks_and_partition)
2245 return false;
2246
2247 return true;
2248 }
2249
2250 /* Expand a move instruction. Return true if all work is done. */
2251
2252 bool
2253 sparc_expand_move (machine_mode mode, rtx *operands)
2254 {
2255 /* Handle sets of MEM first. */
2256 if (GET_CODE (operands[0]) == MEM)
2257 {
2258 /* 0 is a register (or a pair of registers) on SPARC. */
2259 if (register_or_zero_operand (operands[1], mode))
2260 return false;
2261
2262 if (!reload_in_progress)
2263 {
2264 operands[0] = validize_mem (operands[0]);
2265 operands[1] = force_reg (mode, operands[1]);
2266 }
2267 }
2268
2269 /* Fix up TLS cases. */
2270 if (TARGET_HAVE_TLS
2271 && CONSTANT_P (operands[1])
2272 && sparc_tls_referenced_p (operands [1]))
2273 {
2274 operands[1] = sparc_legitimize_tls_address (operands[1]);
2275 return false;
2276 }
2277
2278 /* Fix up PIC cases. */
2279 if (flag_pic && CONSTANT_P (operands[1]))
2280 {
2281 if (pic_address_needs_scratch (operands[1]))
2282 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2283
2284 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2285 if ((GET_CODE (operands[1]) == LABEL_REF
2286 && can_use_mov_pic_label_ref (operands[1]))
2287 || (GET_CODE (operands[1]) == CONST
2288 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2289 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2290 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2291 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2292 {
2293 if (mode == SImode)
2294 {
2295 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2296 return true;
2297 }
2298
2299 if (mode == DImode)
2300 {
2301 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2302 return true;
2303 }
2304 }
2305
2306 if (symbolic_operand (operands[1], mode))
2307 {
2308 operands[1]
2309 = sparc_legitimize_pic_address (operands[1],
2310 reload_in_progress
2311 ? operands[0] : NULL_RTX);
2312 return false;
2313 }
2314 }
2315
2316 /* If we are trying to toss an integer constant into FP registers,
2317 or loading a FP or vector constant, force it into memory. */
2318 if (CONSTANT_P (operands[1])
2319 && REG_P (operands[0])
2320 && (SPARC_FP_REG_P (REGNO (operands[0]))
2321 || SCALAR_FLOAT_MODE_P (mode)
2322 || VECTOR_MODE_P (mode)))
2323 {
2324 /* emit_group_store will send such bogosity to us when it is
2325 not storing directly into memory. So fix this up to avoid
2326 crashes in output_constant_pool. */
2327 if (operands [1] == const0_rtx)
2328 operands[1] = CONST0_RTX (mode);
2329
2330 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2331 always other regs. */
2332 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2333 && (const_zero_operand (operands[1], mode)
2334 || const_all_ones_operand (operands[1], mode)))
2335 return false;
2336
2337 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2338 /* We are able to build any SF constant in integer registers
2339 with at most 2 instructions. */
2340 && (mode == SFmode
2341 /* And any DF constant in integer registers if needed. */
2342 || (mode == DFmode && !can_create_pseudo_p ())))
2343 return false;
2344
2345 operands[1] = force_const_mem (mode, operands[1]);
2346 if (!reload_in_progress)
2347 operands[1] = validize_mem (operands[1]);
2348 return false;
2349 }
2350
2351 /* Accept non-constants and valid constants unmodified. */
2352 if (!CONSTANT_P (operands[1])
2353 || GET_CODE (operands[1]) == HIGH
2354 || input_operand (operands[1], mode))
2355 return false;
2356
2357 switch (mode)
2358 {
2359 case E_QImode:
2360 /* All QImode constants require only one insn, so proceed. */
2361 break;
2362
2363 case E_HImode:
2364 case E_SImode:
2365 sparc_emit_set_const32 (operands[0], operands[1]);
2366 return true;
2367
2368 case E_DImode:
2369 /* input_operand should have filtered out 32-bit mode. */
2370 sparc_emit_set_const64 (operands[0], operands[1]);
2371 return true;
2372
2373 case E_TImode:
2374 {
2375 rtx high, low;
2376 /* TImode isn't available in 32-bit mode. */
2377 split_double (operands[1], &high, &low);
2378 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2379 high));
2380 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2381 low));
2382 }
2383 return true;
2384
2385 default:
2386 gcc_unreachable ();
2387 }
2388
2389 return false;
2390 }
2391
2392 /* Load OP1, a 32-bit constant, into OP0, a register.
2393 We know it can't be done in one insn when we get
2394 here, the move expander guarantees this. */
2395
2396 static void
2397 sparc_emit_set_const32 (rtx op0, rtx op1)
2398 {
2399 machine_mode mode = GET_MODE (op0);
2400 rtx temp = op0;
2401
2402 if (can_create_pseudo_p ())
2403 temp = gen_reg_rtx (mode);
2404
2405 if (GET_CODE (op1) == CONST_INT)
2406 {
2407 gcc_assert (!small_int_operand (op1, mode)
2408 && !const_high_operand (op1, mode));
2409
2410 /* Emit them as real moves instead of a HIGH/LO_SUM,
2411 this way CSE can see everything and reuse intermediate
2412 values if it wants. */
2413 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2414 & ~(HOST_WIDE_INT) 0x3ff)));
2415
2416 emit_insn (gen_rtx_SET (op0,
2417 gen_rtx_IOR (mode, temp,
2418 GEN_INT (INTVAL (op1) & 0x3ff))));
2419 }
2420 else
2421 {
2422 /* A symbol, emit in the traditional way. */
2423 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2424 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2425 }
2426 }
2427
2428 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2429 If TEMP is nonzero, we are forbidden to use any other scratch
2430 registers. Otherwise, we are allowed to generate them as needed.
2431
2432 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2433 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2434
2435 void
2436 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2437 {
2438 rtx cst, temp1, temp2, temp3, temp4, temp5;
2439 rtx ti_temp = 0;
2440
2441 /* Deal with too large offsets. */
2442 if (GET_CODE (op1) == CONST
2443 && GET_CODE (XEXP (op1, 0)) == PLUS
2444 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2445 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2446 {
2447 gcc_assert (!temp);
2448 temp1 = gen_reg_rtx (DImode);
2449 temp2 = gen_reg_rtx (DImode);
2450 sparc_emit_set_const64 (temp2, cst);
2451 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2452 NULL_RTX);
2453 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2454 return;
2455 }
2456
2457 if (temp && GET_MODE (temp) == TImode)
2458 {
2459 ti_temp = temp;
2460 temp = gen_rtx_REG (DImode, REGNO (temp));
2461 }
2462
2463 /* SPARC-V9 code-model support. */
2464 switch (sparc_cmodel)
2465 {
2466 case CM_MEDLOW:
2467 /* The range spanned by all instructions in the object is less
2468 than 2^31 bytes (2GB) and the distance from any instruction
2469 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2470 than 2^31 bytes (2GB).
2471
2472 The executable must be in the low 4TB of the virtual address
2473 space.
2474
2475 sethi %hi(symbol), %temp1
2476 or %temp1, %lo(symbol), %reg */
2477 if (temp)
2478 temp1 = temp; /* op0 is allowed. */
2479 else
2480 temp1 = gen_reg_rtx (DImode);
2481
2482 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2483 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2484 break;
2485
2486 case CM_MEDMID:
2487 /* The range spanned by all instructions in the object is less
2488 than 2^31 bytes (2GB) and the distance from any instruction
2489 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2490 than 2^31 bytes (2GB).
2491
2492 The executable must be in the low 16TB of the virtual address
2493 space.
2494
2495 sethi %h44(symbol), %temp1
2496 or %temp1, %m44(symbol), %temp2
2497 sllx %temp2, 12, %temp3
2498 or %temp3, %l44(symbol), %reg */
2499 if (temp)
2500 {
2501 temp1 = op0;
2502 temp2 = op0;
2503 temp3 = temp; /* op0 is allowed. */
2504 }
2505 else
2506 {
2507 temp1 = gen_reg_rtx (DImode);
2508 temp2 = gen_reg_rtx (DImode);
2509 temp3 = gen_reg_rtx (DImode);
2510 }
2511
2512 emit_insn (gen_seth44 (temp1, op1));
2513 emit_insn (gen_setm44 (temp2, temp1, op1));
2514 emit_insn (gen_rtx_SET (temp3,
2515 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2516 emit_insn (gen_setl44 (op0, temp3, op1));
2517 break;
2518
2519 case CM_MEDANY:
2520 /* The range spanned by all instructions in the object is less
2521 than 2^31 bytes (2GB) and the distance from any instruction
2522 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2523 than 2^31 bytes (2GB).
2524
2525 The executable can be placed anywhere in the virtual address
2526 space.
2527
2528 sethi %hh(symbol), %temp1
2529 sethi %lm(symbol), %temp2
2530 or %temp1, %hm(symbol), %temp3
2531 sllx %temp3, 32, %temp4
2532 or %temp4, %temp2, %temp5
2533 or %temp5, %lo(symbol), %reg */
2534 if (temp)
2535 {
2536 /* It is possible that one of the registers we got for operands[2]
2537 might coincide with that of operands[0] (which is why we made
2538 it TImode). Pick the other one to use as our scratch. */
2539 if (rtx_equal_p (temp, op0))
2540 {
2541 gcc_assert (ti_temp);
2542 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2543 }
2544 temp1 = op0;
2545 temp2 = temp; /* op0 is _not_ allowed, see above. */
2546 temp3 = op0;
2547 temp4 = op0;
2548 temp5 = op0;
2549 }
2550 else
2551 {
2552 temp1 = gen_reg_rtx (DImode);
2553 temp2 = gen_reg_rtx (DImode);
2554 temp3 = gen_reg_rtx (DImode);
2555 temp4 = gen_reg_rtx (DImode);
2556 temp5 = gen_reg_rtx (DImode);
2557 }
2558
2559 emit_insn (gen_sethh (temp1, op1));
2560 emit_insn (gen_setlm (temp2, op1));
2561 emit_insn (gen_sethm (temp3, temp1, op1));
2562 emit_insn (gen_rtx_SET (temp4,
2563 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2564 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2565 emit_insn (gen_setlo (op0, temp5, op1));
2566 break;
2567
2568 case CM_EMBMEDANY:
2569 /* Old old old backwards compatibility kruft here.
2570 Essentially it is MEDLOW with a fixed 64-bit
2571 virtual base added to all data segment addresses.
2572 Text-segment stuff is computed like MEDANY, we can't
2573 reuse the code above because the relocation knobs
2574 look different.
2575
2576 Data segment: sethi %hi(symbol), %temp1
2577 add %temp1, EMBMEDANY_BASE_REG, %temp2
2578 or %temp2, %lo(symbol), %reg */
2579 if (data_segment_operand (op1, GET_MODE (op1)))
2580 {
2581 if (temp)
2582 {
2583 temp1 = temp; /* op0 is allowed. */
2584 temp2 = op0;
2585 }
2586 else
2587 {
2588 temp1 = gen_reg_rtx (DImode);
2589 temp2 = gen_reg_rtx (DImode);
2590 }
2591
2592 emit_insn (gen_embmedany_sethi (temp1, op1));
2593 emit_insn (gen_embmedany_brsum (temp2, temp1));
2594 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2595 }
2596
2597 /* Text segment: sethi %uhi(symbol), %temp1
2598 sethi %hi(symbol), %temp2
2599 or %temp1, %ulo(symbol), %temp3
2600 sllx %temp3, 32, %temp4
2601 or %temp4, %temp2, %temp5
2602 or %temp5, %lo(symbol), %reg */
2603 else
2604 {
2605 if (temp)
2606 {
2607 /* It is possible that one of the registers we got for operands[2]
2608 might coincide with that of operands[0] (which is why we made
2609 it TImode). Pick the other one to use as our scratch. */
2610 if (rtx_equal_p (temp, op0))
2611 {
2612 gcc_assert (ti_temp);
2613 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2614 }
2615 temp1 = op0;
2616 temp2 = temp; /* op0 is _not_ allowed, see above. */
2617 temp3 = op0;
2618 temp4 = op0;
2619 temp5 = op0;
2620 }
2621 else
2622 {
2623 temp1 = gen_reg_rtx (DImode);
2624 temp2 = gen_reg_rtx (DImode);
2625 temp3 = gen_reg_rtx (DImode);
2626 temp4 = gen_reg_rtx (DImode);
2627 temp5 = gen_reg_rtx (DImode);
2628 }
2629
2630 emit_insn (gen_embmedany_textuhi (temp1, op1));
2631 emit_insn (gen_embmedany_texthi (temp2, op1));
2632 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2633 emit_insn (gen_rtx_SET (temp4,
2634 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2635 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2636 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2637 }
2638 break;
2639
2640 default:
2641 gcc_unreachable ();
2642 }
2643 }
2644
2645 /* These avoid problems when cross compiling. If we do not
2646 go through all this hair then the optimizer will see
2647 invalid REG_EQUAL notes or in some cases none at all. */
2648 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2649 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2650 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2651 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2652
2653 /* The optimizer is not to assume anything about exactly
2654 which bits are set for a HIGH, they are unspecified.
2655 Unfortunately this leads to many missed optimizations
2656 during CSE. We mask out the non-HIGH bits, and matches
2657 a plain movdi, to alleviate this problem. */
2658 static rtx
2659 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2660 {
2661 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2662 }
2663
2664 static rtx
2665 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2666 {
2667 return gen_rtx_SET (dest, GEN_INT (val));
2668 }
2669
2670 static rtx
2671 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2672 {
2673 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2674 }
2675
2676 static rtx
2677 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2678 {
2679 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2680 }
2681
2682 /* Worker routines for 64-bit constant formation on arch64.
2683 One of the key things to be doing in these emissions is
2684 to create as many temp REGs as possible. This makes it
2685 possible for half-built constants to be used later when
2686 such values are similar to something required later on.
2687 Without doing this, the optimizer cannot see such
2688 opportunities. */
2689
2690 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2691 unsigned HOST_WIDE_INT, int);
2692
2693 static void
2694 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2695 unsigned HOST_WIDE_INT low_bits, int is_neg)
2696 {
2697 unsigned HOST_WIDE_INT high_bits;
2698
2699 if (is_neg)
2700 high_bits = (~low_bits) & 0xffffffff;
2701 else
2702 high_bits = low_bits;
2703
2704 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2705 if (!is_neg)
2706 {
2707 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2708 }
2709 else
2710 {
2711 /* If we are XOR'ing with -1, then we should emit a one's complement
2712 instead. This way the combiner will notice logical operations
2713 such as ANDN later on and substitute. */
2714 if ((low_bits & 0x3ff) == 0x3ff)
2715 {
2716 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2717 }
2718 else
2719 {
2720 emit_insn (gen_rtx_SET (op0,
2721 gen_safe_XOR64 (temp,
2722 (-(HOST_WIDE_INT)0x400
2723 | (low_bits & 0x3ff)))));
2724 }
2725 }
2726 }
2727
2728 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2729 unsigned HOST_WIDE_INT, int);
2730
2731 static void
2732 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2733 unsigned HOST_WIDE_INT high_bits,
2734 unsigned HOST_WIDE_INT low_immediate,
2735 int shift_count)
2736 {
2737 rtx temp2 = op0;
2738
2739 if ((high_bits & 0xfffffc00) != 0)
2740 {
2741 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2742 if ((high_bits & ~0xfffffc00) != 0)
2743 emit_insn (gen_rtx_SET (op0,
2744 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2745 else
2746 temp2 = temp;
2747 }
2748 else
2749 {
2750 emit_insn (gen_safe_SET64 (temp, high_bits));
2751 temp2 = temp;
2752 }
2753
2754 /* Now shift it up into place. */
2755 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2756 GEN_INT (shift_count))));
2757
2758 /* If there is a low immediate part piece, finish up by
2759 putting that in as well. */
2760 if (low_immediate != 0)
2761 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2762 }
2763
2764 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2765 unsigned HOST_WIDE_INT);
2766
2767 /* Full 64-bit constant decomposition. Even though this is the
2768 'worst' case, we still optimize a few things away. */
2769 static void
2770 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2771 unsigned HOST_WIDE_INT high_bits,
2772 unsigned HOST_WIDE_INT low_bits)
2773 {
2774 rtx sub_temp = op0;
2775
2776 if (can_create_pseudo_p ())
2777 sub_temp = gen_reg_rtx (DImode);
2778
2779 if ((high_bits & 0xfffffc00) != 0)
2780 {
2781 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2782 if ((high_bits & ~0xfffffc00) != 0)
2783 emit_insn (gen_rtx_SET (sub_temp,
2784 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2785 else
2786 sub_temp = temp;
2787 }
2788 else
2789 {
2790 emit_insn (gen_safe_SET64 (temp, high_bits));
2791 sub_temp = temp;
2792 }
2793
2794 if (can_create_pseudo_p ())
2795 {
2796 rtx temp2 = gen_reg_rtx (DImode);
2797 rtx temp3 = gen_reg_rtx (DImode);
2798 rtx temp4 = gen_reg_rtx (DImode);
2799
2800 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2801 GEN_INT (32))));
2802
2803 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2804 if ((low_bits & ~0xfffffc00) != 0)
2805 {
2806 emit_insn (gen_rtx_SET (temp3,
2807 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2808 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2809 }
2810 else
2811 {
2812 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2813 }
2814 }
2815 else
2816 {
2817 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2818 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2819 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2820 int to_shift = 12;
2821
2822 /* We are in the middle of reload, so this is really
2823 painful. However we do still make an attempt to
2824 avoid emitting truly stupid code. */
2825 if (low1 != const0_rtx)
2826 {
2827 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2828 GEN_INT (to_shift))));
2829 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2830 sub_temp = op0;
2831 to_shift = 12;
2832 }
2833 else
2834 {
2835 to_shift += 12;
2836 }
2837 if (low2 != const0_rtx)
2838 {
2839 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2840 GEN_INT (to_shift))));
2841 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2842 sub_temp = op0;
2843 to_shift = 8;
2844 }
2845 else
2846 {
2847 to_shift += 8;
2848 }
2849 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2850 GEN_INT (to_shift))));
2851 if (low3 != const0_rtx)
2852 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2853 /* phew... */
2854 }
2855 }
2856
2857 /* Analyze a 64-bit constant for certain properties. */
2858 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2859 unsigned HOST_WIDE_INT,
2860 int *, int *, int *);
2861
2862 static void
2863 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2864 unsigned HOST_WIDE_INT low_bits,
2865 int *hbsp, int *lbsp, int *abbasp)
2866 {
2867 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2868 int i;
2869
2870 lowest_bit_set = highest_bit_set = -1;
2871 i = 0;
2872 do
2873 {
2874 if ((lowest_bit_set == -1)
2875 && ((low_bits >> i) & 1))
2876 lowest_bit_set = i;
2877 if ((highest_bit_set == -1)
2878 && ((high_bits >> (32 - i - 1)) & 1))
2879 highest_bit_set = (64 - i - 1);
2880 }
2881 while (++i < 32
2882 && ((highest_bit_set == -1)
2883 || (lowest_bit_set == -1)));
2884 if (i == 32)
2885 {
2886 i = 0;
2887 do
2888 {
2889 if ((lowest_bit_set == -1)
2890 && ((high_bits >> i) & 1))
2891 lowest_bit_set = i + 32;
2892 if ((highest_bit_set == -1)
2893 && ((low_bits >> (32 - i - 1)) & 1))
2894 highest_bit_set = 32 - i - 1;
2895 }
2896 while (++i < 32
2897 && ((highest_bit_set == -1)
2898 || (lowest_bit_set == -1)));
2899 }
2900 /* If there are no bits set this should have gone out
2901 as one instruction! */
2902 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2903 all_bits_between_are_set = 1;
2904 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2905 {
2906 if (i < 32)
2907 {
2908 if ((low_bits & (1 << i)) != 0)
2909 continue;
2910 }
2911 else
2912 {
2913 if ((high_bits & (1 << (i - 32))) != 0)
2914 continue;
2915 }
2916 all_bits_between_are_set = 0;
2917 break;
2918 }
2919 *hbsp = highest_bit_set;
2920 *lbsp = lowest_bit_set;
2921 *abbasp = all_bits_between_are_set;
2922 }
2923
2924 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2925
2926 static int
2927 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2928 unsigned HOST_WIDE_INT low_bits)
2929 {
2930 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2931
2932 if (high_bits == 0
2933 || high_bits == 0xffffffff)
2934 return 1;
2935
2936 analyze_64bit_constant (high_bits, low_bits,
2937 &highest_bit_set, &lowest_bit_set,
2938 &all_bits_between_are_set);
2939
2940 if ((highest_bit_set == 63
2941 || lowest_bit_set == 0)
2942 && all_bits_between_are_set != 0)
2943 return 1;
2944
2945 if ((highest_bit_set - lowest_bit_set) < 21)
2946 return 1;
2947
2948 return 0;
2949 }
2950
2951 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2952 unsigned HOST_WIDE_INT,
2953 int, int);
2954
2955 static unsigned HOST_WIDE_INT
2956 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2957 unsigned HOST_WIDE_INT low_bits,
2958 int lowest_bit_set, int shift)
2959 {
2960 HOST_WIDE_INT hi, lo;
2961
2962 if (lowest_bit_set < 32)
2963 {
2964 lo = (low_bits >> lowest_bit_set) << shift;
2965 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2966 }
2967 else
2968 {
2969 lo = 0;
2970 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2971 }
2972 gcc_assert (! (hi & lo));
2973 return (hi | lo);
2974 }
2975
2976 /* Here we are sure to be arch64 and this is an integer constant
2977 being loaded into a register. Emit the most efficient
2978 insn sequence possible. Detection of all the 1-insn cases
2979 has been done already. */
2980 static void
2981 sparc_emit_set_const64 (rtx op0, rtx op1)
2982 {
2983 unsigned HOST_WIDE_INT high_bits, low_bits;
2984 int lowest_bit_set, highest_bit_set;
2985 int all_bits_between_are_set;
2986 rtx temp = 0;
2987
2988 /* Sanity check that we know what we are working with. */
2989 gcc_assert (TARGET_ARCH64
2990 && (GET_CODE (op0) == SUBREG
2991 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2992
2993 if (! can_create_pseudo_p ())
2994 temp = op0;
2995
2996 if (GET_CODE (op1) != CONST_INT)
2997 {
2998 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2999 return;
3000 }
3001
3002 if (! temp)
3003 temp = gen_reg_rtx (DImode);
3004
3005 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3006 low_bits = (INTVAL (op1) & 0xffffffff);
3007
3008 /* low_bits bits 0 --> 31
3009 high_bits bits 32 --> 63 */
3010
3011 analyze_64bit_constant (high_bits, low_bits,
3012 &highest_bit_set, &lowest_bit_set,
3013 &all_bits_between_are_set);
3014
3015 /* First try for a 2-insn sequence. */
3016
3017 /* These situations are preferred because the optimizer can
3018 * do more things with them:
3019 * 1) mov -1, %reg
3020 * sllx %reg, shift, %reg
3021 * 2) mov -1, %reg
3022 * srlx %reg, shift, %reg
3023 * 3) mov some_small_const, %reg
3024 * sllx %reg, shift, %reg
3025 */
3026 if (((highest_bit_set == 63
3027 || lowest_bit_set == 0)
3028 && all_bits_between_are_set != 0)
3029 || ((highest_bit_set - lowest_bit_set) < 12))
3030 {
3031 HOST_WIDE_INT the_const = -1;
3032 int shift = lowest_bit_set;
3033
3034 if ((highest_bit_set != 63
3035 && lowest_bit_set != 0)
3036 || all_bits_between_are_set == 0)
3037 {
3038 the_const =
3039 create_simple_focus_bits (high_bits, low_bits,
3040 lowest_bit_set, 0);
3041 }
3042 else if (lowest_bit_set == 0)
3043 shift = -(63 - highest_bit_set);
3044
3045 gcc_assert (SPARC_SIMM13_P (the_const));
3046 gcc_assert (shift != 0);
3047
3048 emit_insn (gen_safe_SET64 (temp, the_const));
3049 if (shift > 0)
3050 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3051 GEN_INT (shift))));
3052 else if (shift < 0)
3053 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3054 GEN_INT (-shift))));
3055 return;
3056 }
3057
3058 /* Now a range of 22 or less bits set somewhere.
3059 * 1) sethi %hi(focus_bits), %reg
3060 * sllx %reg, shift, %reg
3061 * 2) sethi %hi(focus_bits), %reg
3062 * srlx %reg, shift, %reg
3063 */
3064 if ((highest_bit_set - lowest_bit_set) < 21)
3065 {
3066 unsigned HOST_WIDE_INT focus_bits =
3067 create_simple_focus_bits (high_bits, low_bits,
3068 lowest_bit_set, 10);
3069
3070 gcc_assert (SPARC_SETHI_P (focus_bits));
3071 gcc_assert (lowest_bit_set != 10);
3072
3073 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3074
3075 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3076 if (lowest_bit_set < 10)
3077 emit_insn (gen_rtx_SET (op0,
3078 gen_rtx_LSHIFTRT (DImode, temp,
3079 GEN_INT (10 - lowest_bit_set))));
3080 else if (lowest_bit_set > 10)
3081 emit_insn (gen_rtx_SET (op0,
3082 gen_rtx_ASHIFT (DImode, temp,
3083 GEN_INT (lowest_bit_set - 10))));
3084 return;
3085 }
3086
3087 /* 1) sethi %hi(low_bits), %reg
3088 * or %reg, %lo(low_bits), %reg
3089 * 2) sethi %hi(~low_bits), %reg
3090 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3091 */
3092 if (high_bits == 0
3093 || high_bits == 0xffffffff)
3094 {
3095 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3096 (high_bits == 0xffffffff));
3097 return;
3098 }
3099
3100 /* Now, try 3-insn sequences. */
3101
3102 /* 1) sethi %hi(high_bits), %reg
3103 * or %reg, %lo(high_bits), %reg
3104 * sllx %reg, 32, %reg
3105 */
3106 if (low_bits == 0)
3107 {
3108 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3109 return;
3110 }
3111
3112 /* We may be able to do something quick
3113 when the constant is negated, so try that. */
3114 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3115 (~low_bits) & 0xfffffc00))
3116 {
3117 /* NOTE: The trailing bits get XOR'd so we need the
3118 non-negated bits, not the negated ones. */
3119 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3120
3121 if ((((~high_bits) & 0xffffffff) == 0
3122 && ((~low_bits) & 0x80000000) == 0)
3123 || (((~high_bits) & 0xffffffff) == 0xffffffff
3124 && ((~low_bits) & 0x80000000) != 0))
3125 {
3126 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3127
3128 if ((SPARC_SETHI_P (fast_int)
3129 && (~high_bits & 0xffffffff) == 0)
3130 || SPARC_SIMM13_P (fast_int))
3131 emit_insn (gen_safe_SET64 (temp, fast_int));
3132 else
3133 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3134 }
3135 else
3136 {
3137 rtx negated_const;
3138 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3139 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3140 sparc_emit_set_const64 (temp, negated_const);
3141 }
3142
3143 /* If we are XOR'ing with -1, then we should emit a one's complement
3144 instead. This way the combiner will notice logical operations
3145 such as ANDN later on and substitute. */
3146 if (trailing_bits == 0x3ff)
3147 {
3148 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3149 }
3150 else
3151 {
3152 emit_insn (gen_rtx_SET (op0,
3153 gen_safe_XOR64 (temp,
3154 (-0x400 | trailing_bits))));
3155 }
3156 return;
3157 }
3158
3159 /* 1) sethi %hi(xxx), %reg
3160 * or %reg, %lo(xxx), %reg
3161 * sllx %reg, yyy, %reg
3162 *
3163 * ??? This is just a generalized version of the low_bits==0
3164 * thing above, FIXME...
3165 */
3166 if ((highest_bit_set - lowest_bit_set) < 32)
3167 {
3168 unsigned HOST_WIDE_INT focus_bits =
3169 create_simple_focus_bits (high_bits, low_bits,
3170 lowest_bit_set, 0);
3171
3172 /* We can't get here in this state. */
3173 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3174
3175 /* So what we know is that the set bits straddle the
3176 middle of the 64-bit word. */
3177 sparc_emit_set_const64_quick2 (op0, temp,
3178 focus_bits, 0,
3179 lowest_bit_set);
3180 return;
3181 }
3182
3183 /* 1) sethi %hi(high_bits), %reg
3184 * or %reg, %lo(high_bits), %reg
3185 * sllx %reg, 32, %reg
3186 * or %reg, low_bits, %reg
3187 */
3188 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3189 {
3190 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3191 return;
3192 }
3193
3194 /* The easiest way when all else fails, is full decomposition. */
3195 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3196 }
3197
3198 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3199
3200 static bool
3201 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3202 {
3203 *p1 = SPARC_ICC_REG;
3204 *p2 = SPARC_FCC_REG;
3205 return true;
3206 }
3207
3208 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3209
3210 static unsigned int
3211 sparc_min_arithmetic_precision (void)
3212 {
3213 return 32;
3214 }
3215
3216 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3217 return the mode to be used for the comparison. For floating-point,
3218 CCFP[E]mode is used. CCNZmode should be used when the first operand
3219 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3220 processing is needed. */
3221
3222 machine_mode
3223 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3224 {
3225 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3226 {
3227 switch (op)
3228 {
3229 case EQ:
3230 case NE:
3231 case UNORDERED:
3232 case ORDERED:
3233 case UNLT:
3234 case UNLE:
3235 case UNGT:
3236 case UNGE:
3237 case UNEQ:
3238 case LTGT:
3239 return CCFPmode;
3240
3241 case LT:
3242 case LE:
3243 case GT:
3244 case GE:
3245 return CCFPEmode;
3246
3247 default:
3248 gcc_unreachable ();
3249 }
3250 }
3251 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3252 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3253 && y == const0_rtx)
3254 {
3255 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3256 return CCXNZmode;
3257 else
3258 return CCNZmode;
3259 }
3260 else
3261 {
3262 /* This is for the cmp<mode>_sne pattern. */
3263 if (GET_CODE (x) == NOT && y == constm1_rtx)
3264 {
3265 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3266 return CCXCmode;
3267 else
3268 return CCCmode;
3269 }
3270
3271 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3272 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3273 {
3274 if (GET_CODE (y) == UNSPEC
3275 && (XINT (y, 1) == UNSPEC_ADDV
3276 || XINT (y, 1) == UNSPEC_SUBV
3277 || XINT (y, 1) == UNSPEC_NEGV))
3278 return CCVmode;
3279 else
3280 return CCCmode;
3281 }
3282
3283 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3284 return CCXmode;
3285 else
3286 return CCmode;
3287 }
3288 }
3289
3290 /* Emit the compare insn and return the CC reg for a CODE comparison
3291 with operands X and Y. */
3292
3293 static rtx
3294 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3295 {
3296 machine_mode mode;
3297 rtx cc_reg;
3298
3299 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3300 return x;
3301
3302 mode = SELECT_CC_MODE (code, x, y);
3303
3304 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3305 fcc regs (cse can't tell they're really call clobbered regs and will
3306 remove a duplicate comparison even if there is an intervening function
3307 call - it will then try to reload the cc reg via an int reg which is why
3308 we need the movcc patterns). It is possible to provide the movcc
3309 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3310 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3311 to tell cse that CCFPE mode registers (even pseudos) are call
3312 clobbered. */
3313
3314 /* ??? This is an experiment. Rather than making changes to cse which may
3315 or may not be easy/clean, we do our own cse. This is possible because
3316 we will generate hard registers. Cse knows they're call clobbered (it
3317 doesn't know the same thing about pseudos). If we guess wrong, no big
3318 deal, but if we win, great! */
3319
3320 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3321 #if 1 /* experiment */
3322 {
3323 int reg;
3324 /* We cycle through the registers to ensure they're all exercised. */
3325 static int next_fcc_reg = 0;
3326 /* Previous x,y for each fcc reg. */
3327 static rtx prev_args[4][2];
3328
3329 /* Scan prev_args for x,y. */
3330 for (reg = 0; reg < 4; reg++)
3331 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3332 break;
3333 if (reg == 4)
3334 {
3335 reg = next_fcc_reg;
3336 prev_args[reg][0] = x;
3337 prev_args[reg][1] = y;
3338 next_fcc_reg = (next_fcc_reg + 1) & 3;
3339 }
3340 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3341 }
3342 #else
3343 cc_reg = gen_reg_rtx (mode);
3344 #endif /* ! experiment */
3345 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3346 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3347 else
3348 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3349
3350 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3351 will only result in an unrecognizable insn so no point in asserting. */
3352 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3353
3354 return cc_reg;
3355 }
3356
3357
3358 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3359
3360 rtx
3361 gen_compare_reg (rtx cmp)
3362 {
3363 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3364 }
3365
3366 /* This function is used for v9 only.
3367 DEST is the target of the Scc insn.
3368 CODE is the code for an Scc's comparison.
3369 X and Y are the values we compare.
3370
3371 This function is needed to turn
3372
3373 (set (reg:SI 110)
3374 (gt (reg:CCX 100 %icc)
3375 (const_int 0)))
3376 into
3377 (set (reg:SI 110)
3378 (gt:DI (reg:CCX 100 %icc)
3379 (const_int 0)))
3380
3381 IE: The instruction recognizer needs to see the mode of the comparison to
3382 find the right instruction. We could use "gt:DI" right in the
3383 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3384
3385 static int
3386 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3387 {
3388 if (! TARGET_ARCH64
3389 && (GET_MODE (x) == DImode
3390 || GET_MODE (dest) == DImode))
3391 return 0;
3392
3393 /* Try to use the movrCC insns. */
3394 if (TARGET_ARCH64
3395 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3396 && y == const0_rtx
3397 && v9_regcmp_p (compare_code))
3398 {
3399 rtx op0 = x;
3400 rtx temp;
3401
3402 /* Special case for op0 != 0. This can be done with one instruction if
3403 dest == x. */
3404
3405 if (compare_code == NE
3406 && GET_MODE (dest) == DImode
3407 && rtx_equal_p (op0, dest))
3408 {
3409 emit_insn (gen_rtx_SET (dest,
3410 gen_rtx_IF_THEN_ELSE (DImode,
3411 gen_rtx_fmt_ee (compare_code, DImode,
3412 op0, const0_rtx),
3413 const1_rtx,
3414 dest)));
3415 return 1;
3416 }
3417
3418 if (reg_overlap_mentioned_p (dest, op0))
3419 {
3420 /* Handle the case where dest == x.
3421 We "early clobber" the result. */
3422 op0 = gen_reg_rtx (GET_MODE (x));
3423 emit_move_insn (op0, x);
3424 }
3425
3426 emit_insn (gen_rtx_SET (dest, const0_rtx));
3427 if (GET_MODE (op0) != DImode)
3428 {
3429 temp = gen_reg_rtx (DImode);
3430 convert_move (temp, op0, 0);
3431 }
3432 else
3433 temp = op0;
3434 emit_insn (gen_rtx_SET (dest,
3435 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3436 gen_rtx_fmt_ee (compare_code, DImode,
3437 temp, const0_rtx),
3438 const1_rtx,
3439 dest)));
3440 return 1;
3441 }
3442 else
3443 {
3444 x = gen_compare_reg_1 (compare_code, x, y);
3445 y = const0_rtx;
3446
3447 emit_insn (gen_rtx_SET (dest, const0_rtx));
3448 emit_insn (gen_rtx_SET (dest,
3449 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3450 gen_rtx_fmt_ee (compare_code,
3451 GET_MODE (x), x, y),
3452 const1_rtx, dest)));
3453 return 1;
3454 }
3455 }
3456
3457
3458 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3459 without jumps using the addx/subx instructions. */
3460
3461 bool
3462 emit_scc_insn (rtx operands[])
3463 {
3464 rtx tem, x, y;
3465 enum rtx_code code;
3466 machine_mode mode;
3467
3468 /* The quad-word fp compare library routines all return nonzero to indicate
3469 true, which is different from the equivalent libgcc routines, so we must
3470 handle them specially here. */
3471 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3472 {
3473 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3474 GET_CODE (operands[1]));
3475 operands[2] = XEXP (operands[1], 0);
3476 operands[3] = XEXP (operands[1], 1);
3477 }
3478
3479 code = GET_CODE (operands[1]);
3480 x = operands[2];
3481 y = operands[3];
3482 mode = GET_MODE (x);
3483
3484 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3485 more applications). The exception to this is "reg != 0" which can
3486 be done in one instruction on v9 (so we do it). */
3487 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3488 {
3489 if (y != const0_rtx)
3490 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3491
3492 rtx pat = gen_rtx_SET (operands[0],
3493 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3494 x, const0_rtx));
3495
3496 /* If we can use addx/subx or addxc, add a clobber for CC. */
3497 if (mode == SImode || (code == NE && TARGET_VIS3))
3498 {
3499 rtx clobber
3500 = gen_rtx_CLOBBER (VOIDmode,
3501 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3502 SPARC_ICC_REG));
3503 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3504 }
3505
3506 emit_insn (pat);
3507 return true;
3508 }
3509
3510 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3511 if (TARGET_ARCH64
3512 && mode == DImode
3513 && !((code == LTU || code == GTU) && TARGET_VIS3)
3514 && gen_v9_scc (operands[0], code, x, y))
3515 return true;
3516
3517 /* We can do LTU and GEU using the addx/subx instructions too. And
3518 for GTU/LEU, if both operands are registers swap them and fall
3519 back to the easy case. */
3520 if (code == GTU || code == LEU)
3521 {
3522 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3523 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3524 {
3525 tem = x;
3526 x = y;
3527 y = tem;
3528 code = swap_condition (code);
3529 }
3530 }
3531
3532 if (code == LTU || code == GEU)
3533 {
3534 emit_insn (gen_rtx_SET (operands[0],
3535 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3536 gen_compare_reg_1 (code, x, y),
3537 const0_rtx)));
3538 return true;
3539 }
3540
3541 /* All the posibilities to use addx/subx based sequences has been
3542 exhausted, try for a 3 instruction sequence using v9 conditional
3543 moves. */
3544 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3545 return true;
3546
3547 /* Nope, do branches. */
3548 return false;
3549 }
3550
3551 /* Emit a conditional jump insn for the v9 architecture using comparison code
3552 CODE and jump target LABEL.
3553 This function exists to take advantage of the v9 brxx insns. */
3554
3555 static void
3556 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3557 {
3558 emit_jump_insn (gen_rtx_SET (pc_rtx,
3559 gen_rtx_IF_THEN_ELSE (VOIDmode,
3560 gen_rtx_fmt_ee (code, GET_MODE (op0),
3561 op0, const0_rtx),
3562 gen_rtx_LABEL_REF (VOIDmode, label),
3563 pc_rtx)));
3564 }
3565
3566 /* Emit a conditional jump insn for the UA2011 architecture using
3567 comparison code CODE and jump target LABEL. This function exists
3568 to take advantage of the UA2011 Compare and Branch insns. */
3569
3570 static void
3571 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3572 {
3573 rtx if_then_else;
3574
3575 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3576 gen_rtx_fmt_ee(code, GET_MODE(op0),
3577 op0, op1),
3578 gen_rtx_LABEL_REF (VOIDmode, label),
3579 pc_rtx);
3580
3581 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3582 }
3583
3584 void
3585 emit_conditional_branch_insn (rtx operands[])
3586 {
3587 /* The quad-word fp compare library routines all return nonzero to indicate
3588 true, which is different from the equivalent libgcc routines, so we must
3589 handle them specially here. */
3590 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3591 {
3592 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3593 GET_CODE (operands[0]));
3594 operands[1] = XEXP (operands[0], 0);
3595 operands[2] = XEXP (operands[0], 1);
3596 }
3597
3598 /* If we can tell early on that the comparison is against a constant
3599 that won't fit in the 5-bit signed immediate field of a cbcond,
3600 use one of the other v9 conditional branch sequences. */
3601 if (TARGET_CBCOND
3602 && GET_CODE (operands[1]) == REG
3603 && (GET_MODE (operands[1]) == SImode
3604 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3605 && (GET_CODE (operands[2]) != CONST_INT
3606 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3607 {
3608 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3609 return;
3610 }
3611
3612 if (TARGET_ARCH64 && operands[2] == const0_rtx
3613 && GET_CODE (operands[1]) == REG
3614 && GET_MODE (operands[1]) == DImode)
3615 {
3616 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3617 return;
3618 }
3619
3620 operands[1] = gen_compare_reg (operands[0]);
3621 operands[2] = const0_rtx;
3622 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3623 operands[1], operands[2]);
3624 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3625 operands[3]));
3626 }
3627
3628
3629 /* Generate a DFmode part of a hard TFmode register.
3630 REG is the TFmode hard register, LOW is 1 for the
3631 low 64bit of the register and 0 otherwise.
3632 */
3633 rtx
3634 gen_df_reg (rtx reg, int low)
3635 {
3636 int regno = REGNO (reg);
3637
3638 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3639 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3640 return gen_rtx_REG (DFmode, regno);
3641 }
3642 \f
3643 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3644 Unlike normal calls, TFmode operands are passed by reference. It is
3645 assumed that no more than 3 operands are required. */
3646
3647 static void
3648 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3649 {
3650 rtx ret_slot = NULL, arg[3], func_sym;
3651 int i;
3652
3653 /* We only expect to be called for conversions, unary, and binary ops. */
3654 gcc_assert (nargs == 2 || nargs == 3);
3655
3656 for (i = 0; i < nargs; ++i)
3657 {
3658 rtx this_arg = operands[i];
3659 rtx this_slot;
3660
3661 /* TFmode arguments and return values are passed by reference. */
3662 if (GET_MODE (this_arg) == TFmode)
3663 {
3664 int force_stack_temp;
3665
3666 force_stack_temp = 0;
3667 if (TARGET_BUGGY_QP_LIB && i == 0)
3668 force_stack_temp = 1;
3669
3670 if (GET_CODE (this_arg) == MEM
3671 && ! force_stack_temp)
3672 {
3673 tree expr = MEM_EXPR (this_arg);
3674 if (expr)
3675 mark_addressable (expr);
3676 this_arg = XEXP (this_arg, 0);
3677 }
3678 else if (CONSTANT_P (this_arg)
3679 && ! force_stack_temp)
3680 {
3681 this_slot = force_const_mem (TFmode, this_arg);
3682 this_arg = XEXP (this_slot, 0);
3683 }
3684 else
3685 {
3686 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3687
3688 /* Operand 0 is the return value. We'll copy it out later. */
3689 if (i > 0)
3690 emit_move_insn (this_slot, this_arg);
3691 else
3692 ret_slot = this_slot;
3693
3694 this_arg = XEXP (this_slot, 0);
3695 }
3696 }
3697
3698 arg[i] = this_arg;
3699 }
3700
3701 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3702
3703 if (GET_MODE (operands[0]) == TFmode)
3704 {
3705 if (nargs == 2)
3706 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3707 arg[0], GET_MODE (arg[0]),
3708 arg[1], GET_MODE (arg[1]));
3709 else
3710 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3711 arg[0], GET_MODE (arg[0]),
3712 arg[1], GET_MODE (arg[1]),
3713 arg[2], GET_MODE (arg[2]));
3714
3715 if (ret_slot)
3716 emit_move_insn (operands[0], ret_slot);
3717 }
3718 else
3719 {
3720 rtx ret;
3721
3722 gcc_assert (nargs == 2);
3723
3724 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3725 GET_MODE (operands[0]),
3726 arg[1], GET_MODE (arg[1]));
3727
3728 if (ret != operands[0])
3729 emit_move_insn (operands[0], ret);
3730 }
3731 }
3732
3733 /* Expand soft-float TFmode calls to sparc abi routines. */
3734
3735 static void
3736 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3737 {
3738 const char *func;
3739
3740 switch (code)
3741 {
3742 case PLUS:
3743 func = "_Qp_add";
3744 break;
3745 case MINUS:
3746 func = "_Qp_sub";
3747 break;
3748 case MULT:
3749 func = "_Qp_mul";
3750 break;
3751 case DIV:
3752 func = "_Qp_div";
3753 break;
3754 default:
3755 gcc_unreachable ();
3756 }
3757
3758 emit_soft_tfmode_libcall (func, 3, operands);
3759 }
3760
3761 static void
3762 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3763 {
3764 const char *func;
3765
3766 gcc_assert (code == SQRT);
3767 func = "_Qp_sqrt";
3768
3769 emit_soft_tfmode_libcall (func, 2, operands);
3770 }
3771
3772 static void
3773 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3774 {
3775 const char *func;
3776
3777 switch (code)
3778 {
3779 case FLOAT_EXTEND:
3780 switch (GET_MODE (operands[1]))
3781 {
3782 case E_SFmode:
3783 func = "_Qp_stoq";
3784 break;
3785 case E_DFmode:
3786 func = "_Qp_dtoq";
3787 break;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 break;
3792
3793 case FLOAT_TRUNCATE:
3794 switch (GET_MODE (operands[0]))
3795 {
3796 case E_SFmode:
3797 func = "_Qp_qtos";
3798 break;
3799 case E_DFmode:
3800 func = "_Qp_qtod";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805 break;
3806
3807 case FLOAT:
3808 switch (GET_MODE (operands[1]))
3809 {
3810 case E_SImode:
3811 func = "_Qp_itoq";
3812 if (TARGET_ARCH64)
3813 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3814 break;
3815 case E_DImode:
3816 func = "_Qp_xtoq";
3817 break;
3818 default:
3819 gcc_unreachable ();
3820 }
3821 break;
3822
3823 case UNSIGNED_FLOAT:
3824 switch (GET_MODE (operands[1]))
3825 {
3826 case E_SImode:
3827 func = "_Qp_uitoq";
3828 if (TARGET_ARCH64)
3829 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3830 break;
3831 case E_DImode:
3832 func = "_Qp_uxtoq";
3833 break;
3834 default:
3835 gcc_unreachable ();
3836 }
3837 break;
3838
3839 case FIX:
3840 switch (GET_MODE (operands[0]))
3841 {
3842 case E_SImode:
3843 func = "_Qp_qtoi";
3844 break;
3845 case E_DImode:
3846 func = "_Qp_qtox";
3847 break;
3848 default:
3849 gcc_unreachable ();
3850 }
3851 break;
3852
3853 case UNSIGNED_FIX:
3854 switch (GET_MODE (operands[0]))
3855 {
3856 case E_SImode:
3857 func = "_Qp_qtoui";
3858 break;
3859 case E_DImode:
3860 func = "_Qp_qtoux";
3861 break;
3862 default:
3863 gcc_unreachable ();
3864 }
3865 break;
3866
3867 default:
3868 gcc_unreachable ();
3869 }
3870
3871 emit_soft_tfmode_libcall (func, 2, operands);
3872 }
3873
3874 /* Expand a hard-float tfmode operation. All arguments must be in
3875 registers. */
3876
3877 static void
3878 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3879 {
3880 rtx op, dest;
3881
3882 if (GET_RTX_CLASS (code) == RTX_UNARY)
3883 {
3884 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3885 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3886 }
3887 else
3888 {
3889 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3890 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3891 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3892 operands[1], operands[2]);
3893 }
3894
3895 if (register_operand (operands[0], VOIDmode))
3896 dest = operands[0];
3897 else
3898 dest = gen_reg_rtx (GET_MODE (operands[0]));
3899
3900 emit_insn (gen_rtx_SET (dest, op));
3901
3902 if (dest != operands[0])
3903 emit_move_insn (operands[0], dest);
3904 }
3905
3906 void
3907 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3908 {
3909 if (TARGET_HARD_QUAD)
3910 emit_hard_tfmode_operation (code, operands);
3911 else
3912 emit_soft_tfmode_binop (code, operands);
3913 }
3914
3915 void
3916 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3917 {
3918 if (TARGET_HARD_QUAD)
3919 emit_hard_tfmode_operation (code, operands);
3920 else
3921 emit_soft_tfmode_unop (code, operands);
3922 }
3923
3924 void
3925 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3926 {
3927 if (TARGET_HARD_QUAD)
3928 emit_hard_tfmode_operation (code, operands);
3929 else
3930 emit_soft_tfmode_cvt (code, operands);
3931 }
3932 \f
3933 /* Return nonzero if a branch/jump/call instruction will be emitting
3934 nop into its delay slot. */
3935
3936 int
3937 empty_delay_slot (rtx_insn *insn)
3938 {
3939 rtx seq;
3940
3941 /* If no previous instruction (should not happen), return true. */
3942 if (PREV_INSN (insn) == NULL)
3943 return 1;
3944
3945 seq = NEXT_INSN (PREV_INSN (insn));
3946 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3947 return 0;
3948
3949 return 1;
3950 }
3951
3952 /* Return nonzero if we should emit a nop after a cbcond instruction.
3953 The cbcond instruction does not have a delay slot, however there is
3954 a severe performance penalty if a control transfer appears right
3955 after a cbcond. Therefore we emit a nop when we detect this
3956 situation. */
3957
3958 int
3959 emit_cbcond_nop (rtx_insn *insn)
3960 {
3961 rtx next = next_active_insn (insn);
3962
3963 if (!next)
3964 return 1;
3965
3966 if (NONJUMP_INSN_P (next)
3967 && GET_CODE (PATTERN (next)) == SEQUENCE)
3968 next = XVECEXP (PATTERN (next), 0, 0);
3969 else if (CALL_P (next)
3970 && GET_CODE (PATTERN (next)) == PARALLEL)
3971 {
3972 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3973
3974 if (GET_CODE (delay) == RETURN)
3975 {
3976 /* It's a sibling call. Do not emit the nop if we're going
3977 to emit something other than the jump itself as the first
3978 instruction of the sibcall sequence. */
3979 if (sparc_leaf_function_p || TARGET_FLAT)
3980 return 0;
3981 }
3982 }
3983
3984 if (NONJUMP_INSN_P (next))
3985 return 0;
3986
3987 return 1;
3988 }
3989
3990 /* Return nonzero if TRIAL can go into the call delay slot. */
3991
3992 int
3993 eligible_for_call_delay (rtx_insn *trial)
3994 {
3995 rtx pat;
3996
3997 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3998 return 0;
3999
4000 /* Binutils allows
4001 call __tls_get_addr, %tgd_call (foo)
4002 add %l7, %o0, %o0, %tgd_add (foo)
4003 while Sun as/ld does not. */
4004 if (TARGET_GNU_TLS || !TARGET_TLS)
4005 return 1;
4006
4007 pat = PATTERN (trial);
4008
4009 /* We must reject tgd_add{32|64}, i.e.
4010 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
4011 and tldm_add{32|64}, i.e.
4012 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
4013 for Sun as/ld. */
4014 if (GET_CODE (pat) == SET
4015 && GET_CODE (SET_SRC (pat)) == PLUS)
4016 {
4017 rtx unspec = XEXP (SET_SRC (pat), 1);
4018
4019 if (GET_CODE (unspec) == UNSPEC
4020 && (XINT (unspec, 1) == UNSPEC_TLSGD
4021 || XINT (unspec, 1) == UNSPEC_TLSLDM))
4022 return 0;
4023 }
4024
4025 return 1;
4026 }
4027
4028 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4029 instruction. RETURN_P is true if the v9 variant 'return' is to be
4030 considered in the test too.
4031
4032 TRIAL must be a SET whose destination is a REG appropriate for the
4033 'restore' instruction or, if RETURN_P is true, for the 'return'
4034 instruction. */
4035
4036 static int
4037 eligible_for_restore_insn (rtx trial, bool return_p)
4038 {
4039 rtx pat = PATTERN (trial);
4040 rtx src = SET_SRC (pat);
4041 bool src_is_freg = false;
4042 rtx src_reg;
4043
4044 /* Since we now can do moves between float and integer registers when
4045 VIS3 is enabled, we have to catch this case. We can allow such
4046 moves when doing a 'return' however. */
4047 src_reg = src;
4048 if (GET_CODE (src_reg) == SUBREG)
4049 src_reg = SUBREG_REG (src_reg);
4050 if (GET_CODE (src_reg) == REG
4051 && SPARC_FP_REG_P (REGNO (src_reg)))
4052 src_is_freg = true;
4053
4054 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4055 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4056 && arith_operand (src, GET_MODE (src))
4057 && ! src_is_freg)
4058 {
4059 if (TARGET_ARCH64)
4060 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4061 else
4062 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4063 }
4064
4065 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4066 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4067 && arith_double_operand (src, GET_MODE (src))
4068 && ! src_is_freg)
4069 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4070
4071 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4072 else if (! TARGET_FPU && register_operand (src, SFmode))
4073 return 1;
4074
4075 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4076 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4077 return 1;
4078
4079 /* If we have the 'return' instruction, anything that does not use
4080 local or output registers and can go into a delay slot wins. */
4081 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4082 return 1;
4083
4084 /* The 'restore src1,src2,dest' pattern for SImode. */
4085 else if (GET_CODE (src) == PLUS
4086 && register_operand (XEXP (src, 0), SImode)
4087 && arith_operand (XEXP (src, 1), SImode))
4088 return 1;
4089
4090 /* The 'restore src1,src2,dest' pattern for DImode. */
4091 else if (GET_CODE (src) == PLUS
4092 && register_operand (XEXP (src, 0), DImode)
4093 && arith_double_operand (XEXP (src, 1), DImode))
4094 return 1;
4095
4096 /* The 'restore src1,%lo(src2),dest' pattern. */
4097 else if (GET_CODE (src) == LO_SUM
4098 && ! TARGET_CM_MEDMID
4099 && ((register_operand (XEXP (src, 0), SImode)
4100 && immediate_operand (XEXP (src, 1), SImode))
4101 || (TARGET_ARCH64
4102 && register_operand (XEXP (src, 0), DImode)
4103 && immediate_operand (XEXP (src, 1), DImode))))
4104 return 1;
4105
4106 /* The 'restore src,src,dest' pattern. */
4107 else if (GET_CODE (src) == ASHIFT
4108 && (register_operand (XEXP (src, 0), SImode)
4109 || register_operand (XEXP (src, 0), DImode))
4110 && XEXP (src, 1) == const1_rtx)
4111 return 1;
4112
4113 return 0;
4114 }
4115
4116 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4117
4118 int
4119 eligible_for_return_delay (rtx_insn *trial)
4120 {
4121 int regno;
4122 rtx pat;
4123
4124 /* If the function uses __builtin_eh_return, the eh_return machinery
4125 occupies the delay slot. */
4126 if (crtl->calls_eh_return)
4127 return 0;
4128
4129 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4130 return 0;
4131
4132 /* In the case of a leaf or flat function, anything can go into the slot. */
4133 if (sparc_leaf_function_p || TARGET_FLAT)
4134 return 1;
4135
4136 if (!NONJUMP_INSN_P (trial))
4137 return 0;
4138
4139 pat = PATTERN (trial);
4140 if (GET_CODE (pat) == PARALLEL)
4141 {
4142 int i;
4143
4144 if (! TARGET_V9)
4145 return 0;
4146 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4147 {
4148 rtx expr = XVECEXP (pat, 0, i);
4149 if (GET_CODE (expr) != SET)
4150 return 0;
4151 if (GET_CODE (SET_DEST (expr)) != REG)
4152 return 0;
4153 regno = REGNO (SET_DEST (expr));
4154 if (regno >= 8 && regno < 24)
4155 return 0;
4156 }
4157 return !epilogue_renumber (&pat, 1);
4158 }
4159
4160 if (GET_CODE (pat) != SET)
4161 return 0;
4162
4163 if (GET_CODE (SET_DEST (pat)) != REG)
4164 return 0;
4165
4166 regno = REGNO (SET_DEST (pat));
4167
4168 /* Otherwise, only operations which can be done in tandem with
4169 a `restore' or `return' insn can go into the delay slot. */
4170 if (regno >= 8 && regno < 24)
4171 return 0;
4172
4173 /* If this instruction sets up floating point register and we have a return
4174 instruction, it can probably go in. But restore will not work
4175 with FP_REGS. */
4176 if (! SPARC_INT_REG_P (regno))
4177 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4178
4179 return eligible_for_restore_insn (trial, true);
4180 }
4181
4182 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4183
4184 int
4185 eligible_for_sibcall_delay (rtx_insn *trial)
4186 {
4187 rtx pat;
4188
4189 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4190 return 0;
4191
4192 if (!NONJUMP_INSN_P (trial))
4193 return 0;
4194
4195 pat = PATTERN (trial);
4196
4197 if (sparc_leaf_function_p || TARGET_FLAT)
4198 {
4199 /* If the tail call is done using the call instruction,
4200 we have to restore %o7 in the delay slot. */
4201 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4202 return 0;
4203
4204 /* %g1 is used to build the function address */
4205 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4206 return 0;
4207
4208 return 1;
4209 }
4210
4211 if (GET_CODE (pat) != SET)
4212 return 0;
4213
4214 /* Otherwise, only operations which can be done in tandem with
4215 a `restore' insn can go into the delay slot. */
4216 if (GET_CODE (SET_DEST (pat)) != REG
4217 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4218 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4219 return 0;
4220
4221 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4222 in most cases. */
4223 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4224 return 0;
4225
4226 return eligible_for_restore_insn (trial, false);
4227 }
4228 \f
4229 /* Determine if it's legal to put X into the constant pool. This
4230 is not possible if X contains the address of a symbol that is
4231 not constant (TLS) or not known at final link time (PIC). */
4232
4233 static bool
4234 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4235 {
4236 switch (GET_CODE (x))
4237 {
4238 case CONST_INT:
4239 case CONST_WIDE_INT:
4240 case CONST_DOUBLE:
4241 case CONST_VECTOR:
4242 /* Accept all non-symbolic constants. */
4243 return false;
4244
4245 case LABEL_REF:
4246 /* Labels are OK iff we are non-PIC. */
4247 return flag_pic != 0;
4248
4249 case SYMBOL_REF:
4250 /* 'Naked' TLS symbol references are never OK,
4251 non-TLS symbols are OK iff we are non-PIC. */
4252 if (SYMBOL_REF_TLS_MODEL (x))
4253 return true;
4254 else
4255 return flag_pic != 0;
4256
4257 case CONST:
4258 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4259 case PLUS:
4260 case MINUS:
4261 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4262 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4263 case UNSPEC:
4264 return true;
4265 default:
4266 gcc_unreachable ();
4267 }
4268 }
4269 \f
4270 /* Global Offset Table support. */
4271 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4272 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4273
4274 /* Return the SYMBOL_REF for the Global Offset Table. */
4275
4276 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4277
4278 static rtx
4279 sparc_got (void)
4280 {
4281 if (!sparc_got_symbol)
4282 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4283
4284 return sparc_got_symbol;
4285 }
4286
4287 /* Ensure that we are not using patterns that are not OK with PIC. */
4288
4289 int
4290 check_pic (int i)
4291 {
4292 rtx op;
4293
4294 switch (flag_pic)
4295 {
4296 case 1:
4297 op = recog_data.operand[i];
4298 gcc_assert (GET_CODE (op) != SYMBOL_REF
4299 && (GET_CODE (op) != CONST
4300 || (GET_CODE (XEXP (op, 0)) == MINUS
4301 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4302 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4303 /* fallthrough */
4304 case 2:
4305 default:
4306 return 1;
4307 }
4308 }
4309
4310 /* Return true if X is an address which needs a temporary register when
4311 reloaded while generating PIC code. */
4312
4313 int
4314 pic_address_needs_scratch (rtx x)
4315 {
4316 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4317 if (GET_CODE (x) == CONST
4318 && GET_CODE (XEXP (x, 0)) == PLUS
4319 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4320 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4321 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4322 return 1;
4323
4324 return 0;
4325 }
4326
4327 /* Determine if a given RTX is a valid constant. We already know this
4328 satisfies CONSTANT_P. */
4329
4330 static bool
4331 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4332 {
4333 switch (GET_CODE (x))
4334 {
4335 case CONST:
4336 case SYMBOL_REF:
4337 if (sparc_tls_referenced_p (x))
4338 return false;
4339 break;
4340
4341 case CONST_DOUBLE:
4342 /* Floating point constants are generally not ok.
4343 The only exception is 0.0 and all-ones in VIS. */
4344 if (TARGET_VIS
4345 && SCALAR_FLOAT_MODE_P (mode)
4346 && (const_zero_operand (x, mode)
4347 || const_all_ones_operand (x, mode)))
4348 return true;
4349
4350 return false;
4351
4352 case CONST_VECTOR:
4353 /* Vector constants are generally not ok.
4354 The only exception is 0 or -1 in VIS. */
4355 if (TARGET_VIS
4356 && (const_zero_operand (x, mode)
4357 || const_all_ones_operand (x, mode)))
4358 return true;
4359
4360 return false;
4361
4362 default:
4363 break;
4364 }
4365
4366 return true;
4367 }
4368
4369 /* Determine if a given RTX is a valid constant address. */
4370
4371 bool
4372 constant_address_p (rtx x)
4373 {
4374 switch (GET_CODE (x))
4375 {
4376 case LABEL_REF:
4377 case CONST_INT:
4378 case HIGH:
4379 return true;
4380
4381 case CONST:
4382 if (flag_pic && pic_address_needs_scratch (x))
4383 return false;
4384 return sparc_legitimate_constant_p (Pmode, x);
4385
4386 case SYMBOL_REF:
4387 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4388
4389 default:
4390 return false;
4391 }
4392 }
4393
4394 /* Nonzero if the constant value X is a legitimate general operand
4395 when generating PIC code. It is given that flag_pic is on and
4396 that X satisfies CONSTANT_P. */
4397
4398 bool
4399 legitimate_pic_operand_p (rtx x)
4400 {
4401 if (pic_address_needs_scratch (x))
4402 return false;
4403 if (sparc_tls_referenced_p (x))
4404 return false;
4405 return true;
4406 }
4407
4408 /* Return true if X is a representation of the PIC register. */
4409
4410 static bool
4411 sparc_pic_register_p (rtx x)
4412 {
4413 if (!REG_P (x) || !pic_offset_table_rtx)
4414 return false;
4415
4416 if (x == pic_offset_table_rtx)
4417 return true;
4418
4419 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4420 && (HARD_REGISTER_P (x) || lra_in_progress)
4421 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4422 return true;
4423
4424 return false;
4425 }
4426
4427 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4428 (CONST_INT_P (X) \
4429 && INTVAL (X) >= -0x1000 \
4430 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4431
4432 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4433 (CONST_INT_P (X) \
4434 && INTVAL (X) >= -0x1000 \
4435 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4436
4437 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4438
4439 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4440 ordinarily. This changes a bit when generating PIC. */
4441
4442 static bool
4443 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4444 {
4445 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4446
4447 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4448 rs1 = addr;
4449 else if (GET_CODE (addr) == PLUS)
4450 {
4451 rs1 = XEXP (addr, 0);
4452 rs2 = XEXP (addr, 1);
4453
4454 /* Canonicalize. REG comes first, if there are no regs,
4455 LO_SUM comes first. */
4456 if (!REG_P (rs1)
4457 && GET_CODE (rs1) != SUBREG
4458 && (REG_P (rs2)
4459 || GET_CODE (rs2) == SUBREG
4460 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4461 {
4462 rs1 = XEXP (addr, 1);
4463 rs2 = XEXP (addr, 0);
4464 }
4465
4466 if ((flag_pic == 1
4467 && sparc_pic_register_p (rs1)
4468 && !REG_P (rs2)
4469 && GET_CODE (rs2) != SUBREG
4470 && GET_CODE (rs2) != LO_SUM
4471 && GET_CODE (rs2) != MEM
4472 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4473 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4474 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4475 || ((REG_P (rs1)
4476 || GET_CODE (rs1) == SUBREG)
4477 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4478 {
4479 imm1 = rs2;
4480 rs2 = NULL;
4481 }
4482 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4483 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4484 {
4485 /* We prohibit REG + REG for TFmode when there are no quad move insns
4486 and we consequently need to split. We do this because REG+REG
4487 is not an offsettable address. If we get the situation in reload
4488 where source and destination of a movtf pattern are both MEMs with
4489 REG+REG address, then only one of them gets converted to an
4490 offsettable address. */
4491 if (mode == TFmode
4492 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4493 return 0;
4494
4495 /* Likewise for TImode, but in all cases. */
4496 if (mode == TImode)
4497 return 0;
4498
4499 /* We prohibit REG + REG on ARCH32 if not optimizing for
4500 DFmode/DImode because then mem_min_alignment is likely to be zero
4501 after reload and the forced split would lack a matching splitter
4502 pattern. */
4503 if (TARGET_ARCH32 && !optimize
4504 && (mode == DFmode || mode == DImode))
4505 return 0;
4506 }
4507 else if (USE_AS_OFFSETABLE_LO10
4508 && GET_CODE (rs1) == LO_SUM
4509 && TARGET_ARCH64
4510 && ! TARGET_CM_MEDMID
4511 && RTX_OK_FOR_OLO10_P (rs2, mode))
4512 {
4513 rs2 = NULL;
4514 imm1 = XEXP (rs1, 1);
4515 rs1 = XEXP (rs1, 0);
4516 if (!CONSTANT_P (imm1)
4517 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4518 return 0;
4519 }
4520 }
4521 else if (GET_CODE (addr) == LO_SUM)
4522 {
4523 rs1 = XEXP (addr, 0);
4524 imm1 = XEXP (addr, 1);
4525
4526 if (!CONSTANT_P (imm1)
4527 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4528 return 0;
4529
4530 /* We can't allow TFmode in 32-bit mode, because an offset greater
4531 than the alignment (8) may cause the LO_SUM to overflow. */
4532 if (mode == TFmode && TARGET_ARCH32)
4533 return 0;
4534
4535 /* During reload, accept the HIGH+LO_SUM construct generated by
4536 sparc_legitimize_reload_address. */
4537 if (reload_in_progress
4538 && GET_CODE (rs1) == HIGH
4539 && XEXP (rs1, 0) == imm1)
4540 return 1;
4541 }
4542 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4543 return 1;
4544 else
4545 return 0;
4546
4547 if (GET_CODE (rs1) == SUBREG)
4548 rs1 = SUBREG_REG (rs1);
4549 if (!REG_P (rs1))
4550 return 0;
4551
4552 if (rs2)
4553 {
4554 if (GET_CODE (rs2) == SUBREG)
4555 rs2 = SUBREG_REG (rs2);
4556 if (!REG_P (rs2))
4557 return 0;
4558 }
4559
4560 if (strict)
4561 {
4562 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4563 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4564 return 0;
4565 }
4566 else
4567 {
4568 if ((! SPARC_INT_REG_P (REGNO (rs1))
4569 && REGNO (rs1) != FRAME_POINTER_REGNUM
4570 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4571 || (rs2
4572 && (! SPARC_INT_REG_P (REGNO (rs2))
4573 && REGNO (rs2) != FRAME_POINTER_REGNUM
4574 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4575 return 0;
4576 }
4577 return 1;
4578 }
4579
4580 /* Return the SYMBOL_REF for the tls_get_addr function. */
4581
4582 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4583
4584 static rtx
4585 sparc_tls_get_addr (void)
4586 {
4587 if (!sparc_tls_symbol)
4588 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4589
4590 return sparc_tls_symbol;
4591 }
4592
4593 /* Return the Global Offset Table to be used in TLS mode. */
4594
4595 static rtx
4596 sparc_tls_got (void)
4597 {
4598 /* In PIC mode, this is just the PIC offset table. */
4599 if (flag_pic)
4600 {
4601 crtl->uses_pic_offset_table = 1;
4602 return pic_offset_table_rtx;
4603 }
4604
4605 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4606 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4607 if (TARGET_SUN_TLS && TARGET_ARCH32)
4608 {
4609 load_got_register ();
4610 return global_offset_table_rtx;
4611 }
4612
4613 /* In all other cases, we load a new pseudo with the GOT symbol. */
4614 return copy_to_reg (sparc_got ());
4615 }
4616
4617 /* Return true if X contains a thread-local symbol. */
4618
4619 static bool
4620 sparc_tls_referenced_p (rtx x)
4621 {
4622 if (!TARGET_HAVE_TLS)
4623 return false;
4624
4625 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4626 x = XEXP (XEXP (x, 0), 0);
4627
4628 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4629 return true;
4630
4631 /* That's all we handle in sparc_legitimize_tls_address for now. */
4632 return false;
4633 }
4634
4635 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4636 this (thread-local) address. */
4637
4638 static rtx
4639 sparc_legitimize_tls_address (rtx addr)
4640 {
4641 rtx temp1, temp2, temp3, ret, o0, got;
4642 rtx_insn *insn;
4643
4644 gcc_assert (can_create_pseudo_p ());
4645
4646 if (GET_CODE (addr) == SYMBOL_REF)
4647 switch (SYMBOL_REF_TLS_MODEL (addr))
4648 {
4649 case TLS_MODEL_GLOBAL_DYNAMIC:
4650 start_sequence ();
4651 temp1 = gen_reg_rtx (SImode);
4652 temp2 = gen_reg_rtx (SImode);
4653 ret = gen_reg_rtx (Pmode);
4654 o0 = gen_rtx_REG (Pmode, 8);
4655 got = sparc_tls_got ();
4656 emit_insn (gen_tgd_hi22 (temp1, addr));
4657 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4658 if (TARGET_ARCH32)
4659 {
4660 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4661 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4662 addr, const1_rtx));
4663 }
4664 else
4665 {
4666 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4667 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4668 addr, const1_rtx));
4669 }
4670 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4671 insn = get_insns ();
4672 end_sequence ();
4673 emit_libcall_block (insn, ret, o0, addr);
4674 break;
4675
4676 case TLS_MODEL_LOCAL_DYNAMIC:
4677 start_sequence ();
4678 temp1 = gen_reg_rtx (SImode);
4679 temp2 = gen_reg_rtx (SImode);
4680 temp3 = gen_reg_rtx (Pmode);
4681 ret = gen_reg_rtx (Pmode);
4682 o0 = gen_rtx_REG (Pmode, 8);
4683 got = sparc_tls_got ();
4684 emit_insn (gen_tldm_hi22 (temp1));
4685 emit_insn (gen_tldm_lo10 (temp2, temp1));
4686 if (TARGET_ARCH32)
4687 {
4688 emit_insn (gen_tldm_add32 (o0, got, temp2));
4689 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4690 const1_rtx));
4691 }
4692 else
4693 {
4694 emit_insn (gen_tldm_add64 (o0, got, temp2));
4695 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4696 const1_rtx));
4697 }
4698 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4699 insn = get_insns ();
4700 end_sequence ();
4701 emit_libcall_block (insn, temp3, o0,
4702 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4703 UNSPEC_TLSLD_BASE));
4704 temp1 = gen_reg_rtx (SImode);
4705 temp2 = gen_reg_rtx (SImode);
4706 emit_insn (gen_tldo_hix22 (temp1, addr));
4707 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4708 if (TARGET_ARCH32)
4709 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4710 else
4711 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4712 break;
4713
4714 case TLS_MODEL_INITIAL_EXEC:
4715 temp1 = gen_reg_rtx (SImode);
4716 temp2 = gen_reg_rtx (SImode);
4717 temp3 = gen_reg_rtx (Pmode);
4718 got = sparc_tls_got ();
4719 emit_insn (gen_tie_hi22 (temp1, addr));
4720 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4721 if (TARGET_ARCH32)
4722 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4723 else
4724 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4725 if (TARGET_SUN_TLS)
4726 {
4727 ret = gen_reg_rtx (Pmode);
4728 if (TARGET_ARCH32)
4729 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4730 temp3, addr));
4731 else
4732 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4733 temp3, addr));
4734 }
4735 else
4736 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4737 break;
4738
4739 case TLS_MODEL_LOCAL_EXEC:
4740 temp1 = gen_reg_rtx (Pmode);
4741 temp2 = gen_reg_rtx (Pmode);
4742 if (TARGET_ARCH32)
4743 {
4744 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4745 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4746 }
4747 else
4748 {
4749 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4750 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4751 }
4752 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4753 break;
4754
4755 default:
4756 gcc_unreachable ();
4757 }
4758
4759 else if (GET_CODE (addr) == CONST)
4760 {
4761 rtx base, offset;
4762
4763 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4764
4765 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4766 offset = XEXP (XEXP (addr, 0), 1);
4767
4768 base = force_operand (base, NULL_RTX);
4769 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4770 offset = force_reg (Pmode, offset);
4771 ret = gen_rtx_PLUS (Pmode, base, offset);
4772 }
4773
4774 else
4775 gcc_unreachable (); /* for now ... */
4776
4777 return ret;
4778 }
4779
4780 /* Legitimize PIC addresses. If the address is already position-independent,
4781 we return ORIG. Newly generated position-independent addresses go into a
4782 reg. This is REG if nonzero, otherwise we allocate register(s) as
4783 necessary. */
4784
4785 static rtx
4786 sparc_legitimize_pic_address (rtx orig, rtx reg)
4787 {
4788 if (GET_CODE (orig) == SYMBOL_REF
4789 /* See the comment in sparc_expand_move. */
4790 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4791 {
4792 bool gotdata_op = false;
4793 rtx pic_ref, address;
4794 rtx_insn *insn;
4795
4796 if (!reg)
4797 {
4798 gcc_assert (can_create_pseudo_p ());
4799 reg = gen_reg_rtx (Pmode);
4800 }
4801
4802 if (flag_pic == 2)
4803 {
4804 /* If not during reload, allocate another temp reg here for loading
4805 in the address, so that these instructions can be optimized
4806 properly. */
4807 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4808
4809 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4810 won't get confused into thinking that these two instructions
4811 are loading in the true address of the symbol. If in the
4812 future a PIC rtx exists, that should be used instead. */
4813 if (TARGET_ARCH64)
4814 {
4815 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4816 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4817 }
4818 else
4819 {
4820 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4821 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4822 }
4823
4824 address = temp_reg;
4825 gotdata_op = true;
4826 }
4827 else
4828 address = orig;
4829
4830 crtl->uses_pic_offset_table = 1;
4831 if (gotdata_op)
4832 {
4833 if (TARGET_ARCH64)
4834 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4835 pic_offset_table_rtx,
4836 address, orig));
4837 else
4838 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4839 pic_offset_table_rtx,
4840 address, orig));
4841 }
4842 else
4843 {
4844 pic_ref
4845 = gen_const_mem (Pmode,
4846 gen_rtx_PLUS (Pmode,
4847 pic_offset_table_rtx, address));
4848 insn = emit_move_insn (reg, pic_ref);
4849 }
4850
4851 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4852 by loop. */
4853 set_unique_reg_note (insn, REG_EQUAL, orig);
4854 return reg;
4855 }
4856 else if (GET_CODE (orig) == CONST)
4857 {
4858 rtx base, offset;
4859
4860 if (GET_CODE (XEXP (orig, 0)) == PLUS
4861 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4862 return orig;
4863
4864 if (!reg)
4865 {
4866 gcc_assert (can_create_pseudo_p ());
4867 reg = gen_reg_rtx (Pmode);
4868 }
4869
4870 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4871 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4872 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4873 base == reg ? NULL_RTX : reg);
4874
4875 if (GET_CODE (offset) == CONST_INT)
4876 {
4877 if (SMALL_INT (offset))
4878 return plus_constant (Pmode, base, INTVAL (offset));
4879 else if (can_create_pseudo_p ())
4880 offset = force_reg (Pmode, offset);
4881 else
4882 /* If we reach here, then something is seriously wrong. */
4883 gcc_unreachable ();
4884 }
4885 return gen_rtx_PLUS (Pmode, base, offset);
4886 }
4887 else if (GET_CODE (orig) == LABEL_REF)
4888 /* ??? We ought to be checking that the register is live instead, in case
4889 it is eliminated. */
4890 crtl->uses_pic_offset_table = 1;
4891
4892 return orig;
4893 }
4894
4895 /* Try machine-dependent ways of modifying an illegitimate address X
4896 to be legitimate. If we find one, return the new, valid address.
4897
4898 OLDX is the address as it was before break_out_memory_refs was called.
4899 In some cases it is useful to look at this to decide what needs to be done.
4900
4901 MODE is the mode of the operand pointed to by X.
4902
4903 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4904
4905 static rtx
4906 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4907 machine_mode mode)
4908 {
4909 rtx orig_x = x;
4910
4911 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4912 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4913 force_operand (XEXP (x, 0), NULL_RTX));
4914 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4915 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4916 force_operand (XEXP (x, 1), NULL_RTX));
4917 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4918 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4919 XEXP (x, 1));
4920 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4921 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4922 force_operand (XEXP (x, 1), NULL_RTX));
4923
4924 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4925 return x;
4926
4927 if (sparc_tls_referenced_p (x))
4928 x = sparc_legitimize_tls_address (x);
4929 else if (flag_pic)
4930 x = sparc_legitimize_pic_address (x, NULL_RTX);
4931 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4932 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4933 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4934 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4935 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4936 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4937 else if (GET_CODE (x) == SYMBOL_REF
4938 || GET_CODE (x) == CONST
4939 || GET_CODE (x) == LABEL_REF)
4940 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4941
4942 return x;
4943 }
4944
4945 /* Delegitimize an address that was legitimized by the above function. */
4946
4947 static rtx
4948 sparc_delegitimize_address (rtx x)
4949 {
4950 x = delegitimize_mem_from_attrs (x);
4951
4952 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4953 switch (XINT (XEXP (x, 1), 1))
4954 {
4955 case UNSPEC_MOVE_PIC:
4956 case UNSPEC_TLSLE:
4957 x = XVECEXP (XEXP (x, 1), 0, 0);
4958 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4959 break;
4960 default:
4961 break;
4962 }
4963
4964 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4965 if (GET_CODE (x) == MINUS
4966 && sparc_pic_register_p (XEXP (x, 0))
4967 && GET_CODE (XEXP (x, 1)) == LO_SUM
4968 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4969 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4970 {
4971 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4972 gcc_assert (GET_CODE (x) == LABEL_REF
4973 || (GET_CODE (x) == CONST
4974 && GET_CODE (XEXP (x, 0)) == PLUS
4975 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4976 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4977 }
4978
4979 return x;
4980 }
4981
4982 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4983 replace the input X, or the original X if no replacement is called for.
4984 The output parameter *WIN is 1 if the calling macro should goto WIN,
4985 0 if it should not.
4986
4987 For SPARC, we wish to handle addresses by splitting them into
4988 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4989 This cuts the number of extra insns by one.
4990
4991 Do nothing when generating PIC code and the address is a symbolic
4992 operand or requires a scratch register. */
4993
4994 rtx
4995 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4996 int opnum, int type,
4997 int ind_levels ATTRIBUTE_UNUSED, int *win)
4998 {
4999 /* Decompose SImode constants into HIGH+LO_SUM. */
5000 if (CONSTANT_P (x)
5001 && (mode != TFmode || TARGET_ARCH64)
5002 && GET_MODE (x) == SImode
5003 && GET_CODE (x) != LO_SUM
5004 && GET_CODE (x) != HIGH
5005 && sparc_cmodel <= CM_MEDLOW
5006 && !(flag_pic
5007 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5008 {
5009 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5010 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5011 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5012 opnum, (enum reload_type)type);
5013 *win = 1;
5014 return x;
5015 }
5016
5017 /* We have to recognize what we have already generated above. */
5018 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5019 {
5020 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5021 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5022 opnum, (enum reload_type)type);
5023 *win = 1;
5024 return x;
5025 }
5026
5027 *win = 0;
5028 return x;
5029 }
5030
5031 /* Return true if ADDR (a legitimate address expression)
5032 has an effect that depends on the machine mode it is used for.
5033
5034 In PIC mode,
5035
5036 (mem:HI [%l7+a])
5037
5038 is not equivalent to
5039
5040 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5041
5042 because [%l7+a+1] is interpreted as the address of (a+1). */
5043
5044
5045 static bool
5046 sparc_mode_dependent_address_p (const_rtx addr,
5047 addr_space_t as ATTRIBUTE_UNUSED)
5048 {
5049 if (GET_CODE (addr) == PLUS
5050 && sparc_pic_register_p (XEXP (addr, 0))
5051 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5052 return true;
5053
5054 return false;
5055 }
5056
5057 #ifdef HAVE_GAS_HIDDEN
5058 # define USE_HIDDEN_LINKONCE 1
5059 #else
5060 # define USE_HIDDEN_LINKONCE 0
5061 #endif
5062
5063 static void
5064 get_pc_thunk_name (char name[32], unsigned int regno)
5065 {
5066 const char *reg_name = reg_names[regno];
5067
5068 /* Skip the leading '%' as that cannot be used in a
5069 symbol name. */
5070 reg_name += 1;
5071
5072 if (USE_HIDDEN_LINKONCE)
5073 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5074 else
5075 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5076 }
5077
5078 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5079
5080 static rtx
5081 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
5082 {
5083 int orig_flag_pic = flag_pic;
5084 rtx insn;
5085
5086 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5087 flag_pic = 0;
5088 if (TARGET_ARCH64)
5089 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
5090 else
5091 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
5092 flag_pic = orig_flag_pic;
5093
5094 return insn;
5095 }
5096
5097 /* Emit code to load the GOT register. */
5098
5099 void
5100 load_got_register (void)
5101 {
5102 if (!global_offset_table_rtx)
5103 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5104
5105 if (TARGET_VXWORKS_RTP)
5106 emit_insn (gen_vxworks_load_got ());
5107 else
5108 {
5109 /* The GOT symbol is subject to a PC-relative relocation so we need a
5110 helper function to add the PC value and thus get the final value. */
5111 if (!got_helper_rtx)
5112 {
5113 char name[32];
5114 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5115 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5116 }
5117
5118 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5119 got_helper_rtx));
5120 }
5121 }
5122
5123 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5124 address of the call target. */
5125
5126 void
5127 sparc_emit_call_insn (rtx pat, rtx addr)
5128 {
5129 rtx_insn *insn;
5130
5131 insn = emit_call_insn (pat);
5132
5133 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5134 if (TARGET_VXWORKS_RTP
5135 && flag_pic
5136 && GET_CODE (addr) == SYMBOL_REF
5137 && (SYMBOL_REF_DECL (addr)
5138 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5139 : !SYMBOL_REF_LOCAL_P (addr)))
5140 {
5141 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5142 crtl->uses_pic_offset_table = 1;
5143 }
5144 }
5145 \f
5146 /* Return 1 if RTX is a MEM which is known to be aligned to at
5147 least a DESIRED byte boundary. */
5148
5149 int
5150 mem_min_alignment (rtx mem, int desired)
5151 {
5152 rtx addr, base, offset;
5153
5154 /* If it's not a MEM we can't accept it. */
5155 if (GET_CODE (mem) != MEM)
5156 return 0;
5157
5158 /* Obviously... */
5159 if (!TARGET_UNALIGNED_DOUBLES
5160 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5161 return 1;
5162
5163 /* ??? The rest of the function predates MEM_ALIGN so
5164 there is probably a bit of redundancy. */
5165 addr = XEXP (mem, 0);
5166 base = offset = NULL_RTX;
5167 if (GET_CODE (addr) == PLUS)
5168 {
5169 if (GET_CODE (XEXP (addr, 0)) == REG)
5170 {
5171 base = XEXP (addr, 0);
5172
5173 /* What we are saying here is that if the base
5174 REG is aligned properly, the compiler will make
5175 sure any REG based index upon it will be so
5176 as well. */
5177 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5178 offset = XEXP (addr, 1);
5179 else
5180 offset = const0_rtx;
5181 }
5182 }
5183 else if (GET_CODE (addr) == REG)
5184 {
5185 base = addr;
5186 offset = const0_rtx;
5187 }
5188
5189 if (base != NULL_RTX)
5190 {
5191 int regno = REGNO (base);
5192
5193 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5194 {
5195 /* Check if the compiler has recorded some information
5196 about the alignment of the base REG. If reload has
5197 completed, we already matched with proper alignments.
5198 If not running global_alloc, reload might give us
5199 unaligned pointer to local stack though. */
5200 if (((cfun != 0
5201 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5202 || (optimize && reload_completed))
5203 && (INTVAL (offset) & (desired - 1)) == 0)
5204 return 1;
5205 }
5206 else
5207 {
5208 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5209 return 1;
5210 }
5211 }
5212 else if (! TARGET_UNALIGNED_DOUBLES
5213 || CONSTANT_P (addr)
5214 || GET_CODE (addr) == LO_SUM)
5215 {
5216 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5217 is true, in which case we can only assume that an access is aligned if
5218 it is to a constant address, or the address involves a LO_SUM. */
5219 return 1;
5220 }
5221
5222 /* An obviously unaligned address. */
5223 return 0;
5224 }
5225
5226 \f
5227 /* Vectors to keep interesting information about registers where it can easily
5228 be got. We used to use the actual mode value as the bit number, but there
5229 are more than 32 modes now. Instead we use two tables: one indexed by
5230 hard register number, and one indexed by mode. */
5231
5232 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5233 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5234 mapped into one sparc_mode_class mode. */
5235
5236 enum sparc_mode_class {
5237 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5238 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5239 CC_MODE, CCFP_MODE
5240 };
5241
5242 /* Modes for single-word and smaller quantities. */
5243 #define S_MODES \
5244 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5245
5246 /* Modes for double-word and smaller quantities. */
5247 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5248
5249 /* Modes for quad-word and smaller quantities. */
5250 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5251
5252 /* Modes for 8-word and smaller quantities. */
5253 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5254
5255 /* Modes for single-float quantities. */
5256 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5257
5258 /* Modes for double-float and smaller quantities. */
5259 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5260
5261 /* Modes for quad-float and smaller quantities. */
5262 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5263
5264 /* Modes for quad-float pairs and smaller quantities. */
5265 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5266
5267 /* Modes for double-float only quantities. */
5268 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5269
5270 /* Modes for quad-float and double-float only quantities. */
5271 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5272
5273 /* Modes for quad-float pairs and double-float only quantities. */
5274 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5275
5276 /* Modes for condition codes. */
5277 #define CC_MODES (1 << (int) CC_MODE)
5278 #define CCFP_MODES (1 << (int) CCFP_MODE)
5279
5280 /* Value is 1 if register/mode pair is acceptable on sparc.
5281
5282 The funny mixture of D and T modes is because integer operations
5283 do not specially operate on tetra quantities, so non-quad-aligned
5284 registers can hold quadword quantities (except %o4 and %i4 because
5285 they cross fixed registers).
5286
5287 ??? Note that, despite the settings, non-double-aligned parameter
5288 registers can hold double-word quantities in 32-bit mode. */
5289
5290 /* This points to either the 32-bit or the 64-bit version. */
5291 static const int *hard_regno_mode_classes;
5292
5293 static const int hard_32bit_mode_classes[] = {
5294 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5295 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5296 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5297 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5298
5299 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5300 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5301 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5302 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5303
5304 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5305 and none can hold SFmode/SImode values. */
5306 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5307 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5308 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5309 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5310
5311 /* %fcc[0123] */
5312 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5313
5314 /* %icc, %sfp, %gsr */
5315 CC_MODES, 0, D_MODES
5316 };
5317
5318 static const int hard_64bit_mode_classes[] = {
5319 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5320 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5321 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5322 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5323
5324 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5325 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5326 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5327 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5328
5329 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5330 and none can hold SFmode/SImode values. */
5331 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5332 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5333 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5334 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5335
5336 /* %fcc[0123] */
5337 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5338
5339 /* %icc, %sfp, %gsr */
5340 CC_MODES, 0, D_MODES
5341 };
5342
5343 static int sparc_mode_class [NUM_MACHINE_MODES];
5344
5345 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5346
5347 static void
5348 sparc_init_modes (void)
5349 {
5350 int i;
5351
5352 for (i = 0; i < NUM_MACHINE_MODES; i++)
5353 {
5354 machine_mode m = (machine_mode) i;
5355 unsigned int size = GET_MODE_SIZE (m);
5356
5357 switch (GET_MODE_CLASS (m))
5358 {
5359 case MODE_INT:
5360 case MODE_PARTIAL_INT:
5361 case MODE_COMPLEX_INT:
5362 if (size < 4)
5363 sparc_mode_class[i] = 1 << (int) H_MODE;
5364 else if (size == 4)
5365 sparc_mode_class[i] = 1 << (int) S_MODE;
5366 else if (size == 8)
5367 sparc_mode_class[i] = 1 << (int) D_MODE;
5368 else if (size == 16)
5369 sparc_mode_class[i] = 1 << (int) T_MODE;
5370 else if (size == 32)
5371 sparc_mode_class[i] = 1 << (int) O_MODE;
5372 else
5373 sparc_mode_class[i] = 0;
5374 break;
5375 case MODE_VECTOR_INT:
5376 if (size == 4)
5377 sparc_mode_class[i] = 1 << (int) SF_MODE;
5378 else if (size == 8)
5379 sparc_mode_class[i] = 1 << (int) DF_MODE;
5380 else
5381 sparc_mode_class[i] = 0;
5382 break;
5383 case MODE_FLOAT:
5384 case MODE_COMPLEX_FLOAT:
5385 if (size == 4)
5386 sparc_mode_class[i] = 1 << (int) SF_MODE;
5387 else if (size == 8)
5388 sparc_mode_class[i] = 1 << (int) DF_MODE;
5389 else if (size == 16)
5390 sparc_mode_class[i] = 1 << (int) TF_MODE;
5391 else if (size == 32)
5392 sparc_mode_class[i] = 1 << (int) OF_MODE;
5393 else
5394 sparc_mode_class[i] = 0;
5395 break;
5396 case MODE_CC:
5397 if (m == CCFPmode || m == CCFPEmode)
5398 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5399 else
5400 sparc_mode_class[i] = 1 << (int) CC_MODE;
5401 break;
5402 default:
5403 sparc_mode_class[i] = 0;
5404 break;
5405 }
5406 }
5407
5408 if (TARGET_ARCH64)
5409 hard_regno_mode_classes = hard_64bit_mode_classes;
5410 else
5411 hard_regno_mode_classes = hard_32bit_mode_classes;
5412
5413 /* Initialize the array used by REGNO_REG_CLASS. */
5414 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5415 {
5416 if (i < 16 && TARGET_V8PLUS)
5417 sparc_regno_reg_class[i] = I64_REGS;
5418 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5419 sparc_regno_reg_class[i] = GENERAL_REGS;
5420 else if (i < 64)
5421 sparc_regno_reg_class[i] = FP_REGS;
5422 else if (i < 96)
5423 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5424 else if (i < 100)
5425 sparc_regno_reg_class[i] = FPCC_REGS;
5426 else
5427 sparc_regno_reg_class[i] = NO_REGS;
5428 }
5429 }
5430 \f
5431 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5432
5433 static inline bool
5434 save_global_or_fp_reg_p (unsigned int regno,
5435 int leaf_function ATTRIBUTE_UNUSED)
5436 {
5437 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5438 }
5439
5440 /* Return whether the return address register (%i7) is needed. */
5441
5442 static inline bool
5443 return_addr_reg_needed_p (int leaf_function)
5444 {
5445 /* If it is live, for example because of __builtin_return_address (0). */
5446 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5447 return true;
5448
5449 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5450 if (!leaf_function
5451 /* Loading the GOT register clobbers %o7. */
5452 || crtl->uses_pic_offset_table
5453 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5454 return true;
5455
5456 return false;
5457 }
5458
5459 /* Return whether REGNO, a local or in register, must be saved/restored. */
5460
5461 static bool
5462 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5463 {
5464 /* General case: call-saved registers live at some point. */
5465 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5466 return true;
5467
5468 /* Frame pointer register (%fp) if needed. */
5469 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5470 return true;
5471
5472 /* Return address register (%i7) if needed. */
5473 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5474 return true;
5475
5476 /* GOT register (%l7) if needed. */
5477 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5478 return true;
5479
5480 /* If the function accesses prior frames, the frame pointer and the return
5481 address of the previous frame must be saved on the stack. */
5482 if (crtl->accesses_prior_frames
5483 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5484 return true;
5485
5486 return false;
5487 }
5488
5489 /* Compute the frame size required by the function. This function is called
5490 during the reload pass and also by sparc_expand_prologue. */
5491
5492 static HOST_WIDE_INT
5493 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5494 {
5495 HOST_WIDE_INT frame_size, apparent_frame_size;
5496 int args_size, n_global_fp_regs = 0;
5497 bool save_local_in_regs_p = false;
5498 unsigned int i;
5499
5500 /* If the function allocates dynamic stack space, the dynamic offset is
5501 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5502 if (leaf_function && !cfun->calls_alloca)
5503 args_size = 0;
5504 else
5505 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5506
5507 /* Calculate space needed for global registers. */
5508 if (TARGET_ARCH64)
5509 {
5510 for (i = 0; i < 8; i++)
5511 if (save_global_or_fp_reg_p (i, 0))
5512 n_global_fp_regs += 2;
5513 }
5514 else
5515 {
5516 for (i = 0; i < 8; i += 2)
5517 if (save_global_or_fp_reg_p (i, 0)
5518 || save_global_or_fp_reg_p (i + 1, 0))
5519 n_global_fp_regs += 2;
5520 }
5521
5522 /* In the flat window model, find out which local and in registers need to
5523 be saved. We don't reserve space in the current frame for them as they
5524 will be spilled into the register window save area of the caller's frame.
5525 However, as soon as we use this register window save area, we must create
5526 that of the current frame to make it the live one. */
5527 if (TARGET_FLAT)
5528 for (i = 16; i < 32; i++)
5529 if (save_local_or_in_reg_p (i, leaf_function))
5530 {
5531 save_local_in_regs_p = true;
5532 break;
5533 }
5534
5535 /* Calculate space needed for FP registers. */
5536 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5537 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5538 n_global_fp_regs += 2;
5539
5540 if (size == 0
5541 && n_global_fp_regs == 0
5542 && args_size == 0
5543 && !save_local_in_regs_p)
5544 frame_size = apparent_frame_size = 0;
5545 else
5546 {
5547 /* Start from the apparent frame size. */
5548 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5549
5550 /* We need to add the size of the outgoing argument area. */
5551 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5552
5553 /* And that of the register window save area. */
5554 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5555
5556 /* Finally, bump to the appropriate alignment. */
5557 frame_size = SPARC_STACK_ALIGN (frame_size);
5558 }
5559
5560 /* Set up values for use in prologue and epilogue. */
5561 sparc_frame_size = frame_size;
5562 sparc_apparent_frame_size = apparent_frame_size;
5563 sparc_n_global_fp_regs = n_global_fp_regs;
5564 sparc_save_local_in_regs_p = save_local_in_regs_p;
5565
5566 return frame_size;
5567 }
5568
5569 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5570
5571 int
5572 sparc_initial_elimination_offset (int to)
5573 {
5574 int offset;
5575
5576 if (to == STACK_POINTER_REGNUM)
5577 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5578 else
5579 offset = 0;
5580
5581 offset += SPARC_STACK_BIAS;
5582 return offset;
5583 }
5584
5585 /* Output any necessary .register pseudo-ops. */
5586
5587 void
5588 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5589 {
5590 int i;
5591
5592 if (TARGET_ARCH32)
5593 return;
5594
5595 /* Check if %g[2367] were used without
5596 .register being printed for them already. */
5597 for (i = 2; i < 8; i++)
5598 {
5599 if (df_regs_ever_live_p (i)
5600 && ! sparc_hard_reg_printed [i])
5601 {
5602 sparc_hard_reg_printed [i] = 1;
5603 /* %g7 is used as TLS base register, use #ignore
5604 for it instead of #scratch. */
5605 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5606 i == 7 ? "ignore" : "scratch");
5607 }
5608 if (i == 3) i = 5;
5609 }
5610 }
5611
5612 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5613
5614 #if PROBE_INTERVAL > 4096
5615 #error Cannot use indexed addressing mode for stack probing
5616 #endif
5617
5618 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5619 inclusive. These are offsets from the current stack pointer.
5620
5621 Note that we don't use the REG+REG addressing mode for the probes because
5622 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5623 so the advantages of having a single code win here. */
5624
5625 static void
5626 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5627 {
5628 rtx g1 = gen_rtx_REG (Pmode, 1);
5629
5630 /* See if we have a constant small number of probes to generate. If so,
5631 that's the easy case. */
5632 if (size <= PROBE_INTERVAL)
5633 {
5634 emit_move_insn (g1, GEN_INT (first));
5635 emit_insn (gen_rtx_SET (g1,
5636 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5637 emit_stack_probe (plus_constant (Pmode, g1, -size));
5638 }
5639
5640 /* The run-time loop is made up of 9 insns in the generic case while the
5641 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5642 else if (size <= 4 * PROBE_INTERVAL)
5643 {
5644 HOST_WIDE_INT i;
5645
5646 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5647 emit_insn (gen_rtx_SET (g1,
5648 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5649 emit_stack_probe (g1);
5650
5651 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5652 it exceeds SIZE. If only two probes are needed, this will not
5653 generate any code. Then probe at FIRST + SIZE. */
5654 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5655 {
5656 emit_insn (gen_rtx_SET (g1,
5657 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5658 emit_stack_probe (g1);
5659 }
5660
5661 emit_stack_probe (plus_constant (Pmode, g1,
5662 (i - PROBE_INTERVAL) - size));
5663 }
5664
5665 /* Otherwise, do the same as above, but in a loop. Note that we must be
5666 extra careful with variables wrapping around because we might be at
5667 the very top (or the very bottom) of the address space and we have
5668 to be able to handle this case properly; in particular, we use an
5669 equality test for the loop condition. */
5670 else
5671 {
5672 HOST_WIDE_INT rounded_size;
5673 rtx g4 = gen_rtx_REG (Pmode, 4);
5674
5675 emit_move_insn (g1, GEN_INT (first));
5676
5677
5678 /* Step 1: round SIZE to the previous multiple of the interval. */
5679
5680 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5681 emit_move_insn (g4, GEN_INT (rounded_size));
5682
5683
5684 /* Step 2: compute initial and final value of the loop counter. */
5685
5686 /* TEST_ADDR = SP + FIRST. */
5687 emit_insn (gen_rtx_SET (g1,
5688 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5689
5690 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5691 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5692
5693
5694 /* Step 3: the loop
5695
5696 while (TEST_ADDR != LAST_ADDR)
5697 {
5698 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5699 probe at TEST_ADDR
5700 }
5701
5702 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5703 until it is equal to ROUNDED_SIZE. */
5704
5705 if (TARGET_ARCH64)
5706 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5707 else
5708 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5709
5710
5711 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5712 that SIZE is equal to ROUNDED_SIZE. */
5713
5714 if (size != rounded_size)
5715 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5716 }
5717
5718 /* Make sure nothing is scheduled before we are done. */
5719 emit_insn (gen_blockage ());
5720 }
5721
5722 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5723 absolute addresses. */
5724
5725 const char *
5726 output_probe_stack_range (rtx reg1, rtx reg2)
5727 {
5728 static int labelno = 0;
5729 char loop_lab[32];
5730 rtx xops[2];
5731
5732 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5733
5734 /* Loop. */
5735 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5736
5737 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5738 xops[0] = reg1;
5739 xops[1] = GEN_INT (-PROBE_INTERVAL);
5740 output_asm_insn ("add\t%0, %1, %0", xops);
5741
5742 /* Test if TEST_ADDR == LAST_ADDR. */
5743 xops[1] = reg2;
5744 output_asm_insn ("cmp\t%0, %1", xops);
5745
5746 /* Probe at TEST_ADDR and branch. */
5747 if (TARGET_ARCH64)
5748 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5749 else
5750 fputs ("\tbne\t", asm_out_file);
5751 assemble_name_raw (asm_out_file, loop_lab);
5752 fputc ('\n', asm_out_file);
5753 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5754 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5755
5756 return "";
5757 }
5758
5759 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5760 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5761 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5762 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5763 the action to be performed if it returns false. Return the new offset. */
5764
5765 typedef bool (*sorr_pred_t) (unsigned int, int);
5766 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5767
5768 static int
5769 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5770 int offset, int leaf_function, sorr_pred_t save_p,
5771 sorr_act_t action_true, sorr_act_t action_false)
5772 {
5773 unsigned int i;
5774 rtx mem;
5775 rtx_insn *insn;
5776
5777 if (TARGET_ARCH64 && high <= 32)
5778 {
5779 int fp_offset = -1;
5780
5781 for (i = low; i < high; i++)
5782 {
5783 if (save_p (i, leaf_function))
5784 {
5785 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5786 base, offset));
5787 if (action_true == SORR_SAVE)
5788 {
5789 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5790 RTX_FRAME_RELATED_P (insn) = 1;
5791 }
5792 else /* action_true == SORR_RESTORE */
5793 {
5794 /* The frame pointer must be restored last since its old
5795 value may be used as base address for the frame. This
5796 is problematic in 64-bit mode only because of the lack
5797 of double-word load instruction. */
5798 if (i == HARD_FRAME_POINTER_REGNUM)
5799 fp_offset = offset;
5800 else
5801 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5802 }
5803 offset += 8;
5804 }
5805 else if (action_false == SORR_ADVANCE)
5806 offset += 8;
5807 }
5808
5809 if (fp_offset >= 0)
5810 {
5811 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5812 emit_move_insn (hard_frame_pointer_rtx, mem);
5813 }
5814 }
5815 else
5816 {
5817 for (i = low; i < high; i += 2)
5818 {
5819 bool reg0 = save_p (i, leaf_function);
5820 bool reg1 = save_p (i + 1, leaf_function);
5821 machine_mode mode;
5822 int regno;
5823
5824 if (reg0 && reg1)
5825 {
5826 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5827 regno = i;
5828 }
5829 else if (reg0)
5830 {
5831 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5832 regno = i;
5833 }
5834 else if (reg1)
5835 {
5836 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5837 regno = i + 1;
5838 offset += 4;
5839 }
5840 else
5841 {
5842 if (action_false == SORR_ADVANCE)
5843 offset += 8;
5844 continue;
5845 }
5846
5847 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5848 if (action_true == SORR_SAVE)
5849 {
5850 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5851 RTX_FRAME_RELATED_P (insn) = 1;
5852 if (mode == DImode)
5853 {
5854 rtx set1, set2;
5855 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5856 offset));
5857 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5858 RTX_FRAME_RELATED_P (set1) = 1;
5859 mem
5860 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5861 offset + 4));
5862 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5863 RTX_FRAME_RELATED_P (set2) = 1;
5864 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5865 gen_rtx_PARALLEL (VOIDmode,
5866 gen_rtvec (2, set1, set2)));
5867 }
5868 }
5869 else /* action_true == SORR_RESTORE */
5870 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5871
5872 /* Bump and round down to double word
5873 in case we already bumped by 4. */
5874 offset = ROUND_DOWN (offset + 8, 8);
5875 }
5876 }
5877
5878 return offset;
5879 }
5880
5881 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5882
5883 static rtx
5884 emit_adjust_base_to_offset (rtx base, int offset)
5885 {
5886 /* ??? This might be optimized a little as %g1 might already have a
5887 value close enough that a single add insn will do. */
5888 /* ??? Although, all of this is probably only a temporary fix because
5889 if %g1 can hold a function result, then sparc_expand_epilogue will
5890 lose (the result will be clobbered). */
5891 rtx new_base = gen_rtx_REG (Pmode, 1);
5892 emit_move_insn (new_base, GEN_INT (offset));
5893 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5894 return new_base;
5895 }
5896
5897 /* Emit code to save/restore call-saved global and FP registers. */
5898
5899 static void
5900 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5901 {
5902 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5903 {
5904 base = emit_adjust_base_to_offset (base, offset);
5905 offset = 0;
5906 }
5907
5908 offset
5909 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5910 save_global_or_fp_reg_p, action, SORR_NONE);
5911 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5912 save_global_or_fp_reg_p, action, SORR_NONE);
5913 }
5914
5915 /* Emit code to save/restore call-saved local and in registers. */
5916
5917 static void
5918 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5919 {
5920 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5921 {
5922 base = emit_adjust_base_to_offset (base, offset);
5923 offset = 0;
5924 }
5925
5926 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5927 save_local_or_in_reg_p, action, SORR_ADVANCE);
5928 }
5929
5930 /* Emit a window_save insn. */
5931
5932 static rtx_insn *
5933 emit_window_save (rtx increment)
5934 {
5935 rtx_insn *insn = emit_insn (gen_window_save (increment));
5936 RTX_FRAME_RELATED_P (insn) = 1;
5937
5938 /* The incoming return address (%o7) is saved in %i7. */
5939 add_reg_note (insn, REG_CFA_REGISTER,
5940 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5941 gen_rtx_REG (Pmode,
5942 INCOMING_RETURN_ADDR_REGNUM)));
5943
5944 /* The window save event. */
5945 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5946
5947 /* The CFA is %fp, the hard frame pointer. */
5948 add_reg_note (insn, REG_CFA_DEF_CFA,
5949 plus_constant (Pmode, hard_frame_pointer_rtx,
5950 INCOMING_FRAME_SP_OFFSET));
5951
5952 return insn;
5953 }
5954
5955 /* Generate an increment for the stack pointer. */
5956
5957 static rtx
5958 gen_stack_pointer_inc (rtx increment)
5959 {
5960 return gen_rtx_SET (stack_pointer_rtx,
5961 gen_rtx_PLUS (Pmode,
5962 stack_pointer_rtx,
5963 increment));
5964 }
5965
5966 /* Expand the function prologue. The prologue is responsible for reserving
5967 storage for the frame, saving the call-saved registers and loading the
5968 GOT register if needed. */
5969
5970 void
5971 sparc_expand_prologue (void)
5972 {
5973 HOST_WIDE_INT size;
5974 rtx_insn *insn;
5975
5976 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5977 on the final value of the flag means deferring the prologue/epilogue
5978 expansion until just before the second scheduling pass, which is too
5979 late to emit multiple epilogues or return insns.
5980
5981 Of course we are making the assumption that the value of the flag
5982 will not change between now and its final value. Of the three parts
5983 of the formula, only the last one can reasonably vary. Let's take a
5984 closer look, after assuming that the first two ones are set to true
5985 (otherwise the last value is effectively silenced).
5986
5987 If only_leaf_regs_used returns false, the global predicate will also
5988 be false so the actual frame size calculated below will be positive.
5989 As a consequence, the save_register_window insn will be emitted in
5990 the instruction stream; now this insn explicitly references %fp
5991 which is not a leaf register so only_leaf_regs_used will always
5992 return false subsequently.
5993
5994 If only_leaf_regs_used returns true, we hope that the subsequent
5995 optimization passes won't cause non-leaf registers to pop up. For
5996 example, the regrename pass has special provisions to not rename to
5997 non-leaf registers in a leaf function. */
5998 sparc_leaf_function_p
5999 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6000
6001 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6002
6003 if (flag_stack_usage_info)
6004 current_function_static_stack_size = size;
6005
6006 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6007 || flag_stack_clash_protection)
6008 {
6009 if (crtl->is_leaf && !cfun->calls_alloca)
6010 {
6011 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6012 sparc_emit_probe_stack_range (get_stack_check_protect (),
6013 size - get_stack_check_protect ());
6014 }
6015 else if (size > 0)
6016 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6017 }
6018
6019 if (size == 0)
6020 ; /* do nothing. */
6021 else if (sparc_leaf_function_p)
6022 {
6023 rtx size_int_rtx = GEN_INT (-size);
6024
6025 if (size <= 4096)
6026 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6027 else if (size <= 8192)
6028 {
6029 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6030 RTX_FRAME_RELATED_P (insn) = 1;
6031
6032 /* %sp is still the CFA register. */
6033 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6034 }
6035 else
6036 {
6037 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6038 emit_move_insn (size_rtx, size_int_rtx);
6039 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6040 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6041 gen_stack_pointer_inc (size_int_rtx));
6042 }
6043
6044 RTX_FRAME_RELATED_P (insn) = 1;
6045 }
6046 else
6047 {
6048 rtx size_int_rtx = GEN_INT (-size);
6049
6050 if (size <= 4096)
6051 emit_window_save (size_int_rtx);
6052 else if (size <= 8192)
6053 {
6054 emit_window_save (GEN_INT (-4096));
6055
6056 /* %sp is not the CFA register anymore. */
6057 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6058
6059 /* Make sure no %fp-based store is issued until after the frame is
6060 established. The offset between the frame pointer and the stack
6061 pointer is calculated relative to the value of the stack pointer
6062 at the end of the function prologue, and moving instructions that
6063 access the stack via the frame pointer between the instructions
6064 that decrement the stack pointer could result in accessing the
6065 register window save area, which is volatile. */
6066 emit_insn (gen_frame_blockage ());
6067 }
6068 else
6069 {
6070 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6071 emit_move_insn (size_rtx, size_int_rtx);
6072 emit_window_save (size_rtx);
6073 }
6074 }
6075
6076 if (sparc_leaf_function_p)
6077 {
6078 sparc_frame_base_reg = stack_pointer_rtx;
6079 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6080 }
6081 else
6082 {
6083 sparc_frame_base_reg = hard_frame_pointer_rtx;
6084 sparc_frame_base_offset = SPARC_STACK_BIAS;
6085 }
6086
6087 if (sparc_n_global_fp_regs > 0)
6088 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6089 sparc_frame_base_offset
6090 - sparc_apparent_frame_size,
6091 SORR_SAVE);
6092
6093 /* Advertise that the data calculated just above are now valid. */
6094 sparc_prologue_data_valid_p = true;
6095 }
6096
6097 /* Expand the function prologue. The prologue is responsible for reserving
6098 storage for the frame, saving the call-saved registers and loading the
6099 GOT register if needed. */
6100
6101 void
6102 sparc_flat_expand_prologue (void)
6103 {
6104 HOST_WIDE_INT size;
6105 rtx_insn *insn;
6106
6107 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6108
6109 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6110
6111 if (flag_stack_usage_info)
6112 current_function_static_stack_size = size;
6113
6114 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6115 || flag_stack_clash_protection)
6116 {
6117 if (crtl->is_leaf && !cfun->calls_alloca)
6118 {
6119 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6120 sparc_emit_probe_stack_range (get_stack_check_protect (),
6121 size - get_stack_check_protect ());
6122 }
6123 else if (size > 0)
6124 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6125 }
6126
6127 if (sparc_save_local_in_regs_p)
6128 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6129 SORR_SAVE);
6130
6131 if (size == 0)
6132 ; /* do nothing. */
6133 else
6134 {
6135 rtx size_int_rtx, size_rtx;
6136
6137 size_rtx = size_int_rtx = GEN_INT (-size);
6138
6139 /* We establish the frame (i.e. decrement the stack pointer) first, even
6140 if we use a frame pointer, because we cannot clobber any call-saved
6141 registers, including the frame pointer, if we haven't created a new
6142 register save area, for the sake of compatibility with the ABI. */
6143 if (size <= 4096)
6144 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6145 else if (size <= 8192 && !frame_pointer_needed)
6146 {
6147 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6148 RTX_FRAME_RELATED_P (insn) = 1;
6149 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6150 }
6151 else
6152 {
6153 size_rtx = gen_rtx_REG (Pmode, 1);
6154 emit_move_insn (size_rtx, size_int_rtx);
6155 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6156 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6157 gen_stack_pointer_inc (size_int_rtx));
6158 }
6159 RTX_FRAME_RELATED_P (insn) = 1;
6160
6161 /* Ensure nothing is scheduled until after the frame is established. */
6162 emit_insn (gen_blockage ());
6163
6164 if (frame_pointer_needed)
6165 {
6166 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6167 gen_rtx_MINUS (Pmode,
6168 stack_pointer_rtx,
6169 size_rtx)));
6170 RTX_FRAME_RELATED_P (insn) = 1;
6171
6172 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6173 gen_rtx_SET (hard_frame_pointer_rtx,
6174 plus_constant (Pmode, stack_pointer_rtx,
6175 size)));
6176 }
6177
6178 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6179 {
6180 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6181 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6182
6183 insn = emit_move_insn (i7, o7);
6184 RTX_FRAME_RELATED_P (insn) = 1;
6185
6186 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6187
6188 /* Prevent this instruction from ever being considered dead,
6189 even if this function has no epilogue. */
6190 emit_use (i7);
6191 }
6192 }
6193
6194 if (frame_pointer_needed)
6195 {
6196 sparc_frame_base_reg = hard_frame_pointer_rtx;
6197 sparc_frame_base_offset = SPARC_STACK_BIAS;
6198 }
6199 else
6200 {
6201 sparc_frame_base_reg = stack_pointer_rtx;
6202 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6203 }
6204
6205 if (sparc_n_global_fp_regs > 0)
6206 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6207 sparc_frame_base_offset
6208 - sparc_apparent_frame_size,
6209 SORR_SAVE);
6210
6211 /* Advertise that the data calculated just above are now valid. */
6212 sparc_prologue_data_valid_p = true;
6213 }
6214
6215 /* This function generates the assembly code for function entry, which boils
6216 down to emitting the necessary .register directives. */
6217
6218 static void
6219 sparc_asm_function_prologue (FILE *file)
6220 {
6221 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6222 if (!TARGET_FLAT)
6223 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6224
6225 sparc_output_scratch_registers (file);
6226 }
6227
6228 /* Expand the function epilogue, either normal or part of a sibcall.
6229 We emit all the instructions except the return or the call. */
6230
6231 void
6232 sparc_expand_epilogue (bool for_eh)
6233 {
6234 HOST_WIDE_INT size = sparc_frame_size;
6235
6236 if (cfun->calls_alloca)
6237 emit_insn (gen_frame_blockage ());
6238
6239 if (sparc_n_global_fp_regs > 0)
6240 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6241 sparc_frame_base_offset
6242 - sparc_apparent_frame_size,
6243 SORR_RESTORE);
6244
6245 if (size == 0 || for_eh)
6246 ; /* do nothing. */
6247 else if (sparc_leaf_function_p)
6248 {
6249 if (size <= 4096)
6250 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6251 else if (size <= 8192)
6252 {
6253 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6254 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6255 }
6256 else
6257 {
6258 rtx reg = gen_rtx_REG (Pmode, 1);
6259 emit_move_insn (reg, GEN_INT (size));
6260 emit_insn (gen_stack_pointer_inc (reg));
6261 }
6262 }
6263 }
6264
6265 /* Expand the function epilogue, either normal or part of a sibcall.
6266 We emit all the instructions except the return or the call. */
6267
6268 void
6269 sparc_flat_expand_epilogue (bool for_eh)
6270 {
6271 HOST_WIDE_INT size = sparc_frame_size;
6272
6273 if (sparc_n_global_fp_regs > 0)
6274 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6275 sparc_frame_base_offset
6276 - sparc_apparent_frame_size,
6277 SORR_RESTORE);
6278
6279 /* If we have a frame pointer, we'll need both to restore it before the
6280 frame is destroyed and use its current value in destroying the frame.
6281 Since we don't have an atomic way to do that in the flat window model,
6282 we save the current value into a temporary register (%g1). */
6283 if (frame_pointer_needed && !for_eh)
6284 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6285
6286 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6287 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6288 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6289
6290 if (sparc_save_local_in_regs_p)
6291 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6292 sparc_frame_base_offset,
6293 SORR_RESTORE);
6294
6295 if (size == 0 || for_eh)
6296 ; /* do nothing. */
6297 else if (frame_pointer_needed)
6298 {
6299 /* Make sure the frame is destroyed after everything else is done. */
6300 emit_insn (gen_blockage ());
6301
6302 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6303 }
6304 else
6305 {
6306 /* Likewise. */
6307 emit_insn (gen_blockage ());
6308
6309 if (size <= 4096)
6310 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6311 else if (size <= 8192)
6312 {
6313 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6314 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6315 }
6316 else
6317 {
6318 rtx reg = gen_rtx_REG (Pmode, 1);
6319 emit_move_insn (reg, GEN_INT (size));
6320 emit_insn (gen_stack_pointer_inc (reg));
6321 }
6322 }
6323 }
6324
6325 /* Return true if it is appropriate to emit `return' instructions in the
6326 body of a function. */
6327
6328 bool
6329 sparc_can_use_return_insn_p (void)
6330 {
6331 return sparc_prologue_data_valid_p
6332 && sparc_n_global_fp_regs == 0
6333 && TARGET_FLAT
6334 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6335 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6336 }
6337
6338 /* This function generates the assembly code for function exit. */
6339
6340 static void
6341 sparc_asm_function_epilogue (FILE *file)
6342 {
6343 /* If the last two instructions of a function are "call foo; dslot;"
6344 the return address might point to the first instruction in the next
6345 function and we have to output a dummy nop for the sake of sane
6346 backtraces in such cases. This is pointless for sibling calls since
6347 the return address is explicitly adjusted. */
6348
6349 rtx_insn *insn = get_last_insn ();
6350
6351 rtx last_real_insn = prev_real_insn (insn);
6352 if (last_real_insn
6353 && NONJUMP_INSN_P (last_real_insn)
6354 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6355 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6356
6357 if (last_real_insn
6358 && CALL_P (last_real_insn)
6359 && !SIBLING_CALL_P (last_real_insn))
6360 fputs("\tnop\n", file);
6361
6362 sparc_output_deferred_case_vectors ();
6363 }
6364
6365 /* Output a 'restore' instruction. */
6366
6367 static void
6368 output_restore (rtx pat)
6369 {
6370 rtx operands[3];
6371
6372 if (! pat)
6373 {
6374 fputs ("\t restore\n", asm_out_file);
6375 return;
6376 }
6377
6378 gcc_assert (GET_CODE (pat) == SET);
6379
6380 operands[0] = SET_DEST (pat);
6381 pat = SET_SRC (pat);
6382
6383 switch (GET_CODE (pat))
6384 {
6385 case PLUS:
6386 operands[1] = XEXP (pat, 0);
6387 operands[2] = XEXP (pat, 1);
6388 output_asm_insn (" restore %r1, %2, %Y0", operands);
6389 break;
6390 case LO_SUM:
6391 operands[1] = XEXP (pat, 0);
6392 operands[2] = XEXP (pat, 1);
6393 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6394 break;
6395 case ASHIFT:
6396 operands[1] = XEXP (pat, 0);
6397 gcc_assert (XEXP (pat, 1) == const1_rtx);
6398 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6399 break;
6400 default:
6401 operands[1] = pat;
6402 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6403 break;
6404 }
6405 }
6406
6407 /* Output a return. */
6408
6409 const char *
6410 output_return (rtx_insn *insn)
6411 {
6412 if (crtl->calls_eh_return)
6413 {
6414 /* If the function uses __builtin_eh_return, the eh_return
6415 machinery occupies the delay slot. */
6416 gcc_assert (!final_sequence);
6417
6418 if (flag_delayed_branch)
6419 {
6420 if (!TARGET_FLAT && TARGET_V9)
6421 fputs ("\treturn\t%i7+8\n", asm_out_file);
6422 else
6423 {
6424 if (!TARGET_FLAT)
6425 fputs ("\trestore\n", asm_out_file);
6426
6427 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6428 }
6429
6430 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6431 }
6432 else
6433 {
6434 if (!TARGET_FLAT)
6435 fputs ("\trestore\n", asm_out_file);
6436
6437 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6438 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6439 }
6440 }
6441 else if (sparc_leaf_function_p || TARGET_FLAT)
6442 {
6443 /* This is a leaf or flat function so we don't have to bother restoring
6444 the register window, which frees us from dealing with the convoluted
6445 semantics of restore/return. We simply output the jump to the
6446 return address and the insn in the delay slot (if any). */
6447
6448 return "jmp\t%%o7+%)%#";
6449 }
6450 else
6451 {
6452 /* This is a regular function so we have to restore the register window.
6453 We may have a pending insn for the delay slot, which will be either
6454 combined with the 'restore' instruction or put in the delay slot of
6455 the 'return' instruction. */
6456
6457 if (final_sequence)
6458 {
6459 rtx_insn *delay;
6460 rtx pat;
6461
6462 delay = NEXT_INSN (insn);
6463 gcc_assert (delay);
6464
6465 pat = PATTERN (delay);
6466
6467 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6468 {
6469 epilogue_renumber (&pat, 0);
6470 return "return\t%%i7+%)%#";
6471 }
6472 else
6473 {
6474 output_asm_insn ("jmp\t%%i7+%)", NULL);
6475
6476 /* We're going to output the insn in the delay slot manually.
6477 Make sure to output its source location first. */
6478 PATTERN (delay) = gen_blockage ();
6479 INSN_CODE (delay) = -1;
6480 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6481 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6482
6483 output_restore (pat);
6484 }
6485 }
6486 else
6487 {
6488 /* The delay slot is empty. */
6489 if (TARGET_V9)
6490 return "return\t%%i7+%)\n\t nop";
6491 else if (flag_delayed_branch)
6492 return "jmp\t%%i7+%)\n\t restore";
6493 else
6494 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6495 }
6496 }
6497
6498 return "";
6499 }
6500
6501 /* Output a sibling call. */
6502
6503 const char *
6504 output_sibcall (rtx_insn *insn, rtx call_operand)
6505 {
6506 rtx operands[1];
6507
6508 gcc_assert (flag_delayed_branch);
6509
6510 operands[0] = call_operand;
6511
6512 if (sparc_leaf_function_p || TARGET_FLAT)
6513 {
6514 /* This is a leaf or flat function so we don't have to bother restoring
6515 the register window. We simply output the jump to the function and
6516 the insn in the delay slot (if any). */
6517
6518 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6519
6520 if (final_sequence)
6521 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6522 operands);
6523 else
6524 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6525 it into branch if possible. */
6526 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6527 operands);
6528 }
6529 else
6530 {
6531 /* This is a regular function so we have to restore the register window.
6532 We may have a pending insn for the delay slot, which will be combined
6533 with the 'restore' instruction. */
6534
6535 output_asm_insn ("call\t%a0, 0", operands);
6536
6537 if (final_sequence)
6538 {
6539 rtx_insn *delay;
6540 rtx pat;
6541
6542 delay = NEXT_INSN (insn);
6543 gcc_assert (delay);
6544
6545 pat = PATTERN (delay);
6546
6547 /* We're going to output the insn in the delay slot manually.
6548 Make sure to output its source location first. */
6549 PATTERN (delay) = gen_blockage ();
6550 INSN_CODE (delay) = -1;
6551 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6552 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6553
6554 output_restore (pat);
6555 }
6556 else
6557 output_restore (NULL_RTX);
6558 }
6559
6560 return "";
6561 }
6562 \f
6563 /* Functions for handling argument passing.
6564
6565 For 32-bit, the first 6 args are normally in registers and the rest are
6566 pushed. Any arg that starts within the first 6 words is at least
6567 partially passed in a register unless its data type forbids.
6568
6569 For 64-bit, the argument registers are laid out as an array of 16 elements
6570 and arguments are added sequentially. The first 6 int args and up to the
6571 first 16 fp args (depending on size) are passed in regs.
6572
6573 Slot Stack Integral Float Float in structure Double Long Double
6574 ---- ----- -------- ----- ------------------ ------ -----------
6575 15 [SP+248] %f31 %f30,%f31 %d30
6576 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6577 13 [SP+232] %f27 %f26,%f27 %d26
6578 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6579 11 [SP+216] %f23 %f22,%f23 %d22
6580 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6581 9 [SP+200] %f19 %f18,%f19 %d18
6582 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6583 7 [SP+184] %f15 %f14,%f15 %d14
6584 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6585 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6586 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6587 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6588 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6589 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6590 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6591
6592 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6593
6594 Integral arguments are always passed as 64-bit quantities appropriately
6595 extended.
6596
6597 Passing of floating point values is handled as follows.
6598 If a prototype is in scope:
6599 If the value is in a named argument (i.e. not a stdarg function or a
6600 value not part of the `...') then the value is passed in the appropriate
6601 fp reg.
6602 If the value is part of the `...' and is passed in one of the first 6
6603 slots then the value is passed in the appropriate int reg.
6604 If the value is part of the `...' and is not passed in one of the first 6
6605 slots then the value is passed in memory.
6606 If a prototype is not in scope:
6607 If the value is one of the first 6 arguments the value is passed in the
6608 appropriate integer reg and the appropriate fp reg.
6609 If the value is not one of the first 6 arguments the value is passed in
6610 the appropriate fp reg and in memory.
6611
6612
6613 Summary of the calling conventions implemented by GCC on the SPARC:
6614
6615 32-bit ABI:
6616 size argument return value
6617
6618 small integer <4 int. reg. int. reg.
6619 word 4 int. reg. int. reg.
6620 double word 8 int. reg. int. reg.
6621
6622 _Complex small integer <8 int. reg. int. reg.
6623 _Complex word 8 int. reg. int. reg.
6624 _Complex double word 16 memory int. reg.
6625
6626 vector integer <=8 int. reg. FP reg.
6627 vector integer >8 memory memory
6628
6629 float 4 int. reg. FP reg.
6630 double 8 int. reg. FP reg.
6631 long double 16 memory memory
6632
6633 _Complex float 8 memory FP reg.
6634 _Complex double 16 memory FP reg.
6635 _Complex long double 32 memory FP reg.
6636
6637 vector float any memory memory
6638
6639 aggregate any memory memory
6640
6641
6642
6643 64-bit ABI:
6644 size argument return value
6645
6646 small integer <8 int. reg. int. reg.
6647 word 8 int. reg. int. reg.
6648 double word 16 int. reg. int. reg.
6649
6650 _Complex small integer <16 int. reg. int. reg.
6651 _Complex word 16 int. reg. int. reg.
6652 _Complex double word 32 memory int. reg.
6653
6654 vector integer <=16 FP reg. FP reg.
6655 vector integer 16<s<=32 memory FP reg.
6656 vector integer >32 memory memory
6657
6658 float 4 FP reg. FP reg.
6659 double 8 FP reg. FP reg.
6660 long double 16 FP reg. FP reg.
6661
6662 _Complex float 8 FP reg. FP reg.
6663 _Complex double 16 FP reg. FP reg.
6664 _Complex long double 32 memory FP reg.
6665
6666 vector float <=16 FP reg. FP reg.
6667 vector float 16<s<=32 memory FP reg.
6668 vector float >32 memory memory
6669
6670 aggregate <=16 reg. reg.
6671 aggregate 16<s<=32 memory reg.
6672 aggregate >32 memory memory
6673
6674
6675
6676 Note #1: complex floating-point types follow the extended SPARC ABIs as
6677 implemented by the Sun compiler.
6678
6679 Note #2: integer vector types follow the scalar floating-point types
6680 conventions to match what is implemented by the Sun VIS SDK.
6681
6682 Note #3: floating-point vector types follow the aggregate types
6683 conventions. */
6684
6685
6686 /* Maximum number of int regs for args. */
6687 #define SPARC_INT_ARG_MAX 6
6688 /* Maximum number of fp regs for args. */
6689 #define SPARC_FP_ARG_MAX 16
6690 /* Number of words (partially) occupied for a given size in units. */
6691 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6692
6693 /* Handle the INIT_CUMULATIVE_ARGS macro.
6694 Initialize a variable CUM of type CUMULATIVE_ARGS
6695 for a call to a function whose data type is FNTYPE.
6696 For a library call, FNTYPE is 0. */
6697
6698 void
6699 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6700 {
6701 cum->words = 0;
6702 cum->prototype_p = fntype && prototype_p (fntype);
6703 cum->libcall_p = !fntype;
6704 }
6705
6706 /* Handle promotion of pointer and integer arguments. */
6707
6708 static machine_mode
6709 sparc_promote_function_mode (const_tree type, machine_mode mode,
6710 int *punsignedp, const_tree, int)
6711 {
6712 if (type && POINTER_TYPE_P (type))
6713 {
6714 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6715 return Pmode;
6716 }
6717
6718 /* Integral arguments are passed as full words, as per the ABI. */
6719 if (GET_MODE_CLASS (mode) == MODE_INT
6720 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6721 return word_mode;
6722
6723 return mode;
6724 }
6725
6726 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6727
6728 static bool
6729 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6730 {
6731 return TARGET_ARCH64 ? true : false;
6732 }
6733
6734 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6735 Specify whether to pass the argument by reference. */
6736
6737 static bool
6738 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6739 machine_mode mode, const_tree type,
6740 bool named ATTRIBUTE_UNUSED)
6741 {
6742 if (TARGET_ARCH32)
6743 /* Original SPARC 32-bit ABI says that structures and unions,
6744 and quad-precision floats are passed by reference.
6745 All other base types are passed in registers.
6746
6747 Extended ABI (as implemented by the Sun compiler) says that all
6748 complex floats are passed by reference. Pass complex integers
6749 in registers up to 8 bytes. More generally, enforce the 2-word
6750 cap for passing arguments in registers.
6751
6752 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6753 vectors are passed like floats of the same size, that is in
6754 registers up to 8 bytes. Pass all vector floats by reference
6755 like structure and unions. */
6756 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6757 || mode == SCmode
6758 /* Catch CDImode, TFmode, DCmode and TCmode. */
6759 || GET_MODE_SIZE (mode) > 8
6760 || (type
6761 && VECTOR_TYPE_P (type)
6762 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6763 else
6764 /* Original SPARC 64-bit ABI says that structures and unions
6765 smaller than 16 bytes are passed in registers, as well as
6766 all other base types.
6767
6768 Extended ABI (as implemented by the Sun compiler) says that
6769 complex floats are passed in registers up to 16 bytes. Pass
6770 all complex integers in registers up to 16 bytes. More generally,
6771 enforce the 2-word cap for passing arguments in registers.
6772
6773 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6774 vectors are passed like floats of the same size, that is in
6775 registers (up to 16 bytes). Pass all vector floats like structure
6776 and unions. */
6777 return ((type
6778 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6779 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6780 /* Catch CTImode and TCmode. */
6781 || GET_MODE_SIZE (mode) > 16);
6782 }
6783
6784 /* Traverse the record TYPE recursively and call FUNC on its fields.
6785 NAMED is true if this is for a named parameter. DATA is passed
6786 to FUNC for each field. OFFSET is the starting position and
6787 PACKED is true if we are inside a packed record. */
6788
6789 template <typename T, void Func (const_tree, int, bool, T*)>
6790 static void
6791 traverse_record_type (const_tree type, bool named, T *data,
6792 int offset = 0, bool packed = false)
6793 {
6794 /* The ABI obviously doesn't specify how packed structures are passed.
6795 These are passed in integer regs if possible, otherwise memory. */
6796 if (!packed)
6797 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6798 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6799 {
6800 packed = true;
6801 break;
6802 }
6803
6804 /* Walk the real fields, but skip those with no size or a zero size.
6805 ??? Fields with variable offset are handled as having zero offset. */
6806 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6807 if (TREE_CODE (field) == FIELD_DECL)
6808 {
6809 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6810 continue;
6811
6812 int bitpos = offset;
6813 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6814 bitpos += int_bit_position (field);
6815
6816 tree field_type = TREE_TYPE (field);
6817 if (TREE_CODE (field_type) == RECORD_TYPE)
6818 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6819 packed);
6820 else
6821 {
6822 const bool fp_type
6823 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6824 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6825 data);
6826 }
6827 }
6828 }
6829
6830 /* Handle recursive register classifying for structure layout. */
6831
6832 typedef struct
6833 {
6834 bool fp_regs; /* true if field eligible to FP registers. */
6835 bool fp_regs_in_first_word; /* true if such field in first word. */
6836 } classify_data_t;
6837
6838 /* A subroutine of function_arg_slotno. Classify the field. */
6839
6840 inline void
6841 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6842 {
6843 if (fp)
6844 {
6845 data->fp_regs = true;
6846 if (bitpos < BITS_PER_WORD)
6847 data->fp_regs_in_first_word = true;
6848 }
6849 }
6850
6851 /* Compute the slot number to pass an argument in.
6852 Return the slot number or -1 if passing on the stack.
6853
6854 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6855 the preceding args and about the function being called.
6856 MODE is the argument's machine mode.
6857 TYPE is the data type of the argument (as a tree).
6858 This is null for libcalls where that information may
6859 not be available.
6860 NAMED is nonzero if this argument is a named parameter
6861 (otherwise it is an extra parameter matching an ellipsis).
6862 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6863 *PREGNO records the register number to use if scalar type.
6864 *PPADDING records the amount of padding needed in words. */
6865
6866 static int
6867 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6868 const_tree type, bool named, bool incoming,
6869 int *pregno, int *ppadding)
6870 {
6871 const int regbase
6872 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6873 int slotno = cum->words, regno;
6874 enum mode_class mclass = GET_MODE_CLASS (mode);
6875
6876 if (type && TREE_ADDRESSABLE (type))
6877 return -1;
6878
6879 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6880 if (TARGET_ARCH64
6881 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6882 && (slotno & 1) != 0)
6883 {
6884 slotno++;
6885 *ppadding = 1;
6886 }
6887 else
6888 *ppadding = 0;
6889
6890 /* Vector types deserve special treatment because they are polymorphic wrt
6891 their mode, depending upon whether VIS instructions are enabled. */
6892 if (type && VECTOR_TYPE_P (type))
6893 {
6894 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6895 {
6896 /* The SPARC port defines no floating-point vector modes. */
6897 gcc_assert (mode == BLKmode);
6898 }
6899 else
6900 {
6901 /* Integer vector types should either have a vector
6902 mode or an integral mode, because we are guaranteed
6903 by pass_by_reference that their size is not greater
6904 than 16 bytes and TImode is 16-byte wide. */
6905 gcc_assert (mode != BLKmode);
6906
6907 /* Integer vectors are handled like floats as per
6908 the Sun VIS SDK. */
6909 mclass = MODE_FLOAT;
6910 }
6911 }
6912
6913 switch (mclass)
6914 {
6915 case MODE_FLOAT:
6916 case MODE_COMPLEX_FLOAT:
6917 case MODE_VECTOR_INT:
6918 if (TARGET_ARCH64 && TARGET_FPU && named)
6919 {
6920 /* If all arg slots are filled, then must pass on stack. */
6921 if (slotno >= SPARC_FP_ARG_MAX)
6922 return -1;
6923
6924 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6925 /* Arguments filling only one single FP register are
6926 right-justified in the outer double FP register. */
6927 if (GET_MODE_SIZE (mode) <= 4)
6928 regno++;
6929 break;
6930 }
6931 /* fallthrough */
6932
6933 case MODE_INT:
6934 case MODE_COMPLEX_INT:
6935 /* If all arg slots are filled, then must pass on stack. */
6936 if (slotno >= SPARC_INT_ARG_MAX)
6937 return -1;
6938
6939 regno = regbase + slotno;
6940 break;
6941
6942 case MODE_RANDOM:
6943 /* MODE is VOIDmode when generating the actual call. */
6944 if (mode == VOIDmode)
6945 return -1;
6946
6947 if (TARGET_64BIT && TARGET_FPU && named
6948 && type
6949 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6950 {
6951 /* If all arg slots are filled, then must pass on stack. */
6952 if (slotno >= SPARC_FP_ARG_MAX)
6953 return -1;
6954
6955 if (TREE_CODE (type) == RECORD_TYPE)
6956 {
6957 classify_data_t data = { false, false };
6958 traverse_record_type<classify_data_t, classify_registers>
6959 (type, named, &data);
6960
6961 if (data.fp_regs)
6962 {
6963 /* If all FP slots are filled except for the last one and
6964 there is no FP field in the first word, then must pass
6965 on stack. */
6966 if (slotno >= SPARC_FP_ARG_MAX - 1
6967 && !data.fp_regs_in_first_word)
6968 return -1;
6969 }
6970 else
6971 {
6972 /* If all int slots are filled, then must pass on stack. */
6973 if (slotno >= SPARC_INT_ARG_MAX)
6974 return -1;
6975 }
6976
6977 /* PREGNO isn't set since both int and FP regs can be used. */
6978 return slotno;
6979 }
6980
6981 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6982 }
6983 else
6984 {
6985 /* If all arg slots are filled, then must pass on stack. */
6986 if (slotno >= SPARC_INT_ARG_MAX)
6987 return -1;
6988
6989 regno = regbase + slotno;
6990 }
6991 break;
6992
6993 default :
6994 gcc_unreachable ();
6995 }
6996
6997 *pregno = regno;
6998 return slotno;
6999 }
7000
7001 /* Handle recursive register counting/assigning for structure layout. */
7002
7003 typedef struct
7004 {
7005 int slotno; /* slot number of the argument. */
7006 int regbase; /* regno of the base register. */
7007 int intoffset; /* offset of the first pending integer field. */
7008 int nregs; /* number of words passed in registers. */
7009 bool stack; /* true if part of the argument is on the stack. */
7010 rtx ret; /* return expression being built. */
7011 } assign_data_t;
7012
7013 /* A subroutine of function_arg_record_value. Compute the number of integer
7014 registers to be assigned between PARMS->intoffset and BITPOS. Return
7015 true if at least one integer register is assigned or false otherwise. */
7016
7017 static bool
7018 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7019 {
7020 if (data->intoffset < 0)
7021 return false;
7022
7023 const int intoffset = data->intoffset;
7024 data->intoffset = -1;
7025
7026 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7027 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7028 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7029 int nregs = (endbit - startbit) / BITS_PER_WORD;
7030
7031 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7032 {
7033 nregs = SPARC_INT_ARG_MAX - this_slotno;
7034
7035 /* We need to pass this field (partly) on the stack. */
7036 data->stack = 1;
7037 }
7038
7039 if (nregs <= 0)
7040 return false;
7041
7042 *pnregs = nregs;
7043 return true;
7044 }
7045
7046 /* A subroutine of function_arg_record_value. Compute the number and the mode
7047 of the FP registers to be assigned for FIELD. Return true if at least one
7048 FP register is assigned or false otherwise. */
7049
7050 static bool
7051 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7052 int *pnregs, machine_mode *pmode)
7053 {
7054 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7055 machine_mode mode = DECL_MODE (field);
7056 int nregs, nslots;
7057
7058 /* Slots are counted as words while regs are counted as having the size of
7059 the (inner) mode. */
7060 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7061 {
7062 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7063 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7064 }
7065 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7066 {
7067 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7068 nregs = 2;
7069 }
7070 else
7071 nregs = 1;
7072
7073 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7074
7075 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7076 {
7077 nslots = SPARC_FP_ARG_MAX - this_slotno;
7078 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7079
7080 /* We need to pass this field (partly) on the stack. */
7081 data->stack = 1;
7082
7083 if (nregs <= 0)
7084 return false;
7085 }
7086
7087 *pnregs = nregs;
7088 *pmode = mode;
7089 return true;
7090 }
7091
7092 /* A subroutine of function_arg_record_value. Count the number of registers
7093 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7094
7095 inline void
7096 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7097 {
7098 if (fp)
7099 {
7100 int nregs;
7101 machine_mode mode;
7102
7103 if (compute_int_layout (bitpos, data, &nregs))
7104 data->nregs += nregs;
7105
7106 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7107 data->nregs += nregs;
7108 }
7109 else
7110 {
7111 if (data->intoffset < 0)
7112 data->intoffset = bitpos;
7113 }
7114 }
7115
7116 /* A subroutine of function_arg_record_value. Assign the bits of the
7117 structure between PARMS->intoffset and BITPOS to integer registers. */
7118
7119 static void
7120 assign_int_registers (int bitpos, assign_data_t *data)
7121 {
7122 int intoffset = data->intoffset;
7123 machine_mode mode;
7124 int nregs;
7125
7126 if (!compute_int_layout (bitpos, data, &nregs))
7127 return;
7128
7129 /* If this is the trailing part of a word, only load that much into
7130 the register. Otherwise load the whole register. Note that in
7131 the latter case we may pick up unwanted bits. It's not a problem
7132 at the moment but may wish to revisit. */
7133 if (intoffset % BITS_PER_WORD != 0)
7134 mode = smallest_int_mode_for_size (BITS_PER_WORD
7135 - intoffset % BITS_PER_WORD);
7136 else
7137 mode = word_mode;
7138
7139 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7140 unsigned int regno = data->regbase + this_slotno;
7141 intoffset /= BITS_PER_UNIT;
7142
7143 do
7144 {
7145 rtx reg = gen_rtx_REG (mode, regno);
7146 XVECEXP (data->ret, 0, data->stack + data->nregs)
7147 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7148 data->nregs += 1;
7149 mode = word_mode;
7150 regno += 1;
7151 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7152 }
7153 while (--nregs > 0);
7154 }
7155
7156 /* A subroutine of function_arg_record_value. Assign FIELD at position
7157 BITPOS to FP registers. */
7158
7159 static void
7160 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7161 {
7162 int nregs;
7163 machine_mode mode;
7164
7165 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7166 return;
7167
7168 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7169 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7170 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7171 regno++;
7172 int pos = bitpos / BITS_PER_UNIT;
7173
7174 do
7175 {
7176 rtx reg = gen_rtx_REG (mode, regno);
7177 XVECEXP (data->ret, 0, data->stack + data->nregs)
7178 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7179 data->nregs += 1;
7180 regno += GET_MODE_SIZE (mode) / 4;
7181 pos += GET_MODE_SIZE (mode);
7182 }
7183 while (--nregs > 0);
7184 }
7185
7186 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7187 the structure between PARMS->intoffset and BITPOS to registers. */
7188
7189 inline void
7190 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7191 {
7192 if (fp)
7193 {
7194 assign_int_registers (bitpos, data);
7195
7196 assign_fp_registers (field, bitpos, data);
7197 }
7198 else
7199 {
7200 if (data->intoffset < 0)
7201 data->intoffset = bitpos;
7202 }
7203 }
7204
7205 /* Used by function_arg and function_value to implement the complex
7206 conventions of the 64-bit ABI for passing and returning structures.
7207 Return an expression valid as a return value for the FUNCTION_ARG
7208 and TARGET_FUNCTION_VALUE.
7209
7210 TYPE is the data type of the argument (as a tree).
7211 This is null for libcalls where that information may
7212 not be available.
7213 MODE is the argument's machine mode.
7214 SLOTNO is the index number of the argument's slot in the parameter array.
7215 NAMED is true if this argument is a named parameter
7216 (otherwise it is an extra parameter matching an ellipsis).
7217 REGBASE is the regno of the base register for the parameter array. */
7218
7219 static rtx
7220 function_arg_record_value (const_tree type, machine_mode mode,
7221 int slotno, bool named, int regbase)
7222 {
7223 const int size = int_size_in_bytes (type);
7224 assign_data_t data;
7225 int nregs;
7226
7227 data.slotno = slotno;
7228 data.regbase = regbase;
7229
7230 /* Count how many registers we need. */
7231 data.nregs = 0;
7232 data.intoffset = 0;
7233 data.stack = false;
7234 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7235
7236 /* Take into account pending integer fields. */
7237 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7238 data.nregs += nregs;
7239
7240 /* Allocate the vector and handle some annoying special cases. */
7241 nregs = data.nregs;
7242
7243 if (nregs == 0)
7244 {
7245 /* ??? Empty structure has no value? Duh? */
7246 if (size <= 0)
7247 {
7248 /* Though there's nothing really to store, return a word register
7249 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7250 leads to breakage due to the fact that there are zero bytes to
7251 load. */
7252 return gen_rtx_REG (mode, regbase);
7253 }
7254
7255 /* ??? C++ has structures with no fields, and yet a size. Give up
7256 for now and pass everything back in integer registers. */
7257 nregs = CEIL_NWORDS (size);
7258 if (nregs + slotno > SPARC_INT_ARG_MAX)
7259 nregs = SPARC_INT_ARG_MAX - slotno;
7260 }
7261
7262 gcc_assert (nregs > 0);
7263
7264 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7265
7266 /* If at least one field must be passed on the stack, generate
7267 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7268 also be passed on the stack. We can't do much better because the
7269 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7270 of structures for which the fields passed exclusively in registers
7271 are not at the beginning of the structure. */
7272 if (data.stack)
7273 XVECEXP (data.ret, 0, 0)
7274 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7275
7276 /* Assign the registers. */
7277 data.nregs = 0;
7278 data.intoffset = 0;
7279 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7280
7281 /* Assign pending integer fields. */
7282 assign_int_registers (size * BITS_PER_UNIT, &data);
7283
7284 gcc_assert (data.nregs == nregs);
7285
7286 return data.ret;
7287 }
7288
7289 /* Used by function_arg and function_value to implement the conventions
7290 of the 64-bit ABI for passing and returning unions.
7291 Return an expression valid as a return value for the FUNCTION_ARG
7292 and TARGET_FUNCTION_VALUE.
7293
7294 SIZE is the size in bytes of the union.
7295 MODE is the argument's machine mode.
7296 SLOTNO is the index number of the argument's slot in the parameter array.
7297 REGNO is the hard register the union will be passed in. */
7298
7299 static rtx
7300 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7301 {
7302 unsigned int nwords;
7303
7304 /* See comment in function_arg_record_value for empty structures. */
7305 if (size <= 0)
7306 return gen_rtx_REG (mode, regno);
7307
7308 if (slotno == SPARC_INT_ARG_MAX - 1)
7309 nwords = 1;
7310 else
7311 nwords = CEIL_NWORDS (size);
7312
7313 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7314
7315 /* Unions are passed left-justified. */
7316 for (unsigned int i = 0; i < nwords; i++)
7317 XVECEXP (regs, 0, i)
7318 = gen_rtx_EXPR_LIST (VOIDmode,
7319 gen_rtx_REG (word_mode, regno + i),
7320 GEN_INT (UNITS_PER_WORD * i));
7321
7322 return regs;
7323 }
7324
7325 /* Used by function_arg and function_value to implement the conventions
7326 of the 64-bit ABI for passing and returning BLKmode vectors.
7327 Return an expression valid as a return value for the FUNCTION_ARG
7328 and TARGET_FUNCTION_VALUE.
7329
7330 SIZE is the size in bytes of the vector.
7331 SLOTNO is the index number of the argument's slot in the parameter array.
7332 NAMED is true if this argument is a named parameter
7333 (otherwise it is an extra parameter matching an ellipsis).
7334 REGNO is the hard register the vector will be passed in. */
7335
7336 static rtx
7337 function_arg_vector_value (int size, int slotno, bool named, int regno)
7338 {
7339 const int mult = (named ? 2 : 1);
7340 unsigned int nwords;
7341
7342 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7343 nwords = 1;
7344 else
7345 nwords = CEIL_NWORDS (size);
7346
7347 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7348
7349 if (size < UNITS_PER_WORD)
7350 XVECEXP (regs, 0, 0)
7351 = gen_rtx_EXPR_LIST (VOIDmode,
7352 gen_rtx_REG (SImode, regno),
7353 const0_rtx);
7354 else
7355 for (unsigned int i = 0; i < nwords; i++)
7356 XVECEXP (regs, 0, i)
7357 = gen_rtx_EXPR_LIST (VOIDmode,
7358 gen_rtx_REG (word_mode, regno + i * mult),
7359 GEN_INT (i * UNITS_PER_WORD));
7360
7361 return regs;
7362 }
7363
7364 /* Determine where to put an argument to a function.
7365 Value is zero to push the argument on the stack,
7366 or a hard register in which to store the argument.
7367
7368 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7369 the preceding args and about the function being called.
7370 MODE is the argument's machine mode.
7371 TYPE is the data type of the argument (as a tree).
7372 This is null for libcalls where that information may
7373 not be available.
7374 NAMED is true if this argument is a named parameter
7375 (otherwise it is an extra parameter matching an ellipsis).
7376 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7377 TARGET_FUNCTION_INCOMING_ARG. */
7378
7379 static rtx
7380 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7381 const_tree type, bool named, bool incoming)
7382 {
7383 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7384 const int regbase
7385 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7386 int slotno, regno, padding;
7387 enum mode_class mclass = GET_MODE_CLASS (mode);
7388
7389 slotno
7390 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7391 if (slotno == -1)
7392 return 0;
7393
7394 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7395 if (type && VECTOR_INTEGER_TYPE_P (type))
7396 mclass = MODE_FLOAT;
7397
7398 if (TARGET_ARCH32)
7399 return gen_rtx_REG (mode, regno);
7400
7401 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7402 and are promoted to registers if possible. */
7403 if (type && TREE_CODE (type) == RECORD_TYPE)
7404 {
7405 const int size = int_size_in_bytes (type);
7406 gcc_assert (size <= 16);
7407
7408 return function_arg_record_value (type, mode, slotno, named, regbase);
7409 }
7410
7411 /* Unions up to 16 bytes in size are passed in integer registers. */
7412 else if (type && TREE_CODE (type) == UNION_TYPE)
7413 {
7414 const int size = int_size_in_bytes (type);
7415 gcc_assert (size <= 16);
7416
7417 return function_arg_union_value (size, mode, slotno, regno);
7418 }
7419
7420 /* Floating-point vectors up to 16 bytes are passed in registers. */
7421 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7422 {
7423 const int size = int_size_in_bytes (type);
7424 gcc_assert (size <= 16);
7425
7426 return function_arg_vector_value (size, slotno, named, regno);
7427 }
7428
7429 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7430 but also have the slot allocated for them.
7431 If no prototype is in scope fp values in register slots get passed
7432 in two places, either fp regs and int regs or fp regs and memory. */
7433 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7434 && SPARC_FP_REG_P (regno))
7435 {
7436 rtx reg = gen_rtx_REG (mode, regno);
7437 if (cum->prototype_p || cum->libcall_p)
7438 return reg;
7439 else
7440 {
7441 rtx v0, v1;
7442
7443 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7444 {
7445 int intreg;
7446
7447 /* On incoming, we don't need to know that the value
7448 is passed in %f0 and %i0, and it confuses other parts
7449 causing needless spillage even on the simplest cases. */
7450 if (incoming)
7451 return reg;
7452
7453 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7454 + (regno - SPARC_FP_ARG_FIRST) / 2);
7455
7456 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7457 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7458 const0_rtx);
7459 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7460 }
7461 else
7462 {
7463 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7464 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7465 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7466 }
7467 }
7468 }
7469
7470 /* All other aggregate types are passed in an integer register in a mode
7471 corresponding to the size of the type. */
7472 else if (type && AGGREGATE_TYPE_P (type))
7473 {
7474 const int size = int_size_in_bytes (type);
7475 gcc_assert (size <= 16);
7476
7477 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7478 }
7479
7480 return gen_rtx_REG (mode, regno);
7481 }
7482
7483 /* Handle the TARGET_FUNCTION_ARG target hook. */
7484
7485 static rtx
7486 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7487 const_tree type, bool named)
7488 {
7489 return sparc_function_arg_1 (cum, mode, type, named, false);
7490 }
7491
7492 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7493
7494 static rtx
7495 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7496 const_tree type, bool named)
7497 {
7498 return sparc_function_arg_1 (cum, mode, type, named, true);
7499 }
7500
7501 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7502
7503 static unsigned int
7504 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7505 {
7506 return ((TARGET_ARCH64
7507 && (GET_MODE_ALIGNMENT (mode) == 128
7508 || (type && TYPE_ALIGN (type) == 128)))
7509 ? 128
7510 : PARM_BOUNDARY);
7511 }
7512
7513 /* For an arg passed partly in registers and partly in memory,
7514 this is the number of bytes of registers used.
7515 For args passed entirely in registers or entirely in memory, zero.
7516
7517 Any arg that starts in the first 6 regs but won't entirely fit in them
7518 needs partial registers on v8. On v9, structures with integer
7519 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7520 values that begin in the last fp reg [where "last fp reg" varies with the
7521 mode] will be split between that reg and memory. */
7522
7523 static int
7524 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7525 tree type, bool named)
7526 {
7527 int slotno, regno, padding;
7528
7529 /* We pass false for incoming here, it doesn't matter. */
7530 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7531 false, &regno, &padding);
7532
7533 if (slotno == -1)
7534 return 0;
7535
7536 if (TARGET_ARCH32)
7537 {
7538 /* We are guaranteed by pass_by_reference that the size of the
7539 argument is not greater than 8 bytes, so we only need to return
7540 one word if the argument is partially passed in registers. */
7541 const int size = GET_MODE_SIZE (mode);
7542
7543 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7544 return UNITS_PER_WORD;
7545 }
7546 else
7547 {
7548 /* We are guaranteed by pass_by_reference that the size of the
7549 argument is not greater than 16 bytes, so we only need to return
7550 one word if the argument is partially passed in registers. */
7551 if (type && AGGREGATE_TYPE_P (type))
7552 {
7553 const int size = int_size_in_bytes (type);
7554
7555 if (size > UNITS_PER_WORD
7556 && (slotno == SPARC_INT_ARG_MAX - 1
7557 || slotno == SPARC_FP_ARG_MAX - 1))
7558 return UNITS_PER_WORD;
7559 }
7560 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7561 || ((GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7562 || (type && VECTOR_TYPE_P (type)))
7563 && !(TARGET_FPU && named)))
7564 {
7565 const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7566 ? int_size_in_bytes (type)
7567 : GET_MODE_SIZE (mode);
7568
7569 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7570 return UNITS_PER_WORD;
7571 }
7572 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7573 || (type && VECTOR_TYPE_P (type)))
7574 {
7575 const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7576 ? int_size_in_bytes (type)
7577 : GET_MODE_SIZE (mode);
7578
7579 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7580 return UNITS_PER_WORD;
7581 }
7582 }
7583
7584 return 0;
7585 }
7586
7587 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7588 Update the data in CUM to advance over an argument
7589 of mode MODE and data type TYPE.
7590 TYPE is null for libcalls where that information may not be available. */
7591
7592 static void
7593 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7594 const_tree type, bool named)
7595 {
7596 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7597 int regno, padding;
7598
7599 /* We pass false for incoming here, it doesn't matter. */
7600 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7601
7602 /* If argument requires leading padding, add it. */
7603 cum->words += padding;
7604
7605 if (TARGET_ARCH32)
7606 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7607 else
7608 {
7609 /* For types that can have BLKmode, get the size from the type. */
7610 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7611 {
7612 const int size = int_size_in_bytes (type);
7613
7614 /* See comment in function_arg_record_value for empty structures. */
7615 if (size <= 0)
7616 cum->words++;
7617 else
7618 cum->words += CEIL_NWORDS (size);
7619 }
7620 else
7621 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7622 }
7623 }
7624
7625 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7626 are always stored left shifted in their argument slot. */
7627
7628 static pad_direction
7629 sparc_function_arg_padding (machine_mode mode, const_tree type)
7630 {
7631 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7632 return PAD_UPWARD;
7633
7634 /* Fall back to the default. */
7635 return default_function_arg_padding (mode, type);
7636 }
7637
7638 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7639 Specify whether to return the return value in memory. */
7640
7641 static bool
7642 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7643 {
7644 if (TARGET_ARCH32)
7645 /* Original SPARC 32-bit ABI says that structures and unions, and
7646 quad-precision floats are returned in memory. But note that the
7647 first part is implemented through -fpcc-struct-return being the
7648 default, so here we only implement -freg-struct-return instead.
7649 All other base types are returned in registers.
7650
7651 Extended ABI (as implemented by the Sun compiler) says that
7652 all complex floats are returned in registers (8 FP registers
7653 at most for '_Complex long double'). Return all complex integers
7654 in registers (4 at most for '_Complex long long').
7655
7656 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7657 integers are returned like floats of the same size, that is in
7658 registers up to 8 bytes and in memory otherwise. Return all
7659 vector floats in memory like structure and unions; note that
7660 they always have BLKmode like the latter. */
7661 return (TYPE_MODE (type) == BLKmode
7662 || TYPE_MODE (type) == TFmode
7663 || (TREE_CODE (type) == VECTOR_TYPE
7664 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7665 else
7666 /* Original SPARC 64-bit ABI says that structures and unions
7667 smaller than 32 bytes are returned in registers, as well as
7668 all other base types.
7669
7670 Extended ABI (as implemented by the Sun compiler) says that all
7671 complex floats are returned in registers (8 FP registers at most
7672 for '_Complex long double'). Return all complex integers in
7673 registers (4 at most for '_Complex TItype').
7674
7675 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7676 integers are returned like floats of the same size, that is in
7677 registers. Return all vector floats like structure and unions;
7678 note that they always have BLKmode like the latter. */
7679 return (TYPE_MODE (type) == BLKmode
7680 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7681 }
7682
7683 /* Handle the TARGET_STRUCT_VALUE target hook.
7684 Return where to find the structure return value address. */
7685
7686 static rtx
7687 sparc_struct_value_rtx (tree fndecl, int incoming)
7688 {
7689 if (TARGET_ARCH64)
7690 return NULL_RTX;
7691 else
7692 {
7693 rtx mem;
7694
7695 if (incoming)
7696 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7697 STRUCT_VALUE_OFFSET));
7698 else
7699 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7700 STRUCT_VALUE_OFFSET));
7701
7702 /* Only follow the SPARC ABI for fixed-size structure returns.
7703 Variable size structure returns are handled per the normal
7704 procedures in GCC. This is enabled by -mstd-struct-return */
7705 if (incoming == 2
7706 && sparc_std_struct_return
7707 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7708 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7709 {
7710 /* We must check and adjust the return address, as it is optional
7711 as to whether the return object is really provided. */
7712 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7713 rtx scratch = gen_reg_rtx (SImode);
7714 rtx_code_label *endlab = gen_label_rtx ();
7715
7716 /* Calculate the return object size. */
7717 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7718 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7719 /* Construct a temporary return value. */
7720 rtx temp_val
7721 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7722
7723 /* Implement SPARC 32-bit psABI callee return struct checking:
7724
7725 Fetch the instruction where we will return to and see if
7726 it's an unimp instruction (the most significant 10 bits
7727 will be zero). */
7728 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7729 plus_constant (Pmode,
7730 ret_reg, 8)));
7731 /* Assume the size is valid and pre-adjust. */
7732 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7733 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7734 0, endlab);
7735 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7736 /* Write the address of the memory pointed to by temp_val into
7737 the memory pointed to by mem. */
7738 emit_move_insn (mem, XEXP (temp_val, 0));
7739 emit_label (endlab);
7740 }
7741
7742 return mem;
7743 }
7744 }
7745
7746 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7747 For v9, function return values are subject to the same rules as arguments,
7748 except that up to 32 bytes may be returned in registers. */
7749
7750 static rtx
7751 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7752 {
7753 /* Beware that the two values are swapped here wrt function_arg. */
7754 const int regbase
7755 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7756 enum mode_class mclass = GET_MODE_CLASS (mode);
7757 int regno;
7758
7759 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7760 Note that integer vectors larger than 16 bytes have BLKmode so
7761 they need to be handled like floating-point vectors below. */
7762 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7763 mclass = MODE_FLOAT;
7764
7765 if (TARGET_ARCH64 && type)
7766 {
7767 /* Structures up to 32 bytes in size are returned in registers. */
7768 if (TREE_CODE (type) == RECORD_TYPE)
7769 {
7770 const int size = int_size_in_bytes (type);
7771 gcc_assert (size <= 32);
7772
7773 return function_arg_record_value (type, mode, 0, true, regbase);
7774 }
7775
7776 /* Unions up to 32 bytes in size are returned in integer registers. */
7777 else if (TREE_CODE (type) == UNION_TYPE)
7778 {
7779 const int size = int_size_in_bytes (type);
7780 gcc_assert (size <= 32);
7781
7782 return function_arg_union_value (size, mode, 0, regbase);
7783 }
7784
7785 /* Vectors up to 32 bytes are returned in FP registers. */
7786 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7787 {
7788 const int size = int_size_in_bytes (type);
7789 gcc_assert (size <= 32);
7790
7791 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7792 }
7793
7794 /* Objects that require it are returned in FP registers. */
7795 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7796 ;
7797
7798 /* All other aggregate types are returned in an integer register in a
7799 mode corresponding to the size of the type. */
7800 else if (AGGREGATE_TYPE_P (type))
7801 {
7802 /* All other aggregate types are passed in an integer register
7803 in a mode corresponding to the size of the type. */
7804 const int size = int_size_in_bytes (type);
7805 gcc_assert (size <= 32);
7806
7807 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7808
7809 /* ??? We probably should have made the same ABI change in
7810 3.4.0 as the one we made for unions. The latter was
7811 required by the SCD though, while the former is not
7812 specified, so we favored compatibility and efficiency.
7813
7814 Now we're stuck for aggregates larger than 16 bytes,
7815 because OImode vanished in the meantime. Let's not
7816 try to be unduly clever, and simply follow the ABI
7817 for unions in that case. */
7818 if (mode == BLKmode)
7819 return function_arg_union_value (size, mode, 0, regbase);
7820 else
7821 mclass = MODE_INT;
7822 }
7823
7824 /* We should only have pointer and integer types at this point. This
7825 must match sparc_promote_function_mode. */
7826 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7827 mode = word_mode;
7828 }
7829
7830 /* We should only have pointer and integer types at this point, except with
7831 -freg-struct-return. This must match sparc_promote_function_mode. */
7832 else if (TARGET_ARCH32
7833 && !(type && AGGREGATE_TYPE_P (type))
7834 && mclass == MODE_INT
7835 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7836 mode = word_mode;
7837
7838 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7839 regno = SPARC_FP_ARG_FIRST;
7840 else
7841 regno = regbase;
7842
7843 return gen_rtx_REG (mode, regno);
7844 }
7845
7846 /* Handle TARGET_FUNCTION_VALUE.
7847 On the SPARC, the value is found in the first "output" register, but the
7848 called function leaves it in the first "input" register. */
7849
7850 static rtx
7851 sparc_function_value (const_tree valtype,
7852 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7853 bool outgoing)
7854 {
7855 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7856 }
7857
7858 /* Handle TARGET_LIBCALL_VALUE. */
7859
7860 static rtx
7861 sparc_libcall_value (machine_mode mode,
7862 const_rtx fun ATTRIBUTE_UNUSED)
7863 {
7864 return sparc_function_value_1 (NULL_TREE, mode, false);
7865 }
7866
7867 /* Handle FUNCTION_VALUE_REGNO_P.
7868 On the SPARC, the first "output" reg is used for integer values, and the
7869 first floating point register is used for floating point values. */
7870
7871 static bool
7872 sparc_function_value_regno_p (const unsigned int regno)
7873 {
7874 return (regno == 8 || (TARGET_FPU && regno == 32));
7875 }
7876
7877 /* Do what is necessary for `va_start'. We look at the current function
7878 to determine if stdarg or varargs is used and return the address of
7879 the first unnamed parameter. */
7880
7881 static rtx
7882 sparc_builtin_saveregs (void)
7883 {
7884 int first_reg = crtl->args.info.words;
7885 rtx address;
7886 int regno;
7887
7888 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7889 emit_move_insn (gen_rtx_MEM (word_mode,
7890 gen_rtx_PLUS (Pmode,
7891 frame_pointer_rtx,
7892 GEN_INT (FIRST_PARM_OFFSET (0)
7893 + (UNITS_PER_WORD
7894 * regno)))),
7895 gen_rtx_REG (word_mode,
7896 SPARC_INCOMING_INT_ARG_FIRST + regno));
7897
7898 address = gen_rtx_PLUS (Pmode,
7899 frame_pointer_rtx,
7900 GEN_INT (FIRST_PARM_OFFSET (0)
7901 + UNITS_PER_WORD * first_reg));
7902
7903 return address;
7904 }
7905
7906 /* Implement `va_start' for stdarg. */
7907
7908 static void
7909 sparc_va_start (tree valist, rtx nextarg)
7910 {
7911 nextarg = expand_builtin_saveregs ();
7912 std_expand_builtin_va_start (valist, nextarg);
7913 }
7914
7915 /* Implement `va_arg' for stdarg. */
7916
7917 static tree
7918 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7919 gimple_seq *post_p)
7920 {
7921 HOST_WIDE_INT size, rsize, align;
7922 tree addr, incr;
7923 bool indirect;
7924 tree ptrtype = build_pointer_type (type);
7925
7926 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7927 {
7928 indirect = true;
7929 size = rsize = UNITS_PER_WORD;
7930 align = 0;
7931 }
7932 else
7933 {
7934 indirect = false;
7935 size = int_size_in_bytes (type);
7936 rsize = ROUND_UP (size, UNITS_PER_WORD);
7937 align = 0;
7938
7939 if (TARGET_ARCH64)
7940 {
7941 /* For SPARC64, objects requiring 16-byte alignment get it. */
7942 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7943 align = 2 * UNITS_PER_WORD;
7944
7945 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7946 are left-justified in their slots. */
7947 if (AGGREGATE_TYPE_P (type))
7948 {
7949 if (size == 0)
7950 size = rsize = UNITS_PER_WORD;
7951 else
7952 size = rsize;
7953 }
7954 }
7955 }
7956
7957 incr = valist;
7958 if (align)
7959 {
7960 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7961 incr = fold_convert (sizetype, incr);
7962 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7963 size_int (-align));
7964 incr = fold_convert (ptr_type_node, incr);
7965 }
7966
7967 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7968 addr = incr;
7969
7970 if (BYTES_BIG_ENDIAN && size < rsize)
7971 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7972
7973 if (indirect)
7974 {
7975 addr = fold_convert (build_pointer_type (ptrtype), addr);
7976 addr = build_va_arg_indirect_ref (addr);
7977 }
7978
7979 /* If the address isn't aligned properly for the type, we need a temporary.
7980 FIXME: This is inefficient, usually we can do this in registers. */
7981 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7982 {
7983 tree tmp = create_tmp_var (type, "va_arg_tmp");
7984 tree dest_addr = build_fold_addr_expr (tmp);
7985 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7986 3, dest_addr, addr, size_int (rsize));
7987 TREE_ADDRESSABLE (tmp) = 1;
7988 gimplify_and_add (copy, pre_p);
7989 addr = dest_addr;
7990 }
7991
7992 else
7993 addr = fold_convert (ptrtype, addr);
7994
7995 incr = fold_build_pointer_plus_hwi (incr, rsize);
7996 gimplify_assign (valist, incr, post_p);
7997
7998 return build_va_arg_indirect_ref (addr);
7999 }
8000 \f
8001 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8002 Specify whether the vector mode is supported by the hardware. */
8003
8004 static bool
8005 sparc_vector_mode_supported_p (machine_mode mode)
8006 {
8007 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8008 }
8009 \f
8010 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8011
8012 static machine_mode
8013 sparc_preferred_simd_mode (scalar_mode mode)
8014 {
8015 if (TARGET_VIS)
8016 switch (mode)
8017 {
8018 case E_SImode:
8019 return V2SImode;
8020 case E_HImode:
8021 return V4HImode;
8022 case E_QImode:
8023 return V8QImode;
8024
8025 default:;
8026 }
8027
8028 return word_mode;
8029 }
8030 \f
8031 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8032
8033 static bool
8034 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8035 {
8036 /* Do not fold unconditional jumps that have been created for crossing
8037 partition boundaries. */
8038 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8039 return false;
8040
8041 return true;
8042 }
8043
8044 /* Return the string to output an unconditional branch to LABEL, which is
8045 the operand number of the label.
8046
8047 DEST is the destination insn (i.e. the label), INSN is the source. */
8048
8049 const char *
8050 output_ubranch (rtx dest, rtx_insn *insn)
8051 {
8052 static char string[64];
8053 bool v9_form = false;
8054 int delta;
8055 char *p;
8056
8057 /* Even if we are trying to use cbcond for this, evaluate
8058 whether we can use V9 branches as our backup plan. */
8059 delta = 5000000;
8060 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8061 delta = (INSN_ADDRESSES (INSN_UID (dest))
8062 - INSN_ADDRESSES (INSN_UID (insn)));
8063
8064 /* Leave some instructions for "slop". */
8065 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8066 v9_form = true;
8067
8068 if (TARGET_CBCOND)
8069 {
8070 bool emit_nop = emit_cbcond_nop (insn);
8071 bool far = false;
8072 const char *rval;
8073
8074 if (delta < -500 || delta > 500)
8075 far = true;
8076
8077 if (far)
8078 {
8079 if (v9_form)
8080 rval = "ba,a,pt\t%%xcc, %l0";
8081 else
8082 rval = "b,a\t%l0";
8083 }
8084 else
8085 {
8086 if (emit_nop)
8087 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8088 else
8089 rval = "cwbe\t%%g0, %%g0, %l0";
8090 }
8091 return rval;
8092 }
8093
8094 if (v9_form)
8095 strcpy (string, "ba%*,pt\t%%xcc, ");
8096 else
8097 strcpy (string, "b%*\t");
8098
8099 p = strchr (string, '\0');
8100 *p++ = '%';
8101 *p++ = 'l';
8102 *p++ = '0';
8103 *p++ = '%';
8104 *p++ = '(';
8105 *p = '\0';
8106
8107 return string;
8108 }
8109
8110 /* Return the string to output a conditional branch to LABEL, which is
8111 the operand number of the label. OP is the conditional expression.
8112 XEXP (OP, 0) is assumed to be a condition code register (integer or
8113 floating point) and its mode specifies what kind of comparison we made.
8114
8115 DEST is the destination insn (i.e. the label), INSN is the source.
8116
8117 REVERSED is nonzero if we should reverse the sense of the comparison.
8118
8119 ANNUL is nonzero if we should generate an annulling branch. */
8120
8121 const char *
8122 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8123 rtx_insn *insn)
8124 {
8125 static char string[64];
8126 enum rtx_code code = GET_CODE (op);
8127 rtx cc_reg = XEXP (op, 0);
8128 machine_mode mode = GET_MODE (cc_reg);
8129 const char *labelno, *branch;
8130 int spaces = 8, far;
8131 char *p;
8132
8133 /* v9 branches are limited to +-1MB. If it is too far away,
8134 change
8135
8136 bne,pt %xcc, .LC30
8137
8138 to
8139
8140 be,pn %xcc, .+12
8141 nop
8142 ba .LC30
8143
8144 and
8145
8146 fbne,a,pn %fcc2, .LC29
8147
8148 to
8149
8150 fbe,pt %fcc2, .+16
8151 nop
8152 ba .LC29 */
8153
8154 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8155 if (reversed ^ far)
8156 {
8157 /* Reversal of FP compares takes care -- an ordered compare
8158 becomes an unordered compare and vice versa. */
8159 if (mode == CCFPmode || mode == CCFPEmode)
8160 code = reverse_condition_maybe_unordered (code);
8161 else
8162 code = reverse_condition (code);
8163 }
8164
8165 /* Start by writing the branch condition. */
8166 if (mode == CCFPmode || mode == CCFPEmode)
8167 {
8168 switch (code)
8169 {
8170 case NE:
8171 branch = "fbne";
8172 break;
8173 case EQ:
8174 branch = "fbe";
8175 break;
8176 case GE:
8177 branch = "fbge";
8178 break;
8179 case GT:
8180 branch = "fbg";
8181 break;
8182 case LE:
8183 branch = "fble";
8184 break;
8185 case LT:
8186 branch = "fbl";
8187 break;
8188 case UNORDERED:
8189 branch = "fbu";
8190 break;
8191 case ORDERED:
8192 branch = "fbo";
8193 break;
8194 case UNGT:
8195 branch = "fbug";
8196 break;
8197 case UNLT:
8198 branch = "fbul";
8199 break;
8200 case UNEQ:
8201 branch = "fbue";
8202 break;
8203 case UNGE:
8204 branch = "fbuge";
8205 break;
8206 case UNLE:
8207 branch = "fbule";
8208 break;
8209 case LTGT:
8210 branch = "fblg";
8211 break;
8212 default:
8213 gcc_unreachable ();
8214 }
8215
8216 /* ??? !v9: FP branches cannot be preceded by another floating point
8217 insn. Because there is currently no concept of pre-delay slots,
8218 we can fix this only by always emitting a nop before a floating
8219 point branch. */
8220
8221 string[0] = '\0';
8222 if (! TARGET_V9)
8223 strcpy (string, "nop\n\t");
8224 strcat (string, branch);
8225 }
8226 else
8227 {
8228 switch (code)
8229 {
8230 case NE:
8231 if (mode == CCVmode || mode == CCXVmode)
8232 branch = "bvs";
8233 else
8234 branch = "bne";
8235 break;
8236 case EQ:
8237 if (mode == CCVmode || mode == CCXVmode)
8238 branch = "bvc";
8239 else
8240 branch = "be";
8241 break;
8242 case GE:
8243 if (mode == CCNZmode || mode == CCXNZmode)
8244 branch = "bpos";
8245 else
8246 branch = "bge";
8247 break;
8248 case GT:
8249 branch = "bg";
8250 break;
8251 case LE:
8252 branch = "ble";
8253 break;
8254 case LT:
8255 if (mode == CCNZmode || mode == CCXNZmode)
8256 branch = "bneg";
8257 else
8258 branch = "bl";
8259 break;
8260 case GEU:
8261 branch = "bgeu";
8262 break;
8263 case GTU:
8264 branch = "bgu";
8265 break;
8266 case LEU:
8267 branch = "bleu";
8268 break;
8269 case LTU:
8270 branch = "blu";
8271 break;
8272 default:
8273 gcc_unreachable ();
8274 }
8275 strcpy (string, branch);
8276 }
8277 spaces -= strlen (branch);
8278 p = strchr (string, '\0');
8279
8280 /* Now add the annulling, the label, and a possible noop. */
8281 if (annul && ! far)
8282 {
8283 strcpy (p, ",a");
8284 p += 2;
8285 spaces -= 2;
8286 }
8287
8288 if (TARGET_V9)
8289 {
8290 rtx note;
8291 int v8 = 0;
8292
8293 if (! far && insn && INSN_ADDRESSES_SET_P ())
8294 {
8295 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8296 - INSN_ADDRESSES (INSN_UID (insn)));
8297 /* Leave some instructions for "slop". */
8298 if (delta < -260000 || delta >= 260000)
8299 v8 = 1;
8300 }
8301
8302 switch (mode)
8303 {
8304 case E_CCmode:
8305 case E_CCNZmode:
8306 case E_CCCmode:
8307 case E_CCVmode:
8308 labelno = "%%icc, ";
8309 if (v8)
8310 labelno = "";
8311 break;
8312 case E_CCXmode:
8313 case E_CCXNZmode:
8314 case E_CCXCmode:
8315 case E_CCXVmode:
8316 labelno = "%%xcc, ";
8317 gcc_assert (!v8);
8318 break;
8319 case E_CCFPmode:
8320 case E_CCFPEmode:
8321 {
8322 static char v9_fcc_labelno[] = "%%fccX, ";
8323 /* Set the char indicating the number of the fcc reg to use. */
8324 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8325 labelno = v9_fcc_labelno;
8326 if (v8)
8327 {
8328 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8329 labelno = "";
8330 }
8331 }
8332 break;
8333 default:
8334 gcc_unreachable ();
8335 }
8336
8337 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8338 {
8339 strcpy (p,
8340 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8341 >= profile_probability::even ()) ^ far)
8342 ? ",pt" : ",pn");
8343 p += 3;
8344 spaces -= 3;
8345 }
8346 }
8347 else
8348 labelno = "";
8349
8350 if (spaces > 0)
8351 *p++ = '\t';
8352 else
8353 *p++ = ' ';
8354 strcpy (p, labelno);
8355 p = strchr (p, '\0');
8356 if (far)
8357 {
8358 strcpy (p, ".+12\n\t nop\n\tb\t");
8359 /* Skip the next insn if requested or
8360 if we know that it will be a nop. */
8361 if (annul || ! final_sequence)
8362 p[3] = '6';
8363 p += 14;
8364 }
8365 *p++ = '%';
8366 *p++ = 'l';
8367 *p++ = label + '0';
8368 *p++ = '%';
8369 *p++ = '#';
8370 *p = '\0';
8371
8372 return string;
8373 }
8374
8375 /* Emit a library call comparison between floating point X and Y.
8376 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8377 Return the new operator to be used in the comparison sequence.
8378
8379 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8380 values as arguments instead of the TFmode registers themselves,
8381 that's why we cannot call emit_float_lib_cmp. */
8382
8383 rtx
8384 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8385 {
8386 const char *qpfunc;
8387 rtx slot0, slot1, result, tem, tem2, libfunc;
8388 machine_mode mode;
8389 enum rtx_code new_comparison;
8390
8391 switch (comparison)
8392 {
8393 case EQ:
8394 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8395 break;
8396
8397 case NE:
8398 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8399 break;
8400
8401 case GT:
8402 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8403 break;
8404
8405 case GE:
8406 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8407 break;
8408
8409 case LT:
8410 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8411 break;
8412
8413 case LE:
8414 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8415 break;
8416
8417 case ORDERED:
8418 case UNORDERED:
8419 case UNGT:
8420 case UNLT:
8421 case UNEQ:
8422 case UNGE:
8423 case UNLE:
8424 case LTGT:
8425 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8426 break;
8427
8428 default:
8429 gcc_unreachable ();
8430 }
8431
8432 if (TARGET_ARCH64)
8433 {
8434 if (MEM_P (x))
8435 {
8436 tree expr = MEM_EXPR (x);
8437 if (expr)
8438 mark_addressable (expr);
8439 slot0 = x;
8440 }
8441 else
8442 {
8443 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8444 emit_move_insn (slot0, x);
8445 }
8446
8447 if (MEM_P (y))
8448 {
8449 tree expr = MEM_EXPR (y);
8450 if (expr)
8451 mark_addressable (expr);
8452 slot1 = y;
8453 }
8454 else
8455 {
8456 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8457 emit_move_insn (slot1, y);
8458 }
8459
8460 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8461 emit_library_call (libfunc, LCT_NORMAL,
8462 DImode,
8463 XEXP (slot0, 0), Pmode,
8464 XEXP (slot1, 0), Pmode);
8465 mode = DImode;
8466 }
8467 else
8468 {
8469 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8470 emit_library_call (libfunc, LCT_NORMAL,
8471 SImode,
8472 x, TFmode, y, TFmode);
8473 mode = SImode;
8474 }
8475
8476
8477 /* Immediately move the result of the libcall into a pseudo
8478 register so reload doesn't clobber the value if it needs
8479 the return register for a spill reg. */
8480 result = gen_reg_rtx (mode);
8481 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8482
8483 switch (comparison)
8484 {
8485 default:
8486 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8487 case ORDERED:
8488 case UNORDERED:
8489 new_comparison = (comparison == UNORDERED ? EQ : NE);
8490 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8491 case UNGT:
8492 case UNGE:
8493 new_comparison = (comparison == UNGT ? GT : NE);
8494 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8495 case UNLE:
8496 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8497 case UNLT:
8498 tem = gen_reg_rtx (mode);
8499 if (TARGET_ARCH32)
8500 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8501 else
8502 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8503 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8504 case UNEQ:
8505 case LTGT:
8506 tem = gen_reg_rtx (mode);
8507 if (TARGET_ARCH32)
8508 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8509 else
8510 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8511 tem2 = gen_reg_rtx (mode);
8512 if (TARGET_ARCH32)
8513 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8514 else
8515 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8516 new_comparison = (comparison == UNEQ ? EQ : NE);
8517 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8518 }
8519
8520 gcc_unreachable ();
8521 }
8522
8523 /* Generate an unsigned DImode to FP conversion. This is the same code
8524 optabs would emit if we didn't have TFmode patterns. */
8525
8526 void
8527 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8528 {
8529 rtx i0, i1, f0, in, out;
8530
8531 out = operands[0];
8532 in = force_reg (DImode, operands[1]);
8533 rtx_code_label *neglab = gen_label_rtx ();
8534 rtx_code_label *donelab = gen_label_rtx ();
8535 i0 = gen_reg_rtx (DImode);
8536 i1 = gen_reg_rtx (DImode);
8537 f0 = gen_reg_rtx (mode);
8538
8539 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8540
8541 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8542 emit_jump_insn (gen_jump (donelab));
8543 emit_barrier ();
8544
8545 emit_label (neglab);
8546
8547 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8548 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8549 emit_insn (gen_iordi3 (i0, i0, i1));
8550 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8551 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8552
8553 emit_label (donelab);
8554 }
8555
8556 /* Generate an FP to unsigned DImode conversion. This is the same code
8557 optabs would emit if we didn't have TFmode patterns. */
8558
8559 void
8560 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8561 {
8562 rtx i0, i1, f0, in, out, limit;
8563
8564 out = operands[0];
8565 in = force_reg (mode, operands[1]);
8566 rtx_code_label *neglab = gen_label_rtx ();
8567 rtx_code_label *donelab = gen_label_rtx ();
8568 i0 = gen_reg_rtx (DImode);
8569 i1 = gen_reg_rtx (DImode);
8570 limit = gen_reg_rtx (mode);
8571 f0 = gen_reg_rtx (mode);
8572
8573 emit_move_insn (limit,
8574 const_double_from_real_value (
8575 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8576 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8577
8578 emit_insn (gen_rtx_SET (out,
8579 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8580 emit_jump_insn (gen_jump (donelab));
8581 emit_barrier ();
8582
8583 emit_label (neglab);
8584
8585 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8586 emit_insn (gen_rtx_SET (i0,
8587 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8588 emit_insn (gen_movdi (i1, const1_rtx));
8589 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8590 emit_insn (gen_xordi3 (out, i0, i1));
8591
8592 emit_label (donelab);
8593 }
8594
8595 /* Return the string to output a compare and branch instruction to DEST.
8596 DEST is the destination insn (i.e. the label), INSN is the source,
8597 and OP is the conditional expression. */
8598
8599 const char *
8600 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8601 {
8602 machine_mode mode = GET_MODE (XEXP (op, 0));
8603 enum rtx_code code = GET_CODE (op);
8604 const char *cond_str, *tmpl;
8605 int far, emit_nop, len;
8606 static char string[64];
8607 char size_char;
8608
8609 /* Compare and Branch is limited to +-2KB. If it is too far away,
8610 change
8611
8612 cxbne X, Y, .LC30
8613
8614 to
8615
8616 cxbe X, Y, .+16
8617 nop
8618 ba,pt xcc, .LC30
8619 nop */
8620
8621 len = get_attr_length (insn);
8622
8623 far = len == 4;
8624 emit_nop = len == 2;
8625
8626 if (far)
8627 code = reverse_condition (code);
8628
8629 size_char = ((mode == SImode) ? 'w' : 'x');
8630
8631 switch (code)
8632 {
8633 case NE:
8634 cond_str = "ne";
8635 break;
8636
8637 case EQ:
8638 cond_str = "e";
8639 break;
8640
8641 case GE:
8642 cond_str = "ge";
8643 break;
8644
8645 case GT:
8646 cond_str = "g";
8647 break;
8648
8649 case LE:
8650 cond_str = "le";
8651 break;
8652
8653 case LT:
8654 cond_str = "l";
8655 break;
8656
8657 case GEU:
8658 cond_str = "cc";
8659 break;
8660
8661 case GTU:
8662 cond_str = "gu";
8663 break;
8664
8665 case LEU:
8666 cond_str = "leu";
8667 break;
8668
8669 case LTU:
8670 cond_str = "cs";
8671 break;
8672
8673 default:
8674 gcc_unreachable ();
8675 }
8676
8677 if (far)
8678 {
8679 int veryfar = 1, delta;
8680
8681 if (INSN_ADDRESSES_SET_P ())
8682 {
8683 delta = (INSN_ADDRESSES (INSN_UID (dest))
8684 - INSN_ADDRESSES (INSN_UID (insn)));
8685 /* Leave some instructions for "slop". */
8686 if (delta >= -260000 && delta < 260000)
8687 veryfar = 0;
8688 }
8689
8690 if (veryfar)
8691 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8692 else
8693 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8694 }
8695 else
8696 {
8697 if (emit_nop)
8698 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8699 else
8700 tmpl = "c%cb%s\t%%1, %%2, %%3";
8701 }
8702
8703 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8704
8705 return string;
8706 }
8707
8708 /* Return the string to output a conditional branch to LABEL, testing
8709 register REG. LABEL is the operand number of the label; REG is the
8710 operand number of the reg. OP is the conditional expression. The mode
8711 of REG says what kind of comparison we made.
8712
8713 DEST is the destination insn (i.e. the label), INSN is the source.
8714
8715 REVERSED is nonzero if we should reverse the sense of the comparison.
8716
8717 ANNUL is nonzero if we should generate an annulling branch. */
8718
8719 const char *
8720 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8721 int annul, rtx_insn *insn)
8722 {
8723 static char string[64];
8724 enum rtx_code code = GET_CODE (op);
8725 machine_mode mode = GET_MODE (XEXP (op, 0));
8726 rtx note;
8727 int far;
8728 char *p;
8729
8730 /* branch on register are limited to +-128KB. If it is too far away,
8731 change
8732
8733 brnz,pt %g1, .LC30
8734
8735 to
8736
8737 brz,pn %g1, .+12
8738 nop
8739 ba,pt %xcc, .LC30
8740
8741 and
8742
8743 brgez,a,pn %o1, .LC29
8744
8745 to
8746
8747 brlz,pt %o1, .+16
8748 nop
8749 ba,pt %xcc, .LC29 */
8750
8751 far = get_attr_length (insn) >= 3;
8752
8753 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8754 if (reversed ^ far)
8755 code = reverse_condition (code);
8756
8757 /* Only 64-bit versions of these instructions exist. */
8758 gcc_assert (mode == DImode);
8759
8760 /* Start by writing the branch condition. */
8761
8762 switch (code)
8763 {
8764 case NE:
8765 strcpy (string, "brnz");
8766 break;
8767
8768 case EQ:
8769 strcpy (string, "brz");
8770 break;
8771
8772 case GE:
8773 strcpy (string, "brgez");
8774 break;
8775
8776 case LT:
8777 strcpy (string, "brlz");
8778 break;
8779
8780 case LE:
8781 strcpy (string, "brlez");
8782 break;
8783
8784 case GT:
8785 strcpy (string, "brgz");
8786 break;
8787
8788 default:
8789 gcc_unreachable ();
8790 }
8791
8792 p = strchr (string, '\0');
8793
8794 /* Now add the annulling, reg, label, and nop. */
8795 if (annul && ! far)
8796 {
8797 strcpy (p, ",a");
8798 p += 2;
8799 }
8800
8801 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8802 {
8803 strcpy (p,
8804 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8805 >= profile_probability::even ()) ^ far)
8806 ? ",pt" : ",pn");
8807 p += 3;
8808 }
8809
8810 *p = p < string + 8 ? '\t' : ' ';
8811 p++;
8812 *p++ = '%';
8813 *p++ = '0' + reg;
8814 *p++ = ',';
8815 *p++ = ' ';
8816 if (far)
8817 {
8818 int veryfar = 1, delta;
8819
8820 if (INSN_ADDRESSES_SET_P ())
8821 {
8822 delta = (INSN_ADDRESSES (INSN_UID (dest))
8823 - INSN_ADDRESSES (INSN_UID (insn)));
8824 /* Leave some instructions for "slop". */
8825 if (delta >= -260000 && delta < 260000)
8826 veryfar = 0;
8827 }
8828
8829 strcpy (p, ".+12\n\t nop\n\t");
8830 /* Skip the next insn if requested or
8831 if we know that it will be a nop. */
8832 if (annul || ! final_sequence)
8833 p[3] = '6';
8834 p += 12;
8835 if (veryfar)
8836 {
8837 strcpy (p, "b\t");
8838 p += 2;
8839 }
8840 else
8841 {
8842 strcpy (p, "ba,pt\t%%xcc, ");
8843 p += 13;
8844 }
8845 }
8846 *p++ = '%';
8847 *p++ = 'l';
8848 *p++ = '0' + label;
8849 *p++ = '%';
8850 *p++ = '#';
8851 *p = '\0';
8852
8853 return string;
8854 }
8855
8856 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8857 Such instructions cannot be used in the delay slot of return insn on v9.
8858 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8859 */
8860
8861 static int
8862 epilogue_renumber (register rtx *where, int test)
8863 {
8864 register const char *fmt;
8865 register int i;
8866 register enum rtx_code code;
8867
8868 if (*where == 0)
8869 return 0;
8870
8871 code = GET_CODE (*where);
8872
8873 switch (code)
8874 {
8875 case REG:
8876 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8877 return 1;
8878 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8879 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8880 /* fallthrough */
8881 case SCRATCH:
8882 case CC0:
8883 case PC:
8884 case CONST_INT:
8885 case CONST_WIDE_INT:
8886 case CONST_DOUBLE:
8887 return 0;
8888
8889 /* Do not replace the frame pointer with the stack pointer because
8890 it can cause the delayed instruction to load below the stack.
8891 This occurs when instructions like:
8892
8893 (set (reg/i:SI 24 %i0)
8894 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8895 (const_int -20 [0xffffffec])) 0))
8896
8897 are in the return delayed slot. */
8898 case PLUS:
8899 if (GET_CODE (XEXP (*where, 0)) == REG
8900 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8901 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8902 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8903 return 1;
8904 break;
8905
8906 case MEM:
8907 if (SPARC_STACK_BIAS
8908 && GET_CODE (XEXP (*where, 0)) == REG
8909 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8910 return 1;
8911 break;
8912
8913 default:
8914 break;
8915 }
8916
8917 fmt = GET_RTX_FORMAT (code);
8918
8919 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8920 {
8921 if (fmt[i] == 'E')
8922 {
8923 register int j;
8924 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8925 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8926 return 1;
8927 }
8928 else if (fmt[i] == 'e'
8929 && epilogue_renumber (&(XEXP (*where, i)), test))
8930 return 1;
8931 }
8932 return 0;
8933 }
8934 \f
8935 /* Leaf functions and non-leaf functions have different needs. */
8936
8937 static const int
8938 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8939
8940 static const int
8941 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8942
8943 static const int *const reg_alloc_orders[] = {
8944 reg_leaf_alloc_order,
8945 reg_nonleaf_alloc_order};
8946
8947 void
8948 order_regs_for_local_alloc (void)
8949 {
8950 static int last_order_nonleaf = 1;
8951
8952 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8953 {
8954 last_order_nonleaf = !last_order_nonleaf;
8955 memcpy ((char *) reg_alloc_order,
8956 (const char *) reg_alloc_orders[last_order_nonleaf],
8957 FIRST_PSEUDO_REGISTER * sizeof (int));
8958 }
8959 }
8960 \f
8961 /* Return 1 if REG and MEM are legitimate enough to allow the various
8962 MEM<-->REG splits to be run. */
8963
8964 int
8965 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8966 {
8967 /* Punt if we are here by mistake. */
8968 gcc_assert (reload_completed);
8969
8970 /* We must have an offsettable memory reference. */
8971 if (!offsettable_memref_p (mem))
8972 return 0;
8973
8974 /* If we have legitimate args for ldd/std, we do not want
8975 the split to happen. */
8976 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8977 return 0;
8978
8979 /* Success. */
8980 return 1;
8981 }
8982
8983 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8984
8985 void
8986 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8987 {
8988 rtx high_part = gen_highpart (mode, dest);
8989 rtx low_part = gen_lowpart (mode, dest);
8990 rtx word0 = adjust_address (src, mode, 0);
8991 rtx word1 = adjust_address (src, mode, 4);
8992
8993 if (reg_overlap_mentioned_p (high_part, word1))
8994 {
8995 emit_move_insn_1 (low_part, word1);
8996 emit_move_insn_1 (high_part, word0);
8997 }
8998 else
8999 {
9000 emit_move_insn_1 (high_part, word0);
9001 emit_move_insn_1 (low_part, word1);
9002 }
9003 }
9004
9005 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9006
9007 void
9008 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9009 {
9010 rtx word0 = adjust_address (dest, mode, 0);
9011 rtx word1 = adjust_address (dest, mode, 4);
9012 rtx high_part = gen_highpart (mode, src);
9013 rtx low_part = gen_lowpart (mode, src);
9014
9015 emit_move_insn_1 (word0, high_part);
9016 emit_move_insn_1 (word1, low_part);
9017 }
9018
9019 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9020
9021 int
9022 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9023 {
9024 /* Punt if we are here by mistake. */
9025 gcc_assert (reload_completed);
9026
9027 if (GET_CODE (reg1) == SUBREG)
9028 reg1 = SUBREG_REG (reg1);
9029 if (GET_CODE (reg1) != REG)
9030 return 0;
9031 const int regno1 = REGNO (reg1);
9032
9033 if (GET_CODE (reg2) == SUBREG)
9034 reg2 = SUBREG_REG (reg2);
9035 if (GET_CODE (reg2) != REG)
9036 return 0;
9037 const int regno2 = REGNO (reg2);
9038
9039 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9040 return 1;
9041
9042 if (TARGET_VIS3)
9043 {
9044 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9045 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9046 return 1;
9047 }
9048
9049 return 0;
9050 }
9051
9052 /* Split a REG <--> REG move into a pair of moves in MODE. */
9053
9054 void
9055 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9056 {
9057 rtx dest1 = gen_highpart (mode, dest);
9058 rtx dest2 = gen_lowpart (mode, dest);
9059 rtx src1 = gen_highpart (mode, src);
9060 rtx src2 = gen_lowpart (mode, src);
9061
9062 /* Now emit using the real source and destination we found, swapping
9063 the order if we detect overlap. */
9064 if (reg_overlap_mentioned_p (dest1, src2))
9065 {
9066 emit_move_insn_1 (dest2, src2);
9067 emit_move_insn_1 (dest1, src1);
9068 }
9069 else
9070 {
9071 emit_move_insn_1 (dest1, src1);
9072 emit_move_insn_1 (dest2, src2);
9073 }
9074 }
9075
9076 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9077 This makes them candidates for using ldd and std insns.
9078
9079 Note reg1 and reg2 *must* be hard registers. */
9080
9081 int
9082 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9083 {
9084 /* We might have been passed a SUBREG. */
9085 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9086 return 0;
9087
9088 if (REGNO (reg1) % 2 != 0)
9089 return 0;
9090
9091 /* Integer ldd is deprecated in SPARC V9 */
9092 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9093 return 0;
9094
9095 return (REGNO (reg1) == REGNO (reg2) - 1);
9096 }
9097
9098 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9099 an ldd or std insn.
9100
9101 This can only happen when addr1 and addr2, the addresses in mem1
9102 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9103 addr1 must also be aligned on a 64-bit boundary.
9104
9105 Also iff dependent_reg_rtx is not null it should not be used to
9106 compute the address for mem1, i.e. we cannot optimize a sequence
9107 like:
9108 ld [%o0], %o0
9109 ld [%o0 + 4], %o1
9110 to
9111 ldd [%o0], %o0
9112 nor:
9113 ld [%g3 + 4], %g3
9114 ld [%g3], %g2
9115 to
9116 ldd [%g3], %g2
9117
9118 But, note that the transformation from:
9119 ld [%g2 + 4], %g3
9120 ld [%g2], %g2
9121 to
9122 ldd [%g2], %g2
9123 is perfectly fine. Thus, the peephole2 patterns always pass us
9124 the destination register of the first load, never the second one.
9125
9126 For stores we don't have a similar problem, so dependent_reg_rtx is
9127 NULL_RTX. */
9128
9129 int
9130 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9131 {
9132 rtx addr1, addr2;
9133 unsigned int reg1;
9134 HOST_WIDE_INT offset1;
9135
9136 /* The mems cannot be volatile. */
9137 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9138 return 0;
9139
9140 /* MEM1 should be aligned on a 64-bit boundary. */
9141 if (MEM_ALIGN (mem1) < 64)
9142 return 0;
9143
9144 addr1 = XEXP (mem1, 0);
9145 addr2 = XEXP (mem2, 0);
9146
9147 /* Extract a register number and offset (if used) from the first addr. */
9148 if (GET_CODE (addr1) == PLUS)
9149 {
9150 /* If not a REG, return zero. */
9151 if (GET_CODE (XEXP (addr1, 0)) != REG)
9152 return 0;
9153 else
9154 {
9155 reg1 = REGNO (XEXP (addr1, 0));
9156 /* The offset must be constant! */
9157 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9158 return 0;
9159 offset1 = INTVAL (XEXP (addr1, 1));
9160 }
9161 }
9162 else if (GET_CODE (addr1) != REG)
9163 return 0;
9164 else
9165 {
9166 reg1 = REGNO (addr1);
9167 /* This was a simple (mem (reg)) expression. Offset is 0. */
9168 offset1 = 0;
9169 }
9170
9171 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9172 if (GET_CODE (addr2) != PLUS)
9173 return 0;
9174
9175 if (GET_CODE (XEXP (addr2, 0)) != REG
9176 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9177 return 0;
9178
9179 if (reg1 != REGNO (XEXP (addr2, 0)))
9180 return 0;
9181
9182 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9183 return 0;
9184
9185 /* The first offset must be evenly divisible by 8 to ensure the
9186 address is 64-bit aligned. */
9187 if (offset1 % 8 != 0)
9188 return 0;
9189
9190 /* The offset for the second addr must be 4 more than the first addr. */
9191 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9192 return 0;
9193
9194 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9195 instructions. */
9196 return 1;
9197 }
9198
9199 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9200
9201 rtx
9202 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9203 {
9204 rtx x = widen_memory_access (mem1, mode, 0);
9205 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9206 return x;
9207 }
9208
9209 /* Return 1 if reg is a pseudo, or is the first register in
9210 a hard register pair. This makes it suitable for use in
9211 ldd and std insns. */
9212
9213 int
9214 register_ok_for_ldd (rtx reg)
9215 {
9216 /* We might have been passed a SUBREG. */
9217 if (!REG_P (reg))
9218 return 0;
9219
9220 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9221 return (REGNO (reg) % 2 == 0);
9222
9223 return 1;
9224 }
9225
9226 /* Return 1 if OP, a MEM, has an address which is known to be
9227 aligned to an 8-byte boundary. */
9228
9229 int
9230 memory_ok_for_ldd (rtx op)
9231 {
9232 /* In 64-bit mode, we assume that the address is word-aligned. */
9233 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9234 return 0;
9235
9236 if (! can_create_pseudo_p ()
9237 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9238 return 0;
9239
9240 return 1;
9241 }
9242 \f
9243 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9244
9245 static bool
9246 sparc_print_operand_punct_valid_p (unsigned char code)
9247 {
9248 if (code == '#'
9249 || code == '*'
9250 || code == '('
9251 || code == ')'
9252 || code == '_'
9253 || code == '&')
9254 return true;
9255
9256 return false;
9257 }
9258
9259 /* Implement TARGET_PRINT_OPERAND.
9260 Print operand X (an rtx) in assembler syntax to file FILE.
9261 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9262 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9263
9264 static void
9265 sparc_print_operand (FILE *file, rtx x, int code)
9266 {
9267 const char *s;
9268
9269 switch (code)
9270 {
9271 case '#':
9272 /* Output an insn in a delay slot. */
9273 if (final_sequence)
9274 sparc_indent_opcode = 1;
9275 else
9276 fputs ("\n\t nop", file);
9277 return;
9278 case '*':
9279 /* Output an annul flag if there's nothing for the delay slot and we
9280 are optimizing. This is always used with '(' below.
9281 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9282 this is a dbx bug. So, we only do this when optimizing.
9283 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9284 Always emit a nop in case the next instruction is a branch. */
9285 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9286 fputs (",a", file);
9287 return;
9288 case '(':
9289 /* Output a 'nop' if there's nothing for the delay slot and we are
9290 not optimizing. This is always used with '*' above. */
9291 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9292 fputs ("\n\t nop", file);
9293 else if (final_sequence)
9294 sparc_indent_opcode = 1;
9295 return;
9296 case ')':
9297 /* Output the right displacement from the saved PC on function return.
9298 The caller may have placed an "unimp" insn immediately after the call
9299 so we have to account for it. This insn is used in the 32-bit ABI
9300 when calling a function that returns a non zero-sized structure. The
9301 64-bit ABI doesn't have it. Be careful to have this test be the same
9302 as that for the call. The exception is when sparc_std_struct_return
9303 is enabled, the psABI is followed exactly and the adjustment is made
9304 by the code in sparc_struct_value_rtx. The call emitted is the same
9305 when sparc_std_struct_return is enabled. */
9306 if (!TARGET_ARCH64
9307 && cfun->returns_struct
9308 && !sparc_std_struct_return
9309 && DECL_SIZE (DECL_RESULT (current_function_decl))
9310 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9311 == INTEGER_CST
9312 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9313 fputs ("12", file);
9314 else
9315 fputc ('8', file);
9316 return;
9317 case '_':
9318 /* Output the Embedded Medium/Anywhere code model base register. */
9319 fputs (EMBMEDANY_BASE_REG, file);
9320 return;
9321 case '&':
9322 /* Print some local dynamic TLS name. */
9323 if (const char *name = get_some_local_dynamic_name ())
9324 assemble_name (file, name);
9325 else
9326 output_operand_lossage ("'%%&' used without any "
9327 "local dynamic TLS references");
9328 return;
9329
9330 case 'Y':
9331 /* Adjust the operand to take into account a RESTORE operation. */
9332 if (GET_CODE (x) == CONST_INT)
9333 break;
9334 else if (GET_CODE (x) != REG)
9335 output_operand_lossage ("invalid %%Y operand");
9336 else if (REGNO (x) < 8)
9337 fputs (reg_names[REGNO (x)], file);
9338 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9339 fputs (reg_names[REGNO (x)-16], file);
9340 else
9341 output_operand_lossage ("invalid %%Y operand");
9342 return;
9343 case 'L':
9344 /* Print out the low order register name of a register pair. */
9345 if (WORDS_BIG_ENDIAN)
9346 fputs (reg_names[REGNO (x)+1], file);
9347 else
9348 fputs (reg_names[REGNO (x)], file);
9349 return;
9350 case 'H':
9351 /* Print out the high order register name of a register pair. */
9352 if (WORDS_BIG_ENDIAN)
9353 fputs (reg_names[REGNO (x)], file);
9354 else
9355 fputs (reg_names[REGNO (x)+1], file);
9356 return;
9357 case 'R':
9358 /* Print out the second register name of a register pair or quad.
9359 I.e., R (%o0) => %o1. */
9360 fputs (reg_names[REGNO (x)+1], file);
9361 return;
9362 case 'S':
9363 /* Print out the third register name of a register quad.
9364 I.e., S (%o0) => %o2. */
9365 fputs (reg_names[REGNO (x)+2], file);
9366 return;
9367 case 'T':
9368 /* Print out the fourth register name of a register quad.
9369 I.e., T (%o0) => %o3. */
9370 fputs (reg_names[REGNO (x)+3], file);
9371 return;
9372 case 'x':
9373 /* Print a condition code register. */
9374 if (REGNO (x) == SPARC_ICC_REG)
9375 {
9376 switch (GET_MODE (x))
9377 {
9378 case E_CCmode:
9379 case E_CCNZmode:
9380 case E_CCCmode:
9381 case E_CCVmode:
9382 s = "%icc";
9383 break;
9384 case E_CCXmode:
9385 case E_CCXNZmode:
9386 case E_CCXCmode:
9387 case E_CCXVmode:
9388 s = "%xcc";
9389 break;
9390 default:
9391 gcc_unreachable ();
9392 }
9393 fputs (s, file);
9394 }
9395 else
9396 /* %fccN register */
9397 fputs (reg_names[REGNO (x)], file);
9398 return;
9399 case 'm':
9400 /* Print the operand's address only. */
9401 output_address (GET_MODE (x), XEXP (x, 0));
9402 return;
9403 case 'r':
9404 /* In this case we need a register. Use %g0 if the
9405 operand is const0_rtx. */
9406 if (x == const0_rtx
9407 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9408 {
9409 fputs ("%g0", file);
9410 return;
9411 }
9412 else
9413 break;
9414
9415 case 'A':
9416 switch (GET_CODE (x))
9417 {
9418 case IOR:
9419 s = "or";
9420 break;
9421 case AND:
9422 s = "and";
9423 break;
9424 case XOR:
9425 s = "xor";
9426 break;
9427 default:
9428 output_operand_lossage ("invalid %%A operand");
9429 s = "";
9430 break;
9431 }
9432 fputs (s, file);
9433 return;
9434
9435 case 'B':
9436 switch (GET_CODE (x))
9437 {
9438 case IOR:
9439 s = "orn";
9440 break;
9441 case AND:
9442 s = "andn";
9443 break;
9444 case XOR:
9445 s = "xnor";
9446 break;
9447 default:
9448 output_operand_lossage ("invalid %%B operand");
9449 s = "";
9450 break;
9451 }
9452 fputs (s, file);
9453 return;
9454
9455 /* This is used by the conditional move instructions. */
9456 case 'C':
9457 {
9458 machine_mode mode = GET_MODE (XEXP (x, 0));
9459 switch (GET_CODE (x))
9460 {
9461 case NE:
9462 if (mode == CCVmode || mode == CCXVmode)
9463 s = "vs";
9464 else
9465 s = "ne";
9466 break;
9467 case EQ:
9468 if (mode == CCVmode || mode == CCXVmode)
9469 s = "vc";
9470 else
9471 s = "e";
9472 break;
9473 case GE:
9474 if (mode == CCNZmode || mode == CCXNZmode)
9475 s = "pos";
9476 else
9477 s = "ge";
9478 break;
9479 case GT:
9480 s = "g";
9481 break;
9482 case LE:
9483 s = "le";
9484 break;
9485 case LT:
9486 if (mode == CCNZmode || mode == CCXNZmode)
9487 s = "neg";
9488 else
9489 s = "l";
9490 break;
9491 case GEU:
9492 s = "geu";
9493 break;
9494 case GTU:
9495 s = "gu";
9496 break;
9497 case LEU:
9498 s = "leu";
9499 break;
9500 case LTU:
9501 s = "lu";
9502 break;
9503 case LTGT:
9504 s = "lg";
9505 break;
9506 case UNORDERED:
9507 s = "u";
9508 break;
9509 case ORDERED:
9510 s = "o";
9511 break;
9512 case UNLT:
9513 s = "ul";
9514 break;
9515 case UNLE:
9516 s = "ule";
9517 break;
9518 case UNGT:
9519 s = "ug";
9520 break;
9521 case UNGE:
9522 s = "uge"
9523 ; break;
9524 case UNEQ:
9525 s = "ue";
9526 break;
9527 default:
9528 output_operand_lossage ("invalid %%C operand");
9529 s = "";
9530 break;
9531 }
9532 fputs (s, file);
9533 return;
9534 }
9535
9536 /* This are used by the movr instruction pattern. */
9537 case 'D':
9538 {
9539 switch (GET_CODE (x))
9540 {
9541 case NE:
9542 s = "ne";
9543 break;
9544 case EQ:
9545 s = "e";
9546 break;
9547 case GE:
9548 s = "gez";
9549 break;
9550 case LT:
9551 s = "lz";
9552 break;
9553 case LE:
9554 s = "lez";
9555 break;
9556 case GT:
9557 s = "gz";
9558 break;
9559 default:
9560 output_operand_lossage ("invalid %%D operand");
9561 s = "";
9562 break;
9563 }
9564 fputs (s, file);
9565 return;
9566 }
9567
9568 case 'b':
9569 {
9570 /* Print a sign-extended character. */
9571 int i = trunc_int_for_mode (INTVAL (x), QImode);
9572 fprintf (file, "%d", i);
9573 return;
9574 }
9575
9576 case 'f':
9577 /* Operand must be a MEM; write its address. */
9578 if (GET_CODE (x) != MEM)
9579 output_operand_lossage ("invalid %%f operand");
9580 output_address (GET_MODE (x), XEXP (x, 0));
9581 return;
9582
9583 case 's':
9584 {
9585 /* Print a sign-extended 32-bit value. */
9586 HOST_WIDE_INT i;
9587 if (GET_CODE(x) == CONST_INT)
9588 i = INTVAL (x);
9589 else
9590 {
9591 output_operand_lossage ("invalid %%s operand");
9592 return;
9593 }
9594 i = trunc_int_for_mode (i, SImode);
9595 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9596 return;
9597 }
9598
9599 case 0:
9600 /* Do nothing special. */
9601 break;
9602
9603 default:
9604 /* Undocumented flag. */
9605 output_operand_lossage ("invalid operand output code");
9606 }
9607
9608 if (GET_CODE (x) == REG)
9609 fputs (reg_names[REGNO (x)], file);
9610 else if (GET_CODE (x) == MEM)
9611 {
9612 fputc ('[', file);
9613 /* Poor Sun assembler doesn't understand absolute addressing. */
9614 if (CONSTANT_P (XEXP (x, 0)))
9615 fputs ("%g0+", file);
9616 output_address (GET_MODE (x), XEXP (x, 0));
9617 fputc (']', file);
9618 }
9619 else if (GET_CODE (x) == HIGH)
9620 {
9621 fputs ("%hi(", file);
9622 output_addr_const (file, XEXP (x, 0));
9623 fputc (')', file);
9624 }
9625 else if (GET_CODE (x) == LO_SUM)
9626 {
9627 sparc_print_operand (file, XEXP (x, 0), 0);
9628 if (TARGET_CM_MEDMID)
9629 fputs ("+%l44(", file);
9630 else
9631 fputs ("+%lo(", file);
9632 output_addr_const (file, XEXP (x, 1));
9633 fputc (')', file);
9634 }
9635 else if (GET_CODE (x) == CONST_DOUBLE)
9636 output_operand_lossage ("floating-point constant not a valid immediate operand");
9637 else
9638 output_addr_const (file, x);
9639 }
9640
9641 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9642
9643 static void
9644 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9645 {
9646 register rtx base, index = 0;
9647 int offset = 0;
9648 register rtx addr = x;
9649
9650 if (REG_P (addr))
9651 fputs (reg_names[REGNO (addr)], file);
9652 else if (GET_CODE (addr) == PLUS)
9653 {
9654 if (CONST_INT_P (XEXP (addr, 0)))
9655 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9656 else if (CONST_INT_P (XEXP (addr, 1)))
9657 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9658 else
9659 base = XEXP (addr, 0), index = XEXP (addr, 1);
9660 if (GET_CODE (base) == LO_SUM)
9661 {
9662 gcc_assert (USE_AS_OFFSETABLE_LO10
9663 && TARGET_ARCH64
9664 && ! TARGET_CM_MEDMID);
9665 output_operand (XEXP (base, 0), 0);
9666 fputs ("+%lo(", file);
9667 output_address (VOIDmode, XEXP (base, 1));
9668 fprintf (file, ")+%d", offset);
9669 }
9670 else
9671 {
9672 fputs (reg_names[REGNO (base)], file);
9673 if (index == 0)
9674 fprintf (file, "%+d", offset);
9675 else if (REG_P (index))
9676 fprintf (file, "+%s", reg_names[REGNO (index)]);
9677 else if (GET_CODE (index) == SYMBOL_REF
9678 || GET_CODE (index) == LABEL_REF
9679 || GET_CODE (index) == CONST)
9680 fputc ('+', file), output_addr_const (file, index);
9681 else gcc_unreachable ();
9682 }
9683 }
9684 else if (GET_CODE (addr) == MINUS
9685 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9686 {
9687 output_addr_const (file, XEXP (addr, 0));
9688 fputs ("-(", file);
9689 output_addr_const (file, XEXP (addr, 1));
9690 fputs ("-.)", file);
9691 }
9692 else if (GET_CODE (addr) == LO_SUM)
9693 {
9694 output_operand (XEXP (addr, 0), 0);
9695 if (TARGET_CM_MEDMID)
9696 fputs ("+%l44(", file);
9697 else
9698 fputs ("+%lo(", file);
9699 output_address (VOIDmode, XEXP (addr, 1));
9700 fputc (')', file);
9701 }
9702 else if (flag_pic
9703 && GET_CODE (addr) == CONST
9704 && GET_CODE (XEXP (addr, 0)) == MINUS
9705 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9706 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9707 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9708 {
9709 addr = XEXP (addr, 0);
9710 output_addr_const (file, XEXP (addr, 0));
9711 /* Group the args of the second CONST in parenthesis. */
9712 fputs ("-(", file);
9713 /* Skip past the second CONST--it does nothing for us. */
9714 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9715 /* Close the parenthesis. */
9716 fputc (')', file);
9717 }
9718 else
9719 {
9720 output_addr_const (file, addr);
9721 }
9722 }
9723 \f
9724 /* Target hook for assembling integer objects. The sparc version has
9725 special handling for aligned DI-mode objects. */
9726
9727 static bool
9728 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9729 {
9730 /* ??? We only output .xword's for symbols and only then in environments
9731 where the assembler can handle them. */
9732 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9733 {
9734 if (TARGET_V9)
9735 {
9736 assemble_integer_with_op ("\t.xword\t", x);
9737 return true;
9738 }
9739 else
9740 {
9741 assemble_aligned_integer (4, const0_rtx);
9742 assemble_aligned_integer (4, x);
9743 return true;
9744 }
9745 }
9746 return default_assemble_integer (x, size, aligned_p);
9747 }
9748 \f
9749 /* Return the value of a code used in the .proc pseudo-op that says
9750 what kind of result this function returns. For non-C types, we pick
9751 the closest C type. */
9752
9753 #ifndef SHORT_TYPE_SIZE
9754 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9755 #endif
9756
9757 #ifndef INT_TYPE_SIZE
9758 #define INT_TYPE_SIZE BITS_PER_WORD
9759 #endif
9760
9761 #ifndef LONG_TYPE_SIZE
9762 #define LONG_TYPE_SIZE BITS_PER_WORD
9763 #endif
9764
9765 #ifndef LONG_LONG_TYPE_SIZE
9766 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9767 #endif
9768
9769 #ifndef FLOAT_TYPE_SIZE
9770 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9771 #endif
9772
9773 #ifndef DOUBLE_TYPE_SIZE
9774 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9775 #endif
9776
9777 #ifndef LONG_DOUBLE_TYPE_SIZE
9778 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9779 #endif
9780
9781 unsigned long
9782 sparc_type_code (register tree type)
9783 {
9784 register unsigned long qualifiers = 0;
9785 register unsigned shift;
9786
9787 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9788 setting more, since some assemblers will give an error for this. Also,
9789 we must be careful to avoid shifts of 32 bits or more to avoid getting
9790 unpredictable results. */
9791
9792 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9793 {
9794 switch (TREE_CODE (type))
9795 {
9796 case ERROR_MARK:
9797 return qualifiers;
9798
9799 case ARRAY_TYPE:
9800 qualifiers |= (3 << shift);
9801 break;
9802
9803 case FUNCTION_TYPE:
9804 case METHOD_TYPE:
9805 qualifiers |= (2 << shift);
9806 break;
9807
9808 case POINTER_TYPE:
9809 case REFERENCE_TYPE:
9810 case OFFSET_TYPE:
9811 qualifiers |= (1 << shift);
9812 break;
9813
9814 case RECORD_TYPE:
9815 return (qualifiers | 8);
9816
9817 case UNION_TYPE:
9818 case QUAL_UNION_TYPE:
9819 return (qualifiers | 9);
9820
9821 case ENUMERAL_TYPE:
9822 return (qualifiers | 10);
9823
9824 case VOID_TYPE:
9825 return (qualifiers | 16);
9826
9827 case INTEGER_TYPE:
9828 /* If this is a range type, consider it to be the underlying
9829 type. */
9830 if (TREE_TYPE (type) != 0)
9831 break;
9832
9833 /* Carefully distinguish all the standard types of C,
9834 without messing up if the language is not C. We do this by
9835 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9836 look at both the names and the above fields, but that's redundant.
9837 Any type whose size is between two C types will be considered
9838 to be the wider of the two types. Also, we do not have a
9839 special code to use for "long long", so anything wider than
9840 long is treated the same. Note that we can't distinguish
9841 between "int" and "long" in this code if they are the same
9842 size, but that's fine, since neither can the assembler. */
9843
9844 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9845 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9846
9847 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9848 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9849
9850 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9851 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9852
9853 else
9854 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9855
9856 case REAL_TYPE:
9857 /* If this is a range type, consider it to be the underlying
9858 type. */
9859 if (TREE_TYPE (type) != 0)
9860 break;
9861
9862 /* Carefully distinguish all the standard types of C,
9863 without messing up if the language is not C. */
9864
9865 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9866 return (qualifiers | 6);
9867
9868 else
9869 return (qualifiers | 7);
9870
9871 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9872 /* ??? We need to distinguish between double and float complex types,
9873 but I don't know how yet because I can't reach this code from
9874 existing front-ends. */
9875 return (qualifiers | 7); /* Who knows? */
9876
9877 case VECTOR_TYPE:
9878 case BOOLEAN_TYPE: /* Boolean truth value type. */
9879 case LANG_TYPE:
9880 case NULLPTR_TYPE:
9881 return qualifiers;
9882
9883 default:
9884 gcc_unreachable (); /* Not a type! */
9885 }
9886 }
9887
9888 return qualifiers;
9889 }
9890 \f
9891 /* Nested function support. */
9892
9893 /* Emit RTL insns to initialize the variable parts of a trampoline.
9894 FNADDR is an RTX for the address of the function's pure code.
9895 CXT is an RTX for the static chain value for the function.
9896
9897 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9898 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9899 (to store insns). This is a bit excessive. Perhaps a different
9900 mechanism would be better here.
9901
9902 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9903
9904 static void
9905 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9906 {
9907 /* SPARC 32-bit trampoline:
9908
9909 sethi %hi(fn), %g1
9910 sethi %hi(static), %g2
9911 jmp %g1+%lo(fn)
9912 or %g2, %lo(static), %g2
9913
9914 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9915 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9916 */
9917
9918 emit_move_insn
9919 (adjust_address (m_tramp, SImode, 0),
9920 expand_binop (SImode, ior_optab,
9921 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9922 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9923 NULL_RTX, 1, OPTAB_DIRECT));
9924
9925 emit_move_insn
9926 (adjust_address (m_tramp, SImode, 4),
9927 expand_binop (SImode, ior_optab,
9928 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9929 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9930 NULL_RTX, 1, OPTAB_DIRECT));
9931
9932 emit_move_insn
9933 (adjust_address (m_tramp, SImode, 8),
9934 expand_binop (SImode, ior_optab,
9935 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9936 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9937 NULL_RTX, 1, OPTAB_DIRECT));
9938
9939 emit_move_insn
9940 (adjust_address (m_tramp, SImode, 12),
9941 expand_binop (SImode, ior_optab,
9942 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9943 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9944 NULL_RTX, 1, OPTAB_DIRECT));
9945
9946 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9947 aligned on a 16 byte boundary so one flush clears it all. */
9948 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9949 if (sparc_cpu != PROCESSOR_ULTRASPARC
9950 && sparc_cpu != PROCESSOR_ULTRASPARC3
9951 && sparc_cpu != PROCESSOR_NIAGARA
9952 && sparc_cpu != PROCESSOR_NIAGARA2
9953 && sparc_cpu != PROCESSOR_NIAGARA3
9954 && sparc_cpu != PROCESSOR_NIAGARA4
9955 && sparc_cpu != PROCESSOR_NIAGARA7
9956 && sparc_cpu != PROCESSOR_M8)
9957 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9958
9959 /* Call __enable_execute_stack after writing onto the stack to make sure
9960 the stack address is accessible. */
9961 #ifdef HAVE_ENABLE_EXECUTE_STACK
9962 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9963 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9964 #endif
9965
9966 }
9967
9968 /* The 64-bit version is simpler because it makes more sense to load the
9969 values as "immediate" data out of the trampoline. It's also easier since
9970 we can read the PC without clobbering a register. */
9971
9972 static void
9973 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9974 {
9975 /* SPARC 64-bit trampoline:
9976
9977 rd %pc, %g1
9978 ldx [%g1+24], %g5
9979 jmp %g5
9980 ldx [%g1+16], %g5
9981 +16 bytes data
9982 */
9983
9984 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9985 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9986 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9987 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9988 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9989 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9990 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9991 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9992 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9993 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9994 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9995
9996 if (sparc_cpu != PROCESSOR_ULTRASPARC
9997 && sparc_cpu != PROCESSOR_ULTRASPARC3
9998 && sparc_cpu != PROCESSOR_NIAGARA
9999 && sparc_cpu != PROCESSOR_NIAGARA2
10000 && sparc_cpu != PROCESSOR_NIAGARA3
10001 && sparc_cpu != PROCESSOR_NIAGARA4
10002 && sparc_cpu != PROCESSOR_NIAGARA7
10003 && sparc_cpu != PROCESSOR_M8)
10004 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10005
10006 /* Call __enable_execute_stack after writing onto the stack to make sure
10007 the stack address is accessible. */
10008 #ifdef HAVE_ENABLE_EXECUTE_STACK
10009 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10010 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10011 #endif
10012 }
10013
10014 /* Worker for TARGET_TRAMPOLINE_INIT. */
10015
10016 static void
10017 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10018 {
10019 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10020 cxt = force_reg (Pmode, cxt);
10021 if (TARGET_ARCH64)
10022 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10023 else
10024 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10025 }
10026 \f
10027 /* Adjust the cost of a scheduling dependency. Return the new cost of
10028 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10029
10030 static int
10031 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10032 int cost)
10033 {
10034 enum attr_type insn_type;
10035
10036 if (recog_memoized (insn) < 0)
10037 return cost;
10038
10039 insn_type = get_attr_type (insn);
10040
10041 if (dep_type == 0)
10042 {
10043 /* Data dependency; DEP_INSN writes a register that INSN reads some
10044 cycles later. */
10045
10046 /* if a load, then the dependence must be on the memory address;
10047 add an extra "cycle". Note that the cost could be two cycles
10048 if the reg was written late in an instruction group; we ca not tell
10049 here. */
10050 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10051 return cost + 3;
10052
10053 /* Get the delay only if the address of the store is the dependence. */
10054 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10055 {
10056 rtx pat = PATTERN(insn);
10057 rtx dep_pat = PATTERN (dep_insn);
10058
10059 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10060 return cost; /* This should not happen! */
10061
10062 /* The dependency between the two instructions was on the data that
10063 is being stored. Assume that this implies that the address of the
10064 store is not dependent. */
10065 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10066 return cost;
10067
10068 return cost + 3; /* An approximation. */
10069 }
10070
10071 /* A shift instruction cannot receive its data from an instruction
10072 in the same cycle; add a one cycle penalty. */
10073 if (insn_type == TYPE_SHIFT)
10074 return cost + 3; /* Split before cascade into shift. */
10075 }
10076 else
10077 {
10078 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10079 INSN writes some cycles later. */
10080
10081 /* These are only significant for the fpu unit; writing a fp reg before
10082 the fpu has finished with it stalls the processor. */
10083
10084 /* Reusing an integer register causes no problems. */
10085 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10086 return 0;
10087 }
10088
10089 return cost;
10090 }
10091
10092 static int
10093 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10094 int cost)
10095 {
10096 enum attr_type insn_type, dep_type;
10097 rtx pat = PATTERN(insn);
10098 rtx dep_pat = PATTERN (dep_insn);
10099
10100 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10101 return cost;
10102
10103 insn_type = get_attr_type (insn);
10104 dep_type = get_attr_type (dep_insn);
10105
10106 switch (dtype)
10107 {
10108 case 0:
10109 /* Data dependency; DEP_INSN writes a register that INSN reads some
10110 cycles later. */
10111
10112 switch (insn_type)
10113 {
10114 case TYPE_STORE:
10115 case TYPE_FPSTORE:
10116 /* Get the delay iff the address of the store is the dependence. */
10117 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10118 return cost;
10119
10120 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10121 return cost;
10122 return cost + 3;
10123
10124 case TYPE_LOAD:
10125 case TYPE_SLOAD:
10126 case TYPE_FPLOAD:
10127 /* If a load, then the dependence must be on the memory address. If
10128 the addresses aren't equal, then it might be a false dependency */
10129 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10130 {
10131 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10132 || GET_CODE (SET_DEST (dep_pat)) != MEM
10133 || GET_CODE (SET_SRC (pat)) != MEM
10134 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10135 XEXP (SET_SRC (pat), 0)))
10136 return cost + 2;
10137
10138 return cost + 8;
10139 }
10140 break;
10141
10142 case TYPE_BRANCH:
10143 /* Compare to branch latency is 0. There is no benefit from
10144 separating compare and branch. */
10145 if (dep_type == TYPE_COMPARE)
10146 return 0;
10147 /* Floating point compare to branch latency is less than
10148 compare to conditional move. */
10149 if (dep_type == TYPE_FPCMP)
10150 return cost - 1;
10151 break;
10152 default:
10153 break;
10154 }
10155 break;
10156
10157 case REG_DEP_ANTI:
10158 /* Anti-dependencies only penalize the fpu unit. */
10159 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10160 return 0;
10161 break;
10162
10163 default:
10164 break;
10165 }
10166
10167 return cost;
10168 }
10169
10170 static int
10171 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10172 unsigned int)
10173 {
10174 switch (sparc_cpu)
10175 {
10176 case PROCESSOR_SUPERSPARC:
10177 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10178 break;
10179 case PROCESSOR_HYPERSPARC:
10180 case PROCESSOR_SPARCLITE86X:
10181 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10182 break;
10183 default:
10184 break;
10185 }
10186 return cost;
10187 }
10188
10189 static void
10190 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10191 int sched_verbose ATTRIBUTE_UNUSED,
10192 int max_ready ATTRIBUTE_UNUSED)
10193 {}
10194
10195 static int
10196 sparc_use_sched_lookahead (void)
10197 {
10198 switch (sparc_cpu)
10199 {
10200 case PROCESSOR_ULTRASPARC:
10201 case PROCESSOR_ULTRASPARC3:
10202 return 4;
10203 case PROCESSOR_SUPERSPARC:
10204 case PROCESSOR_HYPERSPARC:
10205 case PROCESSOR_SPARCLITE86X:
10206 return 3;
10207 case PROCESSOR_NIAGARA4:
10208 case PROCESSOR_NIAGARA7:
10209 case PROCESSOR_M8:
10210 return 2;
10211 case PROCESSOR_NIAGARA:
10212 case PROCESSOR_NIAGARA2:
10213 case PROCESSOR_NIAGARA3:
10214 default:
10215 return 0;
10216 }
10217 }
10218
10219 static int
10220 sparc_issue_rate (void)
10221 {
10222 switch (sparc_cpu)
10223 {
10224 case PROCESSOR_ULTRASPARC:
10225 case PROCESSOR_ULTRASPARC3:
10226 case PROCESSOR_M8:
10227 return 4;
10228 case PROCESSOR_SUPERSPARC:
10229 return 3;
10230 case PROCESSOR_HYPERSPARC:
10231 case PROCESSOR_SPARCLITE86X:
10232 case PROCESSOR_V9:
10233 /* Assume V9 processors are capable of at least dual-issue. */
10234 case PROCESSOR_NIAGARA4:
10235 case PROCESSOR_NIAGARA7:
10236 return 2;
10237 case PROCESSOR_NIAGARA:
10238 case PROCESSOR_NIAGARA2:
10239 case PROCESSOR_NIAGARA3:
10240 default:
10241 return 1;
10242 }
10243 }
10244
10245 int
10246 sparc_branch_cost (bool speed_p, bool predictable_p)
10247 {
10248 if (!speed_p)
10249 return 2;
10250
10251 /* For pre-V9 processors we use a single value (usually 3) to take into
10252 account the potential annulling of the delay slot (which ends up being
10253 a bubble in the pipeline slot) plus a cycle to take into consideration
10254 the instruction cache effects.
10255
10256 On V9 and later processors, which have branch prediction facilities,
10257 we take into account whether the branch is (easily) predictable. */
10258 const int cost = sparc_costs->branch_cost;
10259
10260 switch (sparc_cpu)
10261 {
10262 case PROCESSOR_V9:
10263 case PROCESSOR_ULTRASPARC:
10264 case PROCESSOR_ULTRASPARC3:
10265 case PROCESSOR_NIAGARA:
10266 case PROCESSOR_NIAGARA2:
10267 case PROCESSOR_NIAGARA3:
10268 case PROCESSOR_NIAGARA4:
10269 case PROCESSOR_NIAGARA7:
10270 case PROCESSOR_M8:
10271 return cost + (predictable_p ? 0 : 2);
10272
10273 default:
10274 return cost;
10275 }
10276 }
10277
10278 static int
10279 set_extends (rtx_insn *insn)
10280 {
10281 register rtx pat = PATTERN (insn);
10282
10283 switch (GET_CODE (SET_SRC (pat)))
10284 {
10285 /* Load and some shift instructions zero extend. */
10286 case MEM:
10287 case ZERO_EXTEND:
10288 /* sethi clears the high bits */
10289 case HIGH:
10290 /* LO_SUM is used with sethi. sethi cleared the high
10291 bits and the values used with lo_sum are positive */
10292 case LO_SUM:
10293 /* Store flag stores 0 or 1 */
10294 case LT: case LTU:
10295 case GT: case GTU:
10296 case LE: case LEU:
10297 case GE: case GEU:
10298 case EQ:
10299 case NE:
10300 return 1;
10301 case AND:
10302 {
10303 rtx op0 = XEXP (SET_SRC (pat), 0);
10304 rtx op1 = XEXP (SET_SRC (pat), 1);
10305 if (GET_CODE (op1) == CONST_INT)
10306 return INTVAL (op1) >= 0;
10307 if (GET_CODE (op0) != REG)
10308 return 0;
10309 if (sparc_check_64 (op0, insn) == 1)
10310 return 1;
10311 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10312 }
10313 case IOR:
10314 case XOR:
10315 {
10316 rtx op0 = XEXP (SET_SRC (pat), 0);
10317 rtx op1 = XEXP (SET_SRC (pat), 1);
10318 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10319 return 0;
10320 if (GET_CODE (op1) == CONST_INT)
10321 return INTVAL (op1) >= 0;
10322 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10323 }
10324 case LSHIFTRT:
10325 return GET_MODE (SET_SRC (pat)) == SImode;
10326 /* Positive integers leave the high bits zero. */
10327 case CONST_INT:
10328 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10329 case ASHIFTRT:
10330 case SIGN_EXTEND:
10331 return - (GET_MODE (SET_SRC (pat)) == SImode);
10332 case REG:
10333 return sparc_check_64 (SET_SRC (pat), insn);
10334 default:
10335 return 0;
10336 }
10337 }
10338
10339 /* We _ought_ to have only one kind per function, but... */
10340 static GTY(()) rtx sparc_addr_diff_list;
10341 static GTY(()) rtx sparc_addr_list;
10342
10343 void
10344 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10345 {
10346 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10347 if (diff)
10348 sparc_addr_diff_list
10349 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10350 else
10351 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10352 }
10353
10354 static void
10355 sparc_output_addr_vec (rtx vec)
10356 {
10357 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10358 int idx, vlen = XVECLEN (body, 0);
10359
10360 #ifdef ASM_OUTPUT_ADDR_VEC_START
10361 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10362 #endif
10363
10364 #ifdef ASM_OUTPUT_CASE_LABEL
10365 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10366 NEXT_INSN (lab));
10367 #else
10368 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10369 #endif
10370
10371 for (idx = 0; idx < vlen; idx++)
10372 {
10373 ASM_OUTPUT_ADDR_VEC_ELT
10374 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10375 }
10376
10377 #ifdef ASM_OUTPUT_ADDR_VEC_END
10378 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10379 #endif
10380 }
10381
10382 static void
10383 sparc_output_addr_diff_vec (rtx vec)
10384 {
10385 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10386 rtx base = XEXP (XEXP (body, 0), 0);
10387 int idx, vlen = XVECLEN (body, 1);
10388
10389 #ifdef ASM_OUTPUT_ADDR_VEC_START
10390 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10391 #endif
10392
10393 #ifdef ASM_OUTPUT_CASE_LABEL
10394 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10395 NEXT_INSN (lab));
10396 #else
10397 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10398 #endif
10399
10400 for (idx = 0; idx < vlen; idx++)
10401 {
10402 ASM_OUTPUT_ADDR_DIFF_ELT
10403 (asm_out_file,
10404 body,
10405 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10406 CODE_LABEL_NUMBER (base));
10407 }
10408
10409 #ifdef ASM_OUTPUT_ADDR_VEC_END
10410 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10411 #endif
10412 }
10413
10414 static void
10415 sparc_output_deferred_case_vectors (void)
10416 {
10417 rtx t;
10418 int align;
10419
10420 if (sparc_addr_list == NULL_RTX
10421 && sparc_addr_diff_list == NULL_RTX)
10422 return;
10423
10424 /* Align to cache line in the function's code section. */
10425 switch_to_section (current_function_section ());
10426
10427 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10428 if (align > 0)
10429 ASM_OUTPUT_ALIGN (asm_out_file, align);
10430
10431 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10432 sparc_output_addr_vec (XEXP (t, 0));
10433 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10434 sparc_output_addr_diff_vec (XEXP (t, 0));
10435
10436 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10437 }
10438
10439 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10440 unknown. Return 1 if the high bits are zero, -1 if the register is
10441 sign extended. */
10442 int
10443 sparc_check_64 (rtx x, rtx_insn *insn)
10444 {
10445 /* If a register is set only once it is safe to ignore insns this
10446 code does not know how to handle. The loop will either recognize
10447 the single set and return the correct value or fail to recognize
10448 it and return 0. */
10449 int set_once = 0;
10450 rtx y = x;
10451
10452 gcc_assert (GET_CODE (x) == REG);
10453
10454 if (GET_MODE (x) == DImode)
10455 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10456
10457 if (flag_expensive_optimizations
10458 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10459 set_once = 1;
10460
10461 if (insn == 0)
10462 {
10463 if (set_once)
10464 insn = get_last_insn_anywhere ();
10465 else
10466 return 0;
10467 }
10468
10469 while ((insn = PREV_INSN (insn)))
10470 {
10471 switch (GET_CODE (insn))
10472 {
10473 case JUMP_INSN:
10474 case NOTE:
10475 break;
10476 case CODE_LABEL:
10477 case CALL_INSN:
10478 default:
10479 if (! set_once)
10480 return 0;
10481 break;
10482 case INSN:
10483 {
10484 rtx pat = PATTERN (insn);
10485 if (GET_CODE (pat) != SET)
10486 return 0;
10487 if (rtx_equal_p (x, SET_DEST (pat)))
10488 return set_extends (insn);
10489 if (y && rtx_equal_p (y, SET_DEST (pat)))
10490 return set_extends (insn);
10491 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10492 return 0;
10493 }
10494 }
10495 }
10496 return 0;
10497 }
10498
10499 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10500 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10501
10502 const char *
10503 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10504 {
10505 static char asm_code[60];
10506
10507 /* The scratch register is only required when the destination
10508 register is not a 64-bit global or out register. */
10509 if (which_alternative != 2)
10510 operands[3] = operands[0];
10511
10512 /* We can only shift by constants <= 63. */
10513 if (GET_CODE (operands[2]) == CONST_INT)
10514 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10515
10516 if (GET_CODE (operands[1]) == CONST_INT)
10517 {
10518 output_asm_insn ("mov\t%1, %3", operands);
10519 }
10520 else
10521 {
10522 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10523 if (sparc_check_64 (operands[1], insn) <= 0)
10524 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10525 output_asm_insn ("or\t%L1, %3, %3", operands);
10526 }
10527
10528 strcpy (asm_code, opcode);
10529
10530 if (which_alternative != 2)
10531 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10532 else
10533 return
10534 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10535 }
10536 \f
10537 /* Output rtl to increment the profiler label LABELNO
10538 for profiling a function entry. */
10539
10540 void
10541 sparc_profile_hook (int labelno)
10542 {
10543 char buf[32];
10544 rtx lab, fun;
10545
10546 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10547 if (NO_PROFILE_COUNTERS)
10548 {
10549 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10550 }
10551 else
10552 {
10553 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10554 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10555 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10556 }
10557 }
10558 \f
10559 #ifdef TARGET_SOLARIS
10560 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10561
10562 static void
10563 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10564 tree decl ATTRIBUTE_UNUSED)
10565 {
10566 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10567 {
10568 solaris_elf_asm_comdat_section (name, flags, decl);
10569 return;
10570 }
10571
10572 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10573
10574 if (!(flags & SECTION_DEBUG))
10575 fputs (",#alloc", asm_out_file);
10576 #if HAVE_GAS_SECTION_EXCLUDE
10577 if (flags & SECTION_EXCLUDE)
10578 fputs (",#exclude", asm_out_file);
10579 #endif
10580 if (flags & SECTION_WRITE)
10581 fputs (",#write", asm_out_file);
10582 if (flags & SECTION_TLS)
10583 fputs (",#tls", asm_out_file);
10584 if (flags & SECTION_CODE)
10585 fputs (",#execinstr", asm_out_file);
10586
10587 if (flags & SECTION_NOTYPE)
10588 ;
10589 else if (flags & SECTION_BSS)
10590 fputs (",#nobits", asm_out_file);
10591 else
10592 fputs (",#progbits", asm_out_file);
10593
10594 fputc ('\n', asm_out_file);
10595 }
10596 #endif /* TARGET_SOLARIS */
10597
10598 /* We do not allow indirect calls to be optimized into sibling calls.
10599
10600 We cannot use sibling calls when delayed branches are disabled
10601 because they will likely require the call delay slot to be filled.
10602
10603 Also, on SPARC 32-bit we cannot emit a sibling call when the
10604 current function returns a structure. This is because the "unimp
10605 after call" convention would cause the callee to return to the
10606 wrong place. The generic code already disallows cases where the
10607 function being called returns a structure.
10608
10609 It may seem strange how this last case could occur. Usually there
10610 is code after the call which jumps to epilogue code which dumps the
10611 return value into the struct return area. That ought to invalidate
10612 the sibling call right? Well, in the C++ case we can end up passing
10613 the pointer to the struct return area to a constructor (which returns
10614 void) and then nothing else happens. Such a sibling call would look
10615 valid without the added check here.
10616
10617 VxWorks PIC PLT entries require the global pointer to be initialized
10618 on entry. We therefore can't emit sibling calls to them. */
10619 static bool
10620 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10621 {
10622 return (decl
10623 && flag_delayed_branch
10624 && (TARGET_ARCH64 || ! cfun->returns_struct)
10625 && !(TARGET_VXWORKS_RTP
10626 && flag_pic
10627 && !targetm.binds_local_p (decl)));
10628 }
10629 \f
10630 /* libfunc renaming. */
10631
10632 static void
10633 sparc_init_libfuncs (void)
10634 {
10635 if (TARGET_ARCH32)
10636 {
10637 /* Use the subroutines that Sun's library provides for integer
10638 multiply and divide. The `*' prevents an underscore from
10639 being prepended by the compiler. .umul is a little faster
10640 than .mul. */
10641 set_optab_libfunc (smul_optab, SImode, "*.umul");
10642 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10643 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10644 set_optab_libfunc (smod_optab, SImode, "*.rem");
10645 set_optab_libfunc (umod_optab, SImode, "*.urem");
10646
10647 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10648 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10649 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10650 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10651 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10652 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10653
10654 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10655 is because with soft-float, the SFmode and DFmode sqrt
10656 instructions will be absent, and the compiler will notice and
10657 try to use the TFmode sqrt instruction for calls to the
10658 builtin function sqrt, but this fails. */
10659 if (TARGET_FPU)
10660 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10661
10662 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10663 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10664 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10665 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10666 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10667 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10668
10669 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10670 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10671 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10672 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10673
10674 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10675 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10676 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10677 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10678
10679 if (DITF_CONVERSION_LIBFUNCS)
10680 {
10681 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10682 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10683 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10684 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10685 }
10686
10687 if (SUN_CONVERSION_LIBFUNCS)
10688 {
10689 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10690 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10691 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10692 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10693 }
10694 }
10695 if (TARGET_ARCH64)
10696 {
10697 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10698 do not exist in the library. Make sure the compiler does not
10699 emit calls to them by accident. (It should always use the
10700 hardware instructions.) */
10701 set_optab_libfunc (smul_optab, SImode, 0);
10702 set_optab_libfunc (sdiv_optab, SImode, 0);
10703 set_optab_libfunc (udiv_optab, SImode, 0);
10704 set_optab_libfunc (smod_optab, SImode, 0);
10705 set_optab_libfunc (umod_optab, SImode, 0);
10706
10707 if (SUN_INTEGER_MULTIPLY_64)
10708 {
10709 set_optab_libfunc (smul_optab, DImode, "__mul64");
10710 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10711 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10712 set_optab_libfunc (smod_optab, DImode, "__rem64");
10713 set_optab_libfunc (umod_optab, DImode, "__urem64");
10714 }
10715
10716 if (SUN_CONVERSION_LIBFUNCS)
10717 {
10718 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10719 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10720 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10721 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10722 }
10723 }
10724 }
10725 \f
10726 /* SPARC builtins. */
10727 enum sparc_builtins
10728 {
10729 /* FPU builtins. */
10730 SPARC_BUILTIN_LDFSR,
10731 SPARC_BUILTIN_STFSR,
10732
10733 /* VIS 1.0 builtins. */
10734 SPARC_BUILTIN_FPACK16,
10735 SPARC_BUILTIN_FPACK32,
10736 SPARC_BUILTIN_FPACKFIX,
10737 SPARC_BUILTIN_FEXPAND,
10738 SPARC_BUILTIN_FPMERGE,
10739 SPARC_BUILTIN_FMUL8X16,
10740 SPARC_BUILTIN_FMUL8X16AU,
10741 SPARC_BUILTIN_FMUL8X16AL,
10742 SPARC_BUILTIN_FMUL8SUX16,
10743 SPARC_BUILTIN_FMUL8ULX16,
10744 SPARC_BUILTIN_FMULD8SUX16,
10745 SPARC_BUILTIN_FMULD8ULX16,
10746 SPARC_BUILTIN_FALIGNDATAV4HI,
10747 SPARC_BUILTIN_FALIGNDATAV8QI,
10748 SPARC_BUILTIN_FALIGNDATAV2SI,
10749 SPARC_BUILTIN_FALIGNDATADI,
10750 SPARC_BUILTIN_WRGSR,
10751 SPARC_BUILTIN_RDGSR,
10752 SPARC_BUILTIN_ALIGNADDR,
10753 SPARC_BUILTIN_ALIGNADDRL,
10754 SPARC_BUILTIN_PDIST,
10755 SPARC_BUILTIN_EDGE8,
10756 SPARC_BUILTIN_EDGE8L,
10757 SPARC_BUILTIN_EDGE16,
10758 SPARC_BUILTIN_EDGE16L,
10759 SPARC_BUILTIN_EDGE32,
10760 SPARC_BUILTIN_EDGE32L,
10761 SPARC_BUILTIN_FCMPLE16,
10762 SPARC_BUILTIN_FCMPLE32,
10763 SPARC_BUILTIN_FCMPNE16,
10764 SPARC_BUILTIN_FCMPNE32,
10765 SPARC_BUILTIN_FCMPGT16,
10766 SPARC_BUILTIN_FCMPGT32,
10767 SPARC_BUILTIN_FCMPEQ16,
10768 SPARC_BUILTIN_FCMPEQ32,
10769 SPARC_BUILTIN_FPADD16,
10770 SPARC_BUILTIN_FPADD16S,
10771 SPARC_BUILTIN_FPADD32,
10772 SPARC_BUILTIN_FPADD32S,
10773 SPARC_BUILTIN_FPSUB16,
10774 SPARC_BUILTIN_FPSUB16S,
10775 SPARC_BUILTIN_FPSUB32,
10776 SPARC_BUILTIN_FPSUB32S,
10777 SPARC_BUILTIN_ARRAY8,
10778 SPARC_BUILTIN_ARRAY16,
10779 SPARC_BUILTIN_ARRAY32,
10780
10781 /* VIS 2.0 builtins. */
10782 SPARC_BUILTIN_EDGE8N,
10783 SPARC_BUILTIN_EDGE8LN,
10784 SPARC_BUILTIN_EDGE16N,
10785 SPARC_BUILTIN_EDGE16LN,
10786 SPARC_BUILTIN_EDGE32N,
10787 SPARC_BUILTIN_EDGE32LN,
10788 SPARC_BUILTIN_BMASK,
10789 SPARC_BUILTIN_BSHUFFLEV4HI,
10790 SPARC_BUILTIN_BSHUFFLEV8QI,
10791 SPARC_BUILTIN_BSHUFFLEV2SI,
10792 SPARC_BUILTIN_BSHUFFLEDI,
10793
10794 /* VIS 3.0 builtins. */
10795 SPARC_BUILTIN_CMASK8,
10796 SPARC_BUILTIN_CMASK16,
10797 SPARC_BUILTIN_CMASK32,
10798 SPARC_BUILTIN_FCHKSM16,
10799 SPARC_BUILTIN_FSLL16,
10800 SPARC_BUILTIN_FSLAS16,
10801 SPARC_BUILTIN_FSRL16,
10802 SPARC_BUILTIN_FSRA16,
10803 SPARC_BUILTIN_FSLL32,
10804 SPARC_BUILTIN_FSLAS32,
10805 SPARC_BUILTIN_FSRL32,
10806 SPARC_BUILTIN_FSRA32,
10807 SPARC_BUILTIN_PDISTN,
10808 SPARC_BUILTIN_FMEAN16,
10809 SPARC_BUILTIN_FPADD64,
10810 SPARC_BUILTIN_FPSUB64,
10811 SPARC_BUILTIN_FPADDS16,
10812 SPARC_BUILTIN_FPADDS16S,
10813 SPARC_BUILTIN_FPSUBS16,
10814 SPARC_BUILTIN_FPSUBS16S,
10815 SPARC_BUILTIN_FPADDS32,
10816 SPARC_BUILTIN_FPADDS32S,
10817 SPARC_BUILTIN_FPSUBS32,
10818 SPARC_BUILTIN_FPSUBS32S,
10819 SPARC_BUILTIN_FUCMPLE8,
10820 SPARC_BUILTIN_FUCMPNE8,
10821 SPARC_BUILTIN_FUCMPGT8,
10822 SPARC_BUILTIN_FUCMPEQ8,
10823 SPARC_BUILTIN_FHADDS,
10824 SPARC_BUILTIN_FHADDD,
10825 SPARC_BUILTIN_FHSUBS,
10826 SPARC_BUILTIN_FHSUBD,
10827 SPARC_BUILTIN_FNHADDS,
10828 SPARC_BUILTIN_FNHADDD,
10829 SPARC_BUILTIN_UMULXHI,
10830 SPARC_BUILTIN_XMULX,
10831 SPARC_BUILTIN_XMULXHI,
10832
10833 /* VIS 4.0 builtins. */
10834 SPARC_BUILTIN_FPADD8,
10835 SPARC_BUILTIN_FPADDS8,
10836 SPARC_BUILTIN_FPADDUS8,
10837 SPARC_BUILTIN_FPADDUS16,
10838 SPARC_BUILTIN_FPCMPLE8,
10839 SPARC_BUILTIN_FPCMPGT8,
10840 SPARC_BUILTIN_FPCMPULE16,
10841 SPARC_BUILTIN_FPCMPUGT16,
10842 SPARC_BUILTIN_FPCMPULE32,
10843 SPARC_BUILTIN_FPCMPUGT32,
10844 SPARC_BUILTIN_FPMAX8,
10845 SPARC_BUILTIN_FPMAX16,
10846 SPARC_BUILTIN_FPMAX32,
10847 SPARC_BUILTIN_FPMAXU8,
10848 SPARC_BUILTIN_FPMAXU16,
10849 SPARC_BUILTIN_FPMAXU32,
10850 SPARC_BUILTIN_FPMIN8,
10851 SPARC_BUILTIN_FPMIN16,
10852 SPARC_BUILTIN_FPMIN32,
10853 SPARC_BUILTIN_FPMINU8,
10854 SPARC_BUILTIN_FPMINU16,
10855 SPARC_BUILTIN_FPMINU32,
10856 SPARC_BUILTIN_FPSUB8,
10857 SPARC_BUILTIN_FPSUBS8,
10858 SPARC_BUILTIN_FPSUBUS8,
10859 SPARC_BUILTIN_FPSUBUS16,
10860
10861 /* VIS 4.0B builtins. */
10862
10863 /* Note that all the DICTUNPACK* entries should be kept
10864 contiguous. */
10865 SPARC_BUILTIN_FIRST_DICTUNPACK,
10866 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10867 SPARC_BUILTIN_DICTUNPACK16,
10868 SPARC_BUILTIN_DICTUNPACK32,
10869 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10870
10871 /* Note that all the FPCMP*SHL entries should be kept
10872 contiguous. */
10873 SPARC_BUILTIN_FIRST_FPCMPSHL,
10874 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10875 SPARC_BUILTIN_FPCMPGT8SHL,
10876 SPARC_BUILTIN_FPCMPEQ8SHL,
10877 SPARC_BUILTIN_FPCMPNE8SHL,
10878 SPARC_BUILTIN_FPCMPLE16SHL,
10879 SPARC_BUILTIN_FPCMPGT16SHL,
10880 SPARC_BUILTIN_FPCMPEQ16SHL,
10881 SPARC_BUILTIN_FPCMPNE16SHL,
10882 SPARC_BUILTIN_FPCMPLE32SHL,
10883 SPARC_BUILTIN_FPCMPGT32SHL,
10884 SPARC_BUILTIN_FPCMPEQ32SHL,
10885 SPARC_BUILTIN_FPCMPNE32SHL,
10886 SPARC_BUILTIN_FPCMPULE8SHL,
10887 SPARC_BUILTIN_FPCMPUGT8SHL,
10888 SPARC_BUILTIN_FPCMPULE16SHL,
10889 SPARC_BUILTIN_FPCMPUGT16SHL,
10890 SPARC_BUILTIN_FPCMPULE32SHL,
10891 SPARC_BUILTIN_FPCMPUGT32SHL,
10892 SPARC_BUILTIN_FPCMPDE8SHL,
10893 SPARC_BUILTIN_FPCMPDE16SHL,
10894 SPARC_BUILTIN_FPCMPDE32SHL,
10895 SPARC_BUILTIN_FPCMPUR8SHL,
10896 SPARC_BUILTIN_FPCMPUR16SHL,
10897 SPARC_BUILTIN_FPCMPUR32SHL,
10898 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10899
10900 SPARC_BUILTIN_MAX
10901 };
10902
10903 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10904 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10905
10906 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10907 The instruction should require a constant operand of some sort. The
10908 function prints an error if OPVAL is not valid. */
10909
10910 static int
10911 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10912 {
10913 if (GET_CODE (opval) != CONST_INT)
10914 {
10915 error ("%qs expects a constant argument", insn_data[icode].name);
10916 return false;
10917 }
10918
10919 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10920 {
10921 error ("constant argument out of range for %qs", insn_data[icode].name);
10922 return false;
10923 }
10924 return true;
10925 }
10926
10927 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10928 function decl or NULL_TREE if the builtin was not added. */
10929
10930 static tree
10931 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10932 tree type)
10933 {
10934 tree t
10935 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10936
10937 if (t)
10938 {
10939 sparc_builtins[code] = t;
10940 sparc_builtins_icode[code] = icode;
10941 }
10942
10943 return t;
10944 }
10945
10946 /* Likewise, but also marks the function as "const". */
10947
10948 static tree
10949 def_builtin_const (const char *name, enum insn_code icode,
10950 enum sparc_builtins code, tree type)
10951 {
10952 tree t = def_builtin (name, icode, code, type);
10953
10954 if (t)
10955 TREE_READONLY (t) = 1;
10956
10957 return t;
10958 }
10959
10960 /* Implement the TARGET_INIT_BUILTINS target hook.
10961 Create builtin functions for special SPARC instructions. */
10962
10963 static void
10964 sparc_init_builtins (void)
10965 {
10966 if (TARGET_FPU)
10967 sparc_fpu_init_builtins ();
10968
10969 if (TARGET_VIS)
10970 sparc_vis_init_builtins ();
10971 }
10972
10973 /* Create builtin functions for FPU instructions. */
10974
10975 static void
10976 sparc_fpu_init_builtins (void)
10977 {
10978 tree ftype
10979 = build_function_type_list (void_type_node,
10980 build_pointer_type (unsigned_type_node), 0);
10981 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10982 SPARC_BUILTIN_LDFSR, ftype);
10983 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10984 SPARC_BUILTIN_STFSR, ftype);
10985 }
10986
10987 /* Create builtin functions for VIS instructions. */
10988
10989 static void
10990 sparc_vis_init_builtins (void)
10991 {
10992 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10993 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10994 tree v4hi = build_vector_type (intHI_type_node, 4);
10995 tree v2hi = build_vector_type (intHI_type_node, 2);
10996 tree v2si = build_vector_type (intSI_type_node, 2);
10997 tree v1si = build_vector_type (intSI_type_node, 1);
10998
10999 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11000 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11001 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11002 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11003 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11004 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11005 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11006 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11007 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11008 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11009 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11010 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11011 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11012 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11013 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11014 v8qi, v8qi,
11015 intDI_type_node, 0);
11016 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11017 v8qi, v8qi, 0);
11018 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11019 v8qi, v8qi, 0);
11020 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11021 intSI_type_node, 0);
11022 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11023 intSI_type_node, 0);
11024 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11025 intDI_type_node, 0);
11026 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11027 intDI_type_node,
11028 intDI_type_node, 0);
11029 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11030 intSI_type_node,
11031 intSI_type_node, 0);
11032 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11033 ptr_type_node,
11034 intSI_type_node, 0);
11035 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11036 ptr_type_node,
11037 intDI_type_node, 0);
11038 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11039 ptr_type_node,
11040 ptr_type_node, 0);
11041 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11042 ptr_type_node,
11043 ptr_type_node, 0);
11044 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11045 v4hi, v4hi, 0);
11046 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11047 v2si, v2si, 0);
11048 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11049 v4hi, v4hi, 0);
11050 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11051 v2si, v2si, 0);
11052 tree void_ftype_di = build_function_type_list (void_type_node,
11053 intDI_type_node, 0);
11054 tree di_ftype_void = build_function_type_list (intDI_type_node,
11055 void_type_node, 0);
11056 tree void_ftype_si = build_function_type_list (void_type_node,
11057 intSI_type_node, 0);
11058 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11059 float_type_node,
11060 float_type_node, 0);
11061 tree df_ftype_df_df = build_function_type_list (double_type_node,
11062 double_type_node,
11063 double_type_node, 0);
11064
11065 /* Packing and expanding vectors. */
11066 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11067 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11068 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11069 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11070 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11071 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11072 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11073 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11074 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11075 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11076
11077 /* Multiplications. */
11078 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11079 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11080 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11081 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11082 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11083 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11084 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11085 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11086 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11087 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11088 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11089 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11090 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11091 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11092
11093 /* Data aligning. */
11094 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11095 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11096 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11097 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11098 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11099 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11100 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11101 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11102
11103 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11104 SPARC_BUILTIN_WRGSR, void_ftype_di);
11105 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11106 SPARC_BUILTIN_RDGSR, di_ftype_void);
11107
11108 if (TARGET_ARCH64)
11109 {
11110 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11111 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11112 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11113 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11114 }
11115 else
11116 {
11117 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11118 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11119 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11120 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11121 }
11122
11123 /* Pixel distance. */
11124 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11125 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11126
11127 /* Edge handling. */
11128 if (TARGET_ARCH64)
11129 {
11130 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11131 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11132 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11133 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11134 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11135 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11136 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11137 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11138 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11139 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11140 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11141 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11142 }
11143 else
11144 {
11145 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11146 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11147 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11148 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11149 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11150 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11151 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11152 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11153 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11154 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11155 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11156 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11157 }
11158
11159 /* Pixel compare. */
11160 if (TARGET_ARCH64)
11161 {
11162 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11163 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11164 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11165 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11166 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11167 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11168 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11169 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11170 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11171 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11172 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11173 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11174 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11175 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11176 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11177 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11178 }
11179 else
11180 {
11181 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11182 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11183 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11184 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11185 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11186 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11187 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11188 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11189 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11190 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11191 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11192 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11193 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11194 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11195 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11196 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11197 }
11198
11199 /* Addition and subtraction. */
11200 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11201 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11202 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11203 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11204 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11205 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11206 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11207 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11208 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11209 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11210 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11211 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11212 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11213 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11214 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11215 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11216
11217 /* Three-dimensional array addressing. */
11218 if (TARGET_ARCH64)
11219 {
11220 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11221 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11222 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11223 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11224 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11225 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11226 }
11227 else
11228 {
11229 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11230 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11231 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11232 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11233 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11234 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11235 }
11236
11237 if (TARGET_VIS2)
11238 {
11239 /* Edge handling. */
11240 if (TARGET_ARCH64)
11241 {
11242 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11243 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11244 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11245 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11246 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11247 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11248 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11249 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11250 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11251 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11252 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11253 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11254 }
11255 else
11256 {
11257 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11258 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11259 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11260 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11261 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11262 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11263 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11264 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11265 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11266 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11267 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11268 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11269 }
11270
11271 /* Byte mask and shuffle. */
11272 if (TARGET_ARCH64)
11273 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11274 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11275 else
11276 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11277 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11278 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11279 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11280 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11281 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11282 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11283 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11284 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11285 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11286 }
11287
11288 if (TARGET_VIS3)
11289 {
11290 if (TARGET_ARCH64)
11291 {
11292 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11293 SPARC_BUILTIN_CMASK8, void_ftype_di);
11294 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11295 SPARC_BUILTIN_CMASK16, void_ftype_di);
11296 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11297 SPARC_BUILTIN_CMASK32, void_ftype_di);
11298 }
11299 else
11300 {
11301 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11302 SPARC_BUILTIN_CMASK8, void_ftype_si);
11303 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11304 SPARC_BUILTIN_CMASK16, void_ftype_si);
11305 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11306 SPARC_BUILTIN_CMASK32, void_ftype_si);
11307 }
11308
11309 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11310 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11311
11312 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11313 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11314 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11315 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11316 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11317 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11318 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11319 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11320 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11321 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11322 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11323 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11324 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11325 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11326 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11327 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11328
11329 if (TARGET_ARCH64)
11330 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11331 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11332 else
11333 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11334 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11335
11336 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11337 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11338 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11339 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11340 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11341 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11342
11343 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11344 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11345 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11346 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11347 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11348 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11349 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11350 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11351 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11352 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11353 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11354 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11355 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11356 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11357 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11358 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11359
11360 if (TARGET_ARCH64)
11361 {
11362 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11363 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11364 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11365 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11366 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11367 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11368 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11369 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11370 }
11371 else
11372 {
11373 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11374 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11375 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11376 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11377 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11378 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11379 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11380 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11381 }
11382
11383 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11384 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11385 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11386 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11387 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11388 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11389 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11390 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11391 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11392 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11393 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11394 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11395
11396 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11397 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11398 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11399 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11400 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11401 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11402 }
11403
11404 if (TARGET_VIS4)
11405 {
11406 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11407 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11408 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11409 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11410 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11411 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11412 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11413 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11414
11415
11416 if (TARGET_ARCH64)
11417 {
11418 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11419 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11420 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11421 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11422 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11423 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11424 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11425 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11426 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11427 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11428 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11429 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11430 }
11431 else
11432 {
11433 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11434 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11435 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11436 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11437 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11438 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11439 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11440 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11441 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11442 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11443 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11444 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11445 }
11446
11447 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11448 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11449 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11450 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11451 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11452 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11453 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11454 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11455 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11456 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11457 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11458 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11459 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11460 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11461 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11462 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11463 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11464 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11465 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11466 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11467 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11468 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11469 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11470 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11471 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11472 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11473 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11474 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11475 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11476 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11477 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11478 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11479 }
11480
11481 if (TARGET_VIS4B)
11482 {
11483 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11484 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11485 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11486 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11487 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11488 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11489
11490 if (TARGET_ARCH64)
11491 {
11492 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11493 v8qi, v8qi,
11494 intSI_type_node, 0);
11495 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11496 v4hi, v4hi,
11497 intSI_type_node, 0);
11498 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11499 v2si, v2si,
11500 intSI_type_node, 0);
11501
11502 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11503 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11504 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11505 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11506 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11507 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11508 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11509 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11510
11511 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11512 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11513 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11514 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11515 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11516 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11517 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11518 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11519
11520 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11521 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11522 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11523 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11524 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11525 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11526 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11527 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11528
11529
11530 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11531 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11532 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11533 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11534
11535 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11536 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11537 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11538 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11539
11540 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11541 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11542 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11543 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11544
11545 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11546 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11547 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11548 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11549 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11550 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11551
11552 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11553 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11554 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11555 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11556 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11557 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11558
11559 }
11560 else
11561 {
11562 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11563 v8qi, v8qi,
11564 intSI_type_node, 0);
11565 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11566 v4hi, v4hi,
11567 intSI_type_node, 0);
11568 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11569 v2si, v2si,
11570 intSI_type_node, 0);
11571
11572 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11573 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11574 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11575 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11576 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11577 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11578 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11579 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11580
11581 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11582 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11583 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11584 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11585 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11586 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11587 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11588 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11589
11590 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11591 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11592 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11593 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11594 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11595 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11596 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11597 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11598
11599
11600 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11601 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11602 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11603 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11604
11605 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11606 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11607 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11608 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11609
11610 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11611 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11612 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11613 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11614
11615 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11616 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11617 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11618 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11619 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11620 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11621
11622 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11623 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11624 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11625 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11626 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11627 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11628 }
11629 }
11630 }
11631
11632 /* Implement TARGET_BUILTIN_DECL hook. */
11633
11634 static tree
11635 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11636 {
11637 if (code >= SPARC_BUILTIN_MAX)
11638 return error_mark_node;
11639
11640 return sparc_builtins[code];
11641 }
11642
11643 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11644
11645 static rtx
11646 sparc_expand_builtin (tree exp, rtx target,
11647 rtx subtarget ATTRIBUTE_UNUSED,
11648 machine_mode tmode ATTRIBUTE_UNUSED,
11649 int ignore ATTRIBUTE_UNUSED)
11650 {
11651 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11652 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11653 enum insn_code icode = sparc_builtins_icode[code];
11654 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11655 call_expr_arg_iterator iter;
11656 int arg_count = 0;
11657 rtx pat, op[4];
11658 tree arg;
11659
11660 if (nonvoid)
11661 {
11662 machine_mode tmode = insn_data[icode].operand[0].mode;
11663 if (!target
11664 || GET_MODE (target) != tmode
11665 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11666 op[0] = gen_reg_rtx (tmode);
11667 else
11668 op[0] = target;
11669 }
11670 else
11671 op[0] = NULL_RTX;
11672
11673 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11674 {
11675 const struct insn_operand_data *insn_op;
11676 int idx;
11677
11678 if (arg == error_mark_node)
11679 return NULL_RTX;
11680
11681 arg_count++;
11682 idx = arg_count - !nonvoid;
11683 insn_op = &insn_data[icode].operand[idx];
11684 op[arg_count] = expand_normal (arg);
11685
11686 /* Some of the builtins require constant arguments. We check
11687 for this here. */
11688 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11689 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11690 && arg_count == 3)
11691 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11692 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11693 && arg_count == 2))
11694 {
11695 if (!check_constant_argument (icode, idx, op[arg_count]))
11696 return const0_rtx;
11697 }
11698
11699 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11700 {
11701 if (!address_operand (op[arg_count], SImode))
11702 {
11703 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11704 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11705 }
11706 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11707 }
11708
11709 else if (insn_op->mode == V1DImode
11710 && GET_MODE (op[arg_count]) == DImode)
11711 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11712
11713 else if (insn_op->mode == V1SImode
11714 && GET_MODE (op[arg_count]) == SImode)
11715 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11716
11717 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11718 insn_op->mode))
11719 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11720 }
11721
11722 switch (arg_count)
11723 {
11724 case 0:
11725 pat = GEN_FCN (icode) (op[0]);
11726 break;
11727 case 1:
11728 if (nonvoid)
11729 pat = GEN_FCN (icode) (op[0], op[1]);
11730 else
11731 pat = GEN_FCN (icode) (op[1]);
11732 break;
11733 case 2:
11734 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11735 break;
11736 case 3:
11737 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11738 break;
11739 default:
11740 gcc_unreachable ();
11741 }
11742
11743 if (!pat)
11744 return NULL_RTX;
11745
11746 emit_insn (pat);
11747
11748 return (nonvoid ? op[0] : const0_rtx);
11749 }
11750
11751 /* Return the upper 16 bits of the 8x16 multiplication. */
11752
11753 static int
11754 sparc_vis_mul8x16 (int e8, int e16)
11755 {
11756 return (e8 * e16 + 128) / 256;
11757 }
11758
11759 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11760 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11761
11762 static void
11763 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11764 tree inner_type, tree cst0, tree cst1)
11765 {
11766 unsigned i, num = VECTOR_CST_NELTS (cst0);
11767 int scale;
11768
11769 switch (fncode)
11770 {
11771 case SPARC_BUILTIN_FMUL8X16:
11772 for (i = 0; i < num; ++i)
11773 {
11774 int val
11775 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11776 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11777 n_elts->quick_push (build_int_cst (inner_type, val));
11778 }
11779 break;
11780
11781 case SPARC_BUILTIN_FMUL8X16AU:
11782 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11783
11784 for (i = 0; i < num; ++i)
11785 {
11786 int val
11787 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11788 scale);
11789 n_elts->quick_push (build_int_cst (inner_type, val));
11790 }
11791 break;
11792
11793 case SPARC_BUILTIN_FMUL8X16AL:
11794 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11795
11796 for (i = 0; i < num; ++i)
11797 {
11798 int val
11799 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11800 scale);
11801 n_elts->quick_push (build_int_cst (inner_type, val));
11802 }
11803 break;
11804
11805 default:
11806 gcc_unreachable ();
11807 }
11808 }
11809
11810 /* Implement TARGET_FOLD_BUILTIN hook.
11811
11812 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11813 result of the function call is ignored. NULL_TREE is returned if the
11814 function could not be folded. */
11815
11816 static tree
11817 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11818 tree *args, bool ignore)
11819 {
11820 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11821 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11822 tree arg0, arg1, arg2;
11823
11824 if (ignore)
11825 switch (code)
11826 {
11827 case SPARC_BUILTIN_LDFSR:
11828 case SPARC_BUILTIN_STFSR:
11829 case SPARC_BUILTIN_ALIGNADDR:
11830 case SPARC_BUILTIN_WRGSR:
11831 case SPARC_BUILTIN_BMASK:
11832 case SPARC_BUILTIN_CMASK8:
11833 case SPARC_BUILTIN_CMASK16:
11834 case SPARC_BUILTIN_CMASK32:
11835 break;
11836
11837 default:
11838 return build_zero_cst (rtype);
11839 }
11840
11841 switch (code)
11842 {
11843 case SPARC_BUILTIN_FEXPAND:
11844 arg0 = args[0];
11845 STRIP_NOPS (arg0);
11846
11847 if (TREE_CODE (arg0) == VECTOR_CST)
11848 {
11849 tree inner_type = TREE_TYPE (rtype);
11850 unsigned i;
11851
11852 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11853 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11854 {
11855 unsigned HOST_WIDE_INT val
11856 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11857 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11858 }
11859 return n_elts.build ();
11860 }
11861 break;
11862
11863 case SPARC_BUILTIN_FMUL8X16:
11864 case SPARC_BUILTIN_FMUL8X16AU:
11865 case SPARC_BUILTIN_FMUL8X16AL:
11866 arg0 = args[0];
11867 arg1 = args[1];
11868 STRIP_NOPS (arg0);
11869 STRIP_NOPS (arg1);
11870
11871 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11872 {
11873 tree inner_type = TREE_TYPE (rtype);
11874 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11875 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11876 return n_elts.build ();
11877 }
11878 break;
11879
11880 case SPARC_BUILTIN_FPMERGE:
11881 arg0 = args[0];
11882 arg1 = args[1];
11883 STRIP_NOPS (arg0);
11884 STRIP_NOPS (arg1);
11885
11886 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11887 {
11888 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11889 unsigned i;
11890 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11891 {
11892 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11893 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11894 }
11895
11896 return n_elts.build ();
11897 }
11898 break;
11899
11900 case SPARC_BUILTIN_PDIST:
11901 case SPARC_BUILTIN_PDISTN:
11902 arg0 = args[0];
11903 arg1 = args[1];
11904 STRIP_NOPS (arg0);
11905 STRIP_NOPS (arg1);
11906 if (code == SPARC_BUILTIN_PDIST)
11907 {
11908 arg2 = args[2];
11909 STRIP_NOPS (arg2);
11910 }
11911 else
11912 arg2 = integer_zero_node;
11913
11914 if (TREE_CODE (arg0) == VECTOR_CST
11915 && TREE_CODE (arg1) == VECTOR_CST
11916 && TREE_CODE (arg2) == INTEGER_CST)
11917 {
11918 bool overflow = false;
11919 widest_int result = wi::to_widest (arg2);
11920 widest_int tmp;
11921 unsigned i;
11922
11923 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11924 {
11925 tree e0 = VECTOR_CST_ELT (arg0, i);
11926 tree e1 = VECTOR_CST_ELT (arg1, i);
11927
11928 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11929
11930 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11931 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11932 if (wi::neg_p (tmp))
11933 tmp = wi::neg (tmp, &neg2_ovf);
11934 else
11935 neg2_ovf = wi::OVF_NONE;
11936 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11937 overflow |= ((neg1_ovf != wi::OVF_NONE)
11938 | (neg2_ovf != wi::OVF_NONE)
11939 | (add1_ovf != wi::OVF_NONE)
11940 | (add2_ovf != wi::OVF_NONE));
11941 }
11942
11943 gcc_assert (!overflow);
11944
11945 return wide_int_to_tree (rtype, result);
11946 }
11947
11948 default:
11949 break;
11950 }
11951
11952 return NULL_TREE;
11953 }
11954 \f
11955 /* ??? This duplicates information provided to the compiler by the
11956 ??? scheduler description. Some day, teach genautomata to output
11957 ??? the latencies and then CSE will just use that. */
11958
11959 static bool
11960 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11961 int opno ATTRIBUTE_UNUSED,
11962 int *total, bool speed ATTRIBUTE_UNUSED)
11963 {
11964 int code = GET_CODE (x);
11965 bool float_mode_p = FLOAT_MODE_P (mode);
11966
11967 switch (code)
11968 {
11969 case CONST_INT:
11970 if (SMALL_INT (x))
11971 *total = 0;
11972 else
11973 *total = 2;
11974 return true;
11975
11976 case CONST_WIDE_INT:
11977 *total = 0;
11978 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11979 *total += 2;
11980 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11981 *total += 2;
11982 return true;
11983
11984 case HIGH:
11985 *total = 2;
11986 return true;
11987
11988 case CONST:
11989 case LABEL_REF:
11990 case SYMBOL_REF:
11991 *total = 4;
11992 return true;
11993
11994 case CONST_DOUBLE:
11995 *total = 8;
11996 return true;
11997
11998 case MEM:
11999 /* If outer-code was a sign or zero extension, a cost
12000 of COSTS_N_INSNS (1) was already added in. This is
12001 why we are subtracting it back out. */
12002 if (outer_code == ZERO_EXTEND)
12003 {
12004 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12005 }
12006 else if (outer_code == SIGN_EXTEND)
12007 {
12008 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12009 }
12010 else if (float_mode_p)
12011 {
12012 *total = sparc_costs->float_load;
12013 }
12014 else
12015 {
12016 *total = sparc_costs->int_load;
12017 }
12018
12019 return true;
12020
12021 case PLUS:
12022 case MINUS:
12023 if (float_mode_p)
12024 *total = sparc_costs->float_plusminus;
12025 else
12026 *total = COSTS_N_INSNS (1);
12027 return false;
12028
12029 case FMA:
12030 {
12031 rtx sub;
12032
12033 gcc_assert (float_mode_p);
12034 *total = sparc_costs->float_mul;
12035
12036 sub = XEXP (x, 0);
12037 if (GET_CODE (sub) == NEG)
12038 sub = XEXP (sub, 0);
12039 *total += rtx_cost (sub, mode, FMA, 0, speed);
12040
12041 sub = XEXP (x, 2);
12042 if (GET_CODE (sub) == NEG)
12043 sub = XEXP (sub, 0);
12044 *total += rtx_cost (sub, mode, FMA, 2, speed);
12045 return true;
12046 }
12047
12048 case MULT:
12049 if (float_mode_p)
12050 *total = sparc_costs->float_mul;
12051 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12052 *total = COSTS_N_INSNS (25);
12053 else
12054 {
12055 int bit_cost;
12056
12057 bit_cost = 0;
12058 if (sparc_costs->int_mul_bit_factor)
12059 {
12060 int nbits;
12061
12062 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12063 {
12064 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12065 for (nbits = 0; value != 0; value &= value - 1)
12066 nbits++;
12067 }
12068 else
12069 nbits = 7;
12070
12071 if (nbits < 3)
12072 nbits = 3;
12073 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12074 bit_cost = COSTS_N_INSNS (bit_cost);
12075 }
12076
12077 if (mode == DImode || !TARGET_HARD_MUL)
12078 *total = sparc_costs->int_mulX + bit_cost;
12079 else
12080 *total = sparc_costs->int_mul + bit_cost;
12081 }
12082 return false;
12083
12084 case ASHIFT:
12085 case ASHIFTRT:
12086 case LSHIFTRT:
12087 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12088 return false;
12089
12090 case DIV:
12091 case UDIV:
12092 case MOD:
12093 case UMOD:
12094 if (float_mode_p)
12095 {
12096 if (mode == DFmode)
12097 *total = sparc_costs->float_div_df;
12098 else
12099 *total = sparc_costs->float_div_sf;
12100 }
12101 else
12102 {
12103 if (mode == DImode)
12104 *total = sparc_costs->int_divX;
12105 else
12106 *total = sparc_costs->int_div;
12107 }
12108 return false;
12109
12110 case NEG:
12111 if (! float_mode_p)
12112 {
12113 *total = COSTS_N_INSNS (1);
12114 return false;
12115 }
12116 /* FALLTHRU */
12117
12118 case ABS:
12119 case FLOAT:
12120 case UNSIGNED_FLOAT:
12121 case FIX:
12122 case UNSIGNED_FIX:
12123 case FLOAT_EXTEND:
12124 case FLOAT_TRUNCATE:
12125 *total = sparc_costs->float_move;
12126 return false;
12127
12128 case SQRT:
12129 if (mode == DFmode)
12130 *total = sparc_costs->float_sqrt_df;
12131 else
12132 *total = sparc_costs->float_sqrt_sf;
12133 return false;
12134
12135 case COMPARE:
12136 if (float_mode_p)
12137 *total = sparc_costs->float_cmp;
12138 else
12139 *total = COSTS_N_INSNS (1);
12140 return false;
12141
12142 case IF_THEN_ELSE:
12143 if (float_mode_p)
12144 *total = sparc_costs->float_cmove;
12145 else
12146 *total = sparc_costs->int_cmove;
12147 return false;
12148
12149 case IOR:
12150 /* Handle the NAND vector patterns. */
12151 if (sparc_vector_mode_supported_p (mode)
12152 && GET_CODE (XEXP (x, 0)) == NOT
12153 && GET_CODE (XEXP (x, 1)) == NOT)
12154 {
12155 *total = COSTS_N_INSNS (1);
12156 return true;
12157 }
12158 else
12159 return false;
12160
12161 default:
12162 return false;
12163 }
12164 }
12165
12166 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12167
12168 static inline bool
12169 general_or_i64_p (reg_class_t rclass)
12170 {
12171 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12172 }
12173
12174 /* Implement TARGET_REGISTER_MOVE_COST. */
12175
12176 static int
12177 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12178 reg_class_t from, reg_class_t to)
12179 {
12180 bool need_memory = false;
12181
12182 /* This helps postreload CSE to eliminate redundant comparisons. */
12183 if (from == NO_REGS || to == NO_REGS)
12184 return 100;
12185
12186 if (from == FPCC_REGS || to == FPCC_REGS)
12187 need_memory = true;
12188 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12189 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12190 {
12191 if (TARGET_VIS3)
12192 {
12193 int size = GET_MODE_SIZE (mode);
12194 if (size == 8 || size == 4)
12195 {
12196 if (! TARGET_ARCH32 || size == 4)
12197 return 4;
12198 else
12199 return 6;
12200 }
12201 }
12202 need_memory = true;
12203 }
12204
12205 if (need_memory)
12206 {
12207 if (sparc_cpu == PROCESSOR_ULTRASPARC
12208 || sparc_cpu == PROCESSOR_ULTRASPARC3
12209 || sparc_cpu == PROCESSOR_NIAGARA
12210 || sparc_cpu == PROCESSOR_NIAGARA2
12211 || sparc_cpu == PROCESSOR_NIAGARA3
12212 || sparc_cpu == PROCESSOR_NIAGARA4
12213 || sparc_cpu == PROCESSOR_NIAGARA7
12214 || sparc_cpu == PROCESSOR_M8)
12215 return 12;
12216
12217 return 6;
12218 }
12219
12220 return 2;
12221 }
12222
12223 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12224 This is achieved by means of a manual dynamic stack space allocation in
12225 the current frame. We make the assumption that SEQ doesn't contain any
12226 function calls, with the possible exception of calls to the GOT helper. */
12227
12228 static void
12229 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12230 {
12231 /* We must preserve the lowest 16 words for the register save area. */
12232 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12233 /* We really need only 2 words of fresh stack space. */
12234 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12235
12236 rtx slot
12237 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12238 SPARC_STACK_BIAS + offset));
12239
12240 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12241 emit_insn (gen_rtx_SET (slot, reg));
12242 if (reg2)
12243 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12244 reg2));
12245 emit_insn (seq);
12246 if (reg2)
12247 emit_insn (gen_rtx_SET (reg2,
12248 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12249 emit_insn (gen_rtx_SET (reg, slot));
12250 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12251 }
12252
12253 /* Output the assembler code for a thunk function. THUNK_DECL is the
12254 declaration for the thunk function itself, FUNCTION is the decl for
12255 the target function. DELTA is an immediate constant offset to be
12256 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12257 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12258
12259 static void
12260 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12261 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12262 tree function)
12263 {
12264 rtx this_rtx, funexp;
12265 rtx_insn *insn;
12266 unsigned int int_arg_first;
12267
12268 reload_completed = 1;
12269 epilogue_completed = 1;
12270
12271 emit_note (NOTE_INSN_PROLOGUE_END);
12272
12273 if (TARGET_FLAT)
12274 {
12275 sparc_leaf_function_p = 1;
12276
12277 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12278 }
12279 else if (flag_delayed_branch)
12280 {
12281 /* We will emit a regular sibcall below, so we need to instruct
12282 output_sibcall that we are in a leaf function. */
12283 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12284
12285 /* This will cause final.c to invoke leaf_renumber_regs so we
12286 must behave as if we were in a not-yet-leafified function. */
12287 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12288 }
12289 else
12290 {
12291 /* We will emit the sibcall manually below, so we will need to
12292 manually spill non-leaf registers. */
12293 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12294
12295 /* We really are in a leaf function. */
12296 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12297 }
12298
12299 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12300 returns a structure, the structure return pointer is there instead. */
12301 if (TARGET_ARCH64
12302 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12303 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12304 else
12305 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12306
12307 /* Add DELTA. When possible use a plain add, otherwise load it into
12308 a register first. */
12309 if (delta)
12310 {
12311 rtx delta_rtx = GEN_INT (delta);
12312
12313 if (! SPARC_SIMM13_P (delta))
12314 {
12315 rtx scratch = gen_rtx_REG (Pmode, 1);
12316 emit_move_insn (scratch, delta_rtx);
12317 delta_rtx = scratch;
12318 }
12319
12320 /* THIS_RTX += DELTA. */
12321 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12322 }
12323
12324 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12325 if (vcall_offset)
12326 {
12327 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12328 rtx scratch = gen_rtx_REG (Pmode, 1);
12329
12330 gcc_assert (vcall_offset < 0);
12331
12332 /* SCRATCH = *THIS_RTX. */
12333 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12334
12335 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12336 may not have any available scratch register at this point. */
12337 if (SPARC_SIMM13_P (vcall_offset))
12338 ;
12339 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12340 else if (! fixed_regs[5]
12341 /* The below sequence is made up of at least 2 insns,
12342 while the default method may need only one. */
12343 && vcall_offset < -8192)
12344 {
12345 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12346 emit_move_insn (scratch2, vcall_offset_rtx);
12347 vcall_offset_rtx = scratch2;
12348 }
12349 else
12350 {
12351 rtx increment = GEN_INT (-4096);
12352
12353 /* VCALL_OFFSET is a negative number whose typical range can be
12354 estimated as -32768..0 in 32-bit mode. In almost all cases
12355 it is therefore cheaper to emit multiple add insns than
12356 spilling and loading the constant into a register (at least
12357 6 insns). */
12358 while (! SPARC_SIMM13_P (vcall_offset))
12359 {
12360 emit_insn (gen_add2_insn (scratch, increment));
12361 vcall_offset += 4096;
12362 }
12363 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12364 }
12365
12366 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12367 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12368 gen_rtx_PLUS (Pmode,
12369 scratch,
12370 vcall_offset_rtx)));
12371
12372 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12373 emit_insn (gen_add2_insn (this_rtx, scratch));
12374 }
12375
12376 /* Generate a tail call to the target function. */
12377 if (! TREE_USED (function))
12378 {
12379 assemble_external (function);
12380 TREE_USED (function) = 1;
12381 }
12382 funexp = XEXP (DECL_RTL (function), 0);
12383
12384 if (flag_delayed_branch)
12385 {
12386 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12387 insn = emit_call_insn (gen_sibcall (funexp));
12388 SIBLING_CALL_P (insn) = 1;
12389 }
12390 else
12391 {
12392 /* The hoops we have to jump through in order to generate a sibcall
12393 without using delay slots... */
12394 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12395
12396 if (flag_pic)
12397 {
12398 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12399 start_sequence ();
12400 load_got_register (); /* clobbers %o7 */
12401 if (!TARGET_VXWORKS_RTP)
12402 pic_offset_table_rtx = global_offset_table_rtx;
12403 scratch = sparc_legitimize_pic_address (funexp, scratch);
12404 seq = get_insns ();
12405 end_sequence ();
12406 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12407 }
12408 else if (TARGET_ARCH32)
12409 {
12410 emit_insn (gen_rtx_SET (scratch,
12411 gen_rtx_HIGH (SImode, funexp)));
12412 emit_insn (gen_rtx_SET (scratch,
12413 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12414 }
12415 else /* TARGET_ARCH64 */
12416 {
12417 switch (sparc_cmodel)
12418 {
12419 case CM_MEDLOW:
12420 case CM_MEDMID:
12421 /* The destination can serve as a temporary. */
12422 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12423 break;
12424
12425 case CM_MEDANY:
12426 case CM_EMBMEDANY:
12427 /* The destination cannot serve as a temporary. */
12428 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12429 start_sequence ();
12430 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12431 seq = get_insns ();
12432 end_sequence ();
12433 emit_and_preserve (seq, spill_reg, 0);
12434 break;
12435
12436 default:
12437 gcc_unreachable ();
12438 }
12439 }
12440
12441 emit_jump_insn (gen_indirect_jump (scratch));
12442 }
12443
12444 emit_barrier ();
12445
12446 /* Run just enough of rest_of_compilation to get the insns emitted.
12447 There's not really enough bulk here to make other passes such as
12448 instruction scheduling worth while. Note that use_thunk calls
12449 assemble_start_function and assemble_end_function. */
12450 insn = get_insns ();
12451 shorten_branches (insn);
12452 final_start_function (insn, file, 1);
12453 final (insn, file, 1);
12454 final_end_function ();
12455
12456 reload_completed = 0;
12457 epilogue_completed = 0;
12458 }
12459
12460 /* Return true if sparc_output_mi_thunk would be able to output the
12461 assembler code for the thunk function specified by the arguments
12462 it is passed, and false otherwise. */
12463 static bool
12464 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12465 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12466 HOST_WIDE_INT vcall_offset,
12467 const_tree function ATTRIBUTE_UNUSED)
12468 {
12469 /* Bound the loop used in the default method above. */
12470 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12471 }
12472
12473 /* How to allocate a 'struct machine_function'. */
12474
12475 static struct machine_function *
12476 sparc_init_machine_status (void)
12477 {
12478 return ggc_cleared_alloc<machine_function> ();
12479 }
12480 \f
12481 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12482
12483 static unsigned HOST_WIDE_INT
12484 sparc_asan_shadow_offset (void)
12485 {
12486 return TARGET_ARCH64 ? HOST_WIDE_INT_C (0x7fff8000) : (HOST_WIDE_INT_1 << 29);
12487 }
12488 \f
12489 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12490 We need to emit DTP-relative relocations. */
12491
12492 static void
12493 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12494 {
12495 switch (size)
12496 {
12497 case 4:
12498 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12499 break;
12500 case 8:
12501 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12502 break;
12503 default:
12504 gcc_unreachable ();
12505 }
12506 output_addr_const (file, x);
12507 fputs (")", file);
12508 }
12509
12510 /* Do whatever processing is required at the end of a file. */
12511
12512 static void
12513 sparc_file_end (void)
12514 {
12515 /* If we need to emit the special GOT helper function, do so now. */
12516 if (got_helper_rtx)
12517 {
12518 const char *name = XSTR (got_helper_rtx, 0);
12519 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12520 #ifdef DWARF2_UNWIND_INFO
12521 bool do_cfi;
12522 #endif
12523
12524 if (USE_HIDDEN_LINKONCE)
12525 {
12526 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12527 get_identifier (name),
12528 build_function_type_list (void_type_node,
12529 NULL_TREE));
12530 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12531 NULL_TREE, void_type_node);
12532 TREE_PUBLIC (decl) = 1;
12533 TREE_STATIC (decl) = 1;
12534 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12535 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12536 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12537 resolve_unique_section (decl, 0, flag_function_sections);
12538 allocate_struct_function (decl, true);
12539 cfun->is_thunk = 1;
12540 current_function_decl = decl;
12541 init_varasm_status ();
12542 assemble_start_function (decl, name);
12543 }
12544 else
12545 {
12546 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12547 switch_to_section (text_section);
12548 if (align > 0)
12549 ASM_OUTPUT_ALIGN (asm_out_file, align);
12550 ASM_OUTPUT_LABEL (asm_out_file, name);
12551 }
12552
12553 #ifdef DWARF2_UNWIND_INFO
12554 do_cfi = dwarf2out_do_cfi_asm ();
12555 if (do_cfi)
12556 fprintf (asm_out_file, "\t.cfi_startproc\n");
12557 #endif
12558 if (flag_delayed_branch)
12559 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12560 reg_name, reg_name);
12561 else
12562 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12563 reg_name, reg_name);
12564 #ifdef DWARF2_UNWIND_INFO
12565 if (do_cfi)
12566 fprintf (asm_out_file, "\t.cfi_endproc\n");
12567 #endif
12568 }
12569
12570 if (NEED_INDICATE_EXEC_STACK)
12571 file_end_indicate_exec_stack ();
12572
12573 #ifdef TARGET_SOLARIS
12574 solaris_file_end ();
12575 #endif
12576 }
12577
12578 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12579 /* Implement TARGET_MANGLE_TYPE. */
12580
12581 static const char *
12582 sparc_mangle_type (const_tree type)
12583 {
12584 if (TARGET_ARCH32
12585 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12586 && TARGET_LONG_DOUBLE_128)
12587 return "g";
12588
12589 /* For all other types, use normal C++ mangling. */
12590 return NULL;
12591 }
12592 #endif
12593
12594 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12595 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12596 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12597
12598 void
12599 sparc_emit_membar_for_model (enum memmodel model,
12600 int load_store, int before_after)
12601 {
12602 /* Bits for the MEMBAR mmask field. */
12603 const int LoadLoad = 1;
12604 const int StoreLoad = 2;
12605 const int LoadStore = 4;
12606 const int StoreStore = 8;
12607
12608 int mm = 0, implied = 0;
12609
12610 switch (sparc_memory_model)
12611 {
12612 case SMM_SC:
12613 /* Sequential Consistency. All memory transactions are immediately
12614 visible in sequential execution order. No barriers needed. */
12615 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12616 break;
12617
12618 case SMM_TSO:
12619 /* Total Store Ordering: all memory transactions with store semantics
12620 are followed by an implied StoreStore. */
12621 implied |= StoreStore;
12622
12623 /* If we're not looking for a raw barrer (before+after), then atomic
12624 operations get the benefit of being both load and store. */
12625 if (load_store == 3 && before_after == 1)
12626 implied |= StoreLoad;
12627 /* FALLTHRU */
12628
12629 case SMM_PSO:
12630 /* Partial Store Ordering: all memory transactions with load semantics
12631 are followed by an implied LoadLoad | LoadStore. */
12632 implied |= LoadLoad | LoadStore;
12633
12634 /* If we're not looking for a raw barrer (before+after), then atomic
12635 operations get the benefit of being both load and store. */
12636 if (load_store == 3 && before_after == 2)
12637 implied |= StoreLoad | StoreStore;
12638 /* FALLTHRU */
12639
12640 case SMM_RMO:
12641 /* Relaxed Memory Ordering: no implicit bits. */
12642 break;
12643
12644 default:
12645 gcc_unreachable ();
12646 }
12647
12648 if (before_after & 1)
12649 {
12650 if (is_mm_release (model) || is_mm_acq_rel (model)
12651 || is_mm_seq_cst (model))
12652 {
12653 if (load_store & 1)
12654 mm |= LoadLoad | StoreLoad;
12655 if (load_store & 2)
12656 mm |= LoadStore | StoreStore;
12657 }
12658 }
12659 if (before_after & 2)
12660 {
12661 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12662 || is_mm_seq_cst (model))
12663 {
12664 if (load_store & 1)
12665 mm |= LoadLoad | LoadStore;
12666 if (load_store & 2)
12667 mm |= StoreLoad | StoreStore;
12668 }
12669 }
12670
12671 /* Remove the bits implied by the system memory model. */
12672 mm &= ~implied;
12673
12674 /* For raw barriers (before+after), always emit a barrier.
12675 This will become a compile-time barrier if needed. */
12676 if (mm || before_after == 3)
12677 emit_insn (gen_membar (GEN_INT (mm)));
12678 }
12679
12680 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12681 compare and swap on the word containing the byte or half-word. */
12682
12683 static void
12684 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12685 rtx oldval, rtx newval)
12686 {
12687 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12688 rtx addr = gen_reg_rtx (Pmode);
12689 rtx off = gen_reg_rtx (SImode);
12690 rtx oldv = gen_reg_rtx (SImode);
12691 rtx newv = gen_reg_rtx (SImode);
12692 rtx oldvalue = gen_reg_rtx (SImode);
12693 rtx newvalue = gen_reg_rtx (SImode);
12694 rtx res = gen_reg_rtx (SImode);
12695 rtx resv = gen_reg_rtx (SImode);
12696 rtx memsi, val, mask, cc;
12697
12698 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12699
12700 if (Pmode != SImode)
12701 addr1 = gen_lowpart (SImode, addr1);
12702 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12703
12704 memsi = gen_rtx_MEM (SImode, addr);
12705 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12706 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12707
12708 val = copy_to_reg (memsi);
12709
12710 emit_insn (gen_rtx_SET (off,
12711 gen_rtx_XOR (SImode, off,
12712 GEN_INT (GET_MODE (mem) == QImode
12713 ? 3 : 2))));
12714
12715 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12716
12717 if (GET_MODE (mem) == QImode)
12718 mask = force_reg (SImode, GEN_INT (0xff));
12719 else
12720 mask = force_reg (SImode, GEN_INT (0xffff));
12721
12722 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12723
12724 emit_insn (gen_rtx_SET (val,
12725 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12726 val)));
12727
12728 oldval = gen_lowpart (SImode, oldval);
12729 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12730
12731 newval = gen_lowpart_common (SImode, newval);
12732 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12733
12734 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12735
12736 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12737
12738 rtx_code_label *end_label = gen_label_rtx ();
12739 rtx_code_label *loop_label = gen_label_rtx ();
12740 emit_label (loop_label);
12741
12742 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12743
12744 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12745
12746 emit_move_insn (bool_result, const1_rtx);
12747
12748 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12749
12750 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12751
12752 emit_insn (gen_rtx_SET (resv,
12753 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12754 res)));
12755
12756 emit_move_insn (bool_result, const0_rtx);
12757
12758 cc = gen_compare_reg_1 (NE, resv, val);
12759 emit_insn (gen_rtx_SET (val, resv));
12760
12761 /* Use cbranchcc4 to separate the compare and branch! */
12762 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12763 cc, const0_rtx, loop_label));
12764
12765 emit_label (end_label);
12766
12767 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12768
12769 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12770
12771 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12772 }
12773
12774 /* Expand code to perform a compare-and-swap. */
12775
12776 void
12777 sparc_expand_compare_and_swap (rtx operands[])
12778 {
12779 rtx bval, retval, mem, oldval, newval;
12780 machine_mode mode;
12781 enum memmodel model;
12782
12783 bval = operands[0];
12784 retval = operands[1];
12785 mem = operands[2];
12786 oldval = operands[3];
12787 newval = operands[4];
12788 model = (enum memmodel) INTVAL (operands[6]);
12789 mode = GET_MODE (mem);
12790
12791 sparc_emit_membar_for_model (model, 3, 1);
12792
12793 if (reg_overlap_mentioned_p (retval, oldval))
12794 oldval = copy_to_reg (oldval);
12795
12796 if (mode == QImode || mode == HImode)
12797 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12798 else
12799 {
12800 rtx (*gen) (rtx, rtx, rtx, rtx);
12801 rtx x;
12802
12803 if (mode == SImode)
12804 gen = gen_atomic_compare_and_swapsi_1;
12805 else
12806 gen = gen_atomic_compare_and_swapdi_1;
12807 emit_insn (gen (retval, mem, oldval, newval));
12808
12809 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12810 if (x != bval)
12811 convert_move (bval, x, 1);
12812 }
12813
12814 sparc_emit_membar_for_model (model, 3, 2);
12815 }
12816
12817 void
12818 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12819 {
12820 rtx t_1, t_2, t_3;
12821
12822 sel = gen_lowpart (DImode, sel);
12823 switch (vmode)
12824 {
12825 case E_V2SImode:
12826 /* inp = xxxxxxxAxxxxxxxB */
12827 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12828 NULL_RTX, 1, OPTAB_DIRECT);
12829 /* t_1 = ....xxxxxxxAxxx. */
12830 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12831 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12832 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12833 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12834 /* sel = .......B */
12835 /* t_1 = ...A.... */
12836 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12837 /* sel = ...A...B */
12838 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12839 /* sel = AAAABBBB * 4 */
12840 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12841 /* sel = { A*4, A*4+1, A*4+2, ... } */
12842 break;
12843
12844 case E_V4HImode:
12845 /* inp = xxxAxxxBxxxCxxxD */
12846 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12847 NULL_RTX, 1, OPTAB_DIRECT);
12848 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12849 NULL_RTX, 1, OPTAB_DIRECT);
12850 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12851 NULL_RTX, 1, OPTAB_DIRECT);
12852 /* t_1 = ..xxxAxxxBxxxCxx */
12853 /* t_2 = ....xxxAxxxBxxxC */
12854 /* t_3 = ......xxxAxxxBxx */
12855 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12856 GEN_INT (0x07),
12857 NULL_RTX, 1, OPTAB_DIRECT);
12858 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12859 GEN_INT (0x0700),
12860 NULL_RTX, 1, OPTAB_DIRECT);
12861 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12862 GEN_INT (0x070000),
12863 NULL_RTX, 1, OPTAB_DIRECT);
12864 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12865 GEN_INT (0x07000000),
12866 NULL_RTX, 1, OPTAB_DIRECT);
12867 /* sel = .......D */
12868 /* t_1 = .....C.. */
12869 /* t_2 = ...B.... */
12870 /* t_3 = .A...... */
12871 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12872 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12873 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12874 /* sel = .A.B.C.D */
12875 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12876 /* sel = AABBCCDD * 2 */
12877 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12878 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12879 break;
12880
12881 case E_V8QImode:
12882 /* input = xAxBxCxDxExFxGxH */
12883 sel = expand_simple_binop (DImode, AND, sel,
12884 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12885 | 0x0f0f0f0f),
12886 NULL_RTX, 1, OPTAB_DIRECT);
12887 /* sel = .A.B.C.D.E.F.G.H */
12888 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12889 NULL_RTX, 1, OPTAB_DIRECT);
12890 /* t_1 = ..A.B.C.D.E.F.G. */
12891 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12892 NULL_RTX, 1, OPTAB_DIRECT);
12893 /* sel = .AABBCCDDEEFFGGH */
12894 sel = expand_simple_binop (DImode, AND, sel,
12895 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12896 | 0xff00ff),
12897 NULL_RTX, 1, OPTAB_DIRECT);
12898 /* sel = ..AB..CD..EF..GH */
12899 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12900 NULL_RTX, 1, OPTAB_DIRECT);
12901 /* t_1 = ....AB..CD..EF.. */
12902 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12903 NULL_RTX, 1, OPTAB_DIRECT);
12904 /* sel = ..ABABCDCDEFEFGH */
12905 sel = expand_simple_binop (DImode, AND, sel,
12906 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12907 NULL_RTX, 1, OPTAB_DIRECT);
12908 /* sel = ....ABCD....EFGH */
12909 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12910 NULL_RTX, 1, OPTAB_DIRECT);
12911 /* t_1 = ........ABCD.... */
12912 sel = gen_lowpart (SImode, sel);
12913 t_1 = gen_lowpart (SImode, t_1);
12914 break;
12915
12916 default:
12917 gcc_unreachable ();
12918 }
12919
12920 /* Always perform the final addition/merge within the bmask insn. */
12921 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12922 }
12923
12924 /* Implement TARGET_VEC_PERM_CONST. */
12925
12926 static bool
12927 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12928 rtx op1, const vec_perm_indices &sel)
12929 {
12930 if (!TARGET_VIS2)
12931 return false;
12932
12933 /* All permutes are supported. */
12934 if (!target)
12935 return true;
12936
12937 /* Force target-independent code to convert constant permutations on other
12938 modes down to V8QI. Rely on this to avoid the complexity of the byte
12939 order of the permutation. */
12940 if (vmode != V8QImode)
12941 return false;
12942
12943 unsigned int i, mask;
12944 for (i = mask = 0; i < 8; ++i)
12945 mask |= (sel[i] & 0xf) << (28 - i*4);
12946 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12947
12948 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12949 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12950 return true;
12951 }
12952
12953 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12954
12955 static bool
12956 sparc_frame_pointer_required (void)
12957 {
12958 /* If the stack pointer is dynamically modified in the function, it cannot
12959 serve as the frame pointer. */
12960 if (cfun->calls_alloca)
12961 return true;
12962
12963 /* If the function receives nonlocal gotos, it needs to save the frame
12964 pointer in the nonlocal_goto_save_area object. */
12965 if (cfun->has_nonlocal_label)
12966 return true;
12967
12968 /* In flat mode, that's it. */
12969 if (TARGET_FLAT)
12970 return false;
12971
12972 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12973 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12974 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12975 }
12976
12977 /* The way this is structured, we can't eliminate SFP in favor of SP
12978 if the frame pointer is required: we want to use the SFP->HFP elimination
12979 in that case. But the test in update_eliminables doesn't know we are
12980 assuming below that we only do the former elimination. */
12981
12982 static bool
12983 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12984 {
12985 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12986 }
12987
12988 /* Return the hard frame pointer directly to bypass the stack bias. */
12989
12990 static rtx
12991 sparc_builtin_setjmp_frame_value (void)
12992 {
12993 return hard_frame_pointer_rtx;
12994 }
12995
12996 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12997 they won't be allocated. */
12998
12999 static void
13000 sparc_conditional_register_usage (void)
13001 {
13002 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13003 {
13004 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13005 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13006 }
13007 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13008 /* then honor it. */
13009 if (TARGET_ARCH32 && fixed_regs[5])
13010 fixed_regs[5] = 1;
13011 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13012 fixed_regs[5] = 0;
13013 if (! TARGET_V9)
13014 {
13015 int regno;
13016 for (regno = SPARC_FIRST_V9_FP_REG;
13017 regno <= SPARC_LAST_V9_FP_REG;
13018 regno++)
13019 fixed_regs[regno] = 1;
13020 /* %fcc0 is used by v8 and v9. */
13021 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13022 regno <= SPARC_LAST_V9_FCC_REG;
13023 regno++)
13024 fixed_regs[regno] = 1;
13025 }
13026 if (! TARGET_FPU)
13027 {
13028 int regno;
13029 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13030 fixed_regs[regno] = 1;
13031 }
13032 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13033 /* then honor it. Likewise with g3 and g4. */
13034 if (fixed_regs[2] == 2)
13035 fixed_regs[2] = ! TARGET_APP_REGS;
13036 if (fixed_regs[3] == 2)
13037 fixed_regs[3] = ! TARGET_APP_REGS;
13038 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13039 fixed_regs[4] = ! TARGET_APP_REGS;
13040 else if (TARGET_CM_EMBMEDANY)
13041 fixed_regs[4] = 1;
13042 else if (fixed_regs[4] == 2)
13043 fixed_regs[4] = 0;
13044 if (TARGET_FLAT)
13045 {
13046 int regno;
13047 /* Disable leaf functions. */
13048 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13049 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13050 leaf_reg_remap [regno] = regno;
13051 }
13052 if (TARGET_VIS)
13053 global_regs[SPARC_GSR_REG] = 1;
13054 }
13055
13056 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13057
13058 static bool
13059 sparc_use_pseudo_pic_reg (void)
13060 {
13061 return !TARGET_VXWORKS_RTP && flag_pic;
13062 }
13063
13064 /* Implement TARGET_INIT_PIC_REG. */
13065
13066 static void
13067 sparc_init_pic_reg (void)
13068 {
13069 edge entry_edge;
13070 rtx_insn *seq;
13071
13072 if (!crtl->uses_pic_offset_table)
13073 return;
13074
13075 start_sequence ();
13076 load_got_register ();
13077 if (!TARGET_VXWORKS_RTP)
13078 emit_move_insn (pic_offset_table_rtx, global_offset_table_rtx);
13079 seq = get_insns ();
13080 end_sequence ();
13081
13082 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13083 insert_insn_on_edge (seq, entry_edge);
13084 commit_one_edge_insertion (entry_edge);
13085 }
13086
13087 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13088
13089 - We can't load constants into FP registers.
13090 - We can't load FP constants into integer registers when soft-float,
13091 because there is no soft-float pattern with a r/F constraint.
13092 - We can't load FP constants into integer registers for TFmode unless
13093 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13094 - Try and reload integer constants (symbolic or otherwise) back into
13095 registers directly, rather than having them dumped to memory. */
13096
13097 static reg_class_t
13098 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13099 {
13100 machine_mode mode = GET_MODE (x);
13101 if (CONSTANT_P (x))
13102 {
13103 if (FP_REG_CLASS_P (rclass)
13104 || rclass == GENERAL_OR_FP_REGS
13105 || rclass == GENERAL_OR_EXTRA_FP_REGS
13106 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13107 || (mode == TFmode && ! const_zero_operand (x, mode)))
13108 return NO_REGS;
13109
13110 if (GET_MODE_CLASS (mode) == MODE_INT)
13111 return GENERAL_REGS;
13112
13113 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13114 {
13115 if (! FP_REG_CLASS_P (rclass)
13116 || !(const_zero_operand (x, mode)
13117 || const_all_ones_operand (x, mode)))
13118 return NO_REGS;
13119 }
13120 }
13121
13122 if (TARGET_VIS3
13123 && ! TARGET_ARCH64
13124 && (rclass == EXTRA_FP_REGS
13125 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13126 {
13127 int regno = true_regnum (x);
13128
13129 if (SPARC_INT_REG_P (regno))
13130 return (rclass == EXTRA_FP_REGS
13131 ? FP_REGS : GENERAL_OR_FP_REGS);
13132 }
13133
13134 return rclass;
13135 }
13136
13137 /* Return true if we use LRA instead of reload pass. */
13138
13139 static bool
13140 sparc_lra_p (void)
13141 {
13142 return TARGET_LRA;
13143 }
13144
13145 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13146 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13147
13148 const char *
13149 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13150 {
13151 char mulstr[32];
13152
13153 gcc_assert (! TARGET_ARCH64);
13154
13155 if (sparc_check_64 (operands[1], insn) <= 0)
13156 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13157 if (which_alternative == 1)
13158 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13159 if (GET_CODE (operands[2]) == CONST_INT)
13160 {
13161 if (which_alternative == 1)
13162 {
13163 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13164 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13165 output_asm_insn (mulstr, operands);
13166 return "srlx\t%L0, 32, %H0";
13167 }
13168 else
13169 {
13170 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13171 output_asm_insn ("or\t%L1, %3, %3", operands);
13172 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13173 output_asm_insn (mulstr, operands);
13174 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13175 return "mov\t%3, %L0";
13176 }
13177 }
13178 else if (rtx_equal_p (operands[1], operands[2]))
13179 {
13180 if (which_alternative == 1)
13181 {
13182 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13183 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13184 output_asm_insn (mulstr, operands);
13185 return "srlx\t%L0, 32, %H0";
13186 }
13187 else
13188 {
13189 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13190 output_asm_insn ("or\t%L1, %3, %3", operands);
13191 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13192 output_asm_insn (mulstr, operands);
13193 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13194 return "mov\t%3, %L0";
13195 }
13196 }
13197 if (sparc_check_64 (operands[2], insn) <= 0)
13198 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13199 if (which_alternative == 1)
13200 {
13201 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13202 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13203 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13204 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13205 output_asm_insn (mulstr, operands);
13206 return "srlx\t%L0, 32, %H0";
13207 }
13208 else
13209 {
13210 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13211 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13212 output_asm_insn ("or\t%L1, %3, %3", operands);
13213 output_asm_insn ("or\t%L2, %4, %4", operands);
13214 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13215 output_asm_insn (mulstr, operands);
13216 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13217 return "mov\t%3, %L0";
13218 }
13219 }
13220
13221 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13222 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13223 and INNER_MODE are the modes describing TARGET. */
13224
13225 static void
13226 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13227 machine_mode inner_mode)
13228 {
13229 rtx t1, final_insn, sel;
13230 int bmask;
13231
13232 t1 = gen_reg_rtx (mode);
13233
13234 elt = convert_modes (SImode, inner_mode, elt, true);
13235 emit_move_insn (gen_lowpart(SImode, t1), elt);
13236
13237 switch (mode)
13238 {
13239 case E_V2SImode:
13240 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13241 bmask = 0x45674567;
13242 break;
13243 case E_V4HImode:
13244 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13245 bmask = 0x67676767;
13246 break;
13247 case E_V8QImode:
13248 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13249 bmask = 0x77777777;
13250 break;
13251 default:
13252 gcc_unreachable ();
13253 }
13254
13255 sel = force_reg (SImode, GEN_INT (bmask));
13256 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13257 emit_insn (final_insn);
13258 }
13259
13260 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13261 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13262
13263 static void
13264 vector_init_fpmerge (rtx target, rtx elt)
13265 {
13266 rtx t1, t2, t2_low, t3, t3_low;
13267
13268 t1 = gen_reg_rtx (V4QImode);
13269 elt = convert_modes (SImode, QImode, elt, true);
13270 emit_move_insn (gen_lowpart (SImode, t1), elt);
13271
13272 t2 = gen_reg_rtx (V8QImode);
13273 t2_low = gen_lowpart (V4QImode, t2);
13274 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13275
13276 t3 = gen_reg_rtx (V8QImode);
13277 t3_low = gen_lowpart (V4QImode, t3);
13278 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13279
13280 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13281 }
13282
13283 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13284 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13285
13286 static void
13287 vector_init_faligndata (rtx target, rtx elt)
13288 {
13289 rtx t1 = gen_reg_rtx (V4HImode);
13290 int i;
13291
13292 elt = convert_modes (SImode, HImode, elt, true);
13293 emit_move_insn (gen_lowpart (SImode, t1), elt);
13294
13295 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13296 force_reg (SImode, GEN_INT (6)),
13297 const0_rtx));
13298
13299 for (i = 0; i < 4; i++)
13300 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13301 }
13302
13303 /* Emit code to initialize TARGET to values for individual fields VALS. */
13304
13305 void
13306 sparc_expand_vector_init (rtx target, rtx vals)
13307 {
13308 const machine_mode mode = GET_MODE (target);
13309 const machine_mode inner_mode = GET_MODE_INNER (mode);
13310 const int n_elts = GET_MODE_NUNITS (mode);
13311 int i, n_var = 0;
13312 bool all_same = true;
13313 rtx mem;
13314
13315 for (i = 0; i < n_elts; i++)
13316 {
13317 rtx x = XVECEXP (vals, 0, i);
13318 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13319 n_var++;
13320
13321 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13322 all_same = false;
13323 }
13324
13325 if (n_var == 0)
13326 {
13327 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13328 return;
13329 }
13330
13331 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13332 {
13333 if (GET_MODE_SIZE (inner_mode) == 4)
13334 {
13335 emit_move_insn (gen_lowpart (SImode, target),
13336 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13337 return;
13338 }
13339 else if (GET_MODE_SIZE (inner_mode) == 8)
13340 {
13341 emit_move_insn (gen_lowpart (DImode, target),
13342 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13343 return;
13344 }
13345 }
13346 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13347 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13348 {
13349 emit_move_insn (gen_highpart (word_mode, target),
13350 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13351 emit_move_insn (gen_lowpart (word_mode, target),
13352 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13353 return;
13354 }
13355
13356 if (all_same && GET_MODE_SIZE (mode) == 8)
13357 {
13358 if (TARGET_VIS2)
13359 {
13360 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13361 return;
13362 }
13363 if (mode == V8QImode)
13364 {
13365 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13366 return;
13367 }
13368 if (mode == V4HImode)
13369 {
13370 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13371 return;
13372 }
13373 }
13374
13375 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13376 for (i = 0; i < n_elts; i++)
13377 emit_move_insn (adjust_address_nv (mem, inner_mode,
13378 i * GET_MODE_SIZE (inner_mode)),
13379 XVECEXP (vals, 0, i));
13380 emit_move_insn (target, mem);
13381 }
13382
13383 /* Implement TARGET_SECONDARY_RELOAD. */
13384
13385 static reg_class_t
13386 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13387 machine_mode mode, secondary_reload_info *sri)
13388 {
13389 enum reg_class rclass = (enum reg_class) rclass_i;
13390
13391 sri->icode = CODE_FOR_nothing;
13392 sri->extra_cost = 0;
13393
13394 /* We need a temporary when loading/storing a HImode/QImode value
13395 between memory and the FPU registers. This can happen when combine puts
13396 a paradoxical subreg in a float/fix conversion insn. */
13397 if (FP_REG_CLASS_P (rclass)
13398 && (mode == HImode || mode == QImode)
13399 && (GET_CODE (x) == MEM
13400 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13401 && true_regnum (x) == -1)))
13402 return GENERAL_REGS;
13403
13404 /* On 32-bit we need a temporary when loading/storing a DFmode value
13405 between unaligned memory and the upper FPU registers. */
13406 if (TARGET_ARCH32
13407 && rclass == EXTRA_FP_REGS
13408 && mode == DFmode
13409 && GET_CODE (x) == MEM
13410 && ! mem_min_alignment (x, 8))
13411 return FP_REGS;
13412
13413 if (((TARGET_CM_MEDANY
13414 && symbolic_operand (x, mode))
13415 || (TARGET_CM_EMBMEDANY
13416 && text_segment_operand (x, mode)))
13417 && ! flag_pic)
13418 {
13419 if (in_p)
13420 sri->icode = direct_optab_handler (reload_in_optab, mode);
13421 else
13422 sri->icode = direct_optab_handler (reload_out_optab, mode);
13423 return NO_REGS;
13424 }
13425
13426 if (TARGET_VIS3 && TARGET_ARCH32)
13427 {
13428 int regno = true_regnum (x);
13429
13430 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13431 to move 8-byte values in 4-byte pieces. This only works via
13432 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13433 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13434 an FP_REGS intermediate move. */
13435 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13436 || ((general_or_i64_p (rclass)
13437 || rclass == GENERAL_OR_FP_REGS)
13438 && SPARC_FP_REG_P (regno)))
13439 {
13440 sri->extra_cost = 2;
13441 return FP_REGS;
13442 }
13443 }
13444
13445 return NO_REGS;
13446 }
13447
13448 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13449
13450 On SPARC when not VIS3 it is not possible to directly move data
13451 between GENERAL_REGS and FP_REGS. */
13452
13453 static bool
13454 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13455 reg_class_t class2)
13456 {
13457 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13458 && (! TARGET_VIS3
13459 || GET_MODE_SIZE (mode) > 8
13460 || GET_MODE_SIZE (mode) < 4));
13461 }
13462
13463 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13464
13465 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13466 because the movsi and movsf patterns don't handle r/f moves.
13467 For v8 we copy the default definition. */
13468
13469 static machine_mode
13470 sparc_secondary_memory_needed_mode (machine_mode mode)
13471 {
13472 if (TARGET_ARCH64)
13473 {
13474 if (GET_MODE_BITSIZE (mode) < 32)
13475 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13476 return mode;
13477 }
13478 else
13479 {
13480 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13481 return mode_for_size (BITS_PER_WORD,
13482 GET_MODE_CLASS (mode), 0).require ();
13483 return mode;
13484 }
13485 }
13486
13487 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13488 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13489
13490 bool
13491 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13492 {
13493 enum rtx_code rc = GET_CODE (operands[1]);
13494 machine_mode cmp_mode;
13495 rtx cc_reg, dst, cmp;
13496
13497 cmp = operands[1];
13498 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13499 return false;
13500
13501 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13502 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13503
13504 cmp_mode = GET_MODE (XEXP (cmp, 0));
13505 rc = GET_CODE (cmp);
13506
13507 dst = operands[0];
13508 if (! rtx_equal_p (operands[2], dst)
13509 && ! rtx_equal_p (operands[3], dst))
13510 {
13511 if (reg_overlap_mentioned_p (dst, cmp))
13512 dst = gen_reg_rtx (mode);
13513
13514 emit_move_insn (dst, operands[3]);
13515 }
13516 else if (operands[2] == dst)
13517 {
13518 operands[2] = operands[3];
13519
13520 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13521 rc = reverse_condition_maybe_unordered (rc);
13522 else
13523 rc = reverse_condition (rc);
13524 }
13525
13526 if (XEXP (cmp, 1) == const0_rtx
13527 && GET_CODE (XEXP (cmp, 0)) == REG
13528 && cmp_mode == DImode
13529 && v9_regcmp_p (rc))
13530 cc_reg = XEXP (cmp, 0);
13531 else
13532 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13533
13534 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13535
13536 emit_insn (gen_rtx_SET (dst,
13537 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13538
13539 if (dst != operands[0])
13540 emit_move_insn (operands[0], dst);
13541
13542 return true;
13543 }
13544
13545 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13546 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13547 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13548 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13549 code to be used for the condition mask. */
13550
13551 void
13552 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13553 {
13554 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13555 enum rtx_code code = GET_CODE (operands[3]);
13556
13557 mask = gen_reg_rtx (Pmode);
13558 cop0 = operands[4];
13559 cop1 = operands[5];
13560 if (code == LT || code == GE)
13561 {
13562 rtx t;
13563
13564 code = swap_condition (code);
13565 t = cop0; cop0 = cop1; cop1 = t;
13566 }
13567
13568 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13569
13570 fcmp = gen_rtx_UNSPEC (Pmode,
13571 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13572 fcode);
13573
13574 cmask = gen_rtx_UNSPEC (DImode,
13575 gen_rtvec (2, mask, gsr),
13576 ccode);
13577
13578 bshuf = gen_rtx_UNSPEC (mode,
13579 gen_rtvec (3, operands[1], operands[2], gsr),
13580 UNSPEC_BSHUFFLE);
13581
13582 emit_insn (gen_rtx_SET (mask, fcmp));
13583 emit_insn (gen_rtx_SET (gsr, cmask));
13584
13585 emit_insn (gen_rtx_SET (operands[0], bshuf));
13586 }
13587
13588 /* On sparc, any mode which naturally allocates into the float
13589 registers should return 4 here. */
13590
13591 unsigned int
13592 sparc_regmode_natural_size (machine_mode mode)
13593 {
13594 int size = UNITS_PER_WORD;
13595
13596 if (TARGET_ARCH64)
13597 {
13598 enum mode_class mclass = GET_MODE_CLASS (mode);
13599
13600 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13601 size = 4;
13602 }
13603
13604 return size;
13605 }
13606
13607 /* Implement TARGET_HARD_REGNO_NREGS.
13608
13609 On SPARC, ordinary registers hold 32 bits worth; this means both
13610 integer and floating point registers. On v9, integer regs hold 64
13611 bits worth; floating point regs hold 32 bits worth (this includes the
13612 new fp regs as even the odd ones are included in the hard register
13613 count). */
13614
13615 static unsigned int
13616 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13617 {
13618 if (regno == SPARC_GSR_REG)
13619 return 1;
13620 if (TARGET_ARCH64)
13621 {
13622 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13623 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13624 return CEIL (GET_MODE_SIZE (mode), 4);
13625 }
13626 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13627 }
13628
13629 /* Implement TARGET_HARD_REGNO_MODE_OK.
13630
13631 ??? Because of the funny way we pass parameters we should allow certain
13632 ??? types of float/complex values to be in integer registers during
13633 ??? RTL generation. This only matters on arch32. */
13634
13635 static bool
13636 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13637 {
13638 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13639 }
13640
13641 /* Implement TARGET_MODES_TIEABLE_P.
13642
13643 For V9 we have to deal with the fact that only the lower 32 floating
13644 point registers are 32-bit addressable. */
13645
13646 static bool
13647 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13648 {
13649 enum mode_class mclass1, mclass2;
13650 unsigned short size1, size2;
13651
13652 if (mode1 == mode2)
13653 return true;
13654
13655 mclass1 = GET_MODE_CLASS (mode1);
13656 mclass2 = GET_MODE_CLASS (mode2);
13657 if (mclass1 != mclass2)
13658 return false;
13659
13660 if (! TARGET_V9)
13661 return true;
13662
13663 /* Classes are the same and we are V9 so we have to deal with upper
13664 vs. lower floating point registers. If one of the modes is a
13665 4-byte mode, and the other is not, we have to mark them as not
13666 tieable because only the lower 32 floating point register are
13667 addressable 32-bits at a time.
13668
13669 We can't just test explicitly for SFmode, otherwise we won't
13670 cover the vector mode cases properly. */
13671
13672 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13673 return true;
13674
13675 size1 = GET_MODE_SIZE (mode1);
13676 size2 = GET_MODE_SIZE (mode2);
13677 if ((size1 > 4 && size2 == 4)
13678 || (size2 > 4 && size1 == 4))
13679 return false;
13680
13681 return true;
13682 }
13683
13684 /* Implement TARGET_CSTORE_MODE. */
13685
13686 static scalar_int_mode
13687 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13688 {
13689 return (TARGET_ARCH64 ? DImode : SImode);
13690 }
13691
13692 /* Return the compound expression made of T1 and T2. */
13693
13694 static inline tree
13695 compound_expr (tree t1, tree t2)
13696 {
13697 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13698 }
13699
13700 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13701
13702 static void
13703 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13704 {
13705 if (!TARGET_FPU)
13706 return;
13707
13708 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13709 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13710
13711 /* We generate the equivalent of feholdexcept (&fenv_var):
13712
13713 unsigned int fenv_var;
13714 __builtin_store_fsr (&fenv_var);
13715
13716 unsigned int tmp1_var;
13717 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13718
13719 __builtin_load_fsr (&tmp1_var); */
13720
13721 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13722 TREE_ADDRESSABLE (fenv_var) = 1;
13723 tree fenv_addr = build_fold_addr_expr (fenv_var);
13724 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13725 tree hold_stfsr
13726 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13727 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13728
13729 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13730 TREE_ADDRESSABLE (tmp1_var) = 1;
13731 tree masked_fenv_var
13732 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13733 build_int_cst (unsigned_type_node,
13734 ~(accrued_exception_mask | trap_enable_mask)));
13735 tree hold_mask
13736 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13737 NULL_TREE, NULL_TREE);
13738
13739 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13740 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13741 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13742
13743 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13744
13745 /* We reload the value of tmp1_var to clear the exceptions:
13746
13747 __builtin_load_fsr (&tmp1_var); */
13748
13749 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13750
13751 /* We generate the equivalent of feupdateenv (&fenv_var):
13752
13753 unsigned int tmp2_var;
13754 __builtin_store_fsr (&tmp2_var);
13755
13756 __builtin_load_fsr (&fenv_var);
13757
13758 if (SPARC_LOW_FE_EXCEPT_VALUES)
13759 tmp2_var >>= 5;
13760 __atomic_feraiseexcept ((int) tmp2_var); */
13761
13762 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13763 TREE_ADDRESSABLE (tmp2_var) = 1;
13764 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13765 tree update_stfsr
13766 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13767 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13768
13769 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13770
13771 tree atomic_feraiseexcept
13772 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13773 tree update_call
13774 = build_call_expr (atomic_feraiseexcept, 1,
13775 fold_convert (integer_type_node, tmp2_var));
13776
13777 if (SPARC_LOW_FE_EXCEPT_VALUES)
13778 {
13779 tree shifted_tmp2_var
13780 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13781 build_int_cst (unsigned_type_node, 5));
13782 tree update_shift
13783 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13784 update_call = compound_expr (update_shift, update_call);
13785 }
13786
13787 *update
13788 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13789 }
13790
13791 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13792
13793 SImode loads to floating-point registers are not zero-extended.
13794 The definition for LOAD_EXTEND_OP specifies that integer loads
13795 narrower than BITS_PER_WORD will be zero-extended. As a result,
13796 we inhibit changes from SImode unless they are to a mode that is
13797 identical in size.
13798
13799 Likewise for SFmode, since word-mode paradoxical subregs are
13800 problematic on big-endian architectures. */
13801
13802 static bool
13803 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13804 reg_class_t rclass)
13805 {
13806 if (TARGET_ARCH64
13807 && GET_MODE_SIZE (from) == 4
13808 && GET_MODE_SIZE (to) != 4)
13809 return !reg_classes_intersect_p (rclass, FP_REGS);
13810 return true;
13811 }
13812
13813 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13814
13815 static HOST_WIDE_INT
13816 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13817 {
13818 if (TREE_CODE (exp) == STRING_CST)
13819 return MAX (align, FASTEST_ALIGNMENT);
13820 return align;
13821 }
13822
13823 #include "gt-sparc.h"