]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
sparc.c (parc_delegitimize_address): Recognize the GOT register and decoded HIGH...
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2019 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
470 };
471
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
518
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
528
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
546
547 struct GTY(()) machine_function
548 {
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
551
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
555
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
561
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
564
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
570
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
573
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
576 };
577
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
586
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
589
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
594
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 machine_mode, const_tree, bool);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 machine_mode, const_tree, bool);
661 static rtx sparc_function_arg_1 (cumulative_args_t,
662 machine_mode, const_tree, bool, bool);
663 static rtx sparc_function_arg (cumulative_args_t,
664 machine_mode, const_tree, bool);
665 static rtx sparc_function_incoming_arg (cumulative_args_t,
666 machine_mode, const_tree, bool);
667 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
668 static unsigned int sparc_function_arg_boundary (machine_mode,
669 const_tree);
670 static int sparc_arg_partial_bytes (cumulative_args_t,
671 machine_mode, tree, bool);
672 static bool sparc_return_in_memory (const_tree, const_tree);
673 static rtx sparc_struct_value_rtx (tree, int);
674 static rtx sparc_function_value (const_tree, const_tree, bool);
675 static rtx sparc_libcall_value (machine_mode, const_rtx);
676 static bool sparc_function_value_regno_p (const unsigned int);
677 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
678 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
679 static void sparc_file_end (void);
680 static bool sparc_frame_pointer_required (void);
681 static bool sparc_can_eliminate (const int, const int);
682 static rtx sparc_builtin_setjmp_frame_value (void);
683 static void sparc_conditional_register_usage (void);
684 static bool sparc_use_pseudo_pic_reg (void);
685 static void sparc_init_pic_reg (void);
686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
687 static const char *sparc_mangle_type (const_tree);
688 #endif
689 static void sparc_trampoline_init (rtx, tree, rtx);
690 static machine_mode sparc_preferred_simd_mode (scalar_mode);
691 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
692 static bool sparc_lra_p (void);
693 static bool sparc_print_operand_punct_valid_p (unsigned char);
694 static void sparc_print_operand (FILE *, rtx, int);
695 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
696 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
697 machine_mode,
698 secondary_reload_info *);
699 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
700 reg_class_t);
701 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
702 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
703 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
704 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
705 static unsigned int sparc_min_arithmetic_precision (void);
706 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
707 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
708 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
709 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
710 reg_class_t);
711 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
712 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
713 const vec_perm_indices &);
714 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
715 \f
716 #ifdef SUBTARGET_ATTRIBUTE_TABLE
717 /* Table of valid machine attributes. */
718 static const struct attribute_spec sparc_attribute_table[] =
719 {
720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
721 do_diagnostic, handler, exclude } */
722 SUBTARGET_ATTRIBUTE_TABLE,
723 { NULL, 0, 0, false, false, false, false, NULL, NULL }
724 };
725 #endif
726 \f
727 /* Option handling. */
728
729 /* Parsed value. */
730 enum cmodel sparc_cmodel;
731
732 char sparc_hard_reg_printed[8];
733
734 /* Initialize the GCC target structure. */
735
736 /* The default is to use .half rather than .short for aligned HI objects. */
737 #undef TARGET_ASM_ALIGNED_HI_OP
738 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
739
740 #undef TARGET_ASM_UNALIGNED_HI_OP
741 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
742 #undef TARGET_ASM_UNALIGNED_SI_OP
743 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
744 #undef TARGET_ASM_UNALIGNED_DI_OP
745 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
746
747 /* The target hook has to handle DI-mode values. */
748 #undef TARGET_ASM_INTEGER
749 #define TARGET_ASM_INTEGER sparc_assemble_integer
750
751 #undef TARGET_ASM_FUNCTION_PROLOGUE
752 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
753 #undef TARGET_ASM_FUNCTION_EPILOGUE
754 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
755
756 #undef TARGET_SCHED_ADJUST_COST
757 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
758 #undef TARGET_SCHED_ISSUE_RATE
759 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
760 #undef TARGET_SCHED_INIT
761 #define TARGET_SCHED_INIT sparc_sched_init
762 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
763 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
764
765 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
766 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
767
768 #undef TARGET_INIT_LIBFUNCS
769 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
770
771 #undef TARGET_LEGITIMIZE_ADDRESS
772 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
773 #undef TARGET_DELEGITIMIZE_ADDRESS
774 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
775 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
776 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
777
778 #undef TARGET_INIT_BUILTINS
779 #define TARGET_INIT_BUILTINS sparc_init_builtins
780 #undef TARGET_BUILTIN_DECL
781 #define TARGET_BUILTIN_DECL sparc_builtin_decl
782 #undef TARGET_EXPAND_BUILTIN
783 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
784 #undef TARGET_FOLD_BUILTIN
785 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
786
787 #if TARGET_TLS
788 #undef TARGET_HAVE_TLS
789 #define TARGET_HAVE_TLS true
790 #endif
791
792 #undef TARGET_CANNOT_FORCE_CONST_MEM
793 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
794
795 #undef TARGET_ASM_OUTPUT_MI_THUNK
796 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
797 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
798 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
799
800 #undef TARGET_RTX_COSTS
801 #define TARGET_RTX_COSTS sparc_rtx_costs
802 #undef TARGET_ADDRESS_COST
803 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
804 #undef TARGET_REGISTER_MOVE_COST
805 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
806
807 #undef TARGET_PROMOTE_FUNCTION_MODE
808 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
809 #undef TARGET_STRICT_ARGUMENT_NAMING
810 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
811
812 #undef TARGET_MUST_PASS_IN_STACK
813 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
814 #undef TARGET_PASS_BY_REFERENCE
815 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
816 #undef TARGET_ARG_PARTIAL_BYTES
817 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
818 #undef TARGET_FUNCTION_ARG_ADVANCE
819 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
820 #undef TARGET_FUNCTION_ARG
821 #define TARGET_FUNCTION_ARG sparc_function_arg
822 #undef TARGET_FUNCTION_INCOMING_ARG
823 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
824 #undef TARGET_FUNCTION_ARG_PADDING
825 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
826 #undef TARGET_FUNCTION_ARG_BOUNDARY
827 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
828
829 #undef TARGET_RETURN_IN_MEMORY
830 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
831 #undef TARGET_STRUCT_VALUE_RTX
832 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
833 #undef TARGET_FUNCTION_VALUE
834 #define TARGET_FUNCTION_VALUE sparc_function_value
835 #undef TARGET_LIBCALL_VALUE
836 #define TARGET_LIBCALL_VALUE sparc_libcall_value
837 #undef TARGET_FUNCTION_VALUE_REGNO_P
838 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
839
840 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
841 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
842
843 #undef TARGET_ASAN_SHADOW_OFFSET
844 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
845
846 #undef TARGET_EXPAND_BUILTIN_VA_START
847 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
848 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
849 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
850
851 #undef TARGET_VECTOR_MODE_SUPPORTED_P
852 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
853
854 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
855 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
856
857 #ifdef SUBTARGET_INSERT_ATTRIBUTES
858 #undef TARGET_INSERT_ATTRIBUTES
859 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
860 #endif
861
862 #ifdef SUBTARGET_ATTRIBUTE_TABLE
863 #undef TARGET_ATTRIBUTE_TABLE
864 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
865 #endif
866
867 #undef TARGET_OPTION_OVERRIDE
868 #define TARGET_OPTION_OVERRIDE sparc_option_override
869
870 #ifdef TARGET_THREAD_SSP_OFFSET
871 #undef TARGET_STACK_PROTECT_GUARD
872 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
873 #endif
874
875 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
876 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
877 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
878 #endif
879
880 #undef TARGET_ASM_FILE_END
881 #define TARGET_ASM_FILE_END sparc_file_end
882
883 #undef TARGET_FRAME_POINTER_REQUIRED
884 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
885
886 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
887 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
888
889 #undef TARGET_CAN_ELIMINATE
890 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
891
892 #undef TARGET_PREFERRED_RELOAD_CLASS
893 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
894
895 #undef TARGET_SECONDARY_RELOAD
896 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
897 #undef TARGET_SECONDARY_MEMORY_NEEDED
898 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
899 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
900 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
901
902 #undef TARGET_CONDITIONAL_REGISTER_USAGE
903 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
904
905 #undef TARGET_INIT_PIC_REG
906 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
907
908 #undef TARGET_USE_PSEUDO_PIC_REG
909 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
910
911 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
912 #undef TARGET_MANGLE_TYPE
913 #define TARGET_MANGLE_TYPE sparc_mangle_type
914 #endif
915
916 #undef TARGET_LRA_P
917 #define TARGET_LRA_P sparc_lra_p
918
919 #undef TARGET_LEGITIMATE_ADDRESS_P
920 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
921
922 #undef TARGET_LEGITIMATE_CONSTANT_P
923 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
924
925 #undef TARGET_TRAMPOLINE_INIT
926 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
927
928 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
929 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
930 #undef TARGET_PRINT_OPERAND
931 #define TARGET_PRINT_OPERAND sparc_print_operand
932 #undef TARGET_PRINT_OPERAND_ADDRESS
933 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
934
935 /* The value stored by LDSTUB. */
936 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
937 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
938
939 #undef TARGET_CSTORE_MODE
940 #define TARGET_CSTORE_MODE sparc_cstore_mode
941
942 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
943 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
944
945 #undef TARGET_FIXED_CONDITION_CODE_REGS
946 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
947
948 #undef TARGET_MIN_ARITHMETIC_PRECISION
949 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
950
951 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
952 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
953
954 #undef TARGET_HARD_REGNO_NREGS
955 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
956 #undef TARGET_HARD_REGNO_MODE_OK
957 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
958
959 #undef TARGET_MODES_TIEABLE_P
960 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
961
962 #undef TARGET_CAN_CHANGE_MODE_CLASS
963 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
964
965 #undef TARGET_CONSTANT_ALIGNMENT
966 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
967
968 #undef TARGET_VECTORIZE_VEC_PERM_CONST
969 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
970
971 #undef TARGET_CAN_FOLLOW_JUMP
972 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
973
974 struct gcc_target targetm = TARGET_INITIALIZER;
975
976 /* Return the memory reference contained in X if any, zero otherwise. */
977
978 static rtx
979 mem_ref (rtx x)
980 {
981 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
982 x = XEXP (x, 0);
983
984 if (MEM_P (x))
985 return x;
986
987 return NULL_RTX;
988 }
989
990 /* True if any of INSN's source register(s) is REG. */
991
992 static bool
993 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
994 {
995 extract_insn (insn);
996 return ((REG_P (recog_data.operand[1])
997 && REGNO (recog_data.operand[1]) == reg)
998 || (recog_data.n_operands == 3
999 && REG_P (recog_data.operand[2])
1000 && REGNO (recog_data.operand[2]) == reg));
1001 }
1002
1003 /* True if INSN is a floating-point division or square-root. */
1004
1005 static bool
1006 div_sqrt_insn_p (rtx_insn *insn)
1007 {
1008 if (GET_CODE (PATTERN (insn)) != SET)
1009 return false;
1010
1011 switch (get_attr_type (insn))
1012 {
1013 case TYPE_FPDIVS:
1014 case TYPE_FPSQRTS:
1015 case TYPE_FPDIVD:
1016 case TYPE_FPSQRTD:
1017 return true;
1018 default:
1019 return false;
1020 }
1021 }
1022
1023 /* True if INSN is a floating-point instruction. */
1024
1025 static bool
1026 fpop_insn_p (rtx_insn *insn)
1027 {
1028 if (GET_CODE (PATTERN (insn)) != SET)
1029 return false;
1030
1031 switch (get_attr_type (insn))
1032 {
1033 case TYPE_FPMOVE:
1034 case TYPE_FPCMOVE:
1035 case TYPE_FP:
1036 case TYPE_FPCMP:
1037 case TYPE_FPMUL:
1038 case TYPE_FPDIVS:
1039 case TYPE_FPSQRTS:
1040 case TYPE_FPDIVD:
1041 case TYPE_FPSQRTD:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1048 /* True if INSN is an atomic instruction. */
1049
1050 static bool
1051 atomic_insn_for_leon3_p (rtx_insn *insn)
1052 {
1053 switch (INSN_CODE (insn))
1054 {
1055 case CODE_FOR_swapsi:
1056 case CODE_FOR_ldstub:
1057 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1058 return true;
1059 default:
1060 return false;
1061 }
1062 }
1063
1064 /* We use a machine specific pass to enable workarounds for errata.
1065
1066 We need to have the (essentially) final form of the insn stream in order
1067 to properly detect the various hazards. Therefore, this machine specific
1068 pass runs as late as possible. */
1069
1070 /* True if INSN is a md pattern or asm statement. */
1071 #define USEFUL_INSN_P(INSN) \
1072 (NONDEBUG_INSN_P (INSN) \
1073 && GET_CODE (PATTERN (INSN)) != USE \
1074 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1075
1076 static unsigned int
1077 sparc_do_work_around_errata (void)
1078 {
1079 rtx_insn *insn, *next;
1080
1081 /* Force all instructions to be split into their final form. */
1082 split_all_insns_noflow ();
1083
1084 /* Now look for specific patterns in the insn stream. */
1085 for (insn = get_insns (); insn; insn = next)
1086 {
1087 bool insert_nop = false;
1088 rtx set;
1089 rtx_insn *jump;
1090 rtx_sequence *seq;
1091
1092 /* Look into the instruction in a delay slot. */
1093 if (NONJUMP_INSN_P (insn)
1094 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1095 {
1096 jump = seq->insn (0);
1097 insn = seq->insn (1);
1098 }
1099 else if (JUMP_P (insn))
1100 jump = insn;
1101 else
1102 jump = NULL;
1103
1104 /* Place a NOP at the branch target of an integer branch if it is a
1105 floating-point operation or a floating-point branch. */
1106 if (sparc_fix_gr712rc
1107 && jump
1108 && jump_to_label_p (jump)
1109 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1110 {
1111 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1112 if (target
1113 && (fpop_insn_p (target)
1114 || (JUMP_P (target)
1115 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1116 emit_insn_before (gen_nop (), target);
1117 }
1118
1119 /* Insert a NOP between load instruction and atomic instruction. Insert
1120 a NOP at branch target if there is a load in delay slot and an atomic
1121 instruction at branch target. */
1122 if (sparc_fix_ut700
1123 && NONJUMP_INSN_P (insn)
1124 && (set = single_set (insn)) != NULL_RTX
1125 && mem_ref (SET_SRC (set))
1126 && REG_P (SET_DEST (set)))
1127 {
1128 if (jump && jump_to_label_p (jump))
1129 {
1130 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1131 if (target && atomic_insn_for_leon3_p (target))
1132 emit_insn_before (gen_nop (), target);
1133 }
1134
1135 next = next_active_insn (insn);
1136 if (!next)
1137 break;
1138
1139 if (atomic_insn_for_leon3_p (next))
1140 insert_nop = true;
1141 }
1142
1143 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1144 ends with another fdiv or fsqrt instruction with no dependencies on
1145 the former, along with an appropriate pattern in between. */
1146 if (sparc_fix_lost_divsqrt
1147 && NONJUMP_INSN_P (insn)
1148 && div_sqrt_insn_p (insn))
1149 {
1150 int i;
1151 int fp_found = 0;
1152 rtx_insn *after;
1153
1154 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1155
1156 next = next_active_insn (insn);
1157 if (!next)
1158 break;
1159
1160 for (after = next, i = 0; i < 4; i++)
1161 {
1162 /* Count floating-point operations. */
1163 if (i != 3 && fpop_insn_p (after))
1164 {
1165 /* If the insn uses the destination register of
1166 the div/sqrt, then it cannot be problematic. */
1167 if (insn_uses_reg_p (after, dest_reg))
1168 break;
1169 fp_found++;
1170 }
1171
1172 /* Count floating-point loads. */
1173 if (i != 3
1174 && (set = single_set (after)) != NULL_RTX
1175 && REG_P (SET_DEST (set))
1176 && REGNO (SET_DEST (set)) > 31)
1177 {
1178 /* If the insn uses the destination register of
1179 the div/sqrt, then it cannot be problematic. */
1180 if (REGNO (SET_DEST (set)) == dest_reg)
1181 break;
1182 fp_found++;
1183 }
1184
1185 /* Check if this is a problematic sequence. */
1186 if (i > 1
1187 && fp_found >= 2
1188 && div_sqrt_insn_p (after))
1189 {
1190 /* If this is the short version of the problematic
1191 sequence we add two NOPs in a row to also prevent
1192 the long version. */
1193 if (i == 2)
1194 emit_insn_before (gen_nop (), next);
1195 insert_nop = true;
1196 break;
1197 }
1198
1199 /* No need to scan past a second div/sqrt. */
1200 if (div_sqrt_insn_p (after))
1201 break;
1202
1203 /* Insert NOP before branch. */
1204 if (i < 3
1205 && (!NONJUMP_INSN_P (after)
1206 || GET_CODE (PATTERN (after)) == SEQUENCE))
1207 {
1208 insert_nop = true;
1209 break;
1210 }
1211
1212 after = next_active_insn (after);
1213 if (!after)
1214 break;
1215 }
1216 }
1217
1218 /* Look for either of these two sequences:
1219
1220 Sequence A:
1221 1. store of word size or less (e.g. st / stb / sth / stf)
1222 2. any single instruction that is not a load or store
1223 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1224
1225 Sequence B:
1226 1. store of double word size (e.g. std / stdf)
1227 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1228 if (sparc_fix_b2bst
1229 && NONJUMP_INSN_P (insn)
1230 && (set = single_set (insn)) != NULL_RTX
1231 && MEM_P (SET_DEST (set)))
1232 {
1233 /* Sequence B begins with a double-word store. */
1234 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1235 rtx_insn *after;
1236 int i;
1237
1238 next = next_active_insn (insn);
1239 if (!next)
1240 break;
1241
1242 for (after = next, i = 0; i < 2; i++)
1243 {
1244 /* Skip empty assembly statements. */
1245 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1246 || (USEFUL_INSN_P (after)
1247 && (asm_noperands (PATTERN (after))>=0)
1248 && !strcmp (decode_asm_operands (PATTERN (after),
1249 NULL, NULL, NULL,
1250 NULL, NULL), "")))
1251 after = next_active_insn (after);
1252 if (!after)
1253 break;
1254
1255 /* If the insn is a branch, then it cannot be problematic. */
1256 if (!NONJUMP_INSN_P (after)
1257 || GET_CODE (PATTERN (after)) == SEQUENCE)
1258 break;
1259
1260 /* Sequence B is only two instructions long. */
1261 if (seq_b)
1262 {
1263 /* Add NOP if followed by a store. */
1264 if ((set = single_set (after)) != NULL_RTX
1265 && MEM_P (SET_DEST (set)))
1266 insert_nop = true;
1267
1268 /* Otherwise it is ok. */
1269 break;
1270 }
1271
1272 /* If the second instruction is a load or a store,
1273 then the sequence cannot be problematic. */
1274 if (i == 0)
1275 {
1276 if ((set = single_set (after)) != NULL_RTX
1277 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1278 break;
1279
1280 after = next_active_insn (after);
1281 if (!after)
1282 break;
1283 }
1284
1285 /* Add NOP if third instruction is a store. */
1286 if (i == 1
1287 && (set = single_set (after)) != NULL_RTX
1288 && MEM_P (SET_DEST (set)))
1289 insert_nop = true;
1290 }
1291 }
1292
1293 /* Look for a single-word load into an odd-numbered FP register. */
1294 else if (sparc_fix_at697f
1295 && NONJUMP_INSN_P (insn)
1296 && (set = single_set (insn)) != NULL_RTX
1297 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1298 && mem_ref (SET_SRC (set))
1299 && REG_P (SET_DEST (set))
1300 && REGNO (SET_DEST (set)) > 31
1301 && REGNO (SET_DEST (set)) % 2 != 0)
1302 {
1303 /* The wrong dependency is on the enclosing double register. */
1304 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1305 unsigned int src1, src2, dest;
1306 int code;
1307
1308 next = next_active_insn (insn);
1309 if (!next)
1310 break;
1311 /* If the insn is a branch, then it cannot be problematic. */
1312 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1313 continue;
1314
1315 extract_insn (next);
1316 code = INSN_CODE (next);
1317
1318 switch (code)
1319 {
1320 case CODE_FOR_adddf3:
1321 case CODE_FOR_subdf3:
1322 case CODE_FOR_muldf3:
1323 case CODE_FOR_divdf3:
1324 dest = REGNO (recog_data.operand[0]);
1325 src1 = REGNO (recog_data.operand[1]);
1326 src2 = REGNO (recog_data.operand[2]);
1327 if (src1 != src2)
1328 {
1329 /* Case [1-4]:
1330 ld [address], %fx+1
1331 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1332 if ((src1 == x || src2 == x)
1333 && (dest == src1 || dest == src2))
1334 insert_nop = true;
1335 }
1336 else
1337 {
1338 /* Case 5:
1339 ld [address], %fx+1
1340 FPOPd %fx, %fx, %fx */
1341 if (src1 == x
1342 && dest == src1
1343 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1344 insert_nop = true;
1345 }
1346 break;
1347
1348 case CODE_FOR_sqrtdf2:
1349 dest = REGNO (recog_data.operand[0]);
1350 src1 = REGNO (recog_data.operand[1]);
1351 /* Case 6:
1352 ld [address], %fx+1
1353 fsqrtd %fx, %fx */
1354 if (src1 == x && dest == src1)
1355 insert_nop = true;
1356 break;
1357
1358 default:
1359 break;
1360 }
1361 }
1362
1363 /* Look for a single-word load into an integer register. */
1364 else if (sparc_fix_ut699
1365 && NONJUMP_INSN_P (insn)
1366 && (set = single_set (insn)) != NULL_RTX
1367 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1368 && (mem_ref (SET_SRC (set)) != NULL_RTX
1369 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1370 && REG_P (SET_DEST (set))
1371 && REGNO (SET_DEST (set)) < 32)
1372 {
1373 /* There is no problem if the second memory access has a data
1374 dependency on the first single-cycle load. */
1375 rtx x = SET_DEST (set);
1376
1377 next = next_active_insn (insn);
1378 if (!next)
1379 break;
1380 /* If the insn is a branch, then it cannot be problematic. */
1381 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1382 continue;
1383
1384 /* Look for a second memory access to/from an integer register. */
1385 if ((set = single_set (next)) != NULL_RTX)
1386 {
1387 rtx src = SET_SRC (set);
1388 rtx dest = SET_DEST (set);
1389 rtx mem;
1390
1391 /* LDD is affected. */
1392 if ((mem = mem_ref (src)) != NULL_RTX
1393 && REG_P (dest)
1394 && REGNO (dest) < 32
1395 && !reg_mentioned_p (x, XEXP (mem, 0)))
1396 insert_nop = true;
1397
1398 /* STD is *not* affected. */
1399 else if (MEM_P (dest)
1400 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1401 && (src == CONST0_RTX (GET_MODE (dest))
1402 || (REG_P (src)
1403 && REGNO (src) < 32
1404 && REGNO (src) != REGNO (x)))
1405 && !reg_mentioned_p (x, XEXP (dest, 0)))
1406 insert_nop = true;
1407
1408 /* GOT accesses uses LD. */
1409 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1410 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1411 insert_nop = true;
1412 }
1413 }
1414
1415 /* Look for a single-word load/operation into an FP register. */
1416 else if (sparc_fix_ut699
1417 && NONJUMP_INSN_P (insn)
1418 && (set = single_set (insn)) != NULL_RTX
1419 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1420 && REG_P (SET_DEST (set))
1421 && REGNO (SET_DEST (set)) > 31)
1422 {
1423 /* Number of instructions in the problematic window. */
1424 const int n_insns = 4;
1425 /* The problematic combination is with the sibling FP register. */
1426 const unsigned int x = REGNO (SET_DEST (set));
1427 const unsigned int y = x ^ 1;
1428 rtx_insn *after;
1429 int i;
1430
1431 next = next_active_insn (insn);
1432 if (!next)
1433 break;
1434 /* If the insn is a branch, then it cannot be problematic. */
1435 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1436 continue;
1437
1438 /* Look for a second load/operation into the sibling FP register. */
1439 if (!((set = single_set (next)) != NULL_RTX
1440 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1441 && REG_P (SET_DEST (set))
1442 && REGNO (SET_DEST (set)) == y))
1443 continue;
1444
1445 /* Look for a (possible) store from the FP register in the next N
1446 instructions, but bail out if it is again modified or if there
1447 is a store from the sibling FP register before this store. */
1448 for (after = next, i = 0; i < n_insns; i++)
1449 {
1450 bool branch_p;
1451
1452 after = next_active_insn (after);
1453 if (!after)
1454 break;
1455
1456 /* This is a branch with an empty delay slot. */
1457 if (!NONJUMP_INSN_P (after))
1458 {
1459 if (++i == n_insns)
1460 break;
1461 branch_p = true;
1462 after = NULL;
1463 }
1464 /* This is a branch with a filled delay slot. */
1465 else if (rtx_sequence *seq =
1466 dyn_cast <rtx_sequence *> (PATTERN (after)))
1467 {
1468 if (++i == n_insns)
1469 break;
1470 branch_p = true;
1471 after = seq->insn (1);
1472 }
1473 /* This is a regular instruction. */
1474 else
1475 branch_p = false;
1476
1477 if (after && (set = single_set (after)) != NULL_RTX)
1478 {
1479 const rtx src = SET_SRC (set);
1480 const rtx dest = SET_DEST (set);
1481 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1482
1483 /* If the FP register is again modified before the store,
1484 then the store isn't affected. */
1485 if (REG_P (dest)
1486 && (REGNO (dest) == x
1487 || (REGNO (dest) == y && size == 8)))
1488 break;
1489
1490 if (MEM_P (dest) && REG_P (src))
1491 {
1492 /* If there is a store from the sibling FP register
1493 before the store, then the store is not affected. */
1494 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1495 break;
1496
1497 /* Otherwise, the store is affected. */
1498 if (REGNO (src) == x && size == 4)
1499 {
1500 insert_nop = true;
1501 break;
1502 }
1503 }
1504 }
1505
1506 /* If we have a branch in the first M instructions, then we
1507 cannot see the (M+2)th instruction so we play safe. */
1508 if (branch_p && i <= (n_insns - 2))
1509 {
1510 insert_nop = true;
1511 break;
1512 }
1513 }
1514 }
1515
1516 else
1517 next = NEXT_INSN (insn);
1518
1519 if (insert_nop)
1520 emit_insn_before (gen_nop (), next);
1521 }
1522
1523 return 0;
1524 }
1525
1526 namespace {
1527
1528 const pass_data pass_data_work_around_errata =
1529 {
1530 RTL_PASS, /* type */
1531 "errata", /* name */
1532 OPTGROUP_NONE, /* optinfo_flags */
1533 TV_MACH_DEP, /* tv_id */
1534 0, /* properties_required */
1535 0, /* properties_provided */
1536 0, /* properties_destroyed */
1537 0, /* todo_flags_start */
1538 0, /* todo_flags_finish */
1539 };
1540
1541 class pass_work_around_errata : public rtl_opt_pass
1542 {
1543 public:
1544 pass_work_around_errata(gcc::context *ctxt)
1545 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1546 {}
1547
1548 /* opt_pass methods: */
1549 virtual bool gate (function *)
1550 {
1551 return sparc_fix_at697f
1552 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1553 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1554 }
1555
1556 virtual unsigned int execute (function *)
1557 {
1558 return sparc_do_work_around_errata ();
1559 }
1560
1561 }; // class pass_work_around_errata
1562
1563 } // anon namespace
1564
1565 rtl_opt_pass *
1566 make_pass_work_around_errata (gcc::context *ctxt)
1567 {
1568 return new pass_work_around_errata (ctxt);
1569 }
1570
1571 /* Helpers for TARGET_DEBUG_OPTIONS. */
1572 static void
1573 dump_target_flag_bits (const int flags)
1574 {
1575 if (flags & MASK_64BIT)
1576 fprintf (stderr, "64BIT ");
1577 if (flags & MASK_APP_REGS)
1578 fprintf (stderr, "APP_REGS ");
1579 if (flags & MASK_FASTER_STRUCTS)
1580 fprintf (stderr, "FASTER_STRUCTS ");
1581 if (flags & MASK_FLAT)
1582 fprintf (stderr, "FLAT ");
1583 if (flags & MASK_FMAF)
1584 fprintf (stderr, "FMAF ");
1585 if (flags & MASK_FSMULD)
1586 fprintf (stderr, "FSMULD ");
1587 if (flags & MASK_FPU)
1588 fprintf (stderr, "FPU ");
1589 if (flags & MASK_HARD_QUAD)
1590 fprintf (stderr, "HARD_QUAD ");
1591 if (flags & MASK_POPC)
1592 fprintf (stderr, "POPC ");
1593 if (flags & MASK_PTR64)
1594 fprintf (stderr, "PTR64 ");
1595 if (flags & MASK_STACK_BIAS)
1596 fprintf (stderr, "STACK_BIAS ");
1597 if (flags & MASK_UNALIGNED_DOUBLES)
1598 fprintf (stderr, "UNALIGNED_DOUBLES ");
1599 if (flags & MASK_V8PLUS)
1600 fprintf (stderr, "V8PLUS ");
1601 if (flags & MASK_VIS)
1602 fprintf (stderr, "VIS ");
1603 if (flags & MASK_VIS2)
1604 fprintf (stderr, "VIS2 ");
1605 if (flags & MASK_VIS3)
1606 fprintf (stderr, "VIS3 ");
1607 if (flags & MASK_VIS4)
1608 fprintf (stderr, "VIS4 ");
1609 if (flags & MASK_VIS4B)
1610 fprintf (stderr, "VIS4B ");
1611 if (flags & MASK_CBCOND)
1612 fprintf (stderr, "CBCOND ");
1613 if (flags & MASK_DEPRECATED_V8_INSNS)
1614 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1615 if (flags & MASK_SPARCLET)
1616 fprintf (stderr, "SPARCLET ");
1617 if (flags & MASK_SPARCLITE)
1618 fprintf (stderr, "SPARCLITE ");
1619 if (flags & MASK_V8)
1620 fprintf (stderr, "V8 ");
1621 if (flags & MASK_V9)
1622 fprintf (stderr, "V9 ");
1623 }
1624
1625 static void
1626 dump_target_flags (const char *prefix, const int flags)
1627 {
1628 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1629 dump_target_flag_bits (flags);
1630 fprintf(stderr, "]\n");
1631 }
1632
1633 /* Validate and override various options, and do some machine dependent
1634 initialization. */
1635
1636 static void
1637 sparc_option_override (void)
1638 {
1639 static struct code_model {
1640 const char *const name;
1641 const enum cmodel value;
1642 } const cmodels[] = {
1643 { "32", CM_32 },
1644 { "medlow", CM_MEDLOW },
1645 { "medmid", CM_MEDMID },
1646 { "medany", CM_MEDANY },
1647 { "embmedany", CM_EMBMEDANY },
1648 { NULL, (enum cmodel) 0 }
1649 };
1650 const struct code_model *cmodel;
1651 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1652 static struct cpu_default {
1653 const int cpu;
1654 const enum processor_type processor;
1655 } const cpu_default[] = {
1656 /* There must be one entry here for each TARGET_CPU value. */
1657 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1658 { TARGET_CPU_v8, PROCESSOR_V8 },
1659 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1660 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1661 { TARGET_CPU_leon, PROCESSOR_LEON },
1662 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1663 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1664 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1665 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1666 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1667 { TARGET_CPU_v9, PROCESSOR_V9 },
1668 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1669 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1670 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1671 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1672 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1673 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1674 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1675 { TARGET_CPU_m8, PROCESSOR_M8 },
1676 { -1, PROCESSOR_V7 }
1677 };
1678 const struct cpu_default *def;
1679 /* Table of values for -m{cpu,tune}=. This must match the order of
1680 the enum processor_type in sparc-opts.h. */
1681 static struct cpu_table {
1682 const char *const name;
1683 const int disable;
1684 const int enable;
1685 } const cpu_table[] = {
1686 { "v7", MASK_ISA, 0 },
1687 { "cypress", MASK_ISA, 0 },
1688 { "v8", MASK_ISA, MASK_V8 },
1689 /* TI TMS390Z55 supersparc */
1690 { "supersparc", MASK_ISA, MASK_V8 },
1691 { "hypersparc", MASK_ISA, MASK_V8 },
1692 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1693 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1694 { "leon3v7", MASK_ISA, MASK_LEON3 },
1695 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1696 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1697 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1698 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1699 { "f934", MASK_ISA, MASK_SPARCLITE },
1700 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1701 { "sparclet", MASK_ISA, MASK_SPARCLET },
1702 /* TEMIC sparclet */
1703 { "tsc701", MASK_ISA, MASK_SPARCLET },
1704 { "v9", MASK_ISA, MASK_V9 },
1705 /* UltraSPARC I, II, IIi */
1706 { "ultrasparc", MASK_ISA,
1707 /* Although insns using %y are deprecated, it is a clear win. */
1708 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1709 /* UltraSPARC III */
1710 /* ??? Check if %y issue still holds true. */
1711 { "ultrasparc3", MASK_ISA,
1712 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1713 /* UltraSPARC T1 */
1714 { "niagara", MASK_ISA,
1715 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1716 /* UltraSPARC T2 */
1717 { "niagara2", MASK_ISA,
1718 MASK_V9|MASK_POPC|MASK_VIS2 },
1719 /* UltraSPARC T3 */
1720 { "niagara3", MASK_ISA,
1721 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1722 /* UltraSPARC T4 */
1723 { "niagara4", MASK_ISA,
1724 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1725 /* UltraSPARC M7 */
1726 { "niagara7", MASK_ISA,
1727 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1728 /* UltraSPARC M8 */
1729 { "m8", MASK_ISA,
1730 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1731 };
1732 const struct cpu_table *cpu;
1733 unsigned int i;
1734
1735 if (sparc_debug_string != NULL)
1736 {
1737 const char *q;
1738 char *p;
1739
1740 p = ASTRDUP (sparc_debug_string);
1741 while ((q = strtok (p, ",")) != NULL)
1742 {
1743 bool invert;
1744 int mask;
1745
1746 p = NULL;
1747 if (*q == '!')
1748 {
1749 invert = true;
1750 q++;
1751 }
1752 else
1753 invert = false;
1754
1755 if (! strcmp (q, "all"))
1756 mask = MASK_DEBUG_ALL;
1757 else if (! strcmp (q, "options"))
1758 mask = MASK_DEBUG_OPTIONS;
1759 else
1760 error ("unknown -mdebug-%s switch", q);
1761
1762 if (invert)
1763 sparc_debug &= ~mask;
1764 else
1765 sparc_debug |= mask;
1766 }
1767 }
1768
1769 /* Enable the FsMULd instruction by default if not explicitly specified by
1770 the user. It may be later disabled by the CPU (explicitly or not). */
1771 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1772 target_flags |= MASK_FSMULD;
1773
1774 if (TARGET_DEBUG_OPTIONS)
1775 {
1776 dump_target_flags("Initial target_flags", target_flags);
1777 dump_target_flags("target_flags_explicit", target_flags_explicit);
1778 }
1779
1780 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1781 SUBTARGET_OVERRIDE_OPTIONS;
1782 #endif
1783
1784 #ifndef SPARC_BI_ARCH
1785 /* Check for unsupported architecture size. */
1786 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1787 error ("%s is not supported by this configuration",
1788 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1789 #endif
1790
1791 /* We force all 64bit archs to use 128 bit long double */
1792 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1793 {
1794 error ("-mlong-double-64 not allowed with -m64");
1795 target_flags |= MASK_LONG_DOUBLE_128;
1796 }
1797
1798 /* Code model selection. */
1799 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1800
1801 #ifdef SPARC_BI_ARCH
1802 if (TARGET_ARCH32)
1803 sparc_cmodel = CM_32;
1804 #endif
1805
1806 if (sparc_cmodel_string != NULL)
1807 {
1808 if (TARGET_ARCH64)
1809 {
1810 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1811 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1812 break;
1813 if (cmodel->name == NULL)
1814 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1815 else
1816 sparc_cmodel = cmodel->value;
1817 }
1818 else
1819 error ("-mcmodel= is not supported on 32-bit systems");
1820 }
1821
1822 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1823 for (i = 8; i < 16; i++)
1824 if (!call_used_regs [i])
1825 {
1826 error ("-fcall-saved-REG is not supported for out registers");
1827 call_used_regs [i] = 1;
1828 }
1829
1830 /* Set the default CPU if no -mcpu option was specified. */
1831 if (!global_options_set.x_sparc_cpu_and_features)
1832 {
1833 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1834 if (def->cpu == TARGET_CPU_DEFAULT)
1835 break;
1836 gcc_assert (def->cpu != -1);
1837 sparc_cpu_and_features = def->processor;
1838 }
1839
1840 /* Set the default CPU if no -mtune option was specified. */
1841 if (!global_options_set.x_sparc_cpu)
1842 sparc_cpu = sparc_cpu_and_features;
1843
1844 cpu = &cpu_table[(int) sparc_cpu_and_features];
1845
1846 if (TARGET_DEBUG_OPTIONS)
1847 {
1848 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1849 dump_target_flags ("cpu->disable", cpu->disable);
1850 dump_target_flags ("cpu->enable", cpu->enable);
1851 }
1852
1853 target_flags &= ~cpu->disable;
1854 target_flags |= (cpu->enable
1855 #ifndef HAVE_AS_FMAF_HPC_VIS3
1856 & ~(MASK_FMAF | MASK_VIS3)
1857 #endif
1858 #ifndef HAVE_AS_SPARC4
1859 & ~MASK_CBCOND
1860 #endif
1861 #ifndef HAVE_AS_SPARC5_VIS4
1862 & ~(MASK_VIS4 | MASK_SUBXC)
1863 #endif
1864 #ifndef HAVE_AS_SPARC6
1865 & ~(MASK_VIS4B)
1866 #endif
1867 #ifndef HAVE_AS_LEON
1868 & ~(MASK_LEON | MASK_LEON3)
1869 #endif
1870 & ~(target_flags_explicit & MASK_FEATURES)
1871 );
1872
1873 /* FsMULd is a V8 instruction. */
1874 if (!TARGET_V8 && !TARGET_V9)
1875 target_flags &= ~MASK_FSMULD;
1876
1877 /* -mvis2 implies -mvis. */
1878 if (TARGET_VIS2)
1879 target_flags |= MASK_VIS;
1880
1881 /* -mvis3 implies -mvis2 and -mvis. */
1882 if (TARGET_VIS3)
1883 target_flags |= MASK_VIS2 | MASK_VIS;
1884
1885 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1886 if (TARGET_VIS4)
1887 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1888
1889 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1890 if (TARGET_VIS4B)
1891 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1892
1893 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1894 FPU is disabled. */
1895 if (!TARGET_FPU)
1896 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1897 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1898
1899 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1900 are available; -m64 also implies v9. */
1901 if (TARGET_VIS || TARGET_ARCH64)
1902 {
1903 target_flags |= MASK_V9;
1904 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1905 }
1906
1907 /* -mvis also implies -mv8plus on 32-bit. */
1908 if (TARGET_VIS && !TARGET_ARCH64)
1909 target_flags |= MASK_V8PLUS;
1910
1911 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1912 if (TARGET_V9 && TARGET_ARCH32)
1913 target_flags |= MASK_DEPRECATED_V8_INSNS;
1914
1915 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1916 if (!TARGET_V9 || TARGET_ARCH64)
1917 target_flags &= ~MASK_V8PLUS;
1918
1919 /* Don't use stack biasing in 32-bit mode. */
1920 if (TARGET_ARCH32)
1921 target_flags &= ~MASK_STACK_BIAS;
1922
1923 /* Use LRA instead of reload, unless otherwise instructed. */
1924 if (!(target_flags_explicit & MASK_LRA))
1925 target_flags |= MASK_LRA;
1926
1927 /* Enable applicable errata workarounds for LEON3FT. */
1928 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1929 {
1930 sparc_fix_b2bst = 1;
1931 sparc_fix_lost_divsqrt = 1;
1932 }
1933
1934 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1935 if (sparc_fix_ut699)
1936 target_flags &= ~MASK_FSMULD;
1937
1938 /* Supply a default value for align_functions. */
1939 if (flag_align_functions && !str_align_functions)
1940 {
1941 if (sparc_cpu == PROCESSOR_ULTRASPARC
1942 || sparc_cpu == PROCESSOR_ULTRASPARC3
1943 || sparc_cpu == PROCESSOR_NIAGARA
1944 || sparc_cpu == PROCESSOR_NIAGARA2
1945 || sparc_cpu == PROCESSOR_NIAGARA3
1946 || sparc_cpu == PROCESSOR_NIAGARA4)
1947 str_align_functions = "32";
1948 else if (sparc_cpu == PROCESSOR_NIAGARA7
1949 || sparc_cpu == PROCESSOR_M8)
1950 str_align_functions = "64";
1951 }
1952
1953 /* Validate PCC_STRUCT_RETURN. */
1954 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1955 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1956
1957 /* Only use .uaxword when compiling for a 64-bit target. */
1958 if (!TARGET_ARCH64)
1959 targetm.asm_out.unaligned_op.di = NULL;
1960
1961 /* Do various machine dependent initializations. */
1962 sparc_init_modes ();
1963
1964 /* Set up function hooks. */
1965 init_machine_status = sparc_init_machine_status;
1966
1967 switch (sparc_cpu)
1968 {
1969 case PROCESSOR_V7:
1970 case PROCESSOR_CYPRESS:
1971 sparc_costs = &cypress_costs;
1972 break;
1973 case PROCESSOR_V8:
1974 case PROCESSOR_SPARCLITE:
1975 case PROCESSOR_SUPERSPARC:
1976 sparc_costs = &supersparc_costs;
1977 break;
1978 case PROCESSOR_F930:
1979 case PROCESSOR_F934:
1980 case PROCESSOR_HYPERSPARC:
1981 case PROCESSOR_SPARCLITE86X:
1982 sparc_costs = &hypersparc_costs;
1983 break;
1984 case PROCESSOR_LEON:
1985 sparc_costs = &leon_costs;
1986 break;
1987 case PROCESSOR_LEON3:
1988 case PROCESSOR_LEON3V7:
1989 sparc_costs = &leon3_costs;
1990 break;
1991 case PROCESSOR_SPARCLET:
1992 case PROCESSOR_TSC701:
1993 sparc_costs = &sparclet_costs;
1994 break;
1995 case PROCESSOR_V9:
1996 case PROCESSOR_ULTRASPARC:
1997 sparc_costs = &ultrasparc_costs;
1998 break;
1999 case PROCESSOR_ULTRASPARC3:
2000 sparc_costs = &ultrasparc3_costs;
2001 break;
2002 case PROCESSOR_NIAGARA:
2003 sparc_costs = &niagara_costs;
2004 break;
2005 case PROCESSOR_NIAGARA2:
2006 sparc_costs = &niagara2_costs;
2007 break;
2008 case PROCESSOR_NIAGARA3:
2009 sparc_costs = &niagara3_costs;
2010 break;
2011 case PROCESSOR_NIAGARA4:
2012 sparc_costs = &niagara4_costs;
2013 break;
2014 case PROCESSOR_NIAGARA7:
2015 sparc_costs = &niagara7_costs;
2016 break;
2017 case PROCESSOR_M8:
2018 sparc_costs = &m8_costs;
2019 break;
2020 case PROCESSOR_NATIVE:
2021 gcc_unreachable ();
2022 };
2023
2024 if (sparc_memory_model == SMM_DEFAULT)
2025 {
2026 /* Choose the memory model for the operating system. */
2027 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
2028 if (os_default != SMM_DEFAULT)
2029 sparc_memory_model = os_default;
2030 /* Choose the most relaxed model for the processor. */
2031 else if (TARGET_V9)
2032 sparc_memory_model = SMM_RMO;
2033 else if (TARGET_LEON3)
2034 sparc_memory_model = SMM_TSO;
2035 else if (TARGET_LEON)
2036 sparc_memory_model = SMM_SC;
2037 else if (TARGET_V8)
2038 sparc_memory_model = SMM_PSO;
2039 else
2040 sparc_memory_model = SMM_SC;
2041 }
2042
2043 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
2044 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2045 target_flags |= MASK_LONG_DOUBLE_128;
2046 #endif
2047
2048 if (TARGET_DEBUG_OPTIONS)
2049 dump_target_flags ("Final target_flags", target_flags);
2050
2051 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2052 can run at the same time. More important, it is the threshold
2053 defining when additional prefetches will be dropped by the
2054 hardware.
2055
2056 The UltraSPARC-III features a documented prefetch queue with a
2057 size of 8. Additional prefetches issued in the cpu are
2058 dropped.
2059
2060 Niagara processors are different. In these processors prefetches
2061 are handled much like regular loads. The L1 miss buffer is 32
2062 entries, but prefetches start getting affected when 30 entries
2063 become occupied. That occupation could be a mix of regular loads
2064 and prefetches though. And that buffer is shared by all threads.
2065 Once the threshold is reached, if the core is running a single
2066 thread the prefetch will retry. If more than one thread is
2067 running, the prefetch will be dropped.
2068
2069 All this makes it very difficult to determine how many
2070 simultaneous prefetches can be issued simultaneously, even in a
2071 single-threaded program. Experimental results show that setting
2072 this parameter to 32 works well when the number of threads is not
2073 high. */
2074 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2075 ((sparc_cpu == PROCESSOR_ULTRASPARC
2076 || sparc_cpu == PROCESSOR_NIAGARA
2077 || sparc_cpu == PROCESSOR_NIAGARA2
2078 || sparc_cpu == PROCESSOR_NIAGARA3
2079 || sparc_cpu == PROCESSOR_NIAGARA4)
2080 ? 2
2081 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2082 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2083 || sparc_cpu == PROCESSOR_M8)
2084 ? 32 : 3))),
2085 global_options.x_param_values,
2086 global_options_set.x_param_values);
2087
2088 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2089 bytes.
2090
2091 The Oracle SPARC Architecture (previously the UltraSPARC
2092 Architecture) specification states that when a PREFETCH[A]
2093 instruction is executed an implementation-specific amount of data
2094 is prefetched, and that it is at least 64 bytes long (aligned to
2095 at least 64 bytes).
2096
2097 However, this is not correct. The M7 (and implementations prior
2098 to that) does not guarantee a 64B prefetch into a cache if the
2099 line size is smaller. A single cache line is all that is ever
2100 prefetched. So for the M7, where the L1D$ has 32B lines and the
2101 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2102 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2103 is a read_n prefetch, which is the only type which allocates to
2104 the L1.) */
2105 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2106 (sparc_cpu == PROCESSOR_M8
2107 ? 64 : 32),
2108 global_options.x_param_values,
2109 global_options_set.x_param_values);
2110
2111 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2112 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2113 Niagara processors feature a L1D$ of 16KB. */
2114 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2115 ((sparc_cpu == PROCESSOR_ULTRASPARC
2116 || sparc_cpu == PROCESSOR_ULTRASPARC3
2117 || sparc_cpu == PROCESSOR_NIAGARA
2118 || sparc_cpu == PROCESSOR_NIAGARA2
2119 || sparc_cpu == PROCESSOR_NIAGARA3
2120 || sparc_cpu == PROCESSOR_NIAGARA4
2121 || sparc_cpu == PROCESSOR_NIAGARA7
2122 || sparc_cpu == PROCESSOR_M8)
2123 ? 16 : 64),
2124 global_options.x_param_values,
2125 global_options_set.x_param_values);
2126
2127
2128 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2129 that 512 is the default in params.def. */
2130 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2131 ((sparc_cpu == PROCESSOR_NIAGARA4
2132 || sparc_cpu == PROCESSOR_M8)
2133 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2134 ? 256 : 512)),
2135 global_options.x_param_values,
2136 global_options_set.x_param_values);
2137
2138
2139 /* Disable save slot sharing for call-clobbered registers by default.
2140 The IRA sharing algorithm works on single registers only and this
2141 pessimizes for double floating-point registers. */
2142 if (!global_options_set.x_flag_ira_share_save_slots)
2143 flag_ira_share_save_slots = 0;
2144
2145 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2146 redundant 32-to-64-bit extensions. */
2147 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2148 flag_ree = 0;
2149 }
2150 \f
2151 /* Miscellaneous utilities. */
2152
2153 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2154 or branch on register contents instructions. */
2155
2156 int
2157 v9_regcmp_p (enum rtx_code code)
2158 {
2159 return (code == EQ || code == NE || code == GE || code == LT
2160 || code == LE || code == GT);
2161 }
2162
2163 /* Nonzero if OP is a floating point constant which can
2164 be loaded into an integer register using a single
2165 sethi instruction. */
2166
2167 int
2168 fp_sethi_p (rtx op)
2169 {
2170 if (GET_CODE (op) == CONST_DOUBLE)
2171 {
2172 long i;
2173
2174 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2175 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2176 }
2177
2178 return 0;
2179 }
2180
2181 /* Nonzero if OP is a floating point constant which can
2182 be loaded into an integer register using a single
2183 mov instruction. */
2184
2185 int
2186 fp_mov_p (rtx op)
2187 {
2188 if (GET_CODE (op) == CONST_DOUBLE)
2189 {
2190 long i;
2191
2192 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2193 return SPARC_SIMM13_P (i);
2194 }
2195
2196 return 0;
2197 }
2198
2199 /* Nonzero if OP is a floating point constant which can
2200 be loaded into an integer register using a high/losum
2201 instruction sequence. */
2202
2203 int
2204 fp_high_losum_p (rtx op)
2205 {
2206 /* The constraints calling this should only be in
2207 SFmode move insns, so any constant which cannot
2208 be moved using a single insn will do. */
2209 if (GET_CODE (op) == CONST_DOUBLE)
2210 {
2211 long i;
2212
2213 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2214 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2215 }
2216
2217 return 0;
2218 }
2219
2220 /* Return true if the address of LABEL can be loaded by means of the
2221 mov{si,di}_pic_label_ref patterns in PIC mode. */
2222
2223 static bool
2224 can_use_mov_pic_label_ref (rtx label)
2225 {
2226 /* VxWorks does not impose a fixed gap between segments; the run-time
2227 gap can be different from the object-file gap. We therefore can't
2228 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2229 are absolutely sure that X is in the same segment as the GOT.
2230 Unfortunately, the flexibility of linker scripts means that we
2231 can't be sure of that in general, so assume that GOT-relative
2232 accesses are never valid on VxWorks. */
2233 if (TARGET_VXWORKS_RTP)
2234 return false;
2235
2236 /* Similarly, if the label is non-local, it might end up being placed
2237 in a different section than the current one; now mov_pic_label_ref
2238 requires the label and the code to be in the same section. */
2239 if (LABEL_REF_NONLOCAL_P (label))
2240 return false;
2241
2242 /* Finally, if we are reordering basic blocks and partition into hot
2243 and cold sections, this might happen for any label. */
2244 if (flag_reorder_blocks_and_partition)
2245 return false;
2246
2247 return true;
2248 }
2249
2250 /* Expand a move instruction. Return true if all work is done. */
2251
2252 bool
2253 sparc_expand_move (machine_mode mode, rtx *operands)
2254 {
2255 /* Handle sets of MEM first. */
2256 if (GET_CODE (operands[0]) == MEM)
2257 {
2258 /* 0 is a register (or a pair of registers) on SPARC. */
2259 if (register_or_zero_operand (operands[1], mode))
2260 return false;
2261
2262 if (!reload_in_progress)
2263 {
2264 operands[0] = validize_mem (operands[0]);
2265 operands[1] = force_reg (mode, operands[1]);
2266 }
2267 }
2268
2269 /* Fix up TLS cases. */
2270 if (TARGET_HAVE_TLS
2271 && CONSTANT_P (operands[1])
2272 && sparc_tls_referenced_p (operands [1]))
2273 {
2274 operands[1] = sparc_legitimize_tls_address (operands[1]);
2275 return false;
2276 }
2277
2278 /* Fix up PIC cases. */
2279 if (flag_pic && CONSTANT_P (operands[1]))
2280 {
2281 if (pic_address_needs_scratch (operands[1]))
2282 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2283
2284 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2285 if ((GET_CODE (operands[1]) == LABEL_REF
2286 && can_use_mov_pic_label_ref (operands[1]))
2287 || (GET_CODE (operands[1]) == CONST
2288 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2289 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2290 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2291 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2292 {
2293 if (mode == SImode)
2294 {
2295 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2296 return true;
2297 }
2298
2299 if (mode == DImode)
2300 {
2301 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2302 return true;
2303 }
2304 }
2305
2306 if (symbolic_operand (operands[1], mode))
2307 {
2308 operands[1]
2309 = sparc_legitimize_pic_address (operands[1],
2310 reload_in_progress
2311 ? operands[0] : NULL_RTX);
2312 return false;
2313 }
2314 }
2315
2316 /* If we are trying to toss an integer constant into FP registers,
2317 or loading a FP or vector constant, force it into memory. */
2318 if (CONSTANT_P (operands[1])
2319 && REG_P (operands[0])
2320 && (SPARC_FP_REG_P (REGNO (operands[0]))
2321 || SCALAR_FLOAT_MODE_P (mode)
2322 || VECTOR_MODE_P (mode)))
2323 {
2324 /* emit_group_store will send such bogosity to us when it is
2325 not storing directly into memory. So fix this up to avoid
2326 crashes in output_constant_pool. */
2327 if (operands [1] == const0_rtx)
2328 operands[1] = CONST0_RTX (mode);
2329
2330 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2331 always other regs. */
2332 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2333 && (const_zero_operand (operands[1], mode)
2334 || const_all_ones_operand (operands[1], mode)))
2335 return false;
2336
2337 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2338 /* We are able to build any SF constant in integer registers
2339 with at most 2 instructions. */
2340 && (mode == SFmode
2341 /* And any DF constant in integer registers if needed. */
2342 || (mode == DFmode && !can_create_pseudo_p ())))
2343 return false;
2344
2345 operands[1] = force_const_mem (mode, operands[1]);
2346 if (!reload_in_progress)
2347 operands[1] = validize_mem (operands[1]);
2348 return false;
2349 }
2350
2351 /* Accept non-constants and valid constants unmodified. */
2352 if (!CONSTANT_P (operands[1])
2353 || GET_CODE (operands[1]) == HIGH
2354 || input_operand (operands[1], mode))
2355 return false;
2356
2357 switch (mode)
2358 {
2359 case E_QImode:
2360 /* All QImode constants require only one insn, so proceed. */
2361 break;
2362
2363 case E_HImode:
2364 case E_SImode:
2365 sparc_emit_set_const32 (operands[0], operands[1]);
2366 return true;
2367
2368 case E_DImode:
2369 /* input_operand should have filtered out 32-bit mode. */
2370 sparc_emit_set_const64 (operands[0], operands[1]);
2371 return true;
2372
2373 case E_TImode:
2374 {
2375 rtx high, low;
2376 /* TImode isn't available in 32-bit mode. */
2377 split_double (operands[1], &high, &low);
2378 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2379 high));
2380 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2381 low));
2382 }
2383 return true;
2384
2385 default:
2386 gcc_unreachable ();
2387 }
2388
2389 return false;
2390 }
2391
2392 /* Load OP1, a 32-bit constant, into OP0, a register.
2393 We know it can't be done in one insn when we get
2394 here, the move expander guarantees this. */
2395
2396 static void
2397 sparc_emit_set_const32 (rtx op0, rtx op1)
2398 {
2399 machine_mode mode = GET_MODE (op0);
2400 rtx temp = op0;
2401
2402 if (can_create_pseudo_p ())
2403 temp = gen_reg_rtx (mode);
2404
2405 if (GET_CODE (op1) == CONST_INT)
2406 {
2407 gcc_assert (!small_int_operand (op1, mode)
2408 && !const_high_operand (op1, mode));
2409
2410 /* Emit them as real moves instead of a HIGH/LO_SUM,
2411 this way CSE can see everything and reuse intermediate
2412 values if it wants. */
2413 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2414 & ~(HOST_WIDE_INT) 0x3ff)));
2415
2416 emit_insn (gen_rtx_SET (op0,
2417 gen_rtx_IOR (mode, temp,
2418 GEN_INT (INTVAL (op1) & 0x3ff))));
2419 }
2420 else
2421 {
2422 /* A symbol, emit in the traditional way. */
2423 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2424 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2425 }
2426 }
2427
2428 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2429 If TEMP is nonzero, we are forbidden to use any other scratch
2430 registers. Otherwise, we are allowed to generate them as needed.
2431
2432 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2433 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2434
2435 void
2436 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2437 {
2438 rtx cst, temp1, temp2, temp3, temp4, temp5;
2439 rtx ti_temp = 0;
2440
2441 /* Deal with too large offsets. */
2442 if (GET_CODE (op1) == CONST
2443 && GET_CODE (XEXP (op1, 0)) == PLUS
2444 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2445 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2446 {
2447 gcc_assert (!temp);
2448 temp1 = gen_reg_rtx (DImode);
2449 temp2 = gen_reg_rtx (DImode);
2450 sparc_emit_set_const64 (temp2, cst);
2451 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2452 NULL_RTX);
2453 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2454 return;
2455 }
2456
2457 if (temp && GET_MODE (temp) == TImode)
2458 {
2459 ti_temp = temp;
2460 temp = gen_rtx_REG (DImode, REGNO (temp));
2461 }
2462
2463 /* SPARC-V9 code-model support. */
2464 switch (sparc_cmodel)
2465 {
2466 case CM_MEDLOW:
2467 /* The range spanned by all instructions in the object is less
2468 than 2^31 bytes (2GB) and the distance from any instruction
2469 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2470 than 2^31 bytes (2GB).
2471
2472 The executable must be in the low 4TB of the virtual address
2473 space.
2474
2475 sethi %hi(symbol), %temp1
2476 or %temp1, %lo(symbol), %reg */
2477 if (temp)
2478 temp1 = temp; /* op0 is allowed. */
2479 else
2480 temp1 = gen_reg_rtx (DImode);
2481
2482 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2483 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2484 break;
2485
2486 case CM_MEDMID:
2487 /* The range spanned by all instructions in the object is less
2488 than 2^31 bytes (2GB) and the distance from any instruction
2489 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2490 than 2^31 bytes (2GB).
2491
2492 The executable must be in the low 16TB of the virtual address
2493 space.
2494
2495 sethi %h44(symbol), %temp1
2496 or %temp1, %m44(symbol), %temp2
2497 sllx %temp2, 12, %temp3
2498 or %temp3, %l44(symbol), %reg */
2499 if (temp)
2500 {
2501 temp1 = op0;
2502 temp2 = op0;
2503 temp3 = temp; /* op0 is allowed. */
2504 }
2505 else
2506 {
2507 temp1 = gen_reg_rtx (DImode);
2508 temp2 = gen_reg_rtx (DImode);
2509 temp3 = gen_reg_rtx (DImode);
2510 }
2511
2512 emit_insn (gen_seth44 (temp1, op1));
2513 emit_insn (gen_setm44 (temp2, temp1, op1));
2514 emit_insn (gen_rtx_SET (temp3,
2515 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2516 emit_insn (gen_setl44 (op0, temp3, op1));
2517 break;
2518
2519 case CM_MEDANY:
2520 /* The range spanned by all instructions in the object is less
2521 than 2^31 bytes (2GB) and the distance from any instruction
2522 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2523 than 2^31 bytes (2GB).
2524
2525 The executable can be placed anywhere in the virtual address
2526 space.
2527
2528 sethi %hh(symbol), %temp1
2529 sethi %lm(symbol), %temp2
2530 or %temp1, %hm(symbol), %temp3
2531 sllx %temp3, 32, %temp4
2532 or %temp4, %temp2, %temp5
2533 or %temp5, %lo(symbol), %reg */
2534 if (temp)
2535 {
2536 /* It is possible that one of the registers we got for operands[2]
2537 might coincide with that of operands[0] (which is why we made
2538 it TImode). Pick the other one to use as our scratch. */
2539 if (rtx_equal_p (temp, op0))
2540 {
2541 gcc_assert (ti_temp);
2542 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2543 }
2544 temp1 = op0;
2545 temp2 = temp; /* op0 is _not_ allowed, see above. */
2546 temp3 = op0;
2547 temp4 = op0;
2548 temp5 = op0;
2549 }
2550 else
2551 {
2552 temp1 = gen_reg_rtx (DImode);
2553 temp2 = gen_reg_rtx (DImode);
2554 temp3 = gen_reg_rtx (DImode);
2555 temp4 = gen_reg_rtx (DImode);
2556 temp5 = gen_reg_rtx (DImode);
2557 }
2558
2559 emit_insn (gen_sethh (temp1, op1));
2560 emit_insn (gen_setlm (temp2, op1));
2561 emit_insn (gen_sethm (temp3, temp1, op1));
2562 emit_insn (gen_rtx_SET (temp4,
2563 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2564 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2565 emit_insn (gen_setlo (op0, temp5, op1));
2566 break;
2567
2568 case CM_EMBMEDANY:
2569 /* Old old old backwards compatibility kruft here.
2570 Essentially it is MEDLOW with a fixed 64-bit
2571 virtual base added to all data segment addresses.
2572 Text-segment stuff is computed like MEDANY, we can't
2573 reuse the code above because the relocation knobs
2574 look different.
2575
2576 Data segment: sethi %hi(symbol), %temp1
2577 add %temp1, EMBMEDANY_BASE_REG, %temp2
2578 or %temp2, %lo(symbol), %reg */
2579 if (data_segment_operand (op1, GET_MODE (op1)))
2580 {
2581 if (temp)
2582 {
2583 temp1 = temp; /* op0 is allowed. */
2584 temp2 = op0;
2585 }
2586 else
2587 {
2588 temp1 = gen_reg_rtx (DImode);
2589 temp2 = gen_reg_rtx (DImode);
2590 }
2591
2592 emit_insn (gen_embmedany_sethi (temp1, op1));
2593 emit_insn (gen_embmedany_brsum (temp2, temp1));
2594 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2595 }
2596
2597 /* Text segment: sethi %uhi(symbol), %temp1
2598 sethi %hi(symbol), %temp2
2599 or %temp1, %ulo(symbol), %temp3
2600 sllx %temp3, 32, %temp4
2601 or %temp4, %temp2, %temp5
2602 or %temp5, %lo(symbol), %reg */
2603 else
2604 {
2605 if (temp)
2606 {
2607 /* It is possible that one of the registers we got for operands[2]
2608 might coincide with that of operands[0] (which is why we made
2609 it TImode). Pick the other one to use as our scratch. */
2610 if (rtx_equal_p (temp, op0))
2611 {
2612 gcc_assert (ti_temp);
2613 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2614 }
2615 temp1 = op0;
2616 temp2 = temp; /* op0 is _not_ allowed, see above. */
2617 temp3 = op0;
2618 temp4 = op0;
2619 temp5 = op0;
2620 }
2621 else
2622 {
2623 temp1 = gen_reg_rtx (DImode);
2624 temp2 = gen_reg_rtx (DImode);
2625 temp3 = gen_reg_rtx (DImode);
2626 temp4 = gen_reg_rtx (DImode);
2627 temp5 = gen_reg_rtx (DImode);
2628 }
2629
2630 emit_insn (gen_embmedany_textuhi (temp1, op1));
2631 emit_insn (gen_embmedany_texthi (temp2, op1));
2632 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2633 emit_insn (gen_rtx_SET (temp4,
2634 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2635 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2636 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2637 }
2638 break;
2639
2640 default:
2641 gcc_unreachable ();
2642 }
2643 }
2644
2645 /* These avoid problems when cross compiling. If we do not
2646 go through all this hair then the optimizer will see
2647 invalid REG_EQUAL notes or in some cases none at all. */
2648 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2649 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2650 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2651 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2652
2653 /* The optimizer is not to assume anything about exactly
2654 which bits are set for a HIGH, they are unspecified.
2655 Unfortunately this leads to many missed optimizations
2656 during CSE. We mask out the non-HIGH bits, and matches
2657 a plain movdi, to alleviate this problem. */
2658 static rtx
2659 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2660 {
2661 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2662 }
2663
2664 static rtx
2665 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2666 {
2667 return gen_rtx_SET (dest, GEN_INT (val));
2668 }
2669
2670 static rtx
2671 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2672 {
2673 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2674 }
2675
2676 static rtx
2677 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2678 {
2679 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2680 }
2681
2682 /* Worker routines for 64-bit constant formation on arch64.
2683 One of the key things to be doing in these emissions is
2684 to create as many temp REGs as possible. This makes it
2685 possible for half-built constants to be used later when
2686 such values are similar to something required later on.
2687 Without doing this, the optimizer cannot see such
2688 opportunities. */
2689
2690 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2691 unsigned HOST_WIDE_INT, int);
2692
2693 static void
2694 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2695 unsigned HOST_WIDE_INT low_bits, int is_neg)
2696 {
2697 unsigned HOST_WIDE_INT high_bits;
2698
2699 if (is_neg)
2700 high_bits = (~low_bits) & 0xffffffff;
2701 else
2702 high_bits = low_bits;
2703
2704 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2705 if (!is_neg)
2706 {
2707 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2708 }
2709 else
2710 {
2711 /* If we are XOR'ing with -1, then we should emit a one's complement
2712 instead. This way the combiner will notice logical operations
2713 such as ANDN later on and substitute. */
2714 if ((low_bits & 0x3ff) == 0x3ff)
2715 {
2716 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2717 }
2718 else
2719 {
2720 emit_insn (gen_rtx_SET (op0,
2721 gen_safe_XOR64 (temp,
2722 (-(HOST_WIDE_INT)0x400
2723 | (low_bits & 0x3ff)))));
2724 }
2725 }
2726 }
2727
2728 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2729 unsigned HOST_WIDE_INT, int);
2730
2731 static void
2732 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2733 unsigned HOST_WIDE_INT high_bits,
2734 unsigned HOST_WIDE_INT low_immediate,
2735 int shift_count)
2736 {
2737 rtx temp2 = op0;
2738
2739 if ((high_bits & 0xfffffc00) != 0)
2740 {
2741 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2742 if ((high_bits & ~0xfffffc00) != 0)
2743 emit_insn (gen_rtx_SET (op0,
2744 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2745 else
2746 temp2 = temp;
2747 }
2748 else
2749 {
2750 emit_insn (gen_safe_SET64 (temp, high_bits));
2751 temp2 = temp;
2752 }
2753
2754 /* Now shift it up into place. */
2755 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2756 GEN_INT (shift_count))));
2757
2758 /* If there is a low immediate part piece, finish up by
2759 putting that in as well. */
2760 if (low_immediate != 0)
2761 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2762 }
2763
2764 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2765 unsigned HOST_WIDE_INT);
2766
2767 /* Full 64-bit constant decomposition. Even though this is the
2768 'worst' case, we still optimize a few things away. */
2769 static void
2770 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2771 unsigned HOST_WIDE_INT high_bits,
2772 unsigned HOST_WIDE_INT low_bits)
2773 {
2774 rtx sub_temp = op0;
2775
2776 if (can_create_pseudo_p ())
2777 sub_temp = gen_reg_rtx (DImode);
2778
2779 if ((high_bits & 0xfffffc00) != 0)
2780 {
2781 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2782 if ((high_bits & ~0xfffffc00) != 0)
2783 emit_insn (gen_rtx_SET (sub_temp,
2784 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2785 else
2786 sub_temp = temp;
2787 }
2788 else
2789 {
2790 emit_insn (gen_safe_SET64 (temp, high_bits));
2791 sub_temp = temp;
2792 }
2793
2794 if (can_create_pseudo_p ())
2795 {
2796 rtx temp2 = gen_reg_rtx (DImode);
2797 rtx temp3 = gen_reg_rtx (DImode);
2798 rtx temp4 = gen_reg_rtx (DImode);
2799
2800 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2801 GEN_INT (32))));
2802
2803 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2804 if ((low_bits & ~0xfffffc00) != 0)
2805 {
2806 emit_insn (gen_rtx_SET (temp3,
2807 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2808 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2809 }
2810 else
2811 {
2812 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2813 }
2814 }
2815 else
2816 {
2817 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2818 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2819 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2820 int to_shift = 12;
2821
2822 /* We are in the middle of reload, so this is really
2823 painful. However we do still make an attempt to
2824 avoid emitting truly stupid code. */
2825 if (low1 != const0_rtx)
2826 {
2827 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2828 GEN_INT (to_shift))));
2829 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2830 sub_temp = op0;
2831 to_shift = 12;
2832 }
2833 else
2834 {
2835 to_shift += 12;
2836 }
2837 if (low2 != const0_rtx)
2838 {
2839 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2840 GEN_INT (to_shift))));
2841 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2842 sub_temp = op0;
2843 to_shift = 8;
2844 }
2845 else
2846 {
2847 to_shift += 8;
2848 }
2849 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2850 GEN_INT (to_shift))));
2851 if (low3 != const0_rtx)
2852 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2853 /* phew... */
2854 }
2855 }
2856
2857 /* Analyze a 64-bit constant for certain properties. */
2858 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2859 unsigned HOST_WIDE_INT,
2860 int *, int *, int *);
2861
2862 static void
2863 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2864 unsigned HOST_WIDE_INT low_bits,
2865 int *hbsp, int *lbsp, int *abbasp)
2866 {
2867 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2868 int i;
2869
2870 lowest_bit_set = highest_bit_set = -1;
2871 i = 0;
2872 do
2873 {
2874 if ((lowest_bit_set == -1)
2875 && ((low_bits >> i) & 1))
2876 lowest_bit_set = i;
2877 if ((highest_bit_set == -1)
2878 && ((high_bits >> (32 - i - 1)) & 1))
2879 highest_bit_set = (64 - i - 1);
2880 }
2881 while (++i < 32
2882 && ((highest_bit_set == -1)
2883 || (lowest_bit_set == -1)));
2884 if (i == 32)
2885 {
2886 i = 0;
2887 do
2888 {
2889 if ((lowest_bit_set == -1)
2890 && ((high_bits >> i) & 1))
2891 lowest_bit_set = i + 32;
2892 if ((highest_bit_set == -1)
2893 && ((low_bits >> (32 - i - 1)) & 1))
2894 highest_bit_set = 32 - i - 1;
2895 }
2896 while (++i < 32
2897 && ((highest_bit_set == -1)
2898 || (lowest_bit_set == -1)));
2899 }
2900 /* If there are no bits set this should have gone out
2901 as one instruction! */
2902 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2903 all_bits_between_are_set = 1;
2904 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2905 {
2906 if (i < 32)
2907 {
2908 if ((low_bits & (1 << i)) != 0)
2909 continue;
2910 }
2911 else
2912 {
2913 if ((high_bits & (1 << (i - 32))) != 0)
2914 continue;
2915 }
2916 all_bits_between_are_set = 0;
2917 break;
2918 }
2919 *hbsp = highest_bit_set;
2920 *lbsp = lowest_bit_set;
2921 *abbasp = all_bits_between_are_set;
2922 }
2923
2924 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2925
2926 static int
2927 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2928 unsigned HOST_WIDE_INT low_bits)
2929 {
2930 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2931
2932 if (high_bits == 0
2933 || high_bits == 0xffffffff)
2934 return 1;
2935
2936 analyze_64bit_constant (high_bits, low_bits,
2937 &highest_bit_set, &lowest_bit_set,
2938 &all_bits_between_are_set);
2939
2940 if ((highest_bit_set == 63
2941 || lowest_bit_set == 0)
2942 && all_bits_between_are_set != 0)
2943 return 1;
2944
2945 if ((highest_bit_set - lowest_bit_set) < 21)
2946 return 1;
2947
2948 return 0;
2949 }
2950
2951 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2952 unsigned HOST_WIDE_INT,
2953 int, int);
2954
2955 static unsigned HOST_WIDE_INT
2956 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2957 unsigned HOST_WIDE_INT low_bits,
2958 int lowest_bit_set, int shift)
2959 {
2960 HOST_WIDE_INT hi, lo;
2961
2962 if (lowest_bit_set < 32)
2963 {
2964 lo = (low_bits >> lowest_bit_set) << shift;
2965 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2966 }
2967 else
2968 {
2969 lo = 0;
2970 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2971 }
2972 gcc_assert (! (hi & lo));
2973 return (hi | lo);
2974 }
2975
2976 /* Here we are sure to be arch64 and this is an integer constant
2977 being loaded into a register. Emit the most efficient
2978 insn sequence possible. Detection of all the 1-insn cases
2979 has been done already. */
2980 static void
2981 sparc_emit_set_const64 (rtx op0, rtx op1)
2982 {
2983 unsigned HOST_WIDE_INT high_bits, low_bits;
2984 int lowest_bit_set, highest_bit_set;
2985 int all_bits_between_are_set;
2986 rtx temp = 0;
2987
2988 /* Sanity check that we know what we are working with. */
2989 gcc_assert (TARGET_ARCH64
2990 && (GET_CODE (op0) == SUBREG
2991 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2992
2993 if (! can_create_pseudo_p ())
2994 temp = op0;
2995
2996 if (GET_CODE (op1) != CONST_INT)
2997 {
2998 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2999 return;
3000 }
3001
3002 if (! temp)
3003 temp = gen_reg_rtx (DImode);
3004
3005 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3006 low_bits = (INTVAL (op1) & 0xffffffff);
3007
3008 /* low_bits bits 0 --> 31
3009 high_bits bits 32 --> 63 */
3010
3011 analyze_64bit_constant (high_bits, low_bits,
3012 &highest_bit_set, &lowest_bit_set,
3013 &all_bits_between_are_set);
3014
3015 /* First try for a 2-insn sequence. */
3016
3017 /* These situations are preferred because the optimizer can
3018 * do more things with them:
3019 * 1) mov -1, %reg
3020 * sllx %reg, shift, %reg
3021 * 2) mov -1, %reg
3022 * srlx %reg, shift, %reg
3023 * 3) mov some_small_const, %reg
3024 * sllx %reg, shift, %reg
3025 */
3026 if (((highest_bit_set == 63
3027 || lowest_bit_set == 0)
3028 && all_bits_between_are_set != 0)
3029 || ((highest_bit_set - lowest_bit_set) < 12))
3030 {
3031 HOST_WIDE_INT the_const = -1;
3032 int shift = lowest_bit_set;
3033
3034 if ((highest_bit_set != 63
3035 && lowest_bit_set != 0)
3036 || all_bits_between_are_set == 0)
3037 {
3038 the_const =
3039 create_simple_focus_bits (high_bits, low_bits,
3040 lowest_bit_set, 0);
3041 }
3042 else if (lowest_bit_set == 0)
3043 shift = -(63 - highest_bit_set);
3044
3045 gcc_assert (SPARC_SIMM13_P (the_const));
3046 gcc_assert (shift != 0);
3047
3048 emit_insn (gen_safe_SET64 (temp, the_const));
3049 if (shift > 0)
3050 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3051 GEN_INT (shift))));
3052 else if (shift < 0)
3053 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3054 GEN_INT (-shift))));
3055 return;
3056 }
3057
3058 /* Now a range of 22 or less bits set somewhere.
3059 * 1) sethi %hi(focus_bits), %reg
3060 * sllx %reg, shift, %reg
3061 * 2) sethi %hi(focus_bits), %reg
3062 * srlx %reg, shift, %reg
3063 */
3064 if ((highest_bit_set - lowest_bit_set) < 21)
3065 {
3066 unsigned HOST_WIDE_INT focus_bits =
3067 create_simple_focus_bits (high_bits, low_bits,
3068 lowest_bit_set, 10);
3069
3070 gcc_assert (SPARC_SETHI_P (focus_bits));
3071 gcc_assert (lowest_bit_set != 10);
3072
3073 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3074
3075 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3076 if (lowest_bit_set < 10)
3077 emit_insn (gen_rtx_SET (op0,
3078 gen_rtx_LSHIFTRT (DImode, temp,
3079 GEN_INT (10 - lowest_bit_set))));
3080 else if (lowest_bit_set > 10)
3081 emit_insn (gen_rtx_SET (op0,
3082 gen_rtx_ASHIFT (DImode, temp,
3083 GEN_INT (lowest_bit_set - 10))));
3084 return;
3085 }
3086
3087 /* 1) sethi %hi(low_bits), %reg
3088 * or %reg, %lo(low_bits), %reg
3089 * 2) sethi %hi(~low_bits), %reg
3090 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3091 */
3092 if (high_bits == 0
3093 || high_bits == 0xffffffff)
3094 {
3095 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3096 (high_bits == 0xffffffff));
3097 return;
3098 }
3099
3100 /* Now, try 3-insn sequences. */
3101
3102 /* 1) sethi %hi(high_bits), %reg
3103 * or %reg, %lo(high_bits), %reg
3104 * sllx %reg, 32, %reg
3105 */
3106 if (low_bits == 0)
3107 {
3108 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3109 return;
3110 }
3111
3112 /* We may be able to do something quick
3113 when the constant is negated, so try that. */
3114 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3115 (~low_bits) & 0xfffffc00))
3116 {
3117 /* NOTE: The trailing bits get XOR'd so we need the
3118 non-negated bits, not the negated ones. */
3119 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3120
3121 if ((((~high_bits) & 0xffffffff) == 0
3122 && ((~low_bits) & 0x80000000) == 0)
3123 || (((~high_bits) & 0xffffffff) == 0xffffffff
3124 && ((~low_bits) & 0x80000000) != 0))
3125 {
3126 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3127
3128 if ((SPARC_SETHI_P (fast_int)
3129 && (~high_bits & 0xffffffff) == 0)
3130 || SPARC_SIMM13_P (fast_int))
3131 emit_insn (gen_safe_SET64 (temp, fast_int));
3132 else
3133 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3134 }
3135 else
3136 {
3137 rtx negated_const;
3138 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3139 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3140 sparc_emit_set_const64 (temp, negated_const);
3141 }
3142
3143 /* If we are XOR'ing with -1, then we should emit a one's complement
3144 instead. This way the combiner will notice logical operations
3145 such as ANDN later on and substitute. */
3146 if (trailing_bits == 0x3ff)
3147 {
3148 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3149 }
3150 else
3151 {
3152 emit_insn (gen_rtx_SET (op0,
3153 gen_safe_XOR64 (temp,
3154 (-0x400 | trailing_bits))));
3155 }
3156 return;
3157 }
3158
3159 /* 1) sethi %hi(xxx), %reg
3160 * or %reg, %lo(xxx), %reg
3161 * sllx %reg, yyy, %reg
3162 *
3163 * ??? This is just a generalized version of the low_bits==0
3164 * thing above, FIXME...
3165 */
3166 if ((highest_bit_set - lowest_bit_set) < 32)
3167 {
3168 unsigned HOST_WIDE_INT focus_bits =
3169 create_simple_focus_bits (high_bits, low_bits,
3170 lowest_bit_set, 0);
3171
3172 /* We can't get here in this state. */
3173 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3174
3175 /* So what we know is that the set bits straddle the
3176 middle of the 64-bit word. */
3177 sparc_emit_set_const64_quick2 (op0, temp,
3178 focus_bits, 0,
3179 lowest_bit_set);
3180 return;
3181 }
3182
3183 /* 1) sethi %hi(high_bits), %reg
3184 * or %reg, %lo(high_bits), %reg
3185 * sllx %reg, 32, %reg
3186 * or %reg, low_bits, %reg
3187 */
3188 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3189 {
3190 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3191 return;
3192 }
3193
3194 /* The easiest way when all else fails, is full decomposition. */
3195 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3196 }
3197
3198 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3199
3200 static bool
3201 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3202 {
3203 *p1 = SPARC_ICC_REG;
3204 *p2 = SPARC_FCC_REG;
3205 return true;
3206 }
3207
3208 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3209
3210 static unsigned int
3211 sparc_min_arithmetic_precision (void)
3212 {
3213 return 32;
3214 }
3215
3216 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3217 return the mode to be used for the comparison. For floating-point,
3218 CCFP[E]mode is used. CCNZmode should be used when the first operand
3219 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3220 processing is needed. */
3221
3222 machine_mode
3223 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3224 {
3225 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3226 {
3227 switch (op)
3228 {
3229 case EQ:
3230 case NE:
3231 case UNORDERED:
3232 case ORDERED:
3233 case UNLT:
3234 case UNLE:
3235 case UNGT:
3236 case UNGE:
3237 case UNEQ:
3238 case LTGT:
3239 return CCFPmode;
3240
3241 case LT:
3242 case LE:
3243 case GT:
3244 case GE:
3245 return CCFPEmode;
3246
3247 default:
3248 gcc_unreachable ();
3249 }
3250 }
3251 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3252 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3253 && y == const0_rtx)
3254 {
3255 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3256 return CCXNZmode;
3257 else
3258 return CCNZmode;
3259 }
3260 else
3261 {
3262 /* This is for the cmp<mode>_sne pattern. */
3263 if (GET_CODE (x) == NOT && y == constm1_rtx)
3264 {
3265 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3266 return CCXCmode;
3267 else
3268 return CCCmode;
3269 }
3270
3271 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3272 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3273 {
3274 if (GET_CODE (y) == UNSPEC
3275 && (XINT (y, 1) == UNSPEC_ADDV
3276 || XINT (y, 1) == UNSPEC_SUBV
3277 || XINT (y, 1) == UNSPEC_NEGV))
3278 return CCVmode;
3279 else
3280 return CCCmode;
3281 }
3282
3283 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3284 return CCXmode;
3285 else
3286 return CCmode;
3287 }
3288 }
3289
3290 /* Emit the compare insn and return the CC reg for a CODE comparison
3291 with operands X and Y. */
3292
3293 static rtx
3294 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3295 {
3296 machine_mode mode;
3297 rtx cc_reg;
3298
3299 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3300 return x;
3301
3302 mode = SELECT_CC_MODE (code, x, y);
3303
3304 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3305 fcc regs (cse can't tell they're really call clobbered regs and will
3306 remove a duplicate comparison even if there is an intervening function
3307 call - it will then try to reload the cc reg via an int reg which is why
3308 we need the movcc patterns). It is possible to provide the movcc
3309 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3310 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3311 to tell cse that CCFPE mode registers (even pseudos) are call
3312 clobbered. */
3313
3314 /* ??? This is an experiment. Rather than making changes to cse which may
3315 or may not be easy/clean, we do our own cse. This is possible because
3316 we will generate hard registers. Cse knows they're call clobbered (it
3317 doesn't know the same thing about pseudos). If we guess wrong, no big
3318 deal, but if we win, great! */
3319
3320 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3321 #if 1 /* experiment */
3322 {
3323 int reg;
3324 /* We cycle through the registers to ensure they're all exercised. */
3325 static int next_fcc_reg = 0;
3326 /* Previous x,y for each fcc reg. */
3327 static rtx prev_args[4][2];
3328
3329 /* Scan prev_args for x,y. */
3330 for (reg = 0; reg < 4; reg++)
3331 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3332 break;
3333 if (reg == 4)
3334 {
3335 reg = next_fcc_reg;
3336 prev_args[reg][0] = x;
3337 prev_args[reg][1] = y;
3338 next_fcc_reg = (next_fcc_reg + 1) & 3;
3339 }
3340 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3341 }
3342 #else
3343 cc_reg = gen_reg_rtx (mode);
3344 #endif /* ! experiment */
3345 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3346 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3347 else
3348 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3349
3350 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3351 will only result in an unrecognizable insn so no point in asserting. */
3352 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3353
3354 return cc_reg;
3355 }
3356
3357
3358 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3359
3360 rtx
3361 gen_compare_reg (rtx cmp)
3362 {
3363 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3364 }
3365
3366 /* This function is used for v9 only.
3367 DEST is the target of the Scc insn.
3368 CODE is the code for an Scc's comparison.
3369 X and Y are the values we compare.
3370
3371 This function is needed to turn
3372
3373 (set (reg:SI 110)
3374 (gt (reg:CCX 100 %icc)
3375 (const_int 0)))
3376 into
3377 (set (reg:SI 110)
3378 (gt:DI (reg:CCX 100 %icc)
3379 (const_int 0)))
3380
3381 IE: The instruction recognizer needs to see the mode of the comparison to
3382 find the right instruction. We could use "gt:DI" right in the
3383 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3384
3385 static int
3386 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3387 {
3388 if (! TARGET_ARCH64
3389 && (GET_MODE (x) == DImode
3390 || GET_MODE (dest) == DImode))
3391 return 0;
3392
3393 /* Try to use the movrCC insns. */
3394 if (TARGET_ARCH64
3395 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3396 && y == const0_rtx
3397 && v9_regcmp_p (compare_code))
3398 {
3399 rtx op0 = x;
3400 rtx temp;
3401
3402 /* Special case for op0 != 0. This can be done with one instruction if
3403 dest == x. */
3404
3405 if (compare_code == NE
3406 && GET_MODE (dest) == DImode
3407 && rtx_equal_p (op0, dest))
3408 {
3409 emit_insn (gen_rtx_SET (dest,
3410 gen_rtx_IF_THEN_ELSE (DImode,
3411 gen_rtx_fmt_ee (compare_code, DImode,
3412 op0, const0_rtx),
3413 const1_rtx,
3414 dest)));
3415 return 1;
3416 }
3417
3418 if (reg_overlap_mentioned_p (dest, op0))
3419 {
3420 /* Handle the case where dest == x.
3421 We "early clobber" the result. */
3422 op0 = gen_reg_rtx (GET_MODE (x));
3423 emit_move_insn (op0, x);
3424 }
3425
3426 emit_insn (gen_rtx_SET (dest, const0_rtx));
3427 if (GET_MODE (op0) != DImode)
3428 {
3429 temp = gen_reg_rtx (DImode);
3430 convert_move (temp, op0, 0);
3431 }
3432 else
3433 temp = op0;
3434 emit_insn (gen_rtx_SET (dest,
3435 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3436 gen_rtx_fmt_ee (compare_code, DImode,
3437 temp, const0_rtx),
3438 const1_rtx,
3439 dest)));
3440 return 1;
3441 }
3442 else
3443 {
3444 x = gen_compare_reg_1 (compare_code, x, y);
3445 y = const0_rtx;
3446
3447 emit_insn (gen_rtx_SET (dest, const0_rtx));
3448 emit_insn (gen_rtx_SET (dest,
3449 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3450 gen_rtx_fmt_ee (compare_code,
3451 GET_MODE (x), x, y),
3452 const1_rtx, dest)));
3453 return 1;
3454 }
3455 }
3456
3457
3458 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3459 without jumps using the addx/subx instructions. */
3460
3461 bool
3462 emit_scc_insn (rtx operands[])
3463 {
3464 rtx tem, x, y;
3465 enum rtx_code code;
3466 machine_mode mode;
3467
3468 /* The quad-word fp compare library routines all return nonzero to indicate
3469 true, which is different from the equivalent libgcc routines, so we must
3470 handle them specially here. */
3471 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3472 {
3473 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3474 GET_CODE (operands[1]));
3475 operands[2] = XEXP (operands[1], 0);
3476 operands[3] = XEXP (operands[1], 1);
3477 }
3478
3479 code = GET_CODE (operands[1]);
3480 x = operands[2];
3481 y = operands[3];
3482 mode = GET_MODE (x);
3483
3484 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3485 more applications). The exception to this is "reg != 0" which can
3486 be done in one instruction on v9 (so we do it). */
3487 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3488 {
3489 if (y != const0_rtx)
3490 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3491
3492 rtx pat = gen_rtx_SET (operands[0],
3493 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3494 x, const0_rtx));
3495
3496 /* If we can use addx/subx or addxc, add a clobber for CC. */
3497 if (mode == SImode || (code == NE && TARGET_VIS3))
3498 {
3499 rtx clobber
3500 = gen_rtx_CLOBBER (VOIDmode,
3501 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3502 SPARC_ICC_REG));
3503 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3504 }
3505
3506 emit_insn (pat);
3507 return true;
3508 }
3509
3510 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3511 if (TARGET_ARCH64
3512 && mode == DImode
3513 && !((code == LTU || code == GTU) && TARGET_VIS3)
3514 && gen_v9_scc (operands[0], code, x, y))
3515 return true;
3516
3517 /* We can do LTU and GEU using the addx/subx instructions too. And
3518 for GTU/LEU, if both operands are registers swap them and fall
3519 back to the easy case. */
3520 if (code == GTU || code == LEU)
3521 {
3522 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3523 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3524 {
3525 tem = x;
3526 x = y;
3527 y = tem;
3528 code = swap_condition (code);
3529 }
3530 }
3531
3532 if (code == LTU || code == GEU)
3533 {
3534 emit_insn (gen_rtx_SET (operands[0],
3535 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3536 gen_compare_reg_1 (code, x, y),
3537 const0_rtx)));
3538 return true;
3539 }
3540
3541 /* All the posibilities to use addx/subx based sequences has been
3542 exhausted, try for a 3 instruction sequence using v9 conditional
3543 moves. */
3544 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3545 return true;
3546
3547 /* Nope, do branches. */
3548 return false;
3549 }
3550
3551 /* Emit a conditional jump insn for the v9 architecture using comparison code
3552 CODE and jump target LABEL.
3553 This function exists to take advantage of the v9 brxx insns. */
3554
3555 static void
3556 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3557 {
3558 emit_jump_insn (gen_rtx_SET (pc_rtx,
3559 gen_rtx_IF_THEN_ELSE (VOIDmode,
3560 gen_rtx_fmt_ee (code, GET_MODE (op0),
3561 op0, const0_rtx),
3562 gen_rtx_LABEL_REF (VOIDmode, label),
3563 pc_rtx)));
3564 }
3565
3566 /* Emit a conditional jump insn for the UA2011 architecture using
3567 comparison code CODE and jump target LABEL. This function exists
3568 to take advantage of the UA2011 Compare and Branch insns. */
3569
3570 static void
3571 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3572 {
3573 rtx if_then_else;
3574
3575 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3576 gen_rtx_fmt_ee(code, GET_MODE(op0),
3577 op0, op1),
3578 gen_rtx_LABEL_REF (VOIDmode, label),
3579 pc_rtx);
3580
3581 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3582 }
3583
3584 void
3585 emit_conditional_branch_insn (rtx operands[])
3586 {
3587 /* The quad-word fp compare library routines all return nonzero to indicate
3588 true, which is different from the equivalent libgcc routines, so we must
3589 handle them specially here. */
3590 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3591 {
3592 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3593 GET_CODE (operands[0]));
3594 operands[1] = XEXP (operands[0], 0);
3595 operands[2] = XEXP (operands[0], 1);
3596 }
3597
3598 /* If we can tell early on that the comparison is against a constant
3599 that won't fit in the 5-bit signed immediate field of a cbcond,
3600 use one of the other v9 conditional branch sequences. */
3601 if (TARGET_CBCOND
3602 && GET_CODE (operands[1]) == REG
3603 && (GET_MODE (operands[1]) == SImode
3604 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3605 && (GET_CODE (operands[2]) != CONST_INT
3606 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3607 {
3608 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3609 return;
3610 }
3611
3612 if (TARGET_ARCH64 && operands[2] == const0_rtx
3613 && GET_CODE (operands[1]) == REG
3614 && GET_MODE (operands[1]) == DImode)
3615 {
3616 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3617 return;
3618 }
3619
3620 operands[1] = gen_compare_reg (operands[0]);
3621 operands[2] = const0_rtx;
3622 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3623 operands[1], operands[2]);
3624 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3625 operands[3]));
3626 }
3627
3628
3629 /* Generate a DFmode part of a hard TFmode register.
3630 REG is the TFmode hard register, LOW is 1 for the
3631 low 64bit of the register and 0 otherwise.
3632 */
3633 rtx
3634 gen_df_reg (rtx reg, int low)
3635 {
3636 int regno = REGNO (reg);
3637
3638 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3639 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3640 return gen_rtx_REG (DFmode, regno);
3641 }
3642 \f
3643 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3644 Unlike normal calls, TFmode operands are passed by reference. It is
3645 assumed that no more than 3 operands are required. */
3646
3647 static void
3648 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3649 {
3650 rtx ret_slot = NULL, arg[3], func_sym;
3651 int i;
3652
3653 /* We only expect to be called for conversions, unary, and binary ops. */
3654 gcc_assert (nargs == 2 || nargs == 3);
3655
3656 for (i = 0; i < nargs; ++i)
3657 {
3658 rtx this_arg = operands[i];
3659 rtx this_slot;
3660
3661 /* TFmode arguments and return values are passed by reference. */
3662 if (GET_MODE (this_arg) == TFmode)
3663 {
3664 int force_stack_temp;
3665
3666 force_stack_temp = 0;
3667 if (TARGET_BUGGY_QP_LIB && i == 0)
3668 force_stack_temp = 1;
3669
3670 if (GET_CODE (this_arg) == MEM
3671 && ! force_stack_temp)
3672 {
3673 tree expr = MEM_EXPR (this_arg);
3674 if (expr)
3675 mark_addressable (expr);
3676 this_arg = XEXP (this_arg, 0);
3677 }
3678 else if (CONSTANT_P (this_arg)
3679 && ! force_stack_temp)
3680 {
3681 this_slot = force_const_mem (TFmode, this_arg);
3682 this_arg = XEXP (this_slot, 0);
3683 }
3684 else
3685 {
3686 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3687
3688 /* Operand 0 is the return value. We'll copy it out later. */
3689 if (i > 0)
3690 emit_move_insn (this_slot, this_arg);
3691 else
3692 ret_slot = this_slot;
3693
3694 this_arg = XEXP (this_slot, 0);
3695 }
3696 }
3697
3698 arg[i] = this_arg;
3699 }
3700
3701 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3702
3703 if (GET_MODE (operands[0]) == TFmode)
3704 {
3705 if (nargs == 2)
3706 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3707 arg[0], GET_MODE (arg[0]),
3708 arg[1], GET_MODE (arg[1]));
3709 else
3710 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3711 arg[0], GET_MODE (arg[0]),
3712 arg[1], GET_MODE (arg[1]),
3713 arg[2], GET_MODE (arg[2]));
3714
3715 if (ret_slot)
3716 emit_move_insn (operands[0], ret_slot);
3717 }
3718 else
3719 {
3720 rtx ret;
3721
3722 gcc_assert (nargs == 2);
3723
3724 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3725 GET_MODE (operands[0]),
3726 arg[1], GET_MODE (arg[1]));
3727
3728 if (ret != operands[0])
3729 emit_move_insn (operands[0], ret);
3730 }
3731 }
3732
3733 /* Expand soft-float TFmode calls to sparc abi routines. */
3734
3735 static void
3736 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3737 {
3738 const char *func;
3739
3740 switch (code)
3741 {
3742 case PLUS:
3743 func = "_Qp_add";
3744 break;
3745 case MINUS:
3746 func = "_Qp_sub";
3747 break;
3748 case MULT:
3749 func = "_Qp_mul";
3750 break;
3751 case DIV:
3752 func = "_Qp_div";
3753 break;
3754 default:
3755 gcc_unreachable ();
3756 }
3757
3758 emit_soft_tfmode_libcall (func, 3, operands);
3759 }
3760
3761 static void
3762 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3763 {
3764 const char *func;
3765
3766 gcc_assert (code == SQRT);
3767 func = "_Qp_sqrt";
3768
3769 emit_soft_tfmode_libcall (func, 2, operands);
3770 }
3771
3772 static void
3773 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3774 {
3775 const char *func;
3776
3777 switch (code)
3778 {
3779 case FLOAT_EXTEND:
3780 switch (GET_MODE (operands[1]))
3781 {
3782 case E_SFmode:
3783 func = "_Qp_stoq";
3784 break;
3785 case E_DFmode:
3786 func = "_Qp_dtoq";
3787 break;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 break;
3792
3793 case FLOAT_TRUNCATE:
3794 switch (GET_MODE (operands[0]))
3795 {
3796 case E_SFmode:
3797 func = "_Qp_qtos";
3798 break;
3799 case E_DFmode:
3800 func = "_Qp_qtod";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805 break;
3806
3807 case FLOAT:
3808 switch (GET_MODE (operands[1]))
3809 {
3810 case E_SImode:
3811 func = "_Qp_itoq";
3812 if (TARGET_ARCH64)
3813 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3814 break;
3815 case E_DImode:
3816 func = "_Qp_xtoq";
3817 break;
3818 default:
3819 gcc_unreachable ();
3820 }
3821 break;
3822
3823 case UNSIGNED_FLOAT:
3824 switch (GET_MODE (operands[1]))
3825 {
3826 case E_SImode:
3827 func = "_Qp_uitoq";
3828 if (TARGET_ARCH64)
3829 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3830 break;
3831 case E_DImode:
3832 func = "_Qp_uxtoq";
3833 break;
3834 default:
3835 gcc_unreachable ();
3836 }
3837 break;
3838
3839 case FIX:
3840 switch (GET_MODE (operands[0]))
3841 {
3842 case E_SImode:
3843 func = "_Qp_qtoi";
3844 break;
3845 case E_DImode:
3846 func = "_Qp_qtox";
3847 break;
3848 default:
3849 gcc_unreachable ();
3850 }
3851 break;
3852
3853 case UNSIGNED_FIX:
3854 switch (GET_MODE (operands[0]))
3855 {
3856 case E_SImode:
3857 func = "_Qp_qtoui";
3858 break;
3859 case E_DImode:
3860 func = "_Qp_qtoux";
3861 break;
3862 default:
3863 gcc_unreachable ();
3864 }
3865 break;
3866
3867 default:
3868 gcc_unreachable ();
3869 }
3870
3871 emit_soft_tfmode_libcall (func, 2, operands);
3872 }
3873
3874 /* Expand a hard-float tfmode operation. All arguments must be in
3875 registers. */
3876
3877 static void
3878 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3879 {
3880 rtx op, dest;
3881
3882 if (GET_RTX_CLASS (code) == RTX_UNARY)
3883 {
3884 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3885 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3886 }
3887 else
3888 {
3889 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3890 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3891 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3892 operands[1], operands[2]);
3893 }
3894
3895 if (register_operand (operands[0], VOIDmode))
3896 dest = operands[0];
3897 else
3898 dest = gen_reg_rtx (GET_MODE (operands[0]));
3899
3900 emit_insn (gen_rtx_SET (dest, op));
3901
3902 if (dest != operands[0])
3903 emit_move_insn (operands[0], dest);
3904 }
3905
3906 void
3907 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3908 {
3909 if (TARGET_HARD_QUAD)
3910 emit_hard_tfmode_operation (code, operands);
3911 else
3912 emit_soft_tfmode_binop (code, operands);
3913 }
3914
3915 void
3916 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3917 {
3918 if (TARGET_HARD_QUAD)
3919 emit_hard_tfmode_operation (code, operands);
3920 else
3921 emit_soft_tfmode_unop (code, operands);
3922 }
3923
3924 void
3925 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3926 {
3927 if (TARGET_HARD_QUAD)
3928 emit_hard_tfmode_operation (code, operands);
3929 else
3930 emit_soft_tfmode_cvt (code, operands);
3931 }
3932 \f
3933 /* Return nonzero if a branch/jump/call instruction will be emitting
3934 nop into its delay slot. */
3935
3936 int
3937 empty_delay_slot (rtx_insn *insn)
3938 {
3939 rtx seq;
3940
3941 /* If no previous instruction (should not happen), return true. */
3942 if (PREV_INSN (insn) == NULL)
3943 return 1;
3944
3945 seq = NEXT_INSN (PREV_INSN (insn));
3946 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3947 return 0;
3948
3949 return 1;
3950 }
3951
3952 /* Return nonzero if we should emit a nop after a cbcond instruction.
3953 The cbcond instruction does not have a delay slot, however there is
3954 a severe performance penalty if a control transfer appears right
3955 after a cbcond. Therefore we emit a nop when we detect this
3956 situation. */
3957
3958 int
3959 emit_cbcond_nop (rtx_insn *insn)
3960 {
3961 rtx next = next_active_insn (insn);
3962
3963 if (!next)
3964 return 1;
3965
3966 if (NONJUMP_INSN_P (next)
3967 && GET_CODE (PATTERN (next)) == SEQUENCE)
3968 next = XVECEXP (PATTERN (next), 0, 0);
3969 else if (CALL_P (next)
3970 && GET_CODE (PATTERN (next)) == PARALLEL)
3971 {
3972 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3973
3974 if (GET_CODE (delay) == RETURN)
3975 {
3976 /* It's a sibling call. Do not emit the nop if we're going
3977 to emit something other than the jump itself as the first
3978 instruction of the sibcall sequence. */
3979 if (sparc_leaf_function_p || TARGET_FLAT)
3980 return 0;
3981 }
3982 }
3983
3984 if (NONJUMP_INSN_P (next))
3985 return 0;
3986
3987 return 1;
3988 }
3989
3990 /* Return nonzero if TRIAL can go into the call delay slot. */
3991
3992 int
3993 eligible_for_call_delay (rtx_insn *trial)
3994 {
3995 rtx pat;
3996
3997 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3998 return 0;
3999
4000 /* Binutils allows
4001 call __tls_get_addr, %tgd_call (foo)
4002 add %l7, %o0, %o0, %tgd_add (foo)
4003 while Sun as/ld does not. */
4004 if (TARGET_GNU_TLS || !TARGET_TLS)
4005 return 1;
4006
4007 pat = PATTERN (trial);
4008
4009 /* We must reject tgd_add{32|64}, i.e.
4010 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
4011 and tldm_add{32|64}, i.e.
4012 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
4013 for Sun as/ld. */
4014 if (GET_CODE (pat) == SET
4015 && GET_CODE (SET_SRC (pat)) == PLUS)
4016 {
4017 rtx unspec = XEXP (SET_SRC (pat), 1);
4018
4019 if (GET_CODE (unspec) == UNSPEC
4020 && (XINT (unspec, 1) == UNSPEC_TLSGD
4021 || XINT (unspec, 1) == UNSPEC_TLSLDM))
4022 return 0;
4023 }
4024
4025 return 1;
4026 }
4027
4028 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4029 instruction. RETURN_P is true if the v9 variant 'return' is to be
4030 considered in the test too.
4031
4032 TRIAL must be a SET whose destination is a REG appropriate for the
4033 'restore' instruction or, if RETURN_P is true, for the 'return'
4034 instruction. */
4035
4036 static int
4037 eligible_for_restore_insn (rtx trial, bool return_p)
4038 {
4039 rtx pat = PATTERN (trial);
4040 rtx src = SET_SRC (pat);
4041 bool src_is_freg = false;
4042 rtx src_reg;
4043
4044 /* Since we now can do moves between float and integer registers when
4045 VIS3 is enabled, we have to catch this case. We can allow such
4046 moves when doing a 'return' however. */
4047 src_reg = src;
4048 if (GET_CODE (src_reg) == SUBREG)
4049 src_reg = SUBREG_REG (src_reg);
4050 if (GET_CODE (src_reg) == REG
4051 && SPARC_FP_REG_P (REGNO (src_reg)))
4052 src_is_freg = true;
4053
4054 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4055 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4056 && arith_operand (src, GET_MODE (src))
4057 && ! src_is_freg)
4058 {
4059 if (TARGET_ARCH64)
4060 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4061 else
4062 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4063 }
4064
4065 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4066 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4067 && arith_double_operand (src, GET_MODE (src))
4068 && ! src_is_freg)
4069 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4070
4071 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4072 else if (! TARGET_FPU && register_operand (src, SFmode))
4073 return 1;
4074
4075 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4076 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4077 return 1;
4078
4079 /* If we have the 'return' instruction, anything that does not use
4080 local or output registers and can go into a delay slot wins. */
4081 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4082 return 1;
4083
4084 /* The 'restore src1,src2,dest' pattern for SImode. */
4085 else if (GET_CODE (src) == PLUS
4086 && register_operand (XEXP (src, 0), SImode)
4087 && arith_operand (XEXP (src, 1), SImode))
4088 return 1;
4089
4090 /* The 'restore src1,src2,dest' pattern for DImode. */
4091 else if (GET_CODE (src) == PLUS
4092 && register_operand (XEXP (src, 0), DImode)
4093 && arith_double_operand (XEXP (src, 1), DImode))
4094 return 1;
4095
4096 /* The 'restore src1,%lo(src2),dest' pattern. */
4097 else if (GET_CODE (src) == LO_SUM
4098 && ! TARGET_CM_MEDMID
4099 && ((register_operand (XEXP (src, 0), SImode)
4100 && immediate_operand (XEXP (src, 1), SImode))
4101 || (TARGET_ARCH64
4102 && register_operand (XEXP (src, 0), DImode)
4103 && immediate_operand (XEXP (src, 1), DImode))))
4104 return 1;
4105
4106 /* The 'restore src,src,dest' pattern. */
4107 else if (GET_CODE (src) == ASHIFT
4108 && (register_operand (XEXP (src, 0), SImode)
4109 || register_operand (XEXP (src, 0), DImode))
4110 && XEXP (src, 1) == const1_rtx)
4111 return 1;
4112
4113 return 0;
4114 }
4115
4116 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4117
4118 int
4119 eligible_for_return_delay (rtx_insn *trial)
4120 {
4121 int regno;
4122 rtx pat;
4123
4124 /* If the function uses __builtin_eh_return, the eh_return machinery
4125 occupies the delay slot. */
4126 if (crtl->calls_eh_return)
4127 return 0;
4128
4129 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4130 return 0;
4131
4132 /* In the case of a leaf or flat function, anything can go into the slot. */
4133 if (sparc_leaf_function_p || TARGET_FLAT)
4134 return 1;
4135
4136 if (!NONJUMP_INSN_P (trial))
4137 return 0;
4138
4139 pat = PATTERN (trial);
4140 if (GET_CODE (pat) == PARALLEL)
4141 {
4142 int i;
4143
4144 if (! TARGET_V9)
4145 return 0;
4146 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4147 {
4148 rtx expr = XVECEXP (pat, 0, i);
4149 if (GET_CODE (expr) != SET)
4150 return 0;
4151 if (GET_CODE (SET_DEST (expr)) != REG)
4152 return 0;
4153 regno = REGNO (SET_DEST (expr));
4154 if (regno >= 8 && regno < 24)
4155 return 0;
4156 }
4157 return !epilogue_renumber (&pat, 1);
4158 }
4159
4160 if (GET_CODE (pat) != SET)
4161 return 0;
4162
4163 if (GET_CODE (SET_DEST (pat)) != REG)
4164 return 0;
4165
4166 regno = REGNO (SET_DEST (pat));
4167
4168 /* Otherwise, only operations which can be done in tandem with
4169 a `restore' or `return' insn can go into the delay slot. */
4170 if (regno >= 8 && regno < 24)
4171 return 0;
4172
4173 /* If this instruction sets up floating point register and we have a return
4174 instruction, it can probably go in. But restore will not work
4175 with FP_REGS. */
4176 if (! SPARC_INT_REG_P (regno))
4177 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4178
4179 return eligible_for_restore_insn (trial, true);
4180 }
4181
4182 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4183
4184 int
4185 eligible_for_sibcall_delay (rtx_insn *trial)
4186 {
4187 rtx pat;
4188
4189 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4190 return 0;
4191
4192 if (!NONJUMP_INSN_P (trial))
4193 return 0;
4194
4195 pat = PATTERN (trial);
4196
4197 if (sparc_leaf_function_p || TARGET_FLAT)
4198 {
4199 /* If the tail call is done using the call instruction,
4200 we have to restore %o7 in the delay slot. */
4201 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4202 return 0;
4203
4204 /* %g1 is used to build the function address */
4205 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4206 return 0;
4207
4208 return 1;
4209 }
4210
4211 if (GET_CODE (pat) != SET)
4212 return 0;
4213
4214 /* Otherwise, only operations which can be done in tandem with
4215 a `restore' insn can go into the delay slot. */
4216 if (GET_CODE (SET_DEST (pat)) != REG
4217 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4218 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4219 return 0;
4220
4221 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4222 in most cases. */
4223 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4224 return 0;
4225
4226 return eligible_for_restore_insn (trial, false);
4227 }
4228 \f
4229 /* Determine if it's legal to put X into the constant pool. This
4230 is not possible if X contains the address of a symbol that is
4231 not constant (TLS) or not known at final link time (PIC). */
4232
4233 static bool
4234 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4235 {
4236 switch (GET_CODE (x))
4237 {
4238 case CONST_INT:
4239 case CONST_WIDE_INT:
4240 case CONST_DOUBLE:
4241 case CONST_VECTOR:
4242 /* Accept all non-symbolic constants. */
4243 return false;
4244
4245 case LABEL_REF:
4246 /* Labels are OK iff we are non-PIC. */
4247 return flag_pic != 0;
4248
4249 case SYMBOL_REF:
4250 /* 'Naked' TLS symbol references are never OK,
4251 non-TLS symbols are OK iff we are non-PIC. */
4252 if (SYMBOL_REF_TLS_MODEL (x))
4253 return true;
4254 else
4255 return flag_pic != 0;
4256
4257 case CONST:
4258 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4259 case PLUS:
4260 case MINUS:
4261 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4262 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4263 case UNSPEC:
4264 return true;
4265 default:
4266 gcc_unreachable ();
4267 }
4268 }
4269 \f
4270 /* Global Offset Table support. */
4271 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4272 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4273
4274 /* Return the SYMBOL_REF for the Global Offset Table. */
4275
4276 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4277
4278 static rtx
4279 sparc_got (void)
4280 {
4281 if (!sparc_got_symbol)
4282 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4283
4284 return sparc_got_symbol;
4285 }
4286
4287 /* Ensure that we are not using patterns that are not OK with PIC. */
4288
4289 int
4290 check_pic (int i)
4291 {
4292 rtx op;
4293
4294 switch (flag_pic)
4295 {
4296 case 1:
4297 op = recog_data.operand[i];
4298 gcc_assert (GET_CODE (op) != SYMBOL_REF
4299 && (GET_CODE (op) != CONST
4300 || (GET_CODE (XEXP (op, 0)) == MINUS
4301 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4302 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4303 /* fallthrough */
4304 case 2:
4305 default:
4306 return 1;
4307 }
4308 }
4309
4310 /* Return true if X is an address which needs a temporary register when
4311 reloaded while generating PIC code. */
4312
4313 int
4314 pic_address_needs_scratch (rtx x)
4315 {
4316 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4317 if (GET_CODE (x) == CONST
4318 && GET_CODE (XEXP (x, 0)) == PLUS
4319 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4320 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4321 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4322 return 1;
4323
4324 return 0;
4325 }
4326
4327 /* Determine if a given RTX is a valid constant. We already know this
4328 satisfies CONSTANT_P. */
4329
4330 static bool
4331 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4332 {
4333 switch (GET_CODE (x))
4334 {
4335 case CONST:
4336 case SYMBOL_REF:
4337 if (sparc_tls_referenced_p (x))
4338 return false;
4339 break;
4340
4341 case CONST_DOUBLE:
4342 /* Floating point constants are generally not ok.
4343 The only exception is 0.0 and all-ones in VIS. */
4344 if (TARGET_VIS
4345 && SCALAR_FLOAT_MODE_P (mode)
4346 && (const_zero_operand (x, mode)
4347 || const_all_ones_operand (x, mode)))
4348 return true;
4349
4350 return false;
4351
4352 case CONST_VECTOR:
4353 /* Vector constants are generally not ok.
4354 The only exception is 0 or -1 in VIS. */
4355 if (TARGET_VIS
4356 && (const_zero_operand (x, mode)
4357 || const_all_ones_operand (x, mode)))
4358 return true;
4359
4360 return false;
4361
4362 default:
4363 break;
4364 }
4365
4366 return true;
4367 }
4368
4369 /* Determine if a given RTX is a valid constant address. */
4370
4371 bool
4372 constant_address_p (rtx x)
4373 {
4374 switch (GET_CODE (x))
4375 {
4376 case LABEL_REF:
4377 case CONST_INT:
4378 case HIGH:
4379 return true;
4380
4381 case CONST:
4382 if (flag_pic && pic_address_needs_scratch (x))
4383 return false;
4384 return sparc_legitimate_constant_p (Pmode, x);
4385
4386 case SYMBOL_REF:
4387 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4388
4389 default:
4390 return false;
4391 }
4392 }
4393
4394 /* Nonzero if the constant value X is a legitimate general operand
4395 when generating PIC code. It is given that flag_pic is on and
4396 that X satisfies CONSTANT_P. */
4397
4398 bool
4399 legitimate_pic_operand_p (rtx x)
4400 {
4401 if (pic_address_needs_scratch (x))
4402 return false;
4403 if (sparc_tls_referenced_p (x))
4404 return false;
4405 return true;
4406 }
4407
4408 /* Return true if X is a representation of the PIC register. */
4409
4410 static bool
4411 sparc_pic_register_p (rtx x)
4412 {
4413 if (!REG_P (x) || !pic_offset_table_rtx)
4414 return false;
4415
4416 if (x == pic_offset_table_rtx)
4417 return true;
4418
4419 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4420 && (HARD_REGISTER_P (x) || lra_in_progress)
4421 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4422 return true;
4423
4424 return false;
4425 }
4426
4427 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4428 (CONST_INT_P (X) \
4429 && INTVAL (X) >= -0x1000 \
4430 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4431
4432 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4433 (CONST_INT_P (X) \
4434 && INTVAL (X) >= -0x1000 \
4435 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4436
4437 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4438
4439 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4440 ordinarily. This changes a bit when generating PIC. */
4441
4442 static bool
4443 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4444 {
4445 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4446
4447 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4448 rs1 = addr;
4449 else if (GET_CODE (addr) == PLUS)
4450 {
4451 rs1 = XEXP (addr, 0);
4452 rs2 = XEXP (addr, 1);
4453
4454 /* Canonicalize. REG comes first, if there are no regs,
4455 LO_SUM comes first. */
4456 if (!REG_P (rs1)
4457 && GET_CODE (rs1) != SUBREG
4458 && (REG_P (rs2)
4459 || GET_CODE (rs2) == SUBREG
4460 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4461 {
4462 rs1 = XEXP (addr, 1);
4463 rs2 = XEXP (addr, 0);
4464 }
4465
4466 if ((flag_pic == 1
4467 && sparc_pic_register_p (rs1)
4468 && !REG_P (rs2)
4469 && GET_CODE (rs2) != SUBREG
4470 && GET_CODE (rs2) != LO_SUM
4471 && GET_CODE (rs2) != MEM
4472 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4473 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4474 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4475 || ((REG_P (rs1)
4476 || GET_CODE (rs1) == SUBREG)
4477 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4478 {
4479 imm1 = rs2;
4480 rs2 = NULL;
4481 }
4482 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4483 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4484 {
4485 /* We prohibit REG + REG for TFmode when there are no quad move insns
4486 and we consequently need to split. We do this because REG+REG
4487 is not an offsettable address. If we get the situation in reload
4488 where source and destination of a movtf pattern are both MEMs with
4489 REG+REG address, then only one of them gets converted to an
4490 offsettable address. */
4491 if (mode == TFmode
4492 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4493 return 0;
4494
4495 /* Likewise for TImode, but in all cases. */
4496 if (mode == TImode)
4497 return 0;
4498
4499 /* We prohibit REG + REG on ARCH32 if not optimizing for
4500 DFmode/DImode because then mem_min_alignment is likely to be zero
4501 after reload and the forced split would lack a matching splitter
4502 pattern. */
4503 if (TARGET_ARCH32 && !optimize
4504 && (mode == DFmode || mode == DImode))
4505 return 0;
4506 }
4507 else if (USE_AS_OFFSETABLE_LO10
4508 && GET_CODE (rs1) == LO_SUM
4509 && TARGET_ARCH64
4510 && ! TARGET_CM_MEDMID
4511 && RTX_OK_FOR_OLO10_P (rs2, mode))
4512 {
4513 rs2 = NULL;
4514 imm1 = XEXP (rs1, 1);
4515 rs1 = XEXP (rs1, 0);
4516 if (!CONSTANT_P (imm1)
4517 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4518 return 0;
4519 }
4520 }
4521 else if (GET_CODE (addr) == LO_SUM)
4522 {
4523 rs1 = XEXP (addr, 0);
4524 imm1 = XEXP (addr, 1);
4525
4526 if (!CONSTANT_P (imm1)
4527 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4528 return 0;
4529
4530 /* We can't allow TFmode in 32-bit mode, because an offset greater
4531 than the alignment (8) may cause the LO_SUM to overflow. */
4532 if (mode == TFmode && TARGET_ARCH32)
4533 return 0;
4534
4535 /* During reload, accept the HIGH+LO_SUM construct generated by
4536 sparc_legitimize_reload_address. */
4537 if (reload_in_progress
4538 && GET_CODE (rs1) == HIGH
4539 && XEXP (rs1, 0) == imm1)
4540 return 1;
4541 }
4542 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4543 return 1;
4544 else
4545 return 0;
4546
4547 if (GET_CODE (rs1) == SUBREG)
4548 rs1 = SUBREG_REG (rs1);
4549 if (!REG_P (rs1))
4550 return 0;
4551
4552 if (rs2)
4553 {
4554 if (GET_CODE (rs2) == SUBREG)
4555 rs2 = SUBREG_REG (rs2);
4556 if (!REG_P (rs2))
4557 return 0;
4558 }
4559
4560 if (strict)
4561 {
4562 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4563 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4564 return 0;
4565 }
4566 else
4567 {
4568 if ((! SPARC_INT_REG_P (REGNO (rs1))
4569 && REGNO (rs1) != FRAME_POINTER_REGNUM
4570 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4571 || (rs2
4572 && (! SPARC_INT_REG_P (REGNO (rs2))
4573 && REGNO (rs2) != FRAME_POINTER_REGNUM
4574 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4575 return 0;
4576 }
4577 return 1;
4578 }
4579
4580 /* Return the SYMBOL_REF for the tls_get_addr function. */
4581
4582 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4583
4584 static rtx
4585 sparc_tls_get_addr (void)
4586 {
4587 if (!sparc_tls_symbol)
4588 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4589
4590 return sparc_tls_symbol;
4591 }
4592
4593 /* Return the Global Offset Table to be used in TLS mode. */
4594
4595 static rtx
4596 sparc_tls_got (void)
4597 {
4598 /* In PIC mode, this is just the PIC offset table. */
4599 if (flag_pic)
4600 {
4601 crtl->uses_pic_offset_table = 1;
4602 return pic_offset_table_rtx;
4603 }
4604
4605 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4606 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4607 if (TARGET_SUN_TLS && TARGET_ARCH32)
4608 {
4609 load_got_register ();
4610 return global_offset_table_rtx;
4611 }
4612
4613 /* In all other cases, we load a new pseudo with the GOT symbol. */
4614 return copy_to_reg (sparc_got ());
4615 }
4616
4617 /* Return true if X contains a thread-local symbol. */
4618
4619 static bool
4620 sparc_tls_referenced_p (rtx x)
4621 {
4622 if (!TARGET_HAVE_TLS)
4623 return false;
4624
4625 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4626 x = XEXP (XEXP (x, 0), 0);
4627
4628 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4629 return true;
4630
4631 /* That's all we handle in sparc_legitimize_tls_address for now. */
4632 return false;
4633 }
4634
4635 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4636 this (thread-local) address. */
4637
4638 static rtx
4639 sparc_legitimize_tls_address (rtx addr)
4640 {
4641 rtx temp1, temp2, temp3, ret, o0, got;
4642 rtx_insn *insn;
4643
4644 gcc_assert (can_create_pseudo_p ());
4645
4646 if (GET_CODE (addr) == SYMBOL_REF)
4647 /* Although the various sethi/or sequences generate SImode values, many of
4648 them can be transformed by the linker when relaxing and, if relaxing to
4649 local-exec, will become a sethi/xor pair, which is signed and therefore
4650 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4651 values be spilled onto the stack in 64-bit mode. */
4652 switch (SYMBOL_REF_TLS_MODEL (addr))
4653 {
4654 case TLS_MODEL_GLOBAL_DYNAMIC:
4655 start_sequence ();
4656 temp1 = gen_reg_rtx (Pmode);
4657 temp2 = gen_reg_rtx (Pmode);
4658 ret = gen_reg_rtx (Pmode);
4659 o0 = gen_rtx_REG (Pmode, 8);
4660 got = sparc_tls_got ();
4661 if (TARGET_ARCH32)
4662 {
4663 emit_insn (gen_tgd_hi22si (temp1, addr));
4664 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4665 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4666 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4667 addr, const1_rtx));
4668 }
4669 else
4670 {
4671 emit_insn (gen_tgd_hi22di (temp1, addr));
4672 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4673 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4674 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4675 addr, const1_rtx));
4676 }
4677 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4678 RTL_CONST_CALL_P (insn) = 1;
4679 insn = get_insns ();
4680 end_sequence ();
4681 emit_libcall_block (insn, ret, o0, addr);
4682 break;
4683
4684 case TLS_MODEL_LOCAL_DYNAMIC:
4685 start_sequence ();
4686 temp1 = gen_reg_rtx (Pmode);
4687 temp2 = gen_reg_rtx (Pmode);
4688 temp3 = gen_reg_rtx (Pmode);
4689 ret = gen_reg_rtx (Pmode);
4690 o0 = gen_rtx_REG (Pmode, 8);
4691 got = sparc_tls_got ();
4692 if (TARGET_ARCH32)
4693 {
4694 emit_insn (gen_tldm_hi22si (temp1));
4695 emit_insn (gen_tldm_lo10si (temp2, temp1));
4696 emit_insn (gen_tldm_addsi (o0, got, temp2));
4697 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4698 const1_rtx));
4699 }
4700 else
4701 {
4702 emit_insn (gen_tldm_hi22di (temp1));
4703 emit_insn (gen_tldm_lo10di (temp2, temp1));
4704 emit_insn (gen_tldm_adddi (o0, got, temp2));
4705 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4706 const1_rtx));
4707 }
4708 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4709 RTL_CONST_CALL_P (insn) = 1;
4710 insn = get_insns ();
4711 end_sequence ();
4712 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4713 share the LD_BASE result with other LD model accesses. */
4714 emit_libcall_block (insn, temp3, o0,
4715 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4716 UNSPEC_TLSLD_BASE));
4717 temp1 = gen_reg_rtx (Pmode);
4718 temp2 = gen_reg_rtx (Pmode);
4719 if (TARGET_ARCH32)
4720 {
4721 emit_insn (gen_tldo_hix22si (temp1, addr));
4722 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4723 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4724 }
4725 else
4726 {
4727 emit_insn (gen_tldo_hix22di (temp1, addr));
4728 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4729 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4730 }
4731 break;
4732
4733 case TLS_MODEL_INITIAL_EXEC:
4734 temp1 = gen_reg_rtx (Pmode);
4735 temp2 = gen_reg_rtx (Pmode);
4736 temp3 = gen_reg_rtx (Pmode);
4737 got = sparc_tls_got ();
4738 if (TARGET_ARCH32)
4739 {
4740 emit_insn (gen_tie_hi22si (temp1, addr));
4741 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4742 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4743 }
4744 else
4745 {
4746 emit_insn (gen_tie_hi22di (temp1, addr));
4747 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4748 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4749 }
4750 if (TARGET_SUN_TLS)
4751 {
4752 ret = gen_reg_rtx (Pmode);
4753 if (TARGET_ARCH32)
4754 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4755 temp3, addr));
4756 else
4757 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4758 temp3, addr));
4759 }
4760 else
4761 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4762 break;
4763
4764 case TLS_MODEL_LOCAL_EXEC:
4765 temp1 = gen_reg_rtx (Pmode);
4766 temp2 = gen_reg_rtx (Pmode);
4767 if (TARGET_ARCH32)
4768 {
4769 emit_insn (gen_tle_hix22si (temp1, addr));
4770 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4771 }
4772 else
4773 {
4774 emit_insn (gen_tle_hix22di (temp1, addr));
4775 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4776 }
4777 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4778 break;
4779
4780 default:
4781 gcc_unreachable ();
4782 }
4783
4784 else if (GET_CODE (addr) == CONST)
4785 {
4786 rtx base, offset;
4787
4788 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4789
4790 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4791 offset = XEXP (XEXP (addr, 0), 1);
4792
4793 base = force_operand (base, NULL_RTX);
4794 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4795 offset = force_reg (Pmode, offset);
4796 ret = gen_rtx_PLUS (Pmode, base, offset);
4797 }
4798
4799 else
4800 gcc_unreachable (); /* for now ... */
4801
4802 return ret;
4803 }
4804
4805 /* Legitimize PIC addresses. If the address is already position-independent,
4806 we return ORIG. Newly generated position-independent addresses go into a
4807 reg. This is REG if nonzero, otherwise we allocate register(s) as
4808 necessary. */
4809
4810 static rtx
4811 sparc_legitimize_pic_address (rtx orig, rtx reg)
4812 {
4813 if (GET_CODE (orig) == SYMBOL_REF
4814 /* See the comment in sparc_expand_move. */
4815 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4816 {
4817 bool gotdata_op = false;
4818 rtx pic_ref, address;
4819 rtx_insn *insn;
4820
4821 if (!reg)
4822 {
4823 gcc_assert (can_create_pseudo_p ());
4824 reg = gen_reg_rtx (Pmode);
4825 }
4826
4827 if (flag_pic == 2)
4828 {
4829 /* If not during reload, allocate another temp reg here for loading
4830 in the address, so that these instructions can be optimized
4831 properly. */
4832 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4833
4834 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4835 won't get confused into thinking that these two instructions
4836 are loading in the true address of the symbol. If in the
4837 future a PIC rtx exists, that should be used instead. */
4838 if (TARGET_ARCH64)
4839 {
4840 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4841 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4842 }
4843 else
4844 {
4845 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4846 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4847 }
4848
4849 address = temp_reg;
4850 gotdata_op = true;
4851 }
4852 else
4853 address = orig;
4854
4855 crtl->uses_pic_offset_table = 1;
4856 if (gotdata_op)
4857 {
4858 if (TARGET_ARCH64)
4859 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4860 pic_offset_table_rtx,
4861 address, orig));
4862 else
4863 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4864 pic_offset_table_rtx,
4865 address, orig));
4866 }
4867 else
4868 {
4869 pic_ref
4870 = gen_const_mem (Pmode,
4871 gen_rtx_PLUS (Pmode,
4872 pic_offset_table_rtx, address));
4873 insn = emit_move_insn (reg, pic_ref);
4874 }
4875
4876 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4877 by loop. */
4878 set_unique_reg_note (insn, REG_EQUAL, orig);
4879 return reg;
4880 }
4881 else if (GET_CODE (orig) == CONST)
4882 {
4883 rtx base, offset;
4884
4885 if (GET_CODE (XEXP (orig, 0)) == PLUS
4886 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4887 return orig;
4888
4889 if (!reg)
4890 {
4891 gcc_assert (can_create_pseudo_p ());
4892 reg = gen_reg_rtx (Pmode);
4893 }
4894
4895 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4896 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4897 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4898 base == reg ? NULL_RTX : reg);
4899
4900 if (GET_CODE (offset) == CONST_INT)
4901 {
4902 if (SMALL_INT (offset))
4903 return plus_constant (Pmode, base, INTVAL (offset));
4904 else if (can_create_pseudo_p ())
4905 offset = force_reg (Pmode, offset);
4906 else
4907 /* If we reach here, then something is seriously wrong. */
4908 gcc_unreachable ();
4909 }
4910 return gen_rtx_PLUS (Pmode, base, offset);
4911 }
4912 else if (GET_CODE (orig) == LABEL_REF)
4913 /* ??? We ought to be checking that the register is live instead, in case
4914 it is eliminated. */
4915 crtl->uses_pic_offset_table = 1;
4916
4917 return orig;
4918 }
4919
4920 /* Try machine-dependent ways of modifying an illegitimate address X
4921 to be legitimate. If we find one, return the new, valid address.
4922
4923 OLDX is the address as it was before break_out_memory_refs was called.
4924 In some cases it is useful to look at this to decide what needs to be done.
4925
4926 MODE is the mode of the operand pointed to by X.
4927
4928 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4929
4930 static rtx
4931 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4932 machine_mode mode)
4933 {
4934 rtx orig_x = x;
4935
4936 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4937 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4938 force_operand (XEXP (x, 0), NULL_RTX));
4939 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4940 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4941 force_operand (XEXP (x, 1), NULL_RTX));
4942 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4943 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4944 XEXP (x, 1));
4945 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4946 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4947 force_operand (XEXP (x, 1), NULL_RTX));
4948
4949 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4950 return x;
4951
4952 if (sparc_tls_referenced_p (x))
4953 x = sparc_legitimize_tls_address (x);
4954 else if (flag_pic)
4955 x = sparc_legitimize_pic_address (x, NULL_RTX);
4956 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4957 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4958 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4959 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4960 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4961 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4962 else if (GET_CODE (x) == SYMBOL_REF
4963 || GET_CODE (x) == CONST
4964 || GET_CODE (x) == LABEL_REF)
4965 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4966
4967 return x;
4968 }
4969
4970 /* Delegitimize an address that was legitimized by the above function. */
4971
4972 static rtx
4973 sparc_delegitimize_address (rtx x)
4974 {
4975 x = delegitimize_mem_from_attrs (x);
4976
4977 if (GET_CODE (x) == LO_SUM)
4978 x = XEXP (x, 1);
4979
4980 if (GET_CODE (x) == UNSPEC)
4981 switch (XINT (x, 1))
4982 {
4983 case UNSPEC_MOVE_PIC:
4984 case UNSPEC_TLSLE:
4985 x = XVECEXP (x, 0, 0);
4986 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4987 break;
4988 case UNSPEC_MOVE_GOTDATA:
4989 x = XVECEXP (x, 0, 2);
4990 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4991 break;
4992 default:
4993 break;
4994 }
4995
4996 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4997 if (GET_CODE (x) == MINUS
4998 && (XEXP (x, 0) == global_offset_table_rtx
4999 || sparc_pic_register_p (XEXP (x, 0))))
5000 {
5001 rtx y = XEXP (x, 1);
5002
5003 if (GET_CODE (y) == LO_SUM)
5004 y = XEXP (y, 1);
5005
5006 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5007 {
5008 x = XVECEXP (y, 0, 0);
5009 gcc_assert (GET_CODE (x) == LABEL_REF
5010 || (GET_CODE (x) == CONST
5011 && GET_CODE (XEXP (x, 0)) == PLUS
5012 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5013 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5014 }
5015 }
5016
5017 return x;
5018 }
5019
5020 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5021 replace the input X, or the original X if no replacement is called for.
5022 The output parameter *WIN is 1 if the calling macro should goto WIN,
5023 0 if it should not.
5024
5025 For SPARC, we wish to handle addresses by splitting them into
5026 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5027 This cuts the number of extra insns by one.
5028
5029 Do nothing when generating PIC code and the address is a symbolic
5030 operand or requires a scratch register. */
5031
5032 rtx
5033 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5034 int opnum, int type,
5035 int ind_levels ATTRIBUTE_UNUSED, int *win)
5036 {
5037 /* Decompose SImode constants into HIGH+LO_SUM. */
5038 if (CONSTANT_P (x)
5039 && (mode != TFmode || TARGET_ARCH64)
5040 && GET_MODE (x) == SImode
5041 && GET_CODE (x) != LO_SUM
5042 && GET_CODE (x) != HIGH
5043 && sparc_cmodel <= CM_MEDLOW
5044 && !(flag_pic
5045 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5046 {
5047 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5048 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5049 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5050 opnum, (enum reload_type)type);
5051 *win = 1;
5052 return x;
5053 }
5054
5055 /* We have to recognize what we have already generated above. */
5056 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5057 {
5058 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5059 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5060 opnum, (enum reload_type)type);
5061 *win = 1;
5062 return x;
5063 }
5064
5065 *win = 0;
5066 return x;
5067 }
5068
5069 /* Return true if ADDR (a legitimate address expression)
5070 has an effect that depends on the machine mode it is used for.
5071
5072 In PIC mode,
5073
5074 (mem:HI [%l7+a])
5075
5076 is not equivalent to
5077
5078 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5079
5080 because [%l7+a+1] is interpreted as the address of (a+1). */
5081
5082
5083 static bool
5084 sparc_mode_dependent_address_p (const_rtx addr,
5085 addr_space_t as ATTRIBUTE_UNUSED)
5086 {
5087 if (GET_CODE (addr) == PLUS
5088 && sparc_pic_register_p (XEXP (addr, 0))
5089 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5090 return true;
5091
5092 return false;
5093 }
5094
5095 #ifdef HAVE_GAS_HIDDEN
5096 # define USE_HIDDEN_LINKONCE 1
5097 #else
5098 # define USE_HIDDEN_LINKONCE 0
5099 #endif
5100
5101 static void
5102 get_pc_thunk_name (char name[32], unsigned int regno)
5103 {
5104 const char *reg_name = reg_names[regno];
5105
5106 /* Skip the leading '%' as that cannot be used in a
5107 symbol name. */
5108 reg_name += 1;
5109
5110 if (USE_HIDDEN_LINKONCE)
5111 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5112 else
5113 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5114 }
5115
5116 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5117
5118 static rtx
5119 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
5120 {
5121 int orig_flag_pic = flag_pic;
5122 rtx insn;
5123
5124 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5125 flag_pic = 0;
5126 if (TARGET_ARCH64)
5127 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
5128 else
5129 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
5130 flag_pic = orig_flag_pic;
5131
5132 return insn;
5133 }
5134
5135 /* Emit code to load the GOT register. */
5136
5137 void
5138 load_got_register (void)
5139 {
5140 if (!global_offset_table_rtx)
5141 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5142
5143 if (TARGET_VXWORKS_RTP)
5144 emit_insn (gen_vxworks_load_got ());
5145 else
5146 {
5147 /* The GOT symbol is subject to a PC-relative relocation so we need a
5148 helper function to add the PC value and thus get the final value. */
5149 if (!got_helper_rtx)
5150 {
5151 char name[32];
5152 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5153 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5154 }
5155
5156 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5157 got_helper_rtx));
5158 }
5159 }
5160
5161 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5162 address of the call target. */
5163
5164 void
5165 sparc_emit_call_insn (rtx pat, rtx addr)
5166 {
5167 rtx_insn *insn;
5168
5169 insn = emit_call_insn (pat);
5170
5171 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5172 if (TARGET_VXWORKS_RTP
5173 && flag_pic
5174 && GET_CODE (addr) == SYMBOL_REF
5175 && (SYMBOL_REF_DECL (addr)
5176 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5177 : !SYMBOL_REF_LOCAL_P (addr)))
5178 {
5179 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5180 crtl->uses_pic_offset_table = 1;
5181 }
5182 }
5183 \f
5184 /* Return 1 if RTX is a MEM which is known to be aligned to at
5185 least a DESIRED byte boundary. */
5186
5187 int
5188 mem_min_alignment (rtx mem, int desired)
5189 {
5190 rtx addr, base, offset;
5191
5192 /* If it's not a MEM we can't accept it. */
5193 if (GET_CODE (mem) != MEM)
5194 return 0;
5195
5196 /* Obviously... */
5197 if (!TARGET_UNALIGNED_DOUBLES
5198 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5199 return 1;
5200
5201 /* ??? The rest of the function predates MEM_ALIGN so
5202 there is probably a bit of redundancy. */
5203 addr = XEXP (mem, 0);
5204 base = offset = NULL_RTX;
5205 if (GET_CODE (addr) == PLUS)
5206 {
5207 if (GET_CODE (XEXP (addr, 0)) == REG)
5208 {
5209 base = XEXP (addr, 0);
5210
5211 /* What we are saying here is that if the base
5212 REG is aligned properly, the compiler will make
5213 sure any REG based index upon it will be so
5214 as well. */
5215 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5216 offset = XEXP (addr, 1);
5217 else
5218 offset = const0_rtx;
5219 }
5220 }
5221 else if (GET_CODE (addr) == REG)
5222 {
5223 base = addr;
5224 offset = const0_rtx;
5225 }
5226
5227 if (base != NULL_RTX)
5228 {
5229 int regno = REGNO (base);
5230
5231 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5232 {
5233 /* Check if the compiler has recorded some information
5234 about the alignment of the base REG. If reload has
5235 completed, we already matched with proper alignments.
5236 If not running global_alloc, reload might give us
5237 unaligned pointer to local stack though. */
5238 if (((cfun != 0
5239 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5240 || (optimize && reload_completed))
5241 && (INTVAL (offset) & (desired - 1)) == 0)
5242 return 1;
5243 }
5244 else
5245 {
5246 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5247 return 1;
5248 }
5249 }
5250 else if (! TARGET_UNALIGNED_DOUBLES
5251 || CONSTANT_P (addr)
5252 || GET_CODE (addr) == LO_SUM)
5253 {
5254 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5255 is true, in which case we can only assume that an access is aligned if
5256 it is to a constant address, or the address involves a LO_SUM. */
5257 return 1;
5258 }
5259
5260 /* An obviously unaligned address. */
5261 return 0;
5262 }
5263
5264 \f
5265 /* Vectors to keep interesting information about registers where it can easily
5266 be got. We used to use the actual mode value as the bit number, but there
5267 are more than 32 modes now. Instead we use two tables: one indexed by
5268 hard register number, and one indexed by mode. */
5269
5270 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5271 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5272 mapped into one sparc_mode_class mode. */
5273
5274 enum sparc_mode_class {
5275 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5276 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5277 CC_MODE, CCFP_MODE
5278 };
5279
5280 /* Modes for single-word and smaller quantities. */
5281 #define S_MODES \
5282 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5283
5284 /* Modes for double-word and smaller quantities. */
5285 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5286
5287 /* Modes for quad-word and smaller quantities. */
5288 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5289
5290 /* Modes for 8-word and smaller quantities. */
5291 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5292
5293 /* Modes for single-float quantities. */
5294 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5295
5296 /* Modes for double-float and smaller quantities. */
5297 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5298
5299 /* Modes for quad-float and smaller quantities. */
5300 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5301
5302 /* Modes for quad-float pairs and smaller quantities. */
5303 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5304
5305 /* Modes for double-float only quantities. */
5306 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5307
5308 /* Modes for quad-float and double-float only quantities. */
5309 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5310
5311 /* Modes for quad-float pairs and double-float only quantities. */
5312 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5313
5314 /* Modes for condition codes. */
5315 #define CC_MODES (1 << (int) CC_MODE)
5316 #define CCFP_MODES (1 << (int) CCFP_MODE)
5317
5318 /* Value is 1 if register/mode pair is acceptable on sparc.
5319
5320 The funny mixture of D and T modes is because integer operations
5321 do not specially operate on tetra quantities, so non-quad-aligned
5322 registers can hold quadword quantities (except %o4 and %i4 because
5323 they cross fixed registers).
5324
5325 ??? Note that, despite the settings, non-double-aligned parameter
5326 registers can hold double-word quantities in 32-bit mode. */
5327
5328 /* This points to either the 32-bit or the 64-bit version. */
5329 static const int *hard_regno_mode_classes;
5330
5331 static const int hard_32bit_mode_classes[] = {
5332 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5333 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5334 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5335 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5336
5337 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5338 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5339 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5340 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5341
5342 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5343 and none can hold SFmode/SImode values. */
5344 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5345 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5346 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5347 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5348
5349 /* %fcc[0123] */
5350 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5351
5352 /* %icc, %sfp, %gsr */
5353 CC_MODES, 0, D_MODES
5354 };
5355
5356 static const int hard_64bit_mode_classes[] = {
5357 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5358 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5359 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5360 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5361
5362 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5363 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5364 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5365 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5366
5367 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5368 and none can hold SFmode/SImode values. */
5369 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5370 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5371 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5372 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5373
5374 /* %fcc[0123] */
5375 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5376
5377 /* %icc, %sfp, %gsr */
5378 CC_MODES, 0, D_MODES
5379 };
5380
5381 static int sparc_mode_class [NUM_MACHINE_MODES];
5382
5383 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5384
5385 static void
5386 sparc_init_modes (void)
5387 {
5388 int i;
5389
5390 for (i = 0; i < NUM_MACHINE_MODES; i++)
5391 {
5392 machine_mode m = (machine_mode) i;
5393 unsigned int size = GET_MODE_SIZE (m);
5394
5395 switch (GET_MODE_CLASS (m))
5396 {
5397 case MODE_INT:
5398 case MODE_PARTIAL_INT:
5399 case MODE_COMPLEX_INT:
5400 if (size < 4)
5401 sparc_mode_class[i] = 1 << (int) H_MODE;
5402 else if (size == 4)
5403 sparc_mode_class[i] = 1 << (int) S_MODE;
5404 else if (size == 8)
5405 sparc_mode_class[i] = 1 << (int) D_MODE;
5406 else if (size == 16)
5407 sparc_mode_class[i] = 1 << (int) T_MODE;
5408 else if (size == 32)
5409 sparc_mode_class[i] = 1 << (int) O_MODE;
5410 else
5411 sparc_mode_class[i] = 0;
5412 break;
5413 case MODE_VECTOR_INT:
5414 if (size == 4)
5415 sparc_mode_class[i] = 1 << (int) SF_MODE;
5416 else if (size == 8)
5417 sparc_mode_class[i] = 1 << (int) DF_MODE;
5418 else
5419 sparc_mode_class[i] = 0;
5420 break;
5421 case MODE_FLOAT:
5422 case MODE_COMPLEX_FLOAT:
5423 if (size == 4)
5424 sparc_mode_class[i] = 1 << (int) SF_MODE;
5425 else if (size == 8)
5426 sparc_mode_class[i] = 1 << (int) DF_MODE;
5427 else if (size == 16)
5428 sparc_mode_class[i] = 1 << (int) TF_MODE;
5429 else if (size == 32)
5430 sparc_mode_class[i] = 1 << (int) OF_MODE;
5431 else
5432 sparc_mode_class[i] = 0;
5433 break;
5434 case MODE_CC:
5435 if (m == CCFPmode || m == CCFPEmode)
5436 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5437 else
5438 sparc_mode_class[i] = 1 << (int) CC_MODE;
5439 break;
5440 default:
5441 sparc_mode_class[i] = 0;
5442 break;
5443 }
5444 }
5445
5446 if (TARGET_ARCH64)
5447 hard_regno_mode_classes = hard_64bit_mode_classes;
5448 else
5449 hard_regno_mode_classes = hard_32bit_mode_classes;
5450
5451 /* Initialize the array used by REGNO_REG_CLASS. */
5452 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5453 {
5454 if (i < 16 && TARGET_V8PLUS)
5455 sparc_regno_reg_class[i] = I64_REGS;
5456 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5457 sparc_regno_reg_class[i] = GENERAL_REGS;
5458 else if (i < 64)
5459 sparc_regno_reg_class[i] = FP_REGS;
5460 else if (i < 96)
5461 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5462 else if (i < 100)
5463 sparc_regno_reg_class[i] = FPCC_REGS;
5464 else
5465 sparc_regno_reg_class[i] = NO_REGS;
5466 }
5467 }
5468 \f
5469 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5470
5471 static inline bool
5472 save_global_or_fp_reg_p (unsigned int regno,
5473 int leaf_function ATTRIBUTE_UNUSED)
5474 {
5475 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5476 }
5477
5478 /* Return whether the return address register (%i7) is needed. */
5479
5480 static inline bool
5481 return_addr_reg_needed_p (int leaf_function)
5482 {
5483 /* If it is live, for example because of __builtin_return_address (0). */
5484 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5485 return true;
5486
5487 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5488 if (!leaf_function
5489 /* Loading the GOT register clobbers %o7. */
5490 || crtl->uses_pic_offset_table
5491 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5492 return true;
5493
5494 return false;
5495 }
5496
5497 /* Return whether REGNO, a local or in register, must be saved/restored. */
5498
5499 static bool
5500 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5501 {
5502 /* General case: call-saved registers live at some point. */
5503 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5504 return true;
5505
5506 /* Frame pointer register (%fp) if needed. */
5507 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5508 return true;
5509
5510 /* Return address register (%i7) if needed. */
5511 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5512 return true;
5513
5514 /* GOT register (%l7) if needed. */
5515 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5516 return true;
5517
5518 /* If the function accesses prior frames, the frame pointer and the return
5519 address of the previous frame must be saved on the stack. */
5520 if (crtl->accesses_prior_frames
5521 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5522 return true;
5523
5524 return false;
5525 }
5526
5527 /* Compute the frame size required by the function. This function is called
5528 during the reload pass and also by sparc_expand_prologue. */
5529
5530 static HOST_WIDE_INT
5531 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5532 {
5533 HOST_WIDE_INT frame_size, apparent_frame_size;
5534 int args_size, n_global_fp_regs = 0;
5535 bool save_local_in_regs_p = false;
5536 unsigned int i;
5537
5538 /* If the function allocates dynamic stack space, the dynamic offset is
5539 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5540 if (leaf_function && !cfun->calls_alloca)
5541 args_size = 0;
5542 else
5543 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5544
5545 /* Calculate space needed for global registers. */
5546 if (TARGET_ARCH64)
5547 {
5548 for (i = 0; i < 8; i++)
5549 if (save_global_or_fp_reg_p (i, 0))
5550 n_global_fp_regs += 2;
5551 }
5552 else
5553 {
5554 for (i = 0; i < 8; i += 2)
5555 if (save_global_or_fp_reg_p (i, 0)
5556 || save_global_or_fp_reg_p (i + 1, 0))
5557 n_global_fp_regs += 2;
5558 }
5559
5560 /* In the flat window model, find out which local and in registers need to
5561 be saved. We don't reserve space in the current frame for them as they
5562 will be spilled into the register window save area of the caller's frame.
5563 However, as soon as we use this register window save area, we must create
5564 that of the current frame to make it the live one. */
5565 if (TARGET_FLAT)
5566 for (i = 16; i < 32; i++)
5567 if (save_local_or_in_reg_p (i, leaf_function))
5568 {
5569 save_local_in_regs_p = true;
5570 break;
5571 }
5572
5573 /* Calculate space needed for FP registers. */
5574 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5575 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5576 n_global_fp_regs += 2;
5577
5578 if (size == 0
5579 && n_global_fp_regs == 0
5580 && args_size == 0
5581 && !save_local_in_regs_p)
5582 frame_size = apparent_frame_size = 0;
5583 else
5584 {
5585 /* Start from the apparent frame size. */
5586 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5587
5588 /* We need to add the size of the outgoing argument area. */
5589 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5590
5591 /* And that of the register window save area. */
5592 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5593
5594 /* Finally, bump to the appropriate alignment. */
5595 frame_size = SPARC_STACK_ALIGN (frame_size);
5596 }
5597
5598 /* Set up values for use in prologue and epilogue. */
5599 sparc_frame_size = frame_size;
5600 sparc_apparent_frame_size = apparent_frame_size;
5601 sparc_n_global_fp_regs = n_global_fp_regs;
5602 sparc_save_local_in_regs_p = save_local_in_regs_p;
5603
5604 return frame_size;
5605 }
5606
5607 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5608
5609 int
5610 sparc_initial_elimination_offset (int to)
5611 {
5612 int offset;
5613
5614 if (to == STACK_POINTER_REGNUM)
5615 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5616 else
5617 offset = 0;
5618
5619 offset += SPARC_STACK_BIAS;
5620 return offset;
5621 }
5622
5623 /* Output any necessary .register pseudo-ops. */
5624
5625 void
5626 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5627 {
5628 int i;
5629
5630 if (TARGET_ARCH32)
5631 return;
5632
5633 /* Check if %g[2367] were used without
5634 .register being printed for them already. */
5635 for (i = 2; i < 8; i++)
5636 {
5637 if (df_regs_ever_live_p (i)
5638 && ! sparc_hard_reg_printed [i])
5639 {
5640 sparc_hard_reg_printed [i] = 1;
5641 /* %g7 is used as TLS base register, use #ignore
5642 for it instead of #scratch. */
5643 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5644 i == 7 ? "ignore" : "scratch");
5645 }
5646 if (i == 3) i = 5;
5647 }
5648 }
5649
5650 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5651
5652 #if PROBE_INTERVAL > 4096
5653 #error Cannot use indexed addressing mode for stack probing
5654 #endif
5655
5656 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5657 inclusive. These are offsets from the current stack pointer.
5658
5659 Note that we don't use the REG+REG addressing mode for the probes because
5660 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5661 so the advantages of having a single code win here. */
5662
5663 static void
5664 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5665 {
5666 rtx g1 = gen_rtx_REG (Pmode, 1);
5667
5668 /* See if we have a constant small number of probes to generate. If so,
5669 that's the easy case. */
5670 if (size <= PROBE_INTERVAL)
5671 {
5672 emit_move_insn (g1, GEN_INT (first));
5673 emit_insn (gen_rtx_SET (g1,
5674 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5675 emit_stack_probe (plus_constant (Pmode, g1, -size));
5676 }
5677
5678 /* The run-time loop is made up of 9 insns in the generic case while the
5679 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5680 else if (size <= 4 * PROBE_INTERVAL)
5681 {
5682 HOST_WIDE_INT i;
5683
5684 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5685 emit_insn (gen_rtx_SET (g1,
5686 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5687 emit_stack_probe (g1);
5688
5689 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5690 it exceeds SIZE. If only two probes are needed, this will not
5691 generate any code. Then probe at FIRST + SIZE. */
5692 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5693 {
5694 emit_insn (gen_rtx_SET (g1,
5695 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5696 emit_stack_probe (g1);
5697 }
5698
5699 emit_stack_probe (plus_constant (Pmode, g1,
5700 (i - PROBE_INTERVAL) - size));
5701 }
5702
5703 /* Otherwise, do the same as above, but in a loop. Note that we must be
5704 extra careful with variables wrapping around because we might be at
5705 the very top (or the very bottom) of the address space and we have
5706 to be able to handle this case properly; in particular, we use an
5707 equality test for the loop condition. */
5708 else
5709 {
5710 HOST_WIDE_INT rounded_size;
5711 rtx g4 = gen_rtx_REG (Pmode, 4);
5712
5713 emit_move_insn (g1, GEN_INT (first));
5714
5715
5716 /* Step 1: round SIZE to the previous multiple of the interval. */
5717
5718 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5719 emit_move_insn (g4, GEN_INT (rounded_size));
5720
5721
5722 /* Step 2: compute initial and final value of the loop counter. */
5723
5724 /* TEST_ADDR = SP + FIRST. */
5725 emit_insn (gen_rtx_SET (g1,
5726 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5727
5728 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5729 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5730
5731
5732 /* Step 3: the loop
5733
5734 while (TEST_ADDR != LAST_ADDR)
5735 {
5736 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5737 probe at TEST_ADDR
5738 }
5739
5740 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5741 until it is equal to ROUNDED_SIZE. */
5742
5743 if (TARGET_ARCH64)
5744 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5745 else
5746 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5747
5748
5749 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5750 that SIZE is equal to ROUNDED_SIZE. */
5751
5752 if (size != rounded_size)
5753 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5754 }
5755
5756 /* Make sure nothing is scheduled before we are done. */
5757 emit_insn (gen_blockage ());
5758 }
5759
5760 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5761 absolute addresses. */
5762
5763 const char *
5764 output_probe_stack_range (rtx reg1, rtx reg2)
5765 {
5766 static int labelno = 0;
5767 char loop_lab[32];
5768 rtx xops[2];
5769
5770 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5771
5772 /* Loop. */
5773 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5774
5775 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5776 xops[0] = reg1;
5777 xops[1] = GEN_INT (-PROBE_INTERVAL);
5778 output_asm_insn ("add\t%0, %1, %0", xops);
5779
5780 /* Test if TEST_ADDR == LAST_ADDR. */
5781 xops[1] = reg2;
5782 output_asm_insn ("cmp\t%0, %1", xops);
5783
5784 /* Probe at TEST_ADDR and branch. */
5785 if (TARGET_ARCH64)
5786 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5787 else
5788 fputs ("\tbne\t", asm_out_file);
5789 assemble_name_raw (asm_out_file, loop_lab);
5790 fputc ('\n', asm_out_file);
5791 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5792 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5793
5794 return "";
5795 }
5796
5797 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5798 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5799 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5800 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5801 the action to be performed if it returns false. Return the new offset. */
5802
5803 typedef bool (*sorr_pred_t) (unsigned int, int);
5804 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5805
5806 static int
5807 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5808 int offset, int leaf_function, sorr_pred_t save_p,
5809 sorr_act_t action_true, sorr_act_t action_false)
5810 {
5811 unsigned int i;
5812 rtx mem;
5813 rtx_insn *insn;
5814
5815 if (TARGET_ARCH64 && high <= 32)
5816 {
5817 int fp_offset = -1;
5818
5819 for (i = low; i < high; i++)
5820 {
5821 if (save_p (i, leaf_function))
5822 {
5823 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5824 base, offset));
5825 if (action_true == SORR_SAVE)
5826 {
5827 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5828 RTX_FRAME_RELATED_P (insn) = 1;
5829 }
5830 else /* action_true == SORR_RESTORE */
5831 {
5832 /* The frame pointer must be restored last since its old
5833 value may be used as base address for the frame. This
5834 is problematic in 64-bit mode only because of the lack
5835 of double-word load instruction. */
5836 if (i == HARD_FRAME_POINTER_REGNUM)
5837 fp_offset = offset;
5838 else
5839 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5840 }
5841 offset += 8;
5842 }
5843 else if (action_false == SORR_ADVANCE)
5844 offset += 8;
5845 }
5846
5847 if (fp_offset >= 0)
5848 {
5849 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5850 emit_move_insn (hard_frame_pointer_rtx, mem);
5851 }
5852 }
5853 else
5854 {
5855 for (i = low; i < high; i += 2)
5856 {
5857 bool reg0 = save_p (i, leaf_function);
5858 bool reg1 = save_p (i + 1, leaf_function);
5859 machine_mode mode;
5860 int regno;
5861
5862 if (reg0 && reg1)
5863 {
5864 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5865 regno = i;
5866 }
5867 else if (reg0)
5868 {
5869 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5870 regno = i;
5871 }
5872 else if (reg1)
5873 {
5874 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5875 regno = i + 1;
5876 offset += 4;
5877 }
5878 else
5879 {
5880 if (action_false == SORR_ADVANCE)
5881 offset += 8;
5882 continue;
5883 }
5884
5885 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5886 if (action_true == SORR_SAVE)
5887 {
5888 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5889 RTX_FRAME_RELATED_P (insn) = 1;
5890 if (mode == DImode)
5891 {
5892 rtx set1, set2;
5893 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5894 offset));
5895 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5896 RTX_FRAME_RELATED_P (set1) = 1;
5897 mem
5898 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5899 offset + 4));
5900 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5901 RTX_FRAME_RELATED_P (set2) = 1;
5902 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5903 gen_rtx_PARALLEL (VOIDmode,
5904 gen_rtvec (2, set1, set2)));
5905 }
5906 }
5907 else /* action_true == SORR_RESTORE */
5908 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5909
5910 /* Bump and round down to double word
5911 in case we already bumped by 4. */
5912 offset = ROUND_DOWN (offset + 8, 8);
5913 }
5914 }
5915
5916 return offset;
5917 }
5918
5919 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5920
5921 static rtx
5922 emit_adjust_base_to_offset (rtx base, int offset)
5923 {
5924 /* ??? This might be optimized a little as %g1 might already have a
5925 value close enough that a single add insn will do. */
5926 /* ??? Although, all of this is probably only a temporary fix because
5927 if %g1 can hold a function result, then sparc_expand_epilogue will
5928 lose (the result will be clobbered). */
5929 rtx new_base = gen_rtx_REG (Pmode, 1);
5930 emit_move_insn (new_base, GEN_INT (offset));
5931 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5932 return new_base;
5933 }
5934
5935 /* Emit code to save/restore call-saved global and FP registers. */
5936
5937 static void
5938 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5939 {
5940 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5941 {
5942 base = emit_adjust_base_to_offset (base, offset);
5943 offset = 0;
5944 }
5945
5946 offset
5947 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5948 save_global_or_fp_reg_p, action, SORR_NONE);
5949 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5950 save_global_or_fp_reg_p, action, SORR_NONE);
5951 }
5952
5953 /* Emit code to save/restore call-saved local and in registers. */
5954
5955 static void
5956 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5957 {
5958 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5959 {
5960 base = emit_adjust_base_to_offset (base, offset);
5961 offset = 0;
5962 }
5963
5964 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5965 save_local_or_in_reg_p, action, SORR_ADVANCE);
5966 }
5967
5968 /* Emit a window_save insn. */
5969
5970 static rtx_insn *
5971 emit_window_save (rtx increment)
5972 {
5973 rtx_insn *insn = emit_insn (gen_window_save (increment));
5974 RTX_FRAME_RELATED_P (insn) = 1;
5975
5976 /* The incoming return address (%o7) is saved in %i7. */
5977 add_reg_note (insn, REG_CFA_REGISTER,
5978 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5979 gen_rtx_REG (Pmode,
5980 INCOMING_RETURN_ADDR_REGNUM)));
5981
5982 /* The window save event. */
5983 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5984
5985 /* The CFA is %fp, the hard frame pointer. */
5986 add_reg_note (insn, REG_CFA_DEF_CFA,
5987 plus_constant (Pmode, hard_frame_pointer_rtx,
5988 INCOMING_FRAME_SP_OFFSET));
5989
5990 return insn;
5991 }
5992
5993 /* Generate an increment for the stack pointer. */
5994
5995 static rtx
5996 gen_stack_pointer_inc (rtx increment)
5997 {
5998 return gen_rtx_SET (stack_pointer_rtx,
5999 gen_rtx_PLUS (Pmode,
6000 stack_pointer_rtx,
6001 increment));
6002 }
6003
6004 /* Expand the function prologue. The prologue is responsible for reserving
6005 storage for the frame, saving the call-saved registers and loading the
6006 GOT register if needed. */
6007
6008 void
6009 sparc_expand_prologue (void)
6010 {
6011 HOST_WIDE_INT size;
6012 rtx_insn *insn;
6013
6014 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
6015 on the final value of the flag means deferring the prologue/epilogue
6016 expansion until just before the second scheduling pass, which is too
6017 late to emit multiple epilogues or return insns.
6018
6019 Of course we are making the assumption that the value of the flag
6020 will not change between now and its final value. Of the three parts
6021 of the formula, only the last one can reasonably vary. Let's take a
6022 closer look, after assuming that the first two ones are set to true
6023 (otherwise the last value is effectively silenced).
6024
6025 If only_leaf_regs_used returns false, the global predicate will also
6026 be false so the actual frame size calculated below will be positive.
6027 As a consequence, the save_register_window insn will be emitted in
6028 the instruction stream; now this insn explicitly references %fp
6029 which is not a leaf register so only_leaf_regs_used will always
6030 return false subsequently.
6031
6032 If only_leaf_regs_used returns true, we hope that the subsequent
6033 optimization passes won't cause non-leaf registers to pop up. For
6034 example, the regrename pass has special provisions to not rename to
6035 non-leaf registers in a leaf function. */
6036 sparc_leaf_function_p
6037 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6038
6039 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6040
6041 if (flag_stack_usage_info)
6042 current_function_static_stack_size = size;
6043
6044 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6045 || flag_stack_clash_protection)
6046 {
6047 if (crtl->is_leaf && !cfun->calls_alloca)
6048 {
6049 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6050 sparc_emit_probe_stack_range (get_stack_check_protect (),
6051 size - get_stack_check_protect ());
6052 }
6053 else if (size > 0)
6054 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6055 }
6056
6057 if (size == 0)
6058 ; /* do nothing. */
6059 else if (sparc_leaf_function_p)
6060 {
6061 rtx size_int_rtx = GEN_INT (-size);
6062
6063 if (size <= 4096)
6064 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6065 else if (size <= 8192)
6066 {
6067 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6068 RTX_FRAME_RELATED_P (insn) = 1;
6069
6070 /* %sp is still the CFA register. */
6071 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6072 }
6073 else
6074 {
6075 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6076 emit_move_insn (size_rtx, size_int_rtx);
6077 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6078 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6079 gen_stack_pointer_inc (size_int_rtx));
6080 }
6081
6082 RTX_FRAME_RELATED_P (insn) = 1;
6083 }
6084 else
6085 {
6086 rtx size_int_rtx = GEN_INT (-size);
6087
6088 if (size <= 4096)
6089 emit_window_save (size_int_rtx);
6090 else if (size <= 8192)
6091 {
6092 emit_window_save (GEN_INT (-4096));
6093
6094 /* %sp is not the CFA register anymore. */
6095 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6096
6097 /* Make sure no %fp-based store is issued until after the frame is
6098 established. The offset between the frame pointer and the stack
6099 pointer is calculated relative to the value of the stack pointer
6100 at the end of the function prologue, and moving instructions that
6101 access the stack via the frame pointer between the instructions
6102 that decrement the stack pointer could result in accessing the
6103 register window save area, which is volatile. */
6104 emit_insn (gen_frame_blockage ());
6105 }
6106 else
6107 {
6108 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6109 emit_move_insn (size_rtx, size_int_rtx);
6110 emit_window_save (size_rtx);
6111 }
6112 }
6113
6114 if (sparc_leaf_function_p)
6115 {
6116 sparc_frame_base_reg = stack_pointer_rtx;
6117 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6118 }
6119 else
6120 {
6121 sparc_frame_base_reg = hard_frame_pointer_rtx;
6122 sparc_frame_base_offset = SPARC_STACK_BIAS;
6123 }
6124
6125 if (sparc_n_global_fp_regs > 0)
6126 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6127 sparc_frame_base_offset
6128 - sparc_apparent_frame_size,
6129 SORR_SAVE);
6130
6131 /* Advertise that the data calculated just above are now valid. */
6132 sparc_prologue_data_valid_p = true;
6133 }
6134
6135 /* Expand the function prologue. The prologue is responsible for reserving
6136 storage for the frame, saving the call-saved registers and loading the
6137 GOT register if needed. */
6138
6139 void
6140 sparc_flat_expand_prologue (void)
6141 {
6142 HOST_WIDE_INT size;
6143 rtx_insn *insn;
6144
6145 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6146
6147 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6148
6149 if (flag_stack_usage_info)
6150 current_function_static_stack_size = size;
6151
6152 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6153 || flag_stack_clash_protection)
6154 {
6155 if (crtl->is_leaf && !cfun->calls_alloca)
6156 {
6157 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6158 sparc_emit_probe_stack_range (get_stack_check_protect (),
6159 size - get_stack_check_protect ());
6160 }
6161 else if (size > 0)
6162 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6163 }
6164
6165 if (sparc_save_local_in_regs_p)
6166 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6167 SORR_SAVE);
6168
6169 if (size == 0)
6170 ; /* do nothing. */
6171 else
6172 {
6173 rtx size_int_rtx, size_rtx;
6174
6175 size_rtx = size_int_rtx = GEN_INT (-size);
6176
6177 /* We establish the frame (i.e. decrement the stack pointer) first, even
6178 if we use a frame pointer, because we cannot clobber any call-saved
6179 registers, including the frame pointer, if we haven't created a new
6180 register save area, for the sake of compatibility with the ABI. */
6181 if (size <= 4096)
6182 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6183 else if (size <= 8192 && !frame_pointer_needed)
6184 {
6185 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6186 RTX_FRAME_RELATED_P (insn) = 1;
6187 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6188 }
6189 else
6190 {
6191 size_rtx = gen_rtx_REG (Pmode, 1);
6192 emit_move_insn (size_rtx, size_int_rtx);
6193 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6194 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6195 gen_stack_pointer_inc (size_int_rtx));
6196 }
6197 RTX_FRAME_RELATED_P (insn) = 1;
6198
6199 /* Ensure nothing is scheduled until after the frame is established. */
6200 emit_insn (gen_blockage ());
6201
6202 if (frame_pointer_needed)
6203 {
6204 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6205 gen_rtx_MINUS (Pmode,
6206 stack_pointer_rtx,
6207 size_rtx)));
6208 RTX_FRAME_RELATED_P (insn) = 1;
6209
6210 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6211 gen_rtx_SET (hard_frame_pointer_rtx,
6212 plus_constant (Pmode, stack_pointer_rtx,
6213 size)));
6214 }
6215
6216 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6217 {
6218 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6219 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6220
6221 insn = emit_move_insn (i7, o7);
6222 RTX_FRAME_RELATED_P (insn) = 1;
6223
6224 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6225
6226 /* Prevent this instruction from ever being considered dead,
6227 even if this function has no epilogue. */
6228 emit_use (i7);
6229 }
6230 }
6231
6232 if (frame_pointer_needed)
6233 {
6234 sparc_frame_base_reg = hard_frame_pointer_rtx;
6235 sparc_frame_base_offset = SPARC_STACK_BIAS;
6236 }
6237 else
6238 {
6239 sparc_frame_base_reg = stack_pointer_rtx;
6240 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6241 }
6242
6243 if (sparc_n_global_fp_regs > 0)
6244 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6245 sparc_frame_base_offset
6246 - sparc_apparent_frame_size,
6247 SORR_SAVE);
6248
6249 /* Advertise that the data calculated just above are now valid. */
6250 sparc_prologue_data_valid_p = true;
6251 }
6252
6253 /* This function generates the assembly code for function entry, which boils
6254 down to emitting the necessary .register directives. */
6255
6256 static void
6257 sparc_asm_function_prologue (FILE *file)
6258 {
6259 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6260 if (!TARGET_FLAT)
6261 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6262
6263 sparc_output_scratch_registers (file);
6264 }
6265
6266 /* Expand the function epilogue, either normal or part of a sibcall.
6267 We emit all the instructions except the return or the call. */
6268
6269 void
6270 sparc_expand_epilogue (bool for_eh)
6271 {
6272 HOST_WIDE_INT size = sparc_frame_size;
6273
6274 if (cfun->calls_alloca)
6275 emit_insn (gen_frame_blockage ());
6276
6277 if (sparc_n_global_fp_regs > 0)
6278 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6279 sparc_frame_base_offset
6280 - sparc_apparent_frame_size,
6281 SORR_RESTORE);
6282
6283 if (size == 0 || for_eh)
6284 ; /* do nothing. */
6285 else if (sparc_leaf_function_p)
6286 {
6287 if (size <= 4096)
6288 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6289 else if (size <= 8192)
6290 {
6291 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6292 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6293 }
6294 else
6295 {
6296 rtx reg = gen_rtx_REG (Pmode, 1);
6297 emit_move_insn (reg, GEN_INT (size));
6298 emit_insn (gen_stack_pointer_inc (reg));
6299 }
6300 }
6301 }
6302
6303 /* Expand the function epilogue, either normal or part of a sibcall.
6304 We emit all the instructions except the return or the call. */
6305
6306 void
6307 sparc_flat_expand_epilogue (bool for_eh)
6308 {
6309 HOST_WIDE_INT size = sparc_frame_size;
6310
6311 if (sparc_n_global_fp_regs > 0)
6312 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6313 sparc_frame_base_offset
6314 - sparc_apparent_frame_size,
6315 SORR_RESTORE);
6316
6317 /* If we have a frame pointer, we'll need both to restore it before the
6318 frame is destroyed and use its current value in destroying the frame.
6319 Since we don't have an atomic way to do that in the flat window model,
6320 we save the current value into a temporary register (%g1). */
6321 if (frame_pointer_needed && !for_eh)
6322 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6323
6324 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6325 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6326 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6327
6328 if (sparc_save_local_in_regs_p)
6329 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6330 sparc_frame_base_offset,
6331 SORR_RESTORE);
6332
6333 if (size == 0 || for_eh)
6334 ; /* do nothing. */
6335 else if (frame_pointer_needed)
6336 {
6337 /* Make sure the frame is destroyed after everything else is done. */
6338 emit_insn (gen_blockage ());
6339
6340 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6341 }
6342 else
6343 {
6344 /* Likewise. */
6345 emit_insn (gen_blockage ());
6346
6347 if (size <= 4096)
6348 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6349 else if (size <= 8192)
6350 {
6351 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6352 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6353 }
6354 else
6355 {
6356 rtx reg = gen_rtx_REG (Pmode, 1);
6357 emit_move_insn (reg, GEN_INT (size));
6358 emit_insn (gen_stack_pointer_inc (reg));
6359 }
6360 }
6361 }
6362
6363 /* Return true if it is appropriate to emit `return' instructions in the
6364 body of a function. */
6365
6366 bool
6367 sparc_can_use_return_insn_p (void)
6368 {
6369 return sparc_prologue_data_valid_p
6370 && sparc_n_global_fp_regs == 0
6371 && TARGET_FLAT
6372 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6373 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6374 }
6375
6376 /* This function generates the assembly code for function exit. */
6377
6378 static void
6379 sparc_asm_function_epilogue (FILE *file)
6380 {
6381 /* If the last two instructions of a function are "call foo; dslot;"
6382 the return address might point to the first instruction in the next
6383 function and we have to output a dummy nop for the sake of sane
6384 backtraces in such cases. This is pointless for sibling calls since
6385 the return address is explicitly adjusted. */
6386
6387 rtx_insn *insn = get_last_insn ();
6388
6389 rtx last_real_insn = prev_real_insn (insn);
6390 if (last_real_insn
6391 && NONJUMP_INSN_P (last_real_insn)
6392 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6393 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6394
6395 if (last_real_insn
6396 && CALL_P (last_real_insn)
6397 && !SIBLING_CALL_P (last_real_insn))
6398 fputs("\tnop\n", file);
6399
6400 sparc_output_deferred_case_vectors ();
6401 }
6402
6403 /* Output a 'restore' instruction. */
6404
6405 static void
6406 output_restore (rtx pat)
6407 {
6408 rtx operands[3];
6409
6410 if (! pat)
6411 {
6412 fputs ("\t restore\n", asm_out_file);
6413 return;
6414 }
6415
6416 gcc_assert (GET_CODE (pat) == SET);
6417
6418 operands[0] = SET_DEST (pat);
6419 pat = SET_SRC (pat);
6420
6421 switch (GET_CODE (pat))
6422 {
6423 case PLUS:
6424 operands[1] = XEXP (pat, 0);
6425 operands[2] = XEXP (pat, 1);
6426 output_asm_insn (" restore %r1, %2, %Y0", operands);
6427 break;
6428 case LO_SUM:
6429 operands[1] = XEXP (pat, 0);
6430 operands[2] = XEXP (pat, 1);
6431 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6432 break;
6433 case ASHIFT:
6434 operands[1] = XEXP (pat, 0);
6435 gcc_assert (XEXP (pat, 1) == const1_rtx);
6436 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6437 break;
6438 default:
6439 operands[1] = pat;
6440 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6441 break;
6442 }
6443 }
6444
6445 /* Output a return. */
6446
6447 const char *
6448 output_return (rtx_insn *insn)
6449 {
6450 if (crtl->calls_eh_return)
6451 {
6452 /* If the function uses __builtin_eh_return, the eh_return
6453 machinery occupies the delay slot. */
6454 gcc_assert (!final_sequence);
6455
6456 if (flag_delayed_branch)
6457 {
6458 if (!TARGET_FLAT && TARGET_V9)
6459 fputs ("\treturn\t%i7+8\n", asm_out_file);
6460 else
6461 {
6462 if (!TARGET_FLAT)
6463 fputs ("\trestore\n", asm_out_file);
6464
6465 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6466 }
6467
6468 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6469 }
6470 else
6471 {
6472 if (!TARGET_FLAT)
6473 fputs ("\trestore\n", asm_out_file);
6474
6475 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6476 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6477 }
6478 }
6479 else if (sparc_leaf_function_p || TARGET_FLAT)
6480 {
6481 /* This is a leaf or flat function so we don't have to bother restoring
6482 the register window, which frees us from dealing with the convoluted
6483 semantics of restore/return. We simply output the jump to the
6484 return address and the insn in the delay slot (if any). */
6485
6486 return "jmp\t%%o7+%)%#";
6487 }
6488 else
6489 {
6490 /* This is a regular function so we have to restore the register window.
6491 We may have a pending insn for the delay slot, which will be either
6492 combined with the 'restore' instruction or put in the delay slot of
6493 the 'return' instruction. */
6494
6495 if (final_sequence)
6496 {
6497 rtx_insn *delay;
6498 rtx pat;
6499
6500 delay = NEXT_INSN (insn);
6501 gcc_assert (delay);
6502
6503 pat = PATTERN (delay);
6504
6505 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6506 {
6507 epilogue_renumber (&pat, 0);
6508 return "return\t%%i7+%)%#";
6509 }
6510 else
6511 {
6512 output_asm_insn ("jmp\t%%i7+%)", NULL);
6513
6514 /* We're going to output the insn in the delay slot manually.
6515 Make sure to output its source location first. */
6516 PATTERN (delay) = gen_blockage ();
6517 INSN_CODE (delay) = -1;
6518 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6519 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6520
6521 output_restore (pat);
6522 }
6523 }
6524 else
6525 {
6526 /* The delay slot is empty. */
6527 if (TARGET_V9)
6528 return "return\t%%i7+%)\n\t nop";
6529 else if (flag_delayed_branch)
6530 return "jmp\t%%i7+%)\n\t restore";
6531 else
6532 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6533 }
6534 }
6535
6536 return "";
6537 }
6538
6539 /* Output a sibling call. */
6540
6541 const char *
6542 output_sibcall (rtx_insn *insn, rtx call_operand)
6543 {
6544 rtx operands[1];
6545
6546 gcc_assert (flag_delayed_branch);
6547
6548 operands[0] = call_operand;
6549
6550 if (sparc_leaf_function_p || TARGET_FLAT)
6551 {
6552 /* This is a leaf or flat function so we don't have to bother restoring
6553 the register window. We simply output the jump to the function and
6554 the insn in the delay slot (if any). */
6555
6556 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6557
6558 if (final_sequence)
6559 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6560 operands);
6561 else
6562 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6563 it into branch if possible. */
6564 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6565 operands);
6566 }
6567 else
6568 {
6569 /* This is a regular function so we have to restore the register window.
6570 We may have a pending insn for the delay slot, which will be combined
6571 with the 'restore' instruction. */
6572
6573 output_asm_insn ("call\t%a0, 0", operands);
6574
6575 if (final_sequence)
6576 {
6577 rtx_insn *delay;
6578 rtx pat;
6579
6580 delay = NEXT_INSN (insn);
6581 gcc_assert (delay);
6582
6583 pat = PATTERN (delay);
6584
6585 /* We're going to output the insn in the delay slot manually.
6586 Make sure to output its source location first. */
6587 PATTERN (delay) = gen_blockage ();
6588 INSN_CODE (delay) = -1;
6589 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6590 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6591
6592 output_restore (pat);
6593 }
6594 else
6595 output_restore (NULL_RTX);
6596 }
6597
6598 return "";
6599 }
6600 \f
6601 /* Functions for handling argument passing.
6602
6603 For 32-bit, the first 6 args are normally in registers and the rest are
6604 pushed. Any arg that starts within the first 6 words is at least
6605 partially passed in a register unless its data type forbids.
6606
6607 For 64-bit, the argument registers are laid out as an array of 16 elements
6608 and arguments are added sequentially. The first 6 int args and up to the
6609 first 16 fp args (depending on size) are passed in regs.
6610
6611 Slot Stack Integral Float Float in structure Double Long Double
6612 ---- ----- -------- ----- ------------------ ------ -----------
6613 15 [SP+248] %f31 %f30,%f31 %d30
6614 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6615 13 [SP+232] %f27 %f26,%f27 %d26
6616 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6617 11 [SP+216] %f23 %f22,%f23 %d22
6618 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6619 9 [SP+200] %f19 %f18,%f19 %d18
6620 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6621 7 [SP+184] %f15 %f14,%f15 %d14
6622 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6623 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6624 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6625 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6626 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6627 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6628 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6629
6630 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6631
6632 Integral arguments are always passed as 64-bit quantities appropriately
6633 extended.
6634
6635 Passing of floating point values is handled as follows.
6636 If a prototype is in scope:
6637 If the value is in a named argument (i.e. not a stdarg function or a
6638 value not part of the `...') then the value is passed in the appropriate
6639 fp reg.
6640 If the value is part of the `...' and is passed in one of the first 6
6641 slots then the value is passed in the appropriate int reg.
6642 If the value is part of the `...' and is not passed in one of the first 6
6643 slots then the value is passed in memory.
6644 If a prototype is not in scope:
6645 If the value is one of the first 6 arguments the value is passed in the
6646 appropriate integer reg and the appropriate fp reg.
6647 If the value is not one of the first 6 arguments the value is passed in
6648 the appropriate fp reg and in memory.
6649
6650
6651 Summary of the calling conventions implemented by GCC on the SPARC:
6652
6653 32-bit ABI:
6654 size argument return value
6655
6656 small integer <4 int. reg. int. reg.
6657 word 4 int. reg. int. reg.
6658 double word 8 int. reg. int. reg.
6659
6660 _Complex small integer <8 int. reg. int. reg.
6661 _Complex word 8 int. reg. int. reg.
6662 _Complex double word 16 memory int. reg.
6663
6664 vector integer <=8 int. reg. FP reg.
6665 vector integer >8 memory memory
6666
6667 float 4 int. reg. FP reg.
6668 double 8 int. reg. FP reg.
6669 long double 16 memory memory
6670
6671 _Complex float 8 memory FP reg.
6672 _Complex double 16 memory FP reg.
6673 _Complex long double 32 memory FP reg.
6674
6675 vector float any memory memory
6676
6677 aggregate any memory memory
6678
6679
6680
6681 64-bit ABI:
6682 size argument return value
6683
6684 small integer <8 int. reg. int. reg.
6685 word 8 int. reg. int. reg.
6686 double word 16 int. reg. int. reg.
6687
6688 _Complex small integer <16 int. reg. int. reg.
6689 _Complex word 16 int. reg. int. reg.
6690 _Complex double word 32 memory int. reg.
6691
6692 vector integer <=16 FP reg. FP reg.
6693 vector integer 16<s<=32 memory FP reg.
6694 vector integer >32 memory memory
6695
6696 float 4 FP reg. FP reg.
6697 double 8 FP reg. FP reg.
6698 long double 16 FP reg. FP reg.
6699
6700 _Complex float 8 FP reg. FP reg.
6701 _Complex double 16 FP reg. FP reg.
6702 _Complex long double 32 memory FP reg.
6703
6704 vector float <=16 FP reg. FP reg.
6705 vector float 16<s<=32 memory FP reg.
6706 vector float >32 memory memory
6707
6708 aggregate <=16 reg. reg.
6709 aggregate 16<s<=32 memory reg.
6710 aggregate >32 memory memory
6711
6712
6713
6714 Note #1: complex floating-point types follow the extended SPARC ABIs as
6715 implemented by the Sun compiler.
6716
6717 Note #2: integer vector types follow the scalar floating-point types
6718 conventions to match what is implemented by the Sun VIS SDK.
6719
6720 Note #3: floating-point vector types follow the aggregate types
6721 conventions. */
6722
6723
6724 /* Maximum number of int regs for args. */
6725 #define SPARC_INT_ARG_MAX 6
6726 /* Maximum number of fp regs for args. */
6727 #define SPARC_FP_ARG_MAX 16
6728 /* Number of words (partially) occupied for a given size in units. */
6729 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6730
6731 /* Handle the INIT_CUMULATIVE_ARGS macro.
6732 Initialize a variable CUM of type CUMULATIVE_ARGS
6733 for a call to a function whose data type is FNTYPE.
6734 For a library call, FNTYPE is 0. */
6735
6736 void
6737 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6738 {
6739 cum->words = 0;
6740 cum->prototype_p = fntype && prototype_p (fntype);
6741 cum->libcall_p = !fntype;
6742 }
6743
6744 /* Handle promotion of pointer and integer arguments. */
6745
6746 static machine_mode
6747 sparc_promote_function_mode (const_tree type, machine_mode mode,
6748 int *punsignedp, const_tree, int)
6749 {
6750 if (type && POINTER_TYPE_P (type))
6751 {
6752 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6753 return Pmode;
6754 }
6755
6756 /* Integral arguments are passed as full words, as per the ABI. */
6757 if (GET_MODE_CLASS (mode) == MODE_INT
6758 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6759 return word_mode;
6760
6761 return mode;
6762 }
6763
6764 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6765
6766 static bool
6767 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6768 {
6769 return TARGET_ARCH64 ? true : false;
6770 }
6771
6772 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6773 Specify whether to pass the argument by reference. */
6774
6775 static bool
6776 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6777 machine_mode mode, const_tree type,
6778 bool named ATTRIBUTE_UNUSED)
6779 {
6780 if (TARGET_ARCH32)
6781 /* Original SPARC 32-bit ABI says that structures and unions,
6782 and quad-precision floats are passed by reference.
6783 All other base types are passed in registers.
6784
6785 Extended ABI (as implemented by the Sun compiler) says that all
6786 complex floats are passed by reference. Pass complex integers
6787 in registers up to 8 bytes. More generally, enforce the 2-word
6788 cap for passing arguments in registers.
6789
6790 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6791 vectors are passed like floats of the same size, that is in
6792 registers up to 8 bytes. Pass all vector floats by reference
6793 like structure and unions. */
6794 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6795 || mode == SCmode
6796 /* Catch CDImode, TFmode, DCmode and TCmode. */
6797 || GET_MODE_SIZE (mode) > 8
6798 || (type
6799 && VECTOR_TYPE_P (type)
6800 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6801 else
6802 /* Original SPARC 64-bit ABI says that structures and unions
6803 smaller than 16 bytes are passed in registers, as well as
6804 all other base types.
6805
6806 Extended ABI (as implemented by the Sun compiler) says that
6807 complex floats are passed in registers up to 16 bytes. Pass
6808 all complex integers in registers up to 16 bytes. More generally,
6809 enforce the 2-word cap for passing arguments in registers.
6810
6811 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6812 vectors are passed like floats of the same size, that is in
6813 registers (up to 16 bytes). Pass all vector floats like structure
6814 and unions. */
6815 return ((type
6816 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6817 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6818 /* Catch CTImode and TCmode. */
6819 || GET_MODE_SIZE (mode) > 16);
6820 }
6821
6822 /* Traverse the record TYPE recursively and call FUNC on its fields.
6823 NAMED is true if this is for a named parameter. DATA is passed
6824 to FUNC for each field. OFFSET is the starting position and
6825 PACKED is true if we are inside a packed record. */
6826
6827 template <typename T, void Func (const_tree, int, bool, T*)>
6828 static void
6829 traverse_record_type (const_tree type, bool named, T *data,
6830 int offset = 0, bool packed = false)
6831 {
6832 /* The ABI obviously doesn't specify how packed structures are passed.
6833 These are passed in integer regs if possible, otherwise memory. */
6834 if (!packed)
6835 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6836 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6837 {
6838 packed = true;
6839 break;
6840 }
6841
6842 /* Walk the real fields, but skip those with no size or a zero size.
6843 ??? Fields with variable offset are handled as having zero offset. */
6844 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6845 if (TREE_CODE (field) == FIELD_DECL)
6846 {
6847 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6848 continue;
6849
6850 int bitpos = offset;
6851 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6852 bitpos += int_bit_position (field);
6853
6854 tree field_type = TREE_TYPE (field);
6855 if (TREE_CODE (field_type) == RECORD_TYPE)
6856 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6857 packed);
6858 else
6859 {
6860 const bool fp_type
6861 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6862 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6863 data);
6864 }
6865 }
6866 }
6867
6868 /* Handle recursive register classifying for structure layout. */
6869
6870 typedef struct
6871 {
6872 bool fp_regs; /* true if field eligible to FP registers. */
6873 bool fp_regs_in_first_word; /* true if such field in first word. */
6874 } classify_data_t;
6875
6876 /* A subroutine of function_arg_slotno. Classify the field. */
6877
6878 inline void
6879 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6880 {
6881 if (fp)
6882 {
6883 data->fp_regs = true;
6884 if (bitpos < BITS_PER_WORD)
6885 data->fp_regs_in_first_word = true;
6886 }
6887 }
6888
6889 /* Compute the slot number to pass an argument in.
6890 Return the slot number or -1 if passing on the stack.
6891
6892 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6893 the preceding args and about the function being called.
6894 MODE is the argument's machine mode.
6895 TYPE is the data type of the argument (as a tree).
6896 This is null for libcalls where that information may
6897 not be available.
6898 NAMED is nonzero if this argument is a named parameter
6899 (otherwise it is an extra parameter matching an ellipsis).
6900 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6901 *PREGNO records the register number to use if scalar type.
6902 *PPADDING records the amount of padding needed in words. */
6903
6904 static int
6905 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6906 const_tree type, bool named, bool incoming,
6907 int *pregno, int *ppadding)
6908 {
6909 const int regbase
6910 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6911 int slotno = cum->words, regno;
6912 enum mode_class mclass = GET_MODE_CLASS (mode);
6913
6914 /* Silence warnings in the callers. */
6915 *pregno = -1;
6916 *ppadding = -1;
6917
6918 if (type && TREE_ADDRESSABLE (type))
6919 return -1;
6920
6921 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6922 if (TARGET_ARCH64
6923 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6924 && (slotno & 1) != 0)
6925 {
6926 slotno++;
6927 *ppadding = 1;
6928 }
6929 else
6930 *ppadding = 0;
6931
6932 /* Vector types deserve special treatment because they are polymorphic wrt
6933 their mode, depending upon whether VIS instructions are enabled. */
6934 if (type && VECTOR_TYPE_P (type))
6935 {
6936 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6937 {
6938 /* The SPARC port defines no floating-point vector modes. */
6939 gcc_assert (mode == BLKmode);
6940 }
6941 else
6942 {
6943 /* Integer vector types should either have a vector
6944 mode or an integral mode, because we are guaranteed
6945 by pass_by_reference that their size is not greater
6946 than 16 bytes and TImode is 16-byte wide. */
6947 gcc_assert (mode != BLKmode);
6948
6949 /* Integer vectors are handled like floats as per
6950 the Sun VIS SDK. */
6951 mclass = MODE_FLOAT;
6952 }
6953 }
6954
6955 switch (mclass)
6956 {
6957 case MODE_FLOAT:
6958 case MODE_COMPLEX_FLOAT:
6959 case MODE_VECTOR_INT:
6960 if (TARGET_ARCH64 && TARGET_FPU && named)
6961 {
6962 /* If all arg slots are filled, then must pass on stack. */
6963 if (slotno >= SPARC_FP_ARG_MAX)
6964 return -1;
6965
6966 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6967 /* Arguments filling only one single FP register are
6968 right-justified in the outer double FP register. */
6969 if (GET_MODE_SIZE (mode) <= 4)
6970 regno++;
6971 break;
6972 }
6973 /* fallthrough */
6974
6975 case MODE_INT:
6976 case MODE_COMPLEX_INT:
6977 /* If all arg slots are filled, then must pass on stack. */
6978 if (slotno >= SPARC_INT_ARG_MAX)
6979 return -1;
6980
6981 regno = regbase + slotno;
6982 break;
6983
6984 case MODE_RANDOM:
6985 /* MODE is VOIDmode when generating the actual call. */
6986 if (mode == VOIDmode)
6987 return -1;
6988
6989 if (TARGET_64BIT && TARGET_FPU && named
6990 && type
6991 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6992 {
6993 /* If all arg slots are filled, then must pass on stack. */
6994 if (slotno >= SPARC_FP_ARG_MAX)
6995 return -1;
6996
6997 if (TREE_CODE (type) == RECORD_TYPE)
6998 {
6999 classify_data_t data = { false, false };
7000 traverse_record_type<classify_data_t, classify_registers>
7001 (type, named, &data);
7002
7003 if (data.fp_regs)
7004 {
7005 /* If all FP slots are filled except for the last one and
7006 there is no FP field in the first word, then must pass
7007 on stack. */
7008 if (slotno >= SPARC_FP_ARG_MAX - 1
7009 && !data.fp_regs_in_first_word)
7010 return -1;
7011 }
7012 else
7013 {
7014 /* If all int slots are filled, then must pass on stack. */
7015 if (slotno >= SPARC_INT_ARG_MAX)
7016 return -1;
7017 }
7018
7019 /* PREGNO isn't set since both int and FP regs can be used. */
7020 return slotno;
7021 }
7022
7023 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7024 }
7025 else
7026 {
7027 /* If all arg slots are filled, then must pass on stack. */
7028 if (slotno >= SPARC_INT_ARG_MAX)
7029 return -1;
7030
7031 regno = regbase + slotno;
7032 }
7033 break;
7034
7035 default :
7036 gcc_unreachable ();
7037 }
7038
7039 *pregno = regno;
7040 return slotno;
7041 }
7042
7043 /* Handle recursive register counting/assigning for structure layout. */
7044
7045 typedef struct
7046 {
7047 int slotno; /* slot number of the argument. */
7048 int regbase; /* regno of the base register. */
7049 int intoffset; /* offset of the first pending integer field. */
7050 int nregs; /* number of words passed in registers. */
7051 bool stack; /* true if part of the argument is on the stack. */
7052 rtx ret; /* return expression being built. */
7053 } assign_data_t;
7054
7055 /* A subroutine of function_arg_record_value. Compute the number of integer
7056 registers to be assigned between PARMS->intoffset and BITPOS. Return
7057 true if at least one integer register is assigned or false otherwise. */
7058
7059 static bool
7060 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7061 {
7062 if (data->intoffset < 0)
7063 return false;
7064
7065 const int intoffset = data->intoffset;
7066 data->intoffset = -1;
7067
7068 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7069 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7070 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7071 int nregs = (endbit - startbit) / BITS_PER_WORD;
7072
7073 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7074 {
7075 nregs = SPARC_INT_ARG_MAX - this_slotno;
7076
7077 /* We need to pass this field (partly) on the stack. */
7078 data->stack = 1;
7079 }
7080
7081 if (nregs <= 0)
7082 return false;
7083
7084 *pnregs = nregs;
7085 return true;
7086 }
7087
7088 /* A subroutine of function_arg_record_value. Compute the number and the mode
7089 of the FP registers to be assigned for FIELD. Return true if at least one
7090 FP register is assigned or false otherwise. */
7091
7092 static bool
7093 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7094 int *pnregs, machine_mode *pmode)
7095 {
7096 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7097 machine_mode mode = DECL_MODE (field);
7098 int nregs, nslots;
7099
7100 /* Slots are counted as words while regs are counted as having the size of
7101 the (inner) mode. */
7102 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7103 {
7104 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7105 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7106 }
7107 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7108 {
7109 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7110 nregs = 2;
7111 }
7112 else
7113 nregs = 1;
7114
7115 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7116
7117 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7118 {
7119 nslots = SPARC_FP_ARG_MAX - this_slotno;
7120 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7121
7122 /* We need to pass this field (partly) on the stack. */
7123 data->stack = 1;
7124
7125 if (nregs <= 0)
7126 return false;
7127 }
7128
7129 *pnregs = nregs;
7130 *pmode = mode;
7131 return true;
7132 }
7133
7134 /* A subroutine of function_arg_record_value. Count the number of registers
7135 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7136
7137 inline void
7138 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7139 {
7140 if (fp)
7141 {
7142 int nregs;
7143 machine_mode mode;
7144
7145 if (compute_int_layout (bitpos, data, &nregs))
7146 data->nregs += nregs;
7147
7148 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7149 data->nregs += nregs;
7150 }
7151 else
7152 {
7153 if (data->intoffset < 0)
7154 data->intoffset = bitpos;
7155 }
7156 }
7157
7158 /* A subroutine of function_arg_record_value. Assign the bits of the
7159 structure between PARMS->intoffset and BITPOS to integer registers. */
7160
7161 static void
7162 assign_int_registers (int bitpos, assign_data_t *data)
7163 {
7164 int intoffset = data->intoffset;
7165 machine_mode mode;
7166 int nregs;
7167
7168 if (!compute_int_layout (bitpos, data, &nregs))
7169 return;
7170
7171 /* If this is the trailing part of a word, only load that much into
7172 the register. Otherwise load the whole register. Note that in
7173 the latter case we may pick up unwanted bits. It's not a problem
7174 at the moment but may wish to revisit. */
7175 if (intoffset % BITS_PER_WORD != 0)
7176 mode = smallest_int_mode_for_size (BITS_PER_WORD
7177 - intoffset % BITS_PER_WORD);
7178 else
7179 mode = word_mode;
7180
7181 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7182 unsigned int regno = data->regbase + this_slotno;
7183 intoffset /= BITS_PER_UNIT;
7184
7185 do
7186 {
7187 rtx reg = gen_rtx_REG (mode, regno);
7188 XVECEXP (data->ret, 0, data->stack + data->nregs)
7189 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7190 data->nregs += 1;
7191 mode = word_mode;
7192 regno += 1;
7193 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7194 }
7195 while (--nregs > 0);
7196 }
7197
7198 /* A subroutine of function_arg_record_value. Assign FIELD at position
7199 BITPOS to FP registers. */
7200
7201 static void
7202 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7203 {
7204 int nregs;
7205 machine_mode mode;
7206
7207 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7208 return;
7209
7210 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7211 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7212 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7213 regno++;
7214 int pos = bitpos / BITS_PER_UNIT;
7215
7216 do
7217 {
7218 rtx reg = gen_rtx_REG (mode, regno);
7219 XVECEXP (data->ret, 0, data->stack + data->nregs)
7220 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7221 data->nregs += 1;
7222 regno += GET_MODE_SIZE (mode) / 4;
7223 pos += GET_MODE_SIZE (mode);
7224 }
7225 while (--nregs > 0);
7226 }
7227
7228 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7229 the structure between PARMS->intoffset and BITPOS to registers. */
7230
7231 inline void
7232 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7233 {
7234 if (fp)
7235 {
7236 assign_int_registers (bitpos, data);
7237
7238 assign_fp_registers (field, bitpos, data);
7239 }
7240 else
7241 {
7242 if (data->intoffset < 0)
7243 data->intoffset = bitpos;
7244 }
7245 }
7246
7247 /* Used by function_arg and function_value to implement the complex
7248 conventions of the 64-bit ABI for passing and returning structures.
7249 Return an expression valid as a return value for the FUNCTION_ARG
7250 and TARGET_FUNCTION_VALUE.
7251
7252 TYPE is the data type of the argument (as a tree).
7253 This is null for libcalls where that information may
7254 not be available.
7255 MODE is the argument's machine mode.
7256 SLOTNO is the index number of the argument's slot in the parameter array.
7257 NAMED is true if this argument is a named parameter
7258 (otherwise it is an extra parameter matching an ellipsis).
7259 REGBASE is the regno of the base register for the parameter array. */
7260
7261 static rtx
7262 function_arg_record_value (const_tree type, machine_mode mode,
7263 int slotno, bool named, int regbase)
7264 {
7265 const int size = int_size_in_bytes (type);
7266 assign_data_t data;
7267 int nregs;
7268
7269 data.slotno = slotno;
7270 data.regbase = regbase;
7271
7272 /* Count how many registers we need. */
7273 data.nregs = 0;
7274 data.intoffset = 0;
7275 data.stack = false;
7276 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7277
7278 /* Take into account pending integer fields. */
7279 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7280 data.nregs += nregs;
7281
7282 /* Allocate the vector and handle some annoying special cases. */
7283 nregs = data.nregs;
7284
7285 if (nregs == 0)
7286 {
7287 /* ??? Empty structure has no value? Duh? */
7288 if (size <= 0)
7289 {
7290 /* Though there's nothing really to store, return a word register
7291 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7292 leads to breakage due to the fact that there are zero bytes to
7293 load. */
7294 return gen_rtx_REG (mode, regbase);
7295 }
7296
7297 /* ??? C++ has structures with no fields, and yet a size. Give up
7298 for now and pass everything back in integer registers. */
7299 nregs = CEIL_NWORDS (size);
7300 if (nregs + slotno > SPARC_INT_ARG_MAX)
7301 nregs = SPARC_INT_ARG_MAX - slotno;
7302 }
7303
7304 gcc_assert (nregs > 0);
7305
7306 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7307
7308 /* If at least one field must be passed on the stack, generate
7309 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7310 also be passed on the stack. We can't do much better because the
7311 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7312 of structures for which the fields passed exclusively in registers
7313 are not at the beginning of the structure. */
7314 if (data.stack)
7315 XVECEXP (data.ret, 0, 0)
7316 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7317
7318 /* Assign the registers. */
7319 data.nregs = 0;
7320 data.intoffset = 0;
7321 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7322
7323 /* Assign pending integer fields. */
7324 assign_int_registers (size * BITS_PER_UNIT, &data);
7325
7326 gcc_assert (data.nregs == nregs);
7327
7328 return data.ret;
7329 }
7330
7331 /* Used by function_arg and function_value to implement the conventions
7332 of the 64-bit ABI for passing and returning unions.
7333 Return an expression valid as a return value for the FUNCTION_ARG
7334 and TARGET_FUNCTION_VALUE.
7335
7336 SIZE is the size in bytes of the union.
7337 MODE is the argument's machine mode.
7338 SLOTNO is the index number of the argument's slot in the parameter array.
7339 REGNO is the hard register the union will be passed in. */
7340
7341 static rtx
7342 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7343 {
7344 unsigned int nwords;
7345
7346 /* See comment in function_arg_record_value for empty structures. */
7347 if (size <= 0)
7348 return gen_rtx_REG (mode, regno);
7349
7350 if (slotno == SPARC_INT_ARG_MAX - 1)
7351 nwords = 1;
7352 else
7353 nwords = CEIL_NWORDS (size);
7354
7355 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7356
7357 /* Unions are passed left-justified. */
7358 for (unsigned int i = 0; i < nwords; i++)
7359 XVECEXP (regs, 0, i)
7360 = gen_rtx_EXPR_LIST (VOIDmode,
7361 gen_rtx_REG (word_mode, regno + i),
7362 GEN_INT (UNITS_PER_WORD * i));
7363
7364 return regs;
7365 }
7366
7367 /* Used by function_arg and function_value to implement the conventions
7368 of the 64-bit ABI for passing and returning BLKmode vectors.
7369 Return an expression valid as a return value for the FUNCTION_ARG
7370 and TARGET_FUNCTION_VALUE.
7371
7372 SIZE is the size in bytes of the vector.
7373 SLOTNO is the index number of the argument's slot in the parameter array.
7374 NAMED is true if this argument is a named parameter
7375 (otherwise it is an extra parameter matching an ellipsis).
7376 REGNO is the hard register the vector will be passed in. */
7377
7378 static rtx
7379 function_arg_vector_value (int size, int slotno, bool named, int regno)
7380 {
7381 const int mult = (named ? 2 : 1);
7382 unsigned int nwords;
7383
7384 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7385 nwords = 1;
7386 else
7387 nwords = CEIL_NWORDS (size);
7388
7389 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7390
7391 if (size < UNITS_PER_WORD)
7392 XVECEXP (regs, 0, 0)
7393 = gen_rtx_EXPR_LIST (VOIDmode,
7394 gen_rtx_REG (SImode, regno),
7395 const0_rtx);
7396 else
7397 for (unsigned int i = 0; i < nwords; i++)
7398 XVECEXP (regs, 0, i)
7399 = gen_rtx_EXPR_LIST (VOIDmode,
7400 gen_rtx_REG (word_mode, regno + i * mult),
7401 GEN_INT (i * UNITS_PER_WORD));
7402
7403 return regs;
7404 }
7405
7406 /* Determine where to put an argument to a function.
7407 Value is zero to push the argument on the stack,
7408 or a hard register in which to store the argument.
7409
7410 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7411 the preceding args and about the function being called.
7412 MODE is the argument's machine mode.
7413 TYPE is the data type of the argument (as a tree).
7414 This is null for libcalls where that information may
7415 not be available.
7416 NAMED is true if this argument is a named parameter
7417 (otherwise it is an extra parameter matching an ellipsis).
7418 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7419 TARGET_FUNCTION_INCOMING_ARG. */
7420
7421 static rtx
7422 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7423 const_tree type, bool named, bool incoming)
7424 {
7425 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7426 const int regbase
7427 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7428 int slotno, regno, padding;
7429 enum mode_class mclass = GET_MODE_CLASS (mode);
7430
7431 slotno
7432 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7433 if (slotno == -1)
7434 return 0;
7435
7436 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7437 if (type && VECTOR_INTEGER_TYPE_P (type))
7438 mclass = MODE_FLOAT;
7439
7440 if (TARGET_ARCH32)
7441 return gen_rtx_REG (mode, regno);
7442
7443 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7444 and are promoted to registers if possible. */
7445 if (type && TREE_CODE (type) == RECORD_TYPE)
7446 {
7447 const int size = int_size_in_bytes (type);
7448 gcc_assert (size <= 16);
7449
7450 return function_arg_record_value (type, mode, slotno, named, regbase);
7451 }
7452
7453 /* Unions up to 16 bytes in size are passed in integer registers. */
7454 else if (type && TREE_CODE (type) == UNION_TYPE)
7455 {
7456 const int size = int_size_in_bytes (type);
7457 gcc_assert (size <= 16);
7458
7459 return function_arg_union_value (size, mode, slotno, regno);
7460 }
7461
7462 /* Floating-point vectors up to 16 bytes are passed in registers. */
7463 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7464 {
7465 const int size = int_size_in_bytes (type);
7466 gcc_assert (size <= 16);
7467
7468 return function_arg_vector_value (size, slotno, named, regno);
7469 }
7470
7471 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7472 but also have the slot allocated for them.
7473 If no prototype is in scope fp values in register slots get passed
7474 in two places, either fp regs and int regs or fp regs and memory. */
7475 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7476 && SPARC_FP_REG_P (regno))
7477 {
7478 rtx reg = gen_rtx_REG (mode, regno);
7479 if (cum->prototype_p || cum->libcall_p)
7480 return reg;
7481 else
7482 {
7483 rtx v0, v1;
7484
7485 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7486 {
7487 int intreg;
7488
7489 /* On incoming, we don't need to know that the value
7490 is passed in %f0 and %i0, and it confuses other parts
7491 causing needless spillage even on the simplest cases. */
7492 if (incoming)
7493 return reg;
7494
7495 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7496 + (regno - SPARC_FP_ARG_FIRST) / 2);
7497
7498 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7499 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7500 const0_rtx);
7501 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7502 }
7503 else
7504 {
7505 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7506 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7507 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7508 }
7509 }
7510 }
7511
7512 /* All other aggregate types are passed in an integer register in a mode
7513 corresponding to the size of the type. */
7514 else if (type && AGGREGATE_TYPE_P (type))
7515 {
7516 const int size = int_size_in_bytes (type);
7517 gcc_assert (size <= 16);
7518
7519 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7520 }
7521
7522 return gen_rtx_REG (mode, regno);
7523 }
7524
7525 /* Handle the TARGET_FUNCTION_ARG target hook. */
7526
7527 static rtx
7528 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7529 const_tree type, bool named)
7530 {
7531 return sparc_function_arg_1 (cum, mode, type, named, false);
7532 }
7533
7534 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7535
7536 static rtx
7537 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7538 const_tree type, bool named)
7539 {
7540 return sparc_function_arg_1 (cum, mode, type, named, true);
7541 }
7542
7543 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7544
7545 static unsigned int
7546 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7547 {
7548 return ((TARGET_ARCH64
7549 && (GET_MODE_ALIGNMENT (mode) == 128
7550 || (type && TYPE_ALIGN (type) == 128)))
7551 ? 128
7552 : PARM_BOUNDARY);
7553 }
7554
7555 /* For an arg passed partly in registers and partly in memory,
7556 this is the number of bytes of registers used.
7557 For args passed entirely in registers or entirely in memory, zero.
7558
7559 Any arg that starts in the first 6 regs but won't entirely fit in them
7560 needs partial registers on v8. On v9, structures with integer
7561 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7562 values that begin in the last fp reg [where "last fp reg" varies with the
7563 mode] will be split between that reg and memory. */
7564
7565 static int
7566 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7567 tree type, bool named)
7568 {
7569 int slotno, regno, padding;
7570
7571 /* We pass false for incoming here, it doesn't matter. */
7572 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7573 false, &regno, &padding);
7574
7575 if (slotno == -1)
7576 return 0;
7577
7578 if (TARGET_ARCH32)
7579 {
7580 /* We are guaranteed by pass_by_reference that the size of the
7581 argument is not greater than 8 bytes, so we only need to return
7582 one word if the argument is partially passed in registers. */
7583 const int size = GET_MODE_SIZE (mode);
7584
7585 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7586 return UNITS_PER_WORD;
7587 }
7588 else
7589 {
7590 /* We are guaranteed by pass_by_reference that the size of the
7591 argument is not greater than 16 bytes, so we only need to return
7592 one word if the argument is partially passed in registers. */
7593 if (type && AGGREGATE_TYPE_P (type))
7594 {
7595 const int size = int_size_in_bytes (type);
7596
7597 if (size > UNITS_PER_WORD
7598 && (slotno == SPARC_INT_ARG_MAX - 1
7599 || slotno == SPARC_FP_ARG_MAX - 1))
7600 return UNITS_PER_WORD;
7601 }
7602 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7603 || ((GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7604 || (type && VECTOR_TYPE_P (type)))
7605 && !(TARGET_FPU && named)))
7606 {
7607 const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7608 ? int_size_in_bytes (type)
7609 : GET_MODE_SIZE (mode);
7610
7611 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7612 return UNITS_PER_WORD;
7613 }
7614 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7615 || (type && VECTOR_TYPE_P (type)))
7616 {
7617 const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7618 ? int_size_in_bytes (type)
7619 : GET_MODE_SIZE (mode);
7620
7621 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7622 return UNITS_PER_WORD;
7623 }
7624 }
7625
7626 return 0;
7627 }
7628
7629 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7630 Update the data in CUM to advance over an argument
7631 of mode MODE and data type TYPE.
7632 TYPE is null for libcalls where that information may not be available. */
7633
7634 static void
7635 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7636 const_tree type, bool named)
7637 {
7638 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7639 int regno, padding;
7640
7641 /* We pass false for incoming here, it doesn't matter. */
7642 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7643
7644 /* If argument requires leading padding, add it. */
7645 cum->words += padding;
7646
7647 if (TARGET_ARCH32)
7648 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7649 else
7650 {
7651 /* For types that can have BLKmode, get the size from the type. */
7652 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7653 {
7654 const int size = int_size_in_bytes (type);
7655
7656 /* See comment in function_arg_record_value for empty structures. */
7657 if (size <= 0)
7658 cum->words++;
7659 else
7660 cum->words += CEIL_NWORDS (size);
7661 }
7662 else
7663 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7664 }
7665 }
7666
7667 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7668 are always stored left shifted in their argument slot. */
7669
7670 static pad_direction
7671 sparc_function_arg_padding (machine_mode mode, const_tree type)
7672 {
7673 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7674 return PAD_UPWARD;
7675
7676 /* Fall back to the default. */
7677 return default_function_arg_padding (mode, type);
7678 }
7679
7680 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7681 Specify whether to return the return value in memory. */
7682
7683 static bool
7684 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7685 {
7686 if (TARGET_ARCH32)
7687 /* Original SPARC 32-bit ABI says that structures and unions, and
7688 quad-precision floats are returned in memory. But note that the
7689 first part is implemented through -fpcc-struct-return being the
7690 default, so here we only implement -freg-struct-return instead.
7691 All other base types are returned in registers.
7692
7693 Extended ABI (as implemented by the Sun compiler) says that
7694 all complex floats are returned in registers (8 FP registers
7695 at most for '_Complex long double'). Return all complex integers
7696 in registers (4 at most for '_Complex long long').
7697
7698 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7699 integers are returned like floats of the same size, that is in
7700 registers up to 8 bytes and in memory otherwise. Return all
7701 vector floats in memory like structure and unions; note that
7702 they always have BLKmode like the latter. */
7703 return (TYPE_MODE (type) == BLKmode
7704 || TYPE_MODE (type) == TFmode
7705 || (TREE_CODE (type) == VECTOR_TYPE
7706 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7707 else
7708 /* Original SPARC 64-bit ABI says that structures and unions
7709 smaller than 32 bytes are returned in registers, as well as
7710 all other base types.
7711
7712 Extended ABI (as implemented by the Sun compiler) says that all
7713 complex floats are returned in registers (8 FP registers at most
7714 for '_Complex long double'). Return all complex integers in
7715 registers (4 at most for '_Complex TItype').
7716
7717 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7718 integers are returned like floats of the same size, that is in
7719 registers. Return all vector floats like structure and unions;
7720 note that they always have BLKmode like the latter. */
7721 return (TYPE_MODE (type) == BLKmode
7722 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7723 }
7724
7725 /* Handle the TARGET_STRUCT_VALUE target hook.
7726 Return where to find the structure return value address. */
7727
7728 static rtx
7729 sparc_struct_value_rtx (tree fndecl, int incoming)
7730 {
7731 if (TARGET_ARCH64)
7732 return NULL_RTX;
7733 else
7734 {
7735 rtx mem;
7736
7737 if (incoming)
7738 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7739 STRUCT_VALUE_OFFSET));
7740 else
7741 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7742 STRUCT_VALUE_OFFSET));
7743
7744 /* Only follow the SPARC ABI for fixed-size structure returns.
7745 Variable size structure returns are handled per the normal
7746 procedures in GCC. This is enabled by -mstd-struct-return */
7747 if (incoming == 2
7748 && sparc_std_struct_return
7749 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7750 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7751 {
7752 /* We must check and adjust the return address, as it is optional
7753 as to whether the return object is really provided. */
7754 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7755 rtx scratch = gen_reg_rtx (SImode);
7756 rtx_code_label *endlab = gen_label_rtx ();
7757
7758 /* Calculate the return object size. */
7759 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7760 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7761 /* Construct a temporary return value. */
7762 rtx temp_val
7763 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7764
7765 /* Implement SPARC 32-bit psABI callee return struct checking:
7766
7767 Fetch the instruction where we will return to and see if
7768 it's an unimp instruction (the most significant 10 bits
7769 will be zero). */
7770 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7771 plus_constant (Pmode,
7772 ret_reg, 8)));
7773 /* Assume the size is valid and pre-adjust. */
7774 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7775 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7776 0, endlab);
7777 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7778 /* Write the address of the memory pointed to by temp_val into
7779 the memory pointed to by mem. */
7780 emit_move_insn (mem, XEXP (temp_val, 0));
7781 emit_label (endlab);
7782 }
7783
7784 return mem;
7785 }
7786 }
7787
7788 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7789 For v9, function return values are subject to the same rules as arguments,
7790 except that up to 32 bytes may be returned in registers. */
7791
7792 static rtx
7793 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7794 {
7795 /* Beware that the two values are swapped here wrt function_arg. */
7796 const int regbase
7797 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7798 enum mode_class mclass = GET_MODE_CLASS (mode);
7799 int regno;
7800
7801 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7802 Note that integer vectors larger than 16 bytes have BLKmode so
7803 they need to be handled like floating-point vectors below. */
7804 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7805 mclass = MODE_FLOAT;
7806
7807 if (TARGET_ARCH64 && type)
7808 {
7809 /* Structures up to 32 bytes in size are returned in registers. */
7810 if (TREE_CODE (type) == RECORD_TYPE)
7811 {
7812 const int size = int_size_in_bytes (type);
7813 gcc_assert (size <= 32);
7814
7815 return function_arg_record_value (type, mode, 0, true, regbase);
7816 }
7817
7818 /* Unions up to 32 bytes in size are returned in integer registers. */
7819 else if (TREE_CODE (type) == UNION_TYPE)
7820 {
7821 const int size = int_size_in_bytes (type);
7822 gcc_assert (size <= 32);
7823
7824 return function_arg_union_value (size, mode, 0, regbase);
7825 }
7826
7827 /* Vectors up to 32 bytes are returned in FP registers. */
7828 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7829 {
7830 const int size = int_size_in_bytes (type);
7831 gcc_assert (size <= 32);
7832
7833 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7834 }
7835
7836 /* Objects that require it are returned in FP registers. */
7837 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7838 ;
7839
7840 /* All other aggregate types are returned in an integer register in a
7841 mode corresponding to the size of the type. */
7842 else if (AGGREGATE_TYPE_P (type))
7843 {
7844 /* All other aggregate types are passed in an integer register
7845 in a mode corresponding to the size of the type. */
7846 const int size = int_size_in_bytes (type);
7847 gcc_assert (size <= 32);
7848
7849 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7850
7851 /* ??? We probably should have made the same ABI change in
7852 3.4.0 as the one we made for unions. The latter was
7853 required by the SCD though, while the former is not
7854 specified, so we favored compatibility and efficiency.
7855
7856 Now we're stuck for aggregates larger than 16 bytes,
7857 because OImode vanished in the meantime. Let's not
7858 try to be unduly clever, and simply follow the ABI
7859 for unions in that case. */
7860 if (mode == BLKmode)
7861 return function_arg_union_value (size, mode, 0, regbase);
7862 else
7863 mclass = MODE_INT;
7864 }
7865
7866 /* We should only have pointer and integer types at this point. This
7867 must match sparc_promote_function_mode. */
7868 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7869 mode = word_mode;
7870 }
7871
7872 /* We should only have pointer and integer types at this point, except with
7873 -freg-struct-return. This must match sparc_promote_function_mode. */
7874 else if (TARGET_ARCH32
7875 && !(type && AGGREGATE_TYPE_P (type))
7876 && mclass == MODE_INT
7877 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7878 mode = word_mode;
7879
7880 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7881 regno = SPARC_FP_ARG_FIRST;
7882 else
7883 regno = regbase;
7884
7885 return gen_rtx_REG (mode, regno);
7886 }
7887
7888 /* Handle TARGET_FUNCTION_VALUE.
7889 On the SPARC, the value is found in the first "output" register, but the
7890 called function leaves it in the first "input" register. */
7891
7892 static rtx
7893 sparc_function_value (const_tree valtype,
7894 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7895 bool outgoing)
7896 {
7897 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7898 }
7899
7900 /* Handle TARGET_LIBCALL_VALUE. */
7901
7902 static rtx
7903 sparc_libcall_value (machine_mode mode,
7904 const_rtx fun ATTRIBUTE_UNUSED)
7905 {
7906 return sparc_function_value_1 (NULL_TREE, mode, false);
7907 }
7908
7909 /* Handle FUNCTION_VALUE_REGNO_P.
7910 On the SPARC, the first "output" reg is used for integer values, and the
7911 first floating point register is used for floating point values. */
7912
7913 static bool
7914 sparc_function_value_regno_p (const unsigned int regno)
7915 {
7916 return (regno == 8 || (TARGET_FPU && regno == 32));
7917 }
7918
7919 /* Do what is necessary for `va_start'. We look at the current function
7920 to determine if stdarg or varargs is used and return the address of
7921 the first unnamed parameter. */
7922
7923 static rtx
7924 sparc_builtin_saveregs (void)
7925 {
7926 int first_reg = crtl->args.info.words;
7927 rtx address;
7928 int regno;
7929
7930 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7931 emit_move_insn (gen_rtx_MEM (word_mode,
7932 gen_rtx_PLUS (Pmode,
7933 frame_pointer_rtx,
7934 GEN_INT (FIRST_PARM_OFFSET (0)
7935 + (UNITS_PER_WORD
7936 * regno)))),
7937 gen_rtx_REG (word_mode,
7938 SPARC_INCOMING_INT_ARG_FIRST + regno));
7939
7940 address = gen_rtx_PLUS (Pmode,
7941 frame_pointer_rtx,
7942 GEN_INT (FIRST_PARM_OFFSET (0)
7943 + UNITS_PER_WORD * first_reg));
7944
7945 return address;
7946 }
7947
7948 /* Implement `va_start' for stdarg. */
7949
7950 static void
7951 sparc_va_start (tree valist, rtx nextarg)
7952 {
7953 nextarg = expand_builtin_saveregs ();
7954 std_expand_builtin_va_start (valist, nextarg);
7955 }
7956
7957 /* Implement `va_arg' for stdarg. */
7958
7959 static tree
7960 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7961 gimple_seq *post_p)
7962 {
7963 HOST_WIDE_INT size, rsize, align;
7964 tree addr, incr;
7965 bool indirect;
7966 tree ptrtype = build_pointer_type (type);
7967
7968 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7969 {
7970 indirect = true;
7971 size = rsize = UNITS_PER_WORD;
7972 align = 0;
7973 }
7974 else
7975 {
7976 indirect = false;
7977 size = int_size_in_bytes (type);
7978 rsize = ROUND_UP (size, UNITS_PER_WORD);
7979 align = 0;
7980
7981 if (TARGET_ARCH64)
7982 {
7983 /* For SPARC64, objects requiring 16-byte alignment get it. */
7984 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7985 align = 2 * UNITS_PER_WORD;
7986
7987 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7988 are left-justified in their slots. */
7989 if (AGGREGATE_TYPE_P (type))
7990 {
7991 if (size == 0)
7992 size = rsize = UNITS_PER_WORD;
7993 else
7994 size = rsize;
7995 }
7996 }
7997 }
7998
7999 incr = valist;
8000 if (align)
8001 {
8002 incr = fold_build_pointer_plus_hwi (incr, align - 1);
8003 incr = fold_convert (sizetype, incr);
8004 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
8005 size_int (-align));
8006 incr = fold_convert (ptr_type_node, incr);
8007 }
8008
8009 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8010 addr = incr;
8011
8012 if (BYTES_BIG_ENDIAN && size < rsize)
8013 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8014
8015 if (indirect)
8016 {
8017 addr = fold_convert (build_pointer_type (ptrtype), addr);
8018 addr = build_va_arg_indirect_ref (addr);
8019 }
8020
8021 /* If the address isn't aligned properly for the type, we need a temporary.
8022 FIXME: This is inefficient, usually we can do this in registers. */
8023 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8024 {
8025 tree tmp = create_tmp_var (type, "va_arg_tmp");
8026 tree dest_addr = build_fold_addr_expr (tmp);
8027 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8028 3, dest_addr, addr, size_int (rsize));
8029 TREE_ADDRESSABLE (tmp) = 1;
8030 gimplify_and_add (copy, pre_p);
8031 addr = dest_addr;
8032 }
8033
8034 else
8035 addr = fold_convert (ptrtype, addr);
8036
8037 incr = fold_build_pointer_plus_hwi (incr, rsize);
8038 gimplify_assign (valist, incr, post_p);
8039
8040 return build_va_arg_indirect_ref (addr);
8041 }
8042 \f
8043 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8044 Specify whether the vector mode is supported by the hardware. */
8045
8046 static bool
8047 sparc_vector_mode_supported_p (machine_mode mode)
8048 {
8049 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8050 }
8051 \f
8052 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8053
8054 static machine_mode
8055 sparc_preferred_simd_mode (scalar_mode mode)
8056 {
8057 if (TARGET_VIS)
8058 switch (mode)
8059 {
8060 case E_SImode:
8061 return V2SImode;
8062 case E_HImode:
8063 return V4HImode;
8064 case E_QImode:
8065 return V8QImode;
8066
8067 default:;
8068 }
8069
8070 return word_mode;
8071 }
8072 \f
8073 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8074
8075 static bool
8076 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8077 {
8078 /* Do not fold unconditional jumps that have been created for crossing
8079 partition boundaries. */
8080 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8081 return false;
8082
8083 return true;
8084 }
8085
8086 /* Return the string to output an unconditional branch to LABEL, which is
8087 the operand number of the label.
8088
8089 DEST is the destination insn (i.e. the label), INSN is the source. */
8090
8091 const char *
8092 output_ubranch (rtx dest, rtx_insn *insn)
8093 {
8094 static char string[64];
8095 bool v9_form = false;
8096 int delta;
8097 char *p;
8098
8099 /* Even if we are trying to use cbcond for this, evaluate
8100 whether we can use V9 branches as our backup plan. */
8101 delta = 5000000;
8102 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8103 delta = (INSN_ADDRESSES (INSN_UID (dest))
8104 - INSN_ADDRESSES (INSN_UID (insn)));
8105
8106 /* Leave some instructions for "slop". */
8107 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8108 v9_form = true;
8109
8110 if (TARGET_CBCOND)
8111 {
8112 bool emit_nop = emit_cbcond_nop (insn);
8113 bool far = false;
8114 const char *rval;
8115
8116 if (delta < -500 || delta > 500)
8117 far = true;
8118
8119 if (far)
8120 {
8121 if (v9_form)
8122 rval = "ba,a,pt\t%%xcc, %l0";
8123 else
8124 rval = "b,a\t%l0";
8125 }
8126 else
8127 {
8128 if (emit_nop)
8129 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8130 else
8131 rval = "cwbe\t%%g0, %%g0, %l0";
8132 }
8133 return rval;
8134 }
8135
8136 if (v9_form)
8137 strcpy (string, "ba%*,pt\t%%xcc, ");
8138 else
8139 strcpy (string, "b%*\t");
8140
8141 p = strchr (string, '\0');
8142 *p++ = '%';
8143 *p++ = 'l';
8144 *p++ = '0';
8145 *p++ = '%';
8146 *p++ = '(';
8147 *p = '\0';
8148
8149 return string;
8150 }
8151
8152 /* Return the string to output a conditional branch to LABEL, which is
8153 the operand number of the label. OP is the conditional expression.
8154 XEXP (OP, 0) is assumed to be a condition code register (integer or
8155 floating point) and its mode specifies what kind of comparison we made.
8156
8157 DEST is the destination insn (i.e. the label), INSN is the source.
8158
8159 REVERSED is nonzero if we should reverse the sense of the comparison.
8160
8161 ANNUL is nonzero if we should generate an annulling branch. */
8162
8163 const char *
8164 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8165 rtx_insn *insn)
8166 {
8167 static char string[64];
8168 enum rtx_code code = GET_CODE (op);
8169 rtx cc_reg = XEXP (op, 0);
8170 machine_mode mode = GET_MODE (cc_reg);
8171 const char *labelno, *branch;
8172 int spaces = 8, far;
8173 char *p;
8174
8175 /* v9 branches are limited to +-1MB. If it is too far away,
8176 change
8177
8178 bne,pt %xcc, .LC30
8179
8180 to
8181
8182 be,pn %xcc, .+12
8183 nop
8184 ba .LC30
8185
8186 and
8187
8188 fbne,a,pn %fcc2, .LC29
8189
8190 to
8191
8192 fbe,pt %fcc2, .+16
8193 nop
8194 ba .LC29 */
8195
8196 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8197 if (reversed ^ far)
8198 {
8199 /* Reversal of FP compares takes care -- an ordered compare
8200 becomes an unordered compare and vice versa. */
8201 if (mode == CCFPmode || mode == CCFPEmode)
8202 code = reverse_condition_maybe_unordered (code);
8203 else
8204 code = reverse_condition (code);
8205 }
8206
8207 /* Start by writing the branch condition. */
8208 if (mode == CCFPmode || mode == CCFPEmode)
8209 {
8210 switch (code)
8211 {
8212 case NE:
8213 branch = "fbne";
8214 break;
8215 case EQ:
8216 branch = "fbe";
8217 break;
8218 case GE:
8219 branch = "fbge";
8220 break;
8221 case GT:
8222 branch = "fbg";
8223 break;
8224 case LE:
8225 branch = "fble";
8226 break;
8227 case LT:
8228 branch = "fbl";
8229 break;
8230 case UNORDERED:
8231 branch = "fbu";
8232 break;
8233 case ORDERED:
8234 branch = "fbo";
8235 break;
8236 case UNGT:
8237 branch = "fbug";
8238 break;
8239 case UNLT:
8240 branch = "fbul";
8241 break;
8242 case UNEQ:
8243 branch = "fbue";
8244 break;
8245 case UNGE:
8246 branch = "fbuge";
8247 break;
8248 case UNLE:
8249 branch = "fbule";
8250 break;
8251 case LTGT:
8252 branch = "fblg";
8253 break;
8254 default:
8255 gcc_unreachable ();
8256 }
8257
8258 /* ??? !v9: FP branches cannot be preceded by another floating point
8259 insn. Because there is currently no concept of pre-delay slots,
8260 we can fix this only by always emitting a nop before a floating
8261 point branch. */
8262
8263 string[0] = '\0';
8264 if (! TARGET_V9)
8265 strcpy (string, "nop\n\t");
8266 strcat (string, branch);
8267 }
8268 else
8269 {
8270 switch (code)
8271 {
8272 case NE:
8273 if (mode == CCVmode || mode == CCXVmode)
8274 branch = "bvs";
8275 else
8276 branch = "bne";
8277 break;
8278 case EQ:
8279 if (mode == CCVmode || mode == CCXVmode)
8280 branch = "bvc";
8281 else
8282 branch = "be";
8283 break;
8284 case GE:
8285 if (mode == CCNZmode || mode == CCXNZmode)
8286 branch = "bpos";
8287 else
8288 branch = "bge";
8289 break;
8290 case GT:
8291 branch = "bg";
8292 break;
8293 case LE:
8294 branch = "ble";
8295 break;
8296 case LT:
8297 if (mode == CCNZmode || mode == CCXNZmode)
8298 branch = "bneg";
8299 else
8300 branch = "bl";
8301 break;
8302 case GEU:
8303 branch = "bgeu";
8304 break;
8305 case GTU:
8306 branch = "bgu";
8307 break;
8308 case LEU:
8309 branch = "bleu";
8310 break;
8311 case LTU:
8312 branch = "blu";
8313 break;
8314 default:
8315 gcc_unreachable ();
8316 }
8317 strcpy (string, branch);
8318 }
8319 spaces -= strlen (branch);
8320 p = strchr (string, '\0');
8321
8322 /* Now add the annulling, the label, and a possible noop. */
8323 if (annul && ! far)
8324 {
8325 strcpy (p, ",a");
8326 p += 2;
8327 spaces -= 2;
8328 }
8329
8330 if (TARGET_V9)
8331 {
8332 rtx note;
8333 int v8 = 0;
8334
8335 if (! far && insn && INSN_ADDRESSES_SET_P ())
8336 {
8337 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8338 - INSN_ADDRESSES (INSN_UID (insn)));
8339 /* Leave some instructions for "slop". */
8340 if (delta < -260000 || delta >= 260000)
8341 v8 = 1;
8342 }
8343
8344 switch (mode)
8345 {
8346 case E_CCmode:
8347 case E_CCNZmode:
8348 case E_CCCmode:
8349 case E_CCVmode:
8350 labelno = "%%icc, ";
8351 if (v8)
8352 labelno = "";
8353 break;
8354 case E_CCXmode:
8355 case E_CCXNZmode:
8356 case E_CCXCmode:
8357 case E_CCXVmode:
8358 labelno = "%%xcc, ";
8359 gcc_assert (!v8);
8360 break;
8361 case E_CCFPmode:
8362 case E_CCFPEmode:
8363 {
8364 static char v9_fcc_labelno[] = "%%fccX, ";
8365 /* Set the char indicating the number of the fcc reg to use. */
8366 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8367 labelno = v9_fcc_labelno;
8368 if (v8)
8369 {
8370 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8371 labelno = "";
8372 }
8373 }
8374 break;
8375 default:
8376 gcc_unreachable ();
8377 }
8378
8379 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8380 {
8381 strcpy (p,
8382 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8383 >= profile_probability::even ()) ^ far)
8384 ? ",pt" : ",pn");
8385 p += 3;
8386 spaces -= 3;
8387 }
8388 }
8389 else
8390 labelno = "";
8391
8392 if (spaces > 0)
8393 *p++ = '\t';
8394 else
8395 *p++ = ' ';
8396 strcpy (p, labelno);
8397 p = strchr (p, '\0');
8398 if (far)
8399 {
8400 strcpy (p, ".+12\n\t nop\n\tb\t");
8401 /* Skip the next insn if requested or
8402 if we know that it will be a nop. */
8403 if (annul || ! final_sequence)
8404 p[3] = '6';
8405 p += 14;
8406 }
8407 *p++ = '%';
8408 *p++ = 'l';
8409 *p++ = label + '0';
8410 *p++ = '%';
8411 *p++ = '#';
8412 *p = '\0';
8413
8414 return string;
8415 }
8416
8417 /* Emit a library call comparison between floating point X and Y.
8418 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8419 Return the new operator to be used in the comparison sequence.
8420
8421 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8422 values as arguments instead of the TFmode registers themselves,
8423 that's why we cannot call emit_float_lib_cmp. */
8424
8425 rtx
8426 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8427 {
8428 const char *qpfunc;
8429 rtx slot0, slot1, result, tem, tem2, libfunc;
8430 machine_mode mode;
8431 enum rtx_code new_comparison;
8432
8433 switch (comparison)
8434 {
8435 case EQ:
8436 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8437 break;
8438
8439 case NE:
8440 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8441 break;
8442
8443 case GT:
8444 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8445 break;
8446
8447 case GE:
8448 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8449 break;
8450
8451 case LT:
8452 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8453 break;
8454
8455 case LE:
8456 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8457 break;
8458
8459 case ORDERED:
8460 case UNORDERED:
8461 case UNGT:
8462 case UNLT:
8463 case UNEQ:
8464 case UNGE:
8465 case UNLE:
8466 case LTGT:
8467 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8468 break;
8469
8470 default:
8471 gcc_unreachable ();
8472 }
8473
8474 if (TARGET_ARCH64)
8475 {
8476 if (MEM_P (x))
8477 {
8478 tree expr = MEM_EXPR (x);
8479 if (expr)
8480 mark_addressable (expr);
8481 slot0 = x;
8482 }
8483 else
8484 {
8485 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8486 emit_move_insn (slot0, x);
8487 }
8488
8489 if (MEM_P (y))
8490 {
8491 tree expr = MEM_EXPR (y);
8492 if (expr)
8493 mark_addressable (expr);
8494 slot1 = y;
8495 }
8496 else
8497 {
8498 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8499 emit_move_insn (slot1, y);
8500 }
8501
8502 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8503 emit_library_call (libfunc, LCT_NORMAL,
8504 DImode,
8505 XEXP (slot0, 0), Pmode,
8506 XEXP (slot1, 0), Pmode);
8507 mode = DImode;
8508 }
8509 else
8510 {
8511 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8512 emit_library_call (libfunc, LCT_NORMAL,
8513 SImode,
8514 x, TFmode, y, TFmode);
8515 mode = SImode;
8516 }
8517
8518
8519 /* Immediately move the result of the libcall into a pseudo
8520 register so reload doesn't clobber the value if it needs
8521 the return register for a spill reg. */
8522 result = gen_reg_rtx (mode);
8523 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8524
8525 switch (comparison)
8526 {
8527 default:
8528 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8529 case ORDERED:
8530 case UNORDERED:
8531 new_comparison = (comparison == UNORDERED ? EQ : NE);
8532 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8533 case UNGT:
8534 case UNGE:
8535 new_comparison = (comparison == UNGT ? GT : NE);
8536 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8537 case UNLE:
8538 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8539 case UNLT:
8540 tem = gen_reg_rtx (mode);
8541 if (TARGET_ARCH32)
8542 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8543 else
8544 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8545 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8546 case UNEQ:
8547 case LTGT:
8548 tem = gen_reg_rtx (mode);
8549 if (TARGET_ARCH32)
8550 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8551 else
8552 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8553 tem2 = gen_reg_rtx (mode);
8554 if (TARGET_ARCH32)
8555 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8556 else
8557 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8558 new_comparison = (comparison == UNEQ ? EQ : NE);
8559 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8560 }
8561
8562 gcc_unreachable ();
8563 }
8564
8565 /* Generate an unsigned DImode to FP conversion. This is the same code
8566 optabs would emit if we didn't have TFmode patterns. */
8567
8568 void
8569 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8570 {
8571 rtx i0, i1, f0, in, out;
8572
8573 out = operands[0];
8574 in = force_reg (DImode, operands[1]);
8575 rtx_code_label *neglab = gen_label_rtx ();
8576 rtx_code_label *donelab = gen_label_rtx ();
8577 i0 = gen_reg_rtx (DImode);
8578 i1 = gen_reg_rtx (DImode);
8579 f0 = gen_reg_rtx (mode);
8580
8581 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8582
8583 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8584 emit_jump_insn (gen_jump (donelab));
8585 emit_barrier ();
8586
8587 emit_label (neglab);
8588
8589 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8590 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8591 emit_insn (gen_iordi3 (i0, i0, i1));
8592 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8593 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8594
8595 emit_label (donelab);
8596 }
8597
8598 /* Generate an FP to unsigned DImode conversion. This is the same code
8599 optabs would emit if we didn't have TFmode patterns. */
8600
8601 void
8602 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8603 {
8604 rtx i0, i1, f0, in, out, limit;
8605
8606 out = operands[0];
8607 in = force_reg (mode, operands[1]);
8608 rtx_code_label *neglab = gen_label_rtx ();
8609 rtx_code_label *donelab = gen_label_rtx ();
8610 i0 = gen_reg_rtx (DImode);
8611 i1 = gen_reg_rtx (DImode);
8612 limit = gen_reg_rtx (mode);
8613 f0 = gen_reg_rtx (mode);
8614
8615 emit_move_insn (limit,
8616 const_double_from_real_value (
8617 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8618 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8619
8620 emit_insn (gen_rtx_SET (out,
8621 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8622 emit_jump_insn (gen_jump (donelab));
8623 emit_barrier ();
8624
8625 emit_label (neglab);
8626
8627 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8628 emit_insn (gen_rtx_SET (i0,
8629 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8630 emit_insn (gen_movdi (i1, const1_rtx));
8631 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8632 emit_insn (gen_xordi3 (out, i0, i1));
8633
8634 emit_label (donelab);
8635 }
8636
8637 /* Return the string to output a compare and branch instruction to DEST.
8638 DEST is the destination insn (i.e. the label), INSN is the source,
8639 and OP is the conditional expression. */
8640
8641 const char *
8642 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8643 {
8644 machine_mode mode = GET_MODE (XEXP (op, 0));
8645 enum rtx_code code = GET_CODE (op);
8646 const char *cond_str, *tmpl;
8647 int far, emit_nop, len;
8648 static char string[64];
8649 char size_char;
8650
8651 /* Compare and Branch is limited to +-2KB. If it is too far away,
8652 change
8653
8654 cxbne X, Y, .LC30
8655
8656 to
8657
8658 cxbe X, Y, .+16
8659 nop
8660 ba,pt xcc, .LC30
8661 nop */
8662
8663 len = get_attr_length (insn);
8664
8665 far = len == 4;
8666 emit_nop = len == 2;
8667
8668 if (far)
8669 code = reverse_condition (code);
8670
8671 size_char = ((mode == SImode) ? 'w' : 'x');
8672
8673 switch (code)
8674 {
8675 case NE:
8676 cond_str = "ne";
8677 break;
8678
8679 case EQ:
8680 cond_str = "e";
8681 break;
8682
8683 case GE:
8684 cond_str = "ge";
8685 break;
8686
8687 case GT:
8688 cond_str = "g";
8689 break;
8690
8691 case LE:
8692 cond_str = "le";
8693 break;
8694
8695 case LT:
8696 cond_str = "l";
8697 break;
8698
8699 case GEU:
8700 cond_str = "cc";
8701 break;
8702
8703 case GTU:
8704 cond_str = "gu";
8705 break;
8706
8707 case LEU:
8708 cond_str = "leu";
8709 break;
8710
8711 case LTU:
8712 cond_str = "cs";
8713 break;
8714
8715 default:
8716 gcc_unreachable ();
8717 }
8718
8719 if (far)
8720 {
8721 int veryfar = 1, delta;
8722
8723 if (INSN_ADDRESSES_SET_P ())
8724 {
8725 delta = (INSN_ADDRESSES (INSN_UID (dest))
8726 - INSN_ADDRESSES (INSN_UID (insn)));
8727 /* Leave some instructions for "slop". */
8728 if (delta >= -260000 && delta < 260000)
8729 veryfar = 0;
8730 }
8731
8732 if (veryfar)
8733 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8734 else
8735 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8736 }
8737 else
8738 {
8739 if (emit_nop)
8740 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8741 else
8742 tmpl = "c%cb%s\t%%1, %%2, %%3";
8743 }
8744
8745 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8746
8747 return string;
8748 }
8749
8750 /* Return the string to output a conditional branch to LABEL, testing
8751 register REG. LABEL is the operand number of the label; REG is the
8752 operand number of the reg. OP is the conditional expression. The mode
8753 of REG says what kind of comparison we made.
8754
8755 DEST is the destination insn (i.e. the label), INSN is the source.
8756
8757 REVERSED is nonzero if we should reverse the sense of the comparison.
8758
8759 ANNUL is nonzero if we should generate an annulling branch. */
8760
8761 const char *
8762 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8763 int annul, rtx_insn *insn)
8764 {
8765 static char string[64];
8766 enum rtx_code code = GET_CODE (op);
8767 machine_mode mode = GET_MODE (XEXP (op, 0));
8768 rtx note;
8769 int far;
8770 char *p;
8771
8772 /* branch on register are limited to +-128KB. If it is too far away,
8773 change
8774
8775 brnz,pt %g1, .LC30
8776
8777 to
8778
8779 brz,pn %g1, .+12
8780 nop
8781 ba,pt %xcc, .LC30
8782
8783 and
8784
8785 brgez,a,pn %o1, .LC29
8786
8787 to
8788
8789 brlz,pt %o1, .+16
8790 nop
8791 ba,pt %xcc, .LC29 */
8792
8793 far = get_attr_length (insn) >= 3;
8794
8795 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8796 if (reversed ^ far)
8797 code = reverse_condition (code);
8798
8799 /* Only 64-bit versions of these instructions exist. */
8800 gcc_assert (mode == DImode);
8801
8802 /* Start by writing the branch condition. */
8803
8804 switch (code)
8805 {
8806 case NE:
8807 strcpy (string, "brnz");
8808 break;
8809
8810 case EQ:
8811 strcpy (string, "brz");
8812 break;
8813
8814 case GE:
8815 strcpy (string, "brgez");
8816 break;
8817
8818 case LT:
8819 strcpy (string, "brlz");
8820 break;
8821
8822 case LE:
8823 strcpy (string, "brlez");
8824 break;
8825
8826 case GT:
8827 strcpy (string, "brgz");
8828 break;
8829
8830 default:
8831 gcc_unreachable ();
8832 }
8833
8834 p = strchr (string, '\0');
8835
8836 /* Now add the annulling, reg, label, and nop. */
8837 if (annul && ! far)
8838 {
8839 strcpy (p, ",a");
8840 p += 2;
8841 }
8842
8843 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8844 {
8845 strcpy (p,
8846 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8847 >= profile_probability::even ()) ^ far)
8848 ? ",pt" : ",pn");
8849 p += 3;
8850 }
8851
8852 *p = p < string + 8 ? '\t' : ' ';
8853 p++;
8854 *p++ = '%';
8855 *p++ = '0' + reg;
8856 *p++ = ',';
8857 *p++ = ' ';
8858 if (far)
8859 {
8860 int veryfar = 1, delta;
8861
8862 if (INSN_ADDRESSES_SET_P ())
8863 {
8864 delta = (INSN_ADDRESSES (INSN_UID (dest))
8865 - INSN_ADDRESSES (INSN_UID (insn)));
8866 /* Leave some instructions for "slop". */
8867 if (delta >= -260000 && delta < 260000)
8868 veryfar = 0;
8869 }
8870
8871 strcpy (p, ".+12\n\t nop\n\t");
8872 /* Skip the next insn if requested or
8873 if we know that it will be a nop. */
8874 if (annul || ! final_sequence)
8875 p[3] = '6';
8876 p += 12;
8877 if (veryfar)
8878 {
8879 strcpy (p, "b\t");
8880 p += 2;
8881 }
8882 else
8883 {
8884 strcpy (p, "ba,pt\t%%xcc, ");
8885 p += 13;
8886 }
8887 }
8888 *p++ = '%';
8889 *p++ = 'l';
8890 *p++ = '0' + label;
8891 *p++ = '%';
8892 *p++ = '#';
8893 *p = '\0';
8894
8895 return string;
8896 }
8897
8898 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8899 Such instructions cannot be used in the delay slot of return insn on v9.
8900 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8901 */
8902
8903 static int
8904 epilogue_renumber (register rtx *where, int test)
8905 {
8906 register const char *fmt;
8907 register int i;
8908 register enum rtx_code code;
8909
8910 if (*where == 0)
8911 return 0;
8912
8913 code = GET_CODE (*where);
8914
8915 switch (code)
8916 {
8917 case REG:
8918 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8919 return 1;
8920 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8921 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8922 /* fallthrough */
8923 case SCRATCH:
8924 case CC0:
8925 case PC:
8926 case CONST_INT:
8927 case CONST_WIDE_INT:
8928 case CONST_DOUBLE:
8929 return 0;
8930
8931 /* Do not replace the frame pointer with the stack pointer because
8932 it can cause the delayed instruction to load below the stack.
8933 This occurs when instructions like:
8934
8935 (set (reg/i:SI 24 %i0)
8936 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8937 (const_int -20 [0xffffffec])) 0))
8938
8939 are in the return delayed slot. */
8940 case PLUS:
8941 if (GET_CODE (XEXP (*where, 0)) == REG
8942 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8943 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8944 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8945 return 1;
8946 break;
8947
8948 case MEM:
8949 if (SPARC_STACK_BIAS
8950 && GET_CODE (XEXP (*where, 0)) == REG
8951 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8952 return 1;
8953 break;
8954
8955 default:
8956 break;
8957 }
8958
8959 fmt = GET_RTX_FORMAT (code);
8960
8961 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8962 {
8963 if (fmt[i] == 'E')
8964 {
8965 register int j;
8966 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8967 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8968 return 1;
8969 }
8970 else if (fmt[i] == 'e'
8971 && epilogue_renumber (&(XEXP (*where, i)), test))
8972 return 1;
8973 }
8974 return 0;
8975 }
8976 \f
8977 /* Leaf functions and non-leaf functions have different needs. */
8978
8979 static const int
8980 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8981
8982 static const int
8983 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8984
8985 static const int *const reg_alloc_orders[] = {
8986 reg_leaf_alloc_order,
8987 reg_nonleaf_alloc_order};
8988
8989 void
8990 order_regs_for_local_alloc (void)
8991 {
8992 static int last_order_nonleaf = 1;
8993
8994 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8995 {
8996 last_order_nonleaf = !last_order_nonleaf;
8997 memcpy ((char *) reg_alloc_order,
8998 (const char *) reg_alloc_orders[last_order_nonleaf],
8999 FIRST_PSEUDO_REGISTER * sizeof (int));
9000 }
9001 }
9002 \f
9003 /* Return 1 if REG and MEM are legitimate enough to allow the various
9004 MEM<-->REG splits to be run. */
9005
9006 int
9007 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9008 {
9009 /* Punt if we are here by mistake. */
9010 gcc_assert (reload_completed);
9011
9012 /* We must have an offsettable memory reference. */
9013 if (!offsettable_memref_p (mem))
9014 return 0;
9015
9016 /* If we have legitimate args for ldd/std, we do not want
9017 the split to happen. */
9018 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9019 return 0;
9020
9021 /* Success. */
9022 return 1;
9023 }
9024
9025 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9026
9027 void
9028 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9029 {
9030 rtx high_part = gen_highpart (mode, dest);
9031 rtx low_part = gen_lowpart (mode, dest);
9032 rtx word0 = adjust_address (src, mode, 0);
9033 rtx word1 = adjust_address (src, mode, 4);
9034
9035 if (reg_overlap_mentioned_p (high_part, word1))
9036 {
9037 emit_move_insn_1 (low_part, word1);
9038 emit_move_insn_1 (high_part, word0);
9039 }
9040 else
9041 {
9042 emit_move_insn_1 (high_part, word0);
9043 emit_move_insn_1 (low_part, word1);
9044 }
9045 }
9046
9047 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9048
9049 void
9050 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9051 {
9052 rtx word0 = adjust_address (dest, mode, 0);
9053 rtx word1 = adjust_address (dest, mode, 4);
9054 rtx high_part = gen_highpart (mode, src);
9055 rtx low_part = gen_lowpart (mode, src);
9056
9057 emit_move_insn_1 (word0, high_part);
9058 emit_move_insn_1 (word1, low_part);
9059 }
9060
9061 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9062
9063 int
9064 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9065 {
9066 /* Punt if we are here by mistake. */
9067 gcc_assert (reload_completed);
9068
9069 if (GET_CODE (reg1) == SUBREG)
9070 reg1 = SUBREG_REG (reg1);
9071 if (GET_CODE (reg1) != REG)
9072 return 0;
9073 const int regno1 = REGNO (reg1);
9074
9075 if (GET_CODE (reg2) == SUBREG)
9076 reg2 = SUBREG_REG (reg2);
9077 if (GET_CODE (reg2) != REG)
9078 return 0;
9079 const int regno2 = REGNO (reg2);
9080
9081 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9082 return 1;
9083
9084 if (TARGET_VIS3)
9085 {
9086 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9087 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9088 return 1;
9089 }
9090
9091 return 0;
9092 }
9093
9094 /* Split a REG <--> REG move into a pair of moves in MODE. */
9095
9096 void
9097 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9098 {
9099 rtx dest1 = gen_highpart (mode, dest);
9100 rtx dest2 = gen_lowpart (mode, dest);
9101 rtx src1 = gen_highpart (mode, src);
9102 rtx src2 = gen_lowpart (mode, src);
9103
9104 /* Now emit using the real source and destination we found, swapping
9105 the order if we detect overlap. */
9106 if (reg_overlap_mentioned_p (dest1, src2))
9107 {
9108 emit_move_insn_1 (dest2, src2);
9109 emit_move_insn_1 (dest1, src1);
9110 }
9111 else
9112 {
9113 emit_move_insn_1 (dest1, src1);
9114 emit_move_insn_1 (dest2, src2);
9115 }
9116 }
9117
9118 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9119 This makes them candidates for using ldd and std insns.
9120
9121 Note reg1 and reg2 *must* be hard registers. */
9122
9123 int
9124 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9125 {
9126 /* We might have been passed a SUBREG. */
9127 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9128 return 0;
9129
9130 if (REGNO (reg1) % 2 != 0)
9131 return 0;
9132
9133 /* Integer ldd is deprecated in SPARC V9 */
9134 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9135 return 0;
9136
9137 return (REGNO (reg1) == REGNO (reg2) - 1);
9138 }
9139
9140 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9141 an ldd or std insn.
9142
9143 This can only happen when addr1 and addr2, the addresses in mem1
9144 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9145 addr1 must also be aligned on a 64-bit boundary.
9146
9147 Also iff dependent_reg_rtx is not null it should not be used to
9148 compute the address for mem1, i.e. we cannot optimize a sequence
9149 like:
9150 ld [%o0], %o0
9151 ld [%o0 + 4], %o1
9152 to
9153 ldd [%o0], %o0
9154 nor:
9155 ld [%g3 + 4], %g3
9156 ld [%g3], %g2
9157 to
9158 ldd [%g3], %g2
9159
9160 But, note that the transformation from:
9161 ld [%g2 + 4], %g3
9162 ld [%g2], %g2
9163 to
9164 ldd [%g2], %g2
9165 is perfectly fine. Thus, the peephole2 patterns always pass us
9166 the destination register of the first load, never the second one.
9167
9168 For stores we don't have a similar problem, so dependent_reg_rtx is
9169 NULL_RTX. */
9170
9171 int
9172 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9173 {
9174 rtx addr1, addr2;
9175 unsigned int reg1;
9176 HOST_WIDE_INT offset1;
9177
9178 /* The mems cannot be volatile. */
9179 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9180 return 0;
9181
9182 /* MEM1 should be aligned on a 64-bit boundary. */
9183 if (MEM_ALIGN (mem1) < 64)
9184 return 0;
9185
9186 addr1 = XEXP (mem1, 0);
9187 addr2 = XEXP (mem2, 0);
9188
9189 /* Extract a register number and offset (if used) from the first addr. */
9190 if (GET_CODE (addr1) == PLUS)
9191 {
9192 /* If not a REG, return zero. */
9193 if (GET_CODE (XEXP (addr1, 0)) != REG)
9194 return 0;
9195 else
9196 {
9197 reg1 = REGNO (XEXP (addr1, 0));
9198 /* The offset must be constant! */
9199 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9200 return 0;
9201 offset1 = INTVAL (XEXP (addr1, 1));
9202 }
9203 }
9204 else if (GET_CODE (addr1) != REG)
9205 return 0;
9206 else
9207 {
9208 reg1 = REGNO (addr1);
9209 /* This was a simple (mem (reg)) expression. Offset is 0. */
9210 offset1 = 0;
9211 }
9212
9213 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9214 if (GET_CODE (addr2) != PLUS)
9215 return 0;
9216
9217 if (GET_CODE (XEXP (addr2, 0)) != REG
9218 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9219 return 0;
9220
9221 if (reg1 != REGNO (XEXP (addr2, 0)))
9222 return 0;
9223
9224 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9225 return 0;
9226
9227 /* The first offset must be evenly divisible by 8 to ensure the
9228 address is 64-bit aligned. */
9229 if (offset1 % 8 != 0)
9230 return 0;
9231
9232 /* The offset for the second addr must be 4 more than the first addr. */
9233 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9234 return 0;
9235
9236 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9237 instructions. */
9238 return 1;
9239 }
9240
9241 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9242
9243 rtx
9244 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9245 {
9246 rtx x = widen_memory_access (mem1, mode, 0);
9247 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9248 return x;
9249 }
9250
9251 /* Return 1 if reg is a pseudo, or is the first register in
9252 a hard register pair. This makes it suitable for use in
9253 ldd and std insns. */
9254
9255 int
9256 register_ok_for_ldd (rtx reg)
9257 {
9258 /* We might have been passed a SUBREG. */
9259 if (!REG_P (reg))
9260 return 0;
9261
9262 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9263 return (REGNO (reg) % 2 == 0);
9264
9265 return 1;
9266 }
9267
9268 /* Return 1 if OP, a MEM, has an address which is known to be
9269 aligned to an 8-byte boundary. */
9270
9271 int
9272 memory_ok_for_ldd (rtx op)
9273 {
9274 /* In 64-bit mode, we assume that the address is word-aligned. */
9275 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9276 return 0;
9277
9278 if (! can_create_pseudo_p ()
9279 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9280 return 0;
9281
9282 return 1;
9283 }
9284 \f
9285 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9286
9287 static bool
9288 sparc_print_operand_punct_valid_p (unsigned char code)
9289 {
9290 if (code == '#'
9291 || code == '*'
9292 || code == '('
9293 || code == ')'
9294 || code == '_'
9295 || code == '&')
9296 return true;
9297
9298 return false;
9299 }
9300
9301 /* Implement TARGET_PRINT_OPERAND.
9302 Print operand X (an rtx) in assembler syntax to file FILE.
9303 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9304 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9305
9306 static void
9307 sparc_print_operand (FILE *file, rtx x, int code)
9308 {
9309 const char *s;
9310
9311 switch (code)
9312 {
9313 case '#':
9314 /* Output an insn in a delay slot. */
9315 if (final_sequence)
9316 sparc_indent_opcode = 1;
9317 else
9318 fputs ("\n\t nop", file);
9319 return;
9320 case '*':
9321 /* Output an annul flag if there's nothing for the delay slot and we
9322 are optimizing. This is always used with '(' below.
9323 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9324 this is a dbx bug. So, we only do this when optimizing.
9325 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9326 Always emit a nop in case the next instruction is a branch. */
9327 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9328 fputs (",a", file);
9329 return;
9330 case '(':
9331 /* Output a 'nop' if there's nothing for the delay slot and we are
9332 not optimizing. This is always used with '*' above. */
9333 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9334 fputs ("\n\t nop", file);
9335 else if (final_sequence)
9336 sparc_indent_opcode = 1;
9337 return;
9338 case ')':
9339 /* Output the right displacement from the saved PC on function return.
9340 The caller may have placed an "unimp" insn immediately after the call
9341 so we have to account for it. This insn is used in the 32-bit ABI
9342 when calling a function that returns a non zero-sized structure. The
9343 64-bit ABI doesn't have it. Be careful to have this test be the same
9344 as that for the call. The exception is when sparc_std_struct_return
9345 is enabled, the psABI is followed exactly and the adjustment is made
9346 by the code in sparc_struct_value_rtx. The call emitted is the same
9347 when sparc_std_struct_return is enabled. */
9348 if (!TARGET_ARCH64
9349 && cfun->returns_struct
9350 && !sparc_std_struct_return
9351 && DECL_SIZE (DECL_RESULT (current_function_decl))
9352 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9353 == INTEGER_CST
9354 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9355 fputs ("12", file);
9356 else
9357 fputc ('8', file);
9358 return;
9359 case '_':
9360 /* Output the Embedded Medium/Anywhere code model base register. */
9361 fputs (EMBMEDANY_BASE_REG, file);
9362 return;
9363 case '&':
9364 /* Print some local dynamic TLS name. */
9365 if (const char *name = get_some_local_dynamic_name ())
9366 assemble_name (file, name);
9367 else
9368 output_operand_lossage ("'%%&' used without any "
9369 "local dynamic TLS references");
9370 return;
9371
9372 case 'Y':
9373 /* Adjust the operand to take into account a RESTORE operation. */
9374 if (GET_CODE (x) == CONST_INT)
9375 break;
9376 else if (GET_CODE (x) != REG)
9377 output_operand_lossage ("invalid %%Y operand");
9378 else if (REGNO (x) < 8)
9379 fputs (reg_names[REGNO (x)], file);
9380 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9381 fputs (reg_names[REGNO (x)-16], file);
9382 else
9383 output_operand_lossage ("invalid %%Y operand");
9384 return;
9385 case 'L':
9386 /* Print out the low order register name of a register pair. */
9387 if (WORDS_BIG_ENDIAN)
9388 fputs (reg_names[REGNO (x)+1], file);
9389 else
9390 fputs (reg_names[REGNO (x)], file);
9391 return;
9392 case 'H':
9393 /* Print out the high order register name of a register pair. */
9394 if (WORDS_BIG_ENDIAN)
9395 fputs (reg_names[REGNO (x)], file);
9396 else
9397 fputs (reg_names[REGNO (x)+1], file);
9398 return;
9399 case 'R':
9400 /* Print out the second register name of a register pair or quad.
9401 I.e., R (%o0) => %o1. */
9402 fputs (reg_names[REGNO (x)+1], file);
9403 return;
9404 case 'S':
9405 /* Print out the third register name of a register quad.
9406 I.e., S (%o0) => %o2. */
9407 fputs (reg_names[REGNO (x)+2], file);
9408 return;
9409 case 'T':
9410 /* Print out the fourth register name of a register quad.
9411 I.e., T (%o0) => %o3. */
9412 fputs (reg_names[REGNO (x)+3], file);
9413 return;
9414 case 'x':
9415 /* Print a condition code register. */
9416 if (REGNO (x) == SPARC_ICC_REG)
9417 {
9418 switch (GET_MODE (x))
9419 {
9420 case E_CCmode:
9421 case E_CCNZmode:
9422 case E_CCCmode:
9423 case E_CCVmode:
9424 s = "%icc";
9425 break;
9426 case E_CCXmode:
9427 case E_CCXNZmode:
9428 case E_CCXCmode:
9429 case E_CCXVmode:
9430 s = "%xcc";
9431 break;
9432 default:
9433 gcc_unreachable ();
9434 }
9435 fputs (s, file);
9436 }
9437 else
9438 /* %fccN register */
9439 fputs (reg_names[REGNO (x)], file);
9440 return;
9441 case 'm':
9442 /* Print the operand's address only. */
9443 output_address (GET_MODE (x), XEXP (x, 0));
9444 return;
9445 case 'r':
9446 /* In this case we need a register. Use %g0 if the
9447 operand is const0_rtx. */
9448 if (x == const0_rtx
9449 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9450 {
9451 fputs ("%g0", file);
9452 return;
9453 }
9454 else
9455 break;
9456
9457 case 'A':
9458 switch (GET_CODE (x))
9459 {
9460 case IOR:
9461 s = "or";
9462 break;
9463 case AND:
9464 s = "and";
9465 break;
9466 case XOR:
9467 s = "xor";
9468 break;
9469 default:
9470 output_operand_lossage ("invalid %%A operand");
9471 s = "";
9472 break;
9473 }
9474 fputs (s, file);
9475 return;
9476
9477 case 'B':
9478 switch (GET_CODE (x))
9479 {
9480 case IOR:
9481 s = "orn";
9482 break;
9483 case AND:
9484 s = "andn";
9485 break;
9486 case XOR:
9487 s = "xnor";
9488 break;
9489 default:
9490 output_operand_lossage ("invalid %%B operand");
9491 s = "";
9492 break;
9493 }
9494 fputs (s, file);
9495 return;
9496
9497 /* This is used by the conditional move instructions. */
9498 case 'C':
9499 {
9500 machine_mode mode = GET_MODE (XEXP (x, 0));
9501 switch (GET_CODE (x))
9502 {
9503 case NE:
9504 if (mode == CCVmode || mode == CCXVmode)
9505 s = "vs";
9506 else
9507 s = "ne";
9508 break;
9509 case EQ:
9510 if (mode == CCVmode || mode == CCXVmode)
9511 s = "vc";
9512 else
9513 s = "e";
9514 break;
9515 case GE:
9516 if (mode == CCNZmode || mode == CCXNZmode)
9517 s = "pos";
9518 else
9519 s = "ge";
9520 break;
9521 case GT:
9522 s = "g";
9523 break;
9524 case LE:
9525 s = "le";
9526 break;
9527 case LT:
9528 if (mode == CCNZmode || mode == CCXNZmode)
9529 s = "neg";
9530 else
9531 s = "l";
9532 break;
9533 case GEU:
9534 s = "geu";
9535 break;
9536 case GTU:
9537 s = "gu";
9538 break;
9539 case LEU:
9540 s = "leu";
9541 break;
9542 case LTU:
9543 s = "lu";
9544 break;
9545 case LTGT:
9546 s = "lg";
9547 break;
9548 case UNORDERED:
9549 s = "u";
9550 break;
9551 case ORDERED:
9552 s = "o";
9553 break;
9554 case UNLT:
9555 s = "ul";
9556 break;
9557 case UNLE:
9558 s = "ule";
9559 break;
9560 case UNGT:
9561 s = "ug";
9562 break;
9563 case UNGE:
9564 s = "uge"
9565 ; break;
9566 case UNEQ:
9567 s = "ue";
9568 break;
9569 default:
9570 output_operand_lossage ("invalid %%C operand");
9571 s = "";
9572 break;
9573 }
9574 fputs (s, file);
9575 return;
9576 }
9577
9578 /* This are used by the movr instruction pattern. */
9579 case 'D':
9580 {
9581 switch (GET_CODE (x))
9582 {
9583 case NE:
9584 s = "ne";
9585 break;
9586 case EQ:
9587 s = "e";
9588 break;
9589 case GE:
9590 s = "gez";
9591 break;
9592 case LT:
9593 s = "lz";
9594 break;
9595 case LE:
9596 s = "lez";
9597 break;
9598 case GT:
9599 s = "gz";
9600 break;
9601 default:
9602 output_operand_lossage ("invalid %%D operand");
9603 s = "";
9604 break;
9605 }
9606 fputs (s, file);
9607 return;
9608 }
9609
9610 case 'b':
9611 {
9612 /* Print a sign-extended character. */
9613 int i = trunc_int_for_mode (INTVAL (x), QImode);
9614 fprintf (file, "%d", i);
9615 return;
9616 }
9617
9618 case 'f':
9619 /* Operand must be a MEM; write its address. */
9620 if (GET_CODE (x) != MEM)
9621 output_operand_lossage ("invalid %%f operand");
9622 output_address (GET_MODE (x), XEXP (x, 0));
9623 return;
9624
9625 case 's':
9626 {
9627 /* Print a sign-extended 32-bit value. */
9628 HOST_WIDE_INT i;
9629 if (GET_CODE(x) == CONST_INT)
9630 i = INTVAL (x);
9631 else
9632 {
9633 output_operand_lossage ("invalid %%s operand");
9634 return;
9635 }
9636 i = trunc_int_for_mode (i, SImode);
9637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9638 return;
9639 }
9640
9641 case 0:
9642 /* Do nothing special. */
9643 break;
9644
9645 default:
9646 /* Undocumented flag. */
9647 output_operand_lossage ("invalid operand output code");
9648 }
9649
9650 if (GET_CODE (x) == REG)
9651 fputs (reg_names[REGNO (x)], file);
9652 else if (GET_CODE (x) == MEM)
9653 {
9654 fputc ('[', file);
9655 /* Poor Sun assembler doesn't understand absolute addressing. */
9656 if (CONSTANT_P (XEXP (x, 0)))
9657 fputs ("%g0+", file);
9658 output_address (GET_MODE (x), XEXP (x, 0));
9659 fputc (']', file);
9660 }
9661 else if (GET_CODE (x) == HIGH)
9662 {
9663 fputs ("%hi(", file);
9664 output_addr_const (file, XEXP (x, 0));
9665 fputc (')', file);
9666 }
9667 else if (GET_CODE (x) == LO_SUM)
9668 {
9669 sparc_print_operand (file, XEXP (x, 0), 0);
9670 if (TARGET_CM_MEDMID)
9671 fputs ("+%l44(", file);
9672 else
9673 fputs ("+%lo(", file);
9674 output_addr_const (file, XEXP (x, 1));
9675 fputc (')', file);
9676 }
9677 else if (GET_CODE (x) == CONST_DOUBLE)
9678 output_operand_lossage ("floating-point constant not a valid immediate operand");
9679 else
9680 output_addr_const (file, x);
9681 }
9682
9683 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9684
9685 static void
9686 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9687 {
9688 register rtx base, index = 0;
9689 int offset = 0;
9690 register rtx addr = x;
9691
9692 if (REG_P (addr))
9693 fputs (reg_names[REGNO (addr)], file);
9694 else if (GET_CODE (addr) == PLUS)
9695 {
9696 if (CONST_INT_P (XEXP (addr, 0)))
9697 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9698 else if (CONST_INT_P (XEXP (addr, 1)))
9699 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9700 else
9701 base = XEXP (addr, 0), index = XEXP (addr, 1);
9702 if (GET_CODE (base) == LO_SUM)
9703 {
9704 gcc_assert (USE_AS_OFFSETABLE_LO10
9705 && TARGET_ARCH64
9706 && ! TARGET_CM_MEDMID);
9707 output_operand (XEXP (base, 0), 0);
9708 fputs ("+%lo(", file);
9709 output_address (VOIDmode, XEXP (base, 1));
9710 fprintf (file, ")+%d", offset);
9711 }
9712 else
9713 {
9714 fputs (reg_names[REGNO (base)], file);
9715 if (index == 0)
9716 fprintf (file, "%+d", offset);
9717 else if (REG_P (index))
9718 fprintf (file, "+%s", reg_names[REGNO (index)]);
9719 else if (GET_CODE (index) == SYMBOL_REF
9720 || GET_CODE (index) == LABEL_REF
9721 || GET_CODE (index) == CONST)
9722 fputc ('+', file), output_addr_const (file, index);
9723 else gcc_unreachable ();
9724 }
9725 }
9726 else if (GET_CODE (addr) == MINUS
9727 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9728 {
9729 output_addr_const (file, XEXP (addr, 0));
9730 fputs ("-(", file);
9731 output_addr_const (file, XEXP (addr, 1));
9732 fputs ("-.)", file);
9733 }
9734 else if (GET_CODE (addr) == LO_SUM)
9735 {
9736 output_operand (XEXP (addr, 0), 0);
9737 if (TARGET_CM_MEDMID)
9738 fputs ("+%l44(", file);
9739 else
9740 fputs ("+%lo(", file);
9741 output_address (VOIDmode, XEXP (addr, 1));
9742 fputc (')', file);
9743 }
9744 else if (flag_pic
9745 && GET_CODE (addr) == CONST
9746 && GET_CODE (XEXP (addr, 0)) == MINUS
9747 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9748 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9749 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9750 {
9751 addr = XEXP (addr, 0);
9752 output_addr_const (file, XEXP (addr, 0));
9753 /* Group the args of the second CONST in parenthesis. */
9754 fputs ("-(", file);
9755 /* Skip past the second CONST--it does nothing for us. */
9756 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9757 /* Close the parenthesis. */
9758 fputc (')', file);
9759 }
9760 else
9761 {
9762 output_addr_const (file, addr);
9763 }
9764 }
9765 \f
9766 /* Target hook for assembling integer objects. The sparc version has
9767 special handling for aligned DI-mode objects. */
9768
9769 static bool
9770 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9771 {
9772 /* ??? We only output .xword's for symbols and only then in environments
9773 where the assembler can handle them. */
9774 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9775 {
9776 if (TARGET_V9)
9777 {
9778 assemble_integer_with_op ("\t.xword\t", x);
9779 return true;
9780 }
9781 else
9782 {
9783 assemble_aligned_integer (4, const0_rtx);
9784 assemble_aligned_integer (4, x);
9785 return true;
9786 }
9787 }
9788 return default_assemble_integer (x, size, aligned_p);
9789 }
9790 \f
9791 /* Return the value of a code used in the .proc pseudo-op that says
9792 what kind of result this function returns. For non-C types, we pick
9793 the closest C type. */
9794
9795 #ifndef SHORT_TYPE_SIZE
9796 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9797 #endif
9798
9799 #ifndef INT_TYPE_SIZE
9800 #define INT_TYPE_SIZE BITS_PER_WORD
9801 #endif
9802
9803 #ifndef LONG_TYPE_SIZE
9804 #define LONG_TYPE_SIZE BITS_PER_WORD
9805 #endif
9806
9807 #ifndef LONG_LONG_TYPE_SIZE
9808 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9809 #endif
9810
9811 #ifndef FLOAT_TYPE_SIZE
9812 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9813 #endif
9814
9815 #ifndef DOUBLE_TYPE_SIZE
9816 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9817 #endif
9818
9819 #ifndef LONG_DOUBLE_TYPE_SIZE
9820 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9821 #endif
9822
9823 unsigned long
9824 sparc_type_code (register tree type)
9825 {
9826 register unsigned long qualifiers = 0;
9827 register unsigned shift;
9828
9829 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9830 setting more, since some assemblers will give an error for this. Also,
9831 we must be careful to avoid shifts of 32 bits or more to avoid getting
9832 unpredictable results. */
9833
9834 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9835 {
9836 switch (TREE_CODE (type))
9837 {
9838 case ERROR_MARK:
9839 return qualifiers;
9840
9841 case ARRAY_TYPE:
9842 qualifiers |= (3 << shift);
9843 break;
9844
9845 case FUNCTION_TYPE:
9846 case METHOD_TYPE:
9847 qualifiers |= (2 << shift);
9848 break;
9849
9850 case POINTER_TYPE:
9851 case REFERENCE_TYPE:
9852 case OFFSET_TYPE:
9853 qualifiers |= (1 << shift);
9854 break;
9855
9856 case RECORD_TYPE:
9857 return (qualifiers | 8);
9858
9859 case UNION_TYPE:
9860 case QUAL_UNION_TYPE:
9861 return (qualifiers | 9);
9862
9863 case ENUMERAL_TYPE:
9864 return (qualifiers | 10);
9865
9866 case VOID_TYPE:
9867 return (qualifiers | 16);
9868
9869 case INTEGER_TYPE:
9870 /* If this is a range type, consider it to be the underlying
9871 type. */
9872 if (TREE_TYPE (type) != 0)
9873 break;
9874
9875 /* Carefully distinguish all the standard types of C,
9876 without messing up if the language is not C. We do this by
9877 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9878 look at both the names and the above fields, but that's redundant.
9879 Any type whose size is between two C types will be considered
9880 to be the wider of the two types. Also, we do not have a
9881 special code to use for "long long", so anything wider than
9882 long is treated the same. Note that we can't distinguish
9883 between "int" and "long" in this code if they are the same
9884 size, but that's fine, since neither can the assembler. */
9885
9886 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9887 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9888
9889 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9890 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9891
9892 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9893 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9894
9895 else
9896 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9897
9898 case REAL_TYPE:
9899 /* If this is a range type, consider it to be the underlying
9900 type. */
9901 if (TREE_TYPE (type) != 0)
9902 break;
9903
9904 /* Carefully distinguish all the standard types of C,
9905 without messing up if the language is not C. */
9906
9907 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9908 return (qualifiers | 6);
9909
9910 else
9911 return (qualifiers | 7);
9912
9913 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9914 /* ??? We need to distinguish between double and float complex types,
9915 but I don't know how yet because I can't reach this code from
9916 existing front-ends. */
9917 return (qualifiers | 7); /* Who knows? */
9918
9919 case VECTOR_TYPE:
9920 case BOOLEAN_TYPE: /* Boolean truth value type. */
9921 case LANG_TYPE:
9922 case NULLPTR_TYPE:
9923 return qualifiers;
9924
9925 default:
9926 gcc_unreachable (); /* Not a type! */
9927 }
9928 }
9929
9930 return qualifiers;
9931 }
9932 \f
9933 /* Nested function support. */
9934
9935 /* Emit RTL insns to initialize the variable parts of a trampoline.
9936 FNADDR is an RTX for the address of the function's pure code.
9937 CXT is an RTX for the static chain value for the function.
9938
9939 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9940 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9941 (to store insns). This is a bit excessive. Perhaps a different
9942 mechanism would be better here.
9943
9944 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9945
9946 static void
9947 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9948 {
9949 /* SPARC 32-bit trampoline:
9950
9951 sethi %hi(fn), %g1
9952 sethi %hi(static), %g2
9953 jmp %g1+%lo(fn)
9954 or %g2, %lo(static), %g2
9955
9956 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9957 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9958 */
9959
9960 emit_move_insn
9961 (adjust_address (m_tramp, SImode, 0),
9962 expand_binop (SImode, ior_optab,
9963 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9964 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9965 NULL_RTX, 1, OPTAB_DIRECT));
9966
9967 emit_move_insn
9968 (adjust_address (m_tramp, SImode, 4),
9969 expand_binop (SImode, ior_optab,
9970 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9971 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9972 NULL_RTX, 1, OPTAB_DIRECT));
9973
9974 emit_move_insn
9975 (adjust_address (m_tramp, SImode, 8),
9976 expand_binop (SImode, ior_optab,
9977 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9978 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9979 NULL_RTX, 1, OPTAB_DIRECT));
9980
9981 emit_move_insn
9982 (adjust_address (m_tramp, SImode, 12),
9983 expand_binop (SImode, ior_optab,
9984 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9985 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9986 NULL_RTX, 1, OPTAB_DIRECT));
9987
9988 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9989 aligned on a 16 byte boundary so one flush clears it all. */
9990 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9991 if (sparc_cpu != PROCESSOR_ULTRASPARC
9992 && sparc_cpu != PROCESSOR_ULTRASPARC3
9993 && sparc_cpu != PROCESSOR_NIAGARA
9994 && sparc_cpu != PROCESSOR_NIAGARA2
9995 && sparc_cpu != PROCESSOR_NIAGARA3
9996 && sparc_cpu != PROCESSOR_NIAGARA4
9997 && sparc_cpu != PROCESSOR_NIAGARA7
9998 && sparc_cpu != PROCESSOR_M8)
9999 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
10000
10001 /* Call __enable_execute_stack after writing onto the stack to make sure
10002 the stack address is accessible. */
10003 #ifdef HAVE_ENABLE_EXECUTE_STACK
10004 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10005 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10006 #endif
10007
10008 }
10009
10010 /* The 64-bit version is simpler because it makes more sense to load the
10011 values as "immediate" data out of the trampoline. It's also easier since
10012 we can read the PC without clobbering a register. */
10013
10014 static void
10015 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10016 {
10017 /* SPARC 64-bit trampoline:
10018
10019 rd %pc, %g1
10020 ldx [%g1+24], %g5
10021 jmp %g5
10022 ldx [%g1+16], %g5
10023 +16 bytes data
10024 */
10025
10026 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10027 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10028 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10029 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10030 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10031 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10032 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10033 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10034 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10035 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10036 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10037
10038 if (sparc_cpu != PROCESSOR_ULTRASPARC
10039 && sparc_cpu != PROCESSOR_ULTRASPARC3
10040 && sparc_cpu != PROCESSOR_NIAGARA
10041 && sparc_cpu != PROCESSOR_NIAGARA2
10042 && sparc_cpu != PROCESSOR_NIAGARA3
10043 && sparc_cpu != PROCESSOR_NIAGARA4
10044 && sparc_cpu != PROCESSOR_NIAGARA7
10045 && sparc_cpu != PROCESSOR_M8)
10046 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10047
10048 /* Call __enable_execute_stack after writing onto the stack to make sure
10049 the stack address is accessible. */
10050 #ifdef HAVE_ENABLE_EXECUTE_STACK
10051 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10052 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10053 #endif
10054 }
10055
10056 /* Worker for TARGET_TRAMPOLINE_INIT. */
10057
10058 static void
10059 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10060 {
10061 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10062 cxt = force_reg (Pmode, cxt);
10063 if (TARGET_ARCH64)
10064 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10065 else
10066 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10067 }
10068 \f
10069 /* Adjust the cost of a scheduling dependency. Return the new cost of
10070 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10071
10072 static int
10073 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10074 int cost)
10075 {
10076 enum attr_type insn_type;
10077
10078 if (recog_memoized (insn) < 0)
10079 return cost;
10080
10081 insn_type = get_attr_type (insn);
10082
10083 if (dep_type == 0)
10084 {
10085 /* Data dependency; DEP_INSN writes a register that INSN reads some
10086 cycles later. */
10087
10088 /* if a load, then the dependence must be on the memory address;
10089 add an extra "cycle". Note that the cost could be two cycles
10090 if the reg was written late in an instruction group; we ca not tell
10091 here. */
10092 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10093 return cost + 3;
10094
10095 /* Get the delay only if the address of the store is the dependence. */
10096 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10097 {
10098 rtx pat = PATTERN(insn);
10099 rtx dep_pat = PATTERN (dep_insn);
10100
10101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10102 return cost; /* This should not happen! */
10103
10104 /* The dependency between the two instructions was on the data that
10105 is being stored. Assume that this implies that the address of the
10106 store is not dependent. */
10107 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10108 return cost;
10109
10110 return cost + 3; /* An approximation. */
10111 }
10112
10113 /* A shift instruction cannot receive its data from an instruction
10114 in the same cycle; add a one cycle penalty. */
10115 if (insn_type == TYPE_SHIFT)
10116 return cost + 3; /* Split before cascade into shift. */
10117 }
10118 else
10119 {
10120 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10121 INSN writes some cycles later. */
10122
10123 /* These are only significant for the fpu unit; writing a fp reg before
10124 the fpu has finished with it stalls the processor. */
10125
10126 /* Reusing an integer register causes no problems. */
10127 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10128 return 0;
10129 }
10130
10131 return cost;
10132 }
10133
10134 static int
10135 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10136 int cost)
10137 {
10138 enum attr_type insn_type, dep_type;
10139 rtx pat = PATTERN(insn);
10140 rtx dep_pat = PATTERN (dep_insn);
10141
10142 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10143 return cost;
10144
10145 insn_type = get_attr_type (insn);
10146 dep_type = get_attr_type (dep_insn);
10147
10148 switch (dtype)
10149 {
10150 case 0:
10151 /* Data dependency; DEP_INSN writes a register that INSN reads some
10152 cycles later. */
10153
10154 switch (insn_type)
10155 {
10156 case TYPE_STORE:
10157 case TYPE_FPSTORE:
10158 /* Get the delay iff the address of the store is the dependence. */
10159 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10160 return cost;
10161
10162 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10163 return cost;
10164 return cost + 3;
10165
10166 case TYPE_LOAD:
10167 case TYPE_SLOAD:
10168 case TYPE_FPLOAD:
10169 /* If a load, then the dependence must be on the memory address. If
10170 the addresses aren't equal, then it might be a false dependency */
10171 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10172 {
10173 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10174 || GET_CODE (SET_DEST (dep_pat)) != MEM
10175 || GET_CODE (SET_SRC (pat)) != MEM
10176 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10177 XEXP (SET_SRC (pat), 0)))
10178 return cost + 2;
10179
10180 return cost + 8;
10181 }
10182 break;
10183
10184 case TYPE_BRANCH:
10185 /* Compare to branch latency is 0. There is no benefit from
10186 separating compare and branch. */
10187 if (dep_type == TYPE_COMPARE)
10188 return 0;
10189 /* Floating point compare to branch latency is less than
10190 compare to conditional move. */
10191 if (dep_type == TYPE_FPCMP)
10192 return cost - 1;
10193 break;
10194 default:
10195 break;
10196 }
10197 break;
10198
10199 case REG_DEP_ANTI:
10200 /* Anti-dependencies only penalize the fpu unit. */
10201 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10202 return 0;
10203 break;
10204
10205 default:
10206 break;
10207 }
10208
10209 return cost;
10210 }
10211
10212 static int
10213 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10214 unsigned int)
10215 {
10216 switch (sparc_cpu)
10217 {
10218 case PROCESSOR_SUPERSPARC:
10219 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10220 break;
10221 case PROCESSOR_HYPERSPARC:
10222 case PROCESSOR_SPARCLITE86X:
10223 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10224 break;
10225 default:
10226 break;
10227 }
10228 return cost;
10229 }
10230
10231 static void
10232 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10233 int sched_verbose ATTRIBUTE_UNUSED,
10234 int max_ready ATTRIBUTE_UNUSED)
10235 {}
10236
10237 static int
10238 sparc_use_sched_lookahead (void)
10239 {
10240 switch (sparc_cpu)
10241 {
10242 case PROCESSOR_ULTRASPARC:
10243 case PROCESSOR_ULTRASPARC3:
10244 return 4;
10245 case PROCESSOR_SUPERSPARC:
10246 case PROCESSOR_HYPERSPARC:
10247 case PROCESSOR_SPARCLITE86X:
10248 return 3;
10249 case PROCESSOR_NIAGARA4:
10250 case PROCESSOR_NIAGARA7:
10251 case PROCESSOR_M8:
10252 return 2;
10253 case PROCESSOR_NIAGARA:
10254 case PROCESSOR_NIAGARA2:
10255 case PROCESSOR_NIAGARA3:
10256 default:
10257 return 0;
10258 }
10259 }
10260
10261 static int
10262 sparc_issue_rate (void)
10263 {
10264 switch (sparc_cpu)
10265 {
10266 case PROCESSOR_ULTRASPARC:
10267 case PROCESSOR_ULTRASPARC3:
10268 case PROCESSOR_M8:
10269 return 4;
10270 case PROCESSOR_SUPERSPARC:
10271 return 3;
10272 case PROCESSOR_HYPERSPARC:
10273 case PROCESSOR_SPARCLITE86X:
10274 case PROCESSOR_V9:
10275 /* Assume V9 processors are capable of at least dual-issue. */
10276 case PROCESSOR_NIAGARA4:
10277 case PROCESSOR_NIAGARA7:
10278 return 2;
10279 case PROCESSOR_NIAGARA:
10280 case PROCESSOR_NIAGARA2:
10281 case PROCESSOR_NIAGARA3:
10282 default:
10283 return 1;
10284 }
10285 }
10286
10287 int
10288 sparc_branch_cost (bool speed_p, bool predictable_p)
10289 {
10290 if (!speed_p)
10291 return 2;
10292
10293 /* For pre-V9 processors we use a single value (usually 3) to take into
10294 account the potential annulling of the delay slot (which ends up being
10295 a bubble in the pipeline slot) plus a cycle to take into consideration
10296 the instruction cache effects.
10297
10298 On V9 and later processors, which have branch prediction facilities,
10299 we take into account whether the branch is (easily) predictable. */
10300 const int cost = sparc_costs->branch_cost;
10301
10302 switch (sparc_cpu)
10303 {
10304 case PROCESSOR_V9:
10305 case PROCESSOR_ULTRASPARC:
10306 case PROCESSOR_ULTRASPARC3:
10307 case PROCESSOR_NIAGARA:
10308 case PROCESSOR_NIAGARA2:
10309 case PROCESSOR_NIAGARA3:
10310 case PROCESSOR_NIAGARA4:
10311 case PROCESSOR_NIAGARA7:
10312 case PROCESSOR_M8:
10313 return cost + (predictable_p ? 0 : 2);
10314
10315 default:
10316 return cost;
10317 }
10318 }
10319
10320 static int
10321 set_extends (rtx_insn *insn)
10322 {
10323 register rtx pat = PATTERN (insn);
10324
10325 switch (GET_CODE (SET_SRC (pat)))
10326 {
10327 /* Load and some shift instructions zero extend. */
10328 case MEM:
10329 case ZERO_EXTEND:
10330 /* sethi clears the high bits */
10331 case HIGH:
10332 /* LO_SUM is used with sethi. sethi cleared the high
10333 bits and the values used with lo_sum are positive */
10334 case LO_SUM:
10335 /* Store flag stores 0 or 1 */
10336 case LT: case LTU:
10337 case GT: case GTU:
10338 case LE: case LEU:
10339 case GE: case GEU:
10340 case EQ:
10341 case NE:
10342 return 1;
10343 case AND:
10344 {
10345 rtx op0 = XEXP (SET_SRC (pat), 0);
10346 rtx op1 = XEXP (SET_SRC (pat), 1);
10347 if (GET_CODE (op1) == CONST_INT)
10348 return INTVAL (op1) >= 0;
10349 if (GET_CODE (op0) != REG)
10350 return 0;
10351 if (sparc_check_64 (op0, insn) == 1)
10352 return 1;
10353 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10354 }
10355 case IOR:
10356 case XOR:
10357 {
10358 rtx op0 = XEXP (SET_SRC (pat), 0);
10359 rtx op1 = XEXP (SET_SRC (pat), 1);
10360 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10361 return 0;
10362 if (GET_CODE (op1) == CONST_INT)
10363 return INTVAL (op1) >= 0;
10364 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10365 }
10366 case LSHIFTRT:
10367 return GET_MODE (SET_SRC (pat)) == SImode;
10368 /* Positive integers leave the high bits zero. */
10369 case CONST_INT:
10370 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10371 case ASHIFTRT:
10372 case SIGN_EXTEND:
10373 return - (GET_MODE (SET_SRC (pat)) == SImode);
10374 case REG:
10375 return sparc_check_64 (SET_SRC (pat), insn);
10376 default:
10377 return 0;
10378 }
10379 }
10380
10381 /* We _ought_ to have only one kind per function, but... */
10382 static GTY(()) rtx sparc_addr_diff_list;
10383 static GTY(()) rtx sparc_addr_list;
10384
10385 void
10386 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10387 {
10388 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10389 if (diff)
10390 sparc_addr_diff_list
10391 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10392 else
10393 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10394 }
10395
10396 static void
10397 sparc_output_addr_vec (rtx vec)
10398 {
10399 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10400 int idx, vlen = XVECLEN (body, 0);
10401
10402 #ifdef ASM_OUTPUT_ADDR_VEC_START
10403 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10404 #endif
10405
10406 #ifdef ASM_OUTPUT_CASE_LABEL
10407 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10408 NEXT_INSN (lab));
10409 #else
10410 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10411 #endif
10412
10413 for (idx = 0; idx < vlen; idx++)
10414 {
10415 ASM_OUTPUT_ADDR_VEC_ELT
10416 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10417 }
10418
10419 #ifdef ASM_OUTPUT_ADDR_VEC_END
10420 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10421 #endif
10422 }
10423
10424 static void
10425 sparc_output_addr_diff_vec (rtx vec)
10426 {
10427 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10428 rtx base = XEXP (XEXP (body, 0), 0);
10429 int idx, vlen = XVECLEN (body, 1);
10430
10431 #ifdef ASM_OUTPUT_ADDR_VEC_START
10432 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10433 #endif
10434
10435 #ifdef ASM_OUTPUT_CASE_LABEL
10436 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10437 NEXT_INSN (lab));
10438 #else
10439 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10440 #endif
10441
10442 for (idx = 0; idx < vlen; idx++)
10443 {
10444 ASM_OUTPUT_ADDR_DIFF_ELT
10445 (asm_out_file,
10446 body,
10447 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10448 CODE_LABEL_NUMBER (base));
10449 }
10450
10451 #ifdef ASM_OUTPUT_ADDR_VEC_END
10452 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10453 #endif
10454 }
10455
10456 static void
10457 sparc_output_deferred_case_vectors (void)
10458 {
10459 rtx t;
10460 int align;
10461
10462 if (sparc_addr_list == NULL_RTX
10463 && sparc_addr_diff_list == NULL_RTX)
10464 return;
10465
10466 /* Align to cache line in the function's code section. */
10467 switch_to_section (current_function_section ());
10468
10469 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10470 if (align > 0)
10471 ASM_OUTPUT_ALIGN (asm_out_file, align);
10472
10473 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10474 sparc_output_addr_vec (XEXP (t, 0));
10475 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10476 sparc_output_addr_diff_vec (XEXP (t, 0));
10477
10478 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10479 }
10480
10481 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10482 unknown. Return 1 if the high bits are zero, -1 if the register is
10483 sign extended. */
10484 int
10485 sparc_check_64 (rtx x, rtx_insn *insn)
10486 {
10487 /* If a register is set only once it is safe to ignore insns this
10488 code does not know how to handle. The loop will either recognize
10489 the single set and return the correct value or fail to recognize
10490 it and return 0. */
10491 int set_once = 0;
10492 rtx y = x;
10493
10494 gcc_assert (GET_CODE (x) == REG);
10495
10496 if (GET_MODE (x) == DImode)
10497 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10498
10499 if (flag_expensive_optimizations
10500 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10501 set_once = 1;
10502
10503 if (insn == 0)
10504 {
10505 if (set_once)
10506 insn = get_last_insn_anywhere ();
10507 else
10508 return 0;
10509 }
10510
10511 while ((insn = PREV_INSN (insn)))
10512 {
10513 switch (GET_CODE (insn))
10514 {
10515 case JUMP_INSN:
10516 case NOTE:
10517 break;
10518 case CODE_LABEL:
10519 case CALL_INSN:
10520 default:
10521 if (! set_once)
10522 return 0;
10523 break;
10524 case INSN:
10525 {
10526 rtx pat = PATTERN (insn);
10527 if (GET_CODE (pat) != SET)
10528 return 0;
10529 if (rtx_equal_p (x, SET_DEST (pat)))
10530 return set_extends (insn);
10531 if (y && rtx_equal_p (y, SET_DEST (pat)))
10532 return set_extends (insn);
10533 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10534 return 0;
10535 }
10536 }
10537 }
10538 return 0;
10539 }
10540
10541 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10542 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10543
10544 const char *
10545 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10546 {
10547 static char asm_code[60];
10548
10549 /* The scratch register is only required when the destination
10550 register is not a 64-bit global or out register. */
10551 if (which_alternative != 2)
10552 operands[3] = operands[0];
10553
10554 /* We can only shift by constants <= 63. */
10555 if (GET_CODE (operands[2]) == CONST_INT)
10556 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10557
10558 if (GET_CODE (operands[1]) == CONST_INT)
10559 {
10560 output_asm_insn ("mov\t%1, %3", operands);
10561 }
10562 else
10563 {
10564 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10565 if (sparc_check_64 (operands[1], insn) <= 0)
10566 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10567 output_asm_insn ("or\t%L1, %3, %3", operands);
10568 }
10569
10570 strcpy (asm_code, opcode);
10571
10572 if (which_alternative != 2)
10573 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10574 else
10575 return
10576 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10577 }
10578 \f
10579 /* Output rtl to increment the profiler label LABELNO
10580 for profiling a function entry. */
10581
10582 void
10583 sparc_profile_hook (int labelno)
10584 {
10585 char buf[32];
10586 rtx lab, fun;
10587
10588 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10589 if (NO_PROFILE_COUNTERS)
10590 {
10591 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10592 }
10593 else
10594 {
10595 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10596 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10597 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10598 }
10599 }
10600 \f
10601 #ifdef TARGET_SOLARIS
10602 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10603
10604 static void
10605 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10606 tree decl ATTRIBUTE_UNUSED)
10607 {
10608 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10609 {
10610 solaris_elf_asm_comdat_section (name, flags, decl);
10611 return;
10612 }
10613
10614 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10615
10616 if (!(flags & SECTION_DEBUG))
10617 fputs (",#alloc", asm_out_file);
10618 #if HAVE_GAS_SECTION_EXCLUDE
10619 if (flags & SECTION_EXCLUDE)
10620 fputs (",#exclude", asm_out_file);
10621 #endif
10622 if (flags & SECTION_WRITE)
10623 fputs (",#write", asm_out_file);
10624 if (flags & SECTION_TLS)
10625 fputs (",#tls", asm_out_file);
10626 if (flags & SECTION_CODE)
10627 fputs (",#execinstr", asm_out_file);
10628
10629 if (flags & SECTION_NOTYPE)
10630 ;
10631 else if (flags & SECTION_BSS)
10632 fputs (",#nobits", asm_out_file);
10633 else
10634 fputs (",#progbits", asm_out_file);
10635
10636 fputc ('\n', asm_out_file);
10637 }
10638 #endif /* TARGET_SOLARIS */
10639
10640 /* We do not allow indirect calls to be optimized into sibling calls.
10641
10642 We cannot use sibling calls when delayed branches are disabled
10643 because they will likely require the call delay slot to be filled.
10644
10645 Also, on SPARC 32-bit we cannot emit a sibling call when the
10646 current function returns a structure. This is because the "unimp
10647 after call" convention would cause the callee to return to the
10648 wrong place. The generic code already disallows cases where the
10649 function being called returns a structure.
10650
10651 It may seem strange how this last case could occur. Usually there
10652 is code after the call which jumps to epilogue code which dumps the
10653 return value into the struct return area. That ought to invalidate
10654 the sibling call right? Well, in the C++ case we can end up passing
10655 the pointer to the struct return area to a constructor (which returns
10656 void) and then nothing else happens. Such a sibling call would look
10657 valid without the added check here.
10658
10659 VxWorks PIC PLT entries require the global pointer to be initialized
10660 on entry. We therefore can't emit sibling calls to them. */
10661 static bool
10662 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10663 {
10664 return (decl
10665 && flag_delayed_branch
10666 && (TARGET_ARCH64 || ! cfun->returns_struct)
10667 && !(TARGET_VXWORKS_RTP
10668 && flag_pic
10669 && !targetm.binds_local_p (decl)));
10670 }
10671 \f
10672 /* libfunc renaming. */
10673
10674 static void
10675 sparc_init_libfuncs (void)
10676 {
10677 if (TARGET_ARCH32)
10678 {
10679 /* Use the subroutines that Sun's library provides for integer
10680 multiply and divide. The `*' prevents an underscore from
10681 being prepended by the compiler. .umul is a little faster
10682 than .mul. */
10683 set_optab_libfunc (smul_optab, SImode, "*.umul");
10684 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10685 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10686 set_optab_libfunc (smod_optab, SImode, "*.rem");
10687 set_optab_libfunc (umod_optab, SImode, "*.urem");
10688
10689 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10690 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10691 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10692 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10693 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10694 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10695
10696 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10697 is because with soft-float, the SFmode and DFmode sqrt
10698 instructions will be absent, and the compiler will notice and
10699 try to use the TFmode sqrt instruction for calls to the
10700 builtin function sqrt, but this fails. */
10701 if (TARGET_FPU)
10702 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10703
10704 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10705 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10706 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10707 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10708 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10709 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10710
10711 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10712 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10713 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10714 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10715
10716 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10717 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10718 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10719 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10720
10721 if (DITF_CONVERSION_LIBFUNCS)
10722 {
10723 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10724 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10725 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10726 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10727 }
10728
10729 if (SUN_CONVERSION_LIBFUNCS)
10730 {
10731 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10732 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10733 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10734 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10735 }
10736 }
10737 if (TARGET_ARCH64)
10738 {
10739 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10740 do not exist in the library. Make sure the compiler does not
10741 emit calls to them by accident. (It should always use the
10742 hardware instructions.) */
10743 set_optab_libfunc (smul_optab, SImode, 0);
10744 set_optab_libfunc (sdiv_optab, SImode, 0);
10745 set_optab_libfunc (udiv_optab, SImode, 0);
10746 set_optab_libfunc (smod_optab, SImode, 0);
10747 set_optab_libfunc (umod_optab, SImode, 0);
10748
10749 if (SUN_INTEGER_MULTIPLY_64)
10750 {
10751 set_optab_libfunc (smul_optab, DImode, "__mul64");
10752 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10753 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10754 set_optab_libfunc (smod_optab, DImode, "__rem64");
10755 set_optab_libfunc (umod_optab, DImode, "__urem64");
10756 }
10757
10758 if (SUN_CONVERSION_LIBFUNCS)
10759 {
10760 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10761 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10762 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10763 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10764 }
10765 }
10766 }
10767 \f
10768 /* SPARC builtins. */
10769 enum sparc_builtins
10770 {
10771 /* FPU builtins. */
10772 SPARC_BUILTIN_LDFSR,
10773 SPARC_BUILTIN_STFSR,
10774
10775 /* VIS 1.0 builtins. */
10776 SPARC_BUILTIN_FPACK16,
10777 SPARC_BUILTIN_FPACK32,
10778 SPARC_BUILTIN_FPACKFIX,
10779 SPARC_BUILTIN_FEXPAND,
10780 SPARC_BUILTIN_FPMERGE,
10781 SPARC_BUILTIN_FMUL8X16,
10782 SPARC_BUILTIN_FMUL8X16AU,
10783 SPARC_BUILTIN_FMUL8X16AL,
10784 SPARC_BUILTIN_FMUL8SUX16,
10785 SPARC_BUILTIN_FMUL8ULX16,
10786 SPARC_BUILTIN_FMULD8SUX16,
10787 SPARC_BUILTIN_FMULD8ULX16,
10788 SPARC_BUILTIN_FALIGNDATAV4HI,
10789 SPARC_BUILTIN_FALIGNDATAV8QI,
10790 SPARC_BUILTIN_FALIGNDATAV2SI,
10791 SPARC_BUILTIN_FALIGNDATADI,
10792 SPARC_BUILTIN_WRGSR,
10793 SPARC_BUILTIN_RDGSR,
10794 SPARC_BUILTIN_ALIGNADDR,
10795 SPARC_BUILTIN_ALIGNADDRL,
10796 SPARC_BUILTIN_PDIST,
10797 SPARC_BUILTIN_EDGE8,
10798 SPARC_BUILTIN_EDGE8L,
10799 SPARC_BUILTIN_EDGE16,
10800 SPARC_BUILTIN_EDGE16L,
10801 SPARC_BUILTIN_EDGE32,
10802 SPARC_BUILTIN_EDGE32L,
10803 SPARC_BUILTIN_FCMPLE16,
10804 SPARC_BUILTIN_FCMPLE32,
10805 SPARC_BUILTIN_FCMPNE16,
10806 SPARC_BUILTIN_FCMPNE32,
10807 SPARC_BUILTIN_FCMPGT16,
10808 SPARC_BUILTIN_FCMPGT32,
10809 SPARC_BUILTIN_FCMPEQ16,
10810 SPARC_BUILTIN_FCMPEQ32,
10811 SPARC_BUILTIN_FPADD16,
10812 SPARC_BUILTIN_FPADD16S,
10813 SPARC_BUILTIN_FPADD32,
10814 SPARC_BUILTIN_FPADD32S,
10815 SPARC_BUILTIN_FPSUB16,
10816 SPARC_BUILTIN_FPSUB16S,
10817 SPARC_BUILTIN_FPSUB32,
10818 SPARC_BUILTIN_FPSUB32S,
10819 SPARC_BUILTIN_ARRAY8,
10820 SPARC_BUILTIN_ARRAY16,
10821 SPARC_BUILTIN_ARRAY32,
10822
10823 /* VIS 2.0 builtins. */
10824 SPARC_BUILTIN_EDGE8N,
10825 SPARC_BUILTIN_EDGE8LN,
10826 SPARC_BUILTIN_EDGE16N,
10827 SPARC_BUILTIN_EDGE16LN,
10828 SPARC_BUILTIN_EDGE32N,
10829 SPARC_BUILTIN_EDGE32LN,
10830 SPARC_BUILTIN_BMASK,
10831 SPARC_BUILTIN_BSHUFFLEV4HI,
10832 SPARC_BUILTIN_BSHUFFLEV8QI,
10833 SPARC_BUILTIN_BSHUFFLEV2SI,
10834 SPARC_BUILTIN_BSHUFFLEDI,
10835
10836 /* VIS 3.0 builtins. */
10837 SPARC_BUILTIN_CMASK8,
10838 SPARC_BUILTIN_CMASK16,
10839 SPARC_BUILTIN_CMASK32,
10840 SPARC_BUILTIN_FCHKSM16,
10841 SPARC_BUILTIN_FSLL16,
10842 SPARC_BUILTIN_FSLAS16,
10843 SPARC_BUILTIN_FSRL16,
10844 SPARC_BUILTIN_FSRA16,
10845 SPARC_BUILTIN_FSLL32,
10846 SPARC_BUILTIN_FSLAS32,
10847 SPARC_BUILTIN_FSRL32,
10848 SPARC_BUILTIN_FSRA32,
10849 SPARC_BUILTIN_PDISTN,
10850 SPARC_BUILTIN_FMEAN16,
10851 SPARC_BUILTIN_FPADD64,
10852 SPARC_BUILTIN_FPSUB64,
10853 SPARC_BUILTIN_FPADDS16,
10854 SPARC_BUILTIN_FPADDS16S,
10855 SPARC_BUILTIN_FPSUBS16,
10856 SPARC_BUILTIN_FPSUBS16S,
10857 SPARC_BUILTIN_FPADDS32,
10858 SPARC_BUILTIN_FPADDS32S,
10859 SPARC_BUILTIN_FPSUBS32,
10860 SPARC_BUILTIN_FPSUBS32S,
10861 SPARC_BUILTIN_FUCMPLE8,
10862 SPARC_BUILTIN_FUCMPNE8,
10863 SPARC_BUILTIN_FUCMPGT8,
10864 SPARC_BUILTIN_FUCMPEQ8,
10865 SPARC_BUILTIN_FHADDS,
10866 SPARC_BUILTIN_FHADDD,
10867 SPARC_BUILTIN_FHSUBS,
10868 SPARC_BUILTIN_FHSUBD,
10869 SPARC_BUILTIN_FNHADDS,
10870 SPARC_BUILTIN_FNHADDD,
10871 SPARC_BUILTIN_UMULXHI,
10872 SPARC_BUILTIN_XMULX,
10873 SPARC_BUILTIN_XMULXHI,
10874
10875 /* VIS 4.0 builtins. */
10876 SPARC_BUILTIN_FPADD8,
10877 SPARC_BUILTIN_FPADDS8,
10878 SPARC_BUILTIN_FPADDUS8,
10879 SPARC_BUILTIN_FPADDUS16,
10880 SPARC_BUILTIN_FPCMPLE8,
10881 SPARC_BUILTIN_FPCMPGT8,
10882 SPARC_BUILTIN_FPCMPULE16,
10883 SPARC_BUILTIN_FPCMPUGT16,
10884 SPARC_BUILTIN_FPCMPULE32,
10885 SPARC_BUILTIN_FPCMPUGT32,
10886 SPARC_BUILTIN_FPMAX8,
10887 SPARC_BUILTIN_FPMAX16,
10888 SPARC_BUILTIN_FPMAX32,
10889 SPARC_BUILTIN_FPMAXU8,
10890 SPARC_BUILTIN_FPMAXU16,
10891 SPARC_BUILTIN_FPMAXU32,
10892 SPARC_BUILTIN_FPMIN8,
10893 SPARC_BUILTIN_FPMIN16,
10894 SPARC_BUILTIN_FPMIN32,
10895 SPARC_BUILTIN_FPMINU8,
10896 SPARC_BUILTIN_FPMINU16,
10897 SPARC_BUILTIN_FPMINU32,
10898 SPARC_BUILTIN_FPSUB8,
10899 SPARC_BUILTIN_FPSUBS8,
10900 SPARC_BUILTIN_FPSUBUS8,
10901 SPARC_BUILTIN_FPSUBUS16,
10902
10903 /* VIS 4.0B builtins. */
10904
10905 /* Note that all the DICTUNPACK* entries should be kept
10906 contiguous. */
10907 SPARC_BUILTIN_FIRST_DICTUNPACK,
10908 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10909 SPARC_BUILTIN_DICTUNPACK16,
10910 SPARC_BUILTIN_DICTUNPACK32,
10911 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10912
10913 /* Note that all the FPCMP*SHL entries should be kept
10914 contiguous. */
10915 SPARC_BUILTIN_FIRST_FPCMPSHL,
10916 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10917 SPARC_BUILTIN_FPCMPGT8SHL,
10918 SPARC_BUILTIN_FPCMPEQ8SHL,
10919 SPARC_BUILTIN_FPCMPNE8SHL,
10920 SPARC_BUILTIN_FPCMPLE16SHL,
10921 SPARC_BUILTIN_FPCMPGT16SHL,
10922 SPARC_BUILTIN_FPCMPEQ16SHL,
10923 SPARC_BUILTIN_FPCMPNE16SHL,
10924 SPARC_BUILTIN_FPCMPLE32SHL,
10925 SPARC_BUILTIN_FPCMPGT32SHL,
10926 SPARC_BUILTIN_FPCMPEQ32SHL,
10927 SPARC_BUILTIN_FPCMPNE32SHL,
10928 SPARC_BUILTIN_FPCMPULE8SHL,
10929 SPARC_BUILTIN_FPCMPUGT8SHL,
10930 SPARC_BUILTIN_FPCMPULE16SHL,
10931 SPARC_BUILTIN_FPCMPUGT16SHL,
10932 SPARC_BUILTIN_FPCMPULE32SHL,
10933 SPARC_BUILTIN_FPCMPUGT32SHL,
10934 SPARC_BUILTIN_FPCMPDE8SHL,
10935 SPARC_BUILTIN_FPCMPDE16SHL,
10936 SPARC_BUILTIN_FPCMPDE32SHL,
10937 SPARC_BUILTIN_FPCMPUR8SHL,
10938 SPARC_BUILTIN_FPCMPUR16SHL,
10939 SPARC_BUILTIN_FPCMPUR32SHL,
10940 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10941
10942 SPARC_BUILTIN_MAX
10943 };
10944
10945 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10946 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10947
10948 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10949 The instruction should require a constant operand of some sort. The
10950 function prints an error if OPVAL is not valid. */
10951
10952 static int
10953 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10954 {
10955 if (GET_CODE (opval) != CONST_INT)
10956 {
10957 error ("%qs expects a constant argument", insn_data[icode].name);
10958 return false;
10959 }
10960
10961 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10962 {
10963 error ("constant argument out of range for %qs", insn_data[icode].name);
10964 return false;
10965 }
10966 return true;
10967 }
10968
10969 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10970 function decl or NULL_TREE if the builtin was not added. */
10971
10972 static tree
10973 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10974 tree type)
10975 {
10976 tree t
10977 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10978
10979 if (t)
10980 {
10981 sparc_builtins[code] = t;
10982 sparc_builtins_icode[code] = icode;
10983 }
10984
10985 return t;
10986 }
10987
10988 /* Likewise, but also marks the function as "const". */
10989
10990 static tree
10991 def_builtin_const (const char *name, enum insn_code icode,
10992 enum sparc_builtins code, tree type)
10993 {
10994 tree t = def_builtin (name, icode, code, type);
10995
10996 if (t)
10997 TREE_READONLY (t) = 1;
10998
10999 return t;
11000 }
11001
11002 /* Implement the TARGET_INIT_BUILTINS target hook.
11003 Create builtin functions for special SPARC instructions. */
11004
11005 static void
11006 sparc_init_builtins (void)
11007 {
11008 if (TARGET_FPU)
11009 sparc_fpu_init_builtins ();
11010
11011 if (TARGET_VIS)
11012 sparc_vis_init_builtins ();
11013 }
11014
11015 /* Create builtin functions for FPU instructions. */
11016
11017 static void
11018 sparc_fpu_init_builtins (void)
11019 {
11020 tree ftype
11021 = build_function_type_list (void_type_node,
11022 build_pointer_type (unsigned_type_node), 0);
11023 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11024 SPARC_BUILTIN_LDFSR, ftype);
11025 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11026 SPARC_BUILTIN_STFSR, ftype);
11027 }
11028
11029 /* Create builtin functions for VIS instructions. */
11030
11031 static void
11032 sparc_vis_init_builtins (void)
11033 {
11034 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11035 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11036 tree v4hi = build_vector_type (intHI_type_node, 4);
11037 tree v2hi = build_vector_type (intHI_type_node, 2);
11038 tree v2si = build_vector_type (intSI_type_node, 2);
11039 tree v1si = build_vector_type (intSI_type_node, 1);
11040
11041 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11042 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11043 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11044 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11045 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11046 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11047 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11048 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11049 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11050 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11051 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11052 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11053 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11054 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11055 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11056 v8qi, v8qi,
11057 intDI_type_node, 0);
11058 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11059 v8qi, v8qi, 0);
11060 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11061 v8qi, v8qi, 0);
11062 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11063 intSI_type_node, 0);
11064 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11065 intSI_type_node, 0);
11066 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11067 intDI_type_node, 0);
11068 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11069 intDI_type_node,
11070 intDI_type_node, 0);
11071 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11072 intSI_type_node,
11073 intSI_type_node, 0);
11074 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11075 ptr_type_node,
11076 intSI_type_node, 0);
11077 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11078 ptr_type_node,
11079 intDI_type_node, 0);
11080 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11081 ptr_type_node,
11082 ptr_type_node, 0);
11083 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11084 ptr_type_node,
11085 ptr_type_node, 0);
11086 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11087 v4hi, v4hi, 0);
11088 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11089 v2si, v2si, 0);
11090 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11091 v4hi, v4hi, 0);
11092 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11093 v2si, v2si, 0);
11094 tree void_ftype_di = build_function_type_list (void_type_node,
11095 intDI_type_node, 0);
11096 tree di_ftype_void = build_function_type_list (intDI_type_node,
11097 void_type_node, 0);
11098 tree void_ftype_si = build_function_type_list (void_type_node,
11099 intSI_type_node, 0);
11100 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11101 float_type_node,
11102 float_type_node, 0);
11103 tree df_ftype_df_df = build_function_type_list (double_type_node,
11104 double_type_node,
11105 double_type_node, 0);
11106
11107 /* Packing and expanding vectors. */
11108 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11109 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11110 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11111 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11112 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11113 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11114 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11115 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11116 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11117 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11118
11119 /* Multiplications. */
11120 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11121 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11122 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11123 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11124 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11125 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11126 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11127 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11128 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11129 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11130 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11131 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11132 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11133 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11134
11135 /* Data aligning. */
11136 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11137 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11138 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11139 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11140 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11141 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11142 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11143 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11144
11145 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11146 SPARC_BUILTIN_WRGSR, void_ftype_di);
11147 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11148 SPARC_BUILTIN_RDGSR, di_ftype_void);
11149
11150 if (TARGET_ARCH64)
11151 {
11152 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11153 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11154 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11155 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11156 }
11157 else
11158 {
11159 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11160 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11161 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11162 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11163 }
11164
11165 /* Pixel distance. */
11166 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11167 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11168
11169 /* Edge handling. */
11170 if (TARGET_ARCH64)
11171 {
11172 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11173 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11174 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11175 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11176 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11177 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11178 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11179 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11180 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11181 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11182 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11183 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11184 }
11185 else
11186 {
11187 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11188 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11189 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11190 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11191 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11192 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11193 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11194 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11195 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11196 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11197 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11198 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11199 }
11200
11201 /* Pixel compare. */
11202 if (TARGET_ARCH64)
11203 {
11204 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11205 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11206 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11207 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11208 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11209 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11210 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11211 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11212 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11213 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11214 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11215 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11216 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11217 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11218 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11219 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11220 }
11221 else
11222 {
11223 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11224 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11225 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11226 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11227 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11228 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11229 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11230 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11231 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11232 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11233 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11234 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11235 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11236 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11237 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11238 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11239 }
11240
11241 /* Addition and subtraction. */
11242 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11243 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11244 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11245 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11246 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11247 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11248 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11249 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11250 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11251 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11252 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11253 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11254 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11255 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11256 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11257 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11258
11259 /* Three-dimensional array addressing. */
11260 if (TARGET_ARCH64)
11261 {
11262 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11263 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11264 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11265 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11266 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11267 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11268 }
11269 else
11270 {
11271 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11272 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11273 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11274 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11275 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11276 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11277 }
11278
11279 if (TARGET_VIS2)
11280 {
11281 /* Edge handling. */
11282 if (TARGET_ARCH64)
11283 {
11284 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11285 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11286 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11287 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11288 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11289 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11290 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11291 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11292 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11293 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11294 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11295 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11296 }
11297 else
11298 {
11299 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11300 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11301 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11302 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11303 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11304 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11305 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11306 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11307 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11308 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11309 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11310 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11311 }
11312
11313 /* Byte mask and shuffle. */
11314 if (TARGET_ARCH64)
11315 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11316 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11317 else
11318 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11319 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11320 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11321 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11322 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11323 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11324 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11325 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11326 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11327 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11328 }
11329
11330 if (TARGET_VIS3)
11331 {
11332 if (TARGET_ARCH64)
11333 {
11334 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11335 SPARC_BUILTIN_CMASK8, void_ftype_di);
11336 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11337 SPARC_BUILTIN_CMASK16, void_ftype_di);
11338 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11339 SPARC_BUILTIN_CMASK32, void_ftype_di);
11340 }
11341 else
11342 {
11343 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11344 SPARC_BUILTIN_CMASK8, void_ftype_si);
11345 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11346 SPARC_BUILTIN_CMASK16, void_ftype_si);
11347 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11348 SPARC_BUILTIN_CMASK32, void_ftype_si);
11349 }
11350
11351 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11352 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11353
11354 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11355 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11356 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11357 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11358 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11359 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11360 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11361 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11362 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11363 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11364 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11365 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11366 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11367 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11368 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11369 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11370
11371 if (TARGET_ARCH64)
11372 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11373 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11374 else
11375 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11376 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11377
11378 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11379 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11380 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11381 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11382 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11383 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11384
11385 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11386 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11387 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11388 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11389 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11390 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11391 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11392 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11393 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11394 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11395 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11396 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11397 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11398 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11399 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11400 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11401
11402 if (TARGET_ARCH64)
11403 {
11404 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11405 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11406 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11407 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11408 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11409 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11410 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11411 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11412 }
11413 else
11414 {
11415 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11416 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11417 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11418 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11419 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11420 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11421 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11422 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11423 }
11424
11425 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11426 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11427 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11428 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11429 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11430 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11431 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11432 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11433 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11434 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11435 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11436 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11437
11438 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11439 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11440 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11441 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11442 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11443 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11444 }
11445
11446 if (TARGET_VIS4)
11447 {
11448 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11449 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11450 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11451 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11452 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11453 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11454 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11455 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11456
11457
11458 if (TARGET_ARCH64)
11459 {
11460 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11461 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11462 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11463 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11464 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11465 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11466 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11467 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11468 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11469 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11470 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11471 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11472 }
11473 else
11474 {
11475 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11476 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11477 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11478 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11479 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11480 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11481 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11482 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11483 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11484 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11485 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11486 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11487 }
11488
11489 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11490 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11491 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11492 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11493 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11494 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11495 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11496 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11497 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11498 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11499 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11500 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11501 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11502 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11503 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11504 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11505 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11506 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11507 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11508 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11509 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11510 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11511 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11512 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11513 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11514 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11515 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11516 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11517 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11518 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11519 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11520 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11521 }
11522
11523 if (TARGET_VIS4B)
11524 {
11525 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11526 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11527 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11528 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11529 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11530 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11531
11532 if (TARGET_ARCH64)
11533 {
11534 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11535 v8qi, v8qi,
11536 intSI_type_node, 0);
11537 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11538 v4hi, v4hi,
11539 intSI_type_node, 0);
11540 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11541 v2si, v2si,
11542 intSI_type_node, 0);
11543
11544 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11545 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11546 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11547 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11548 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11549 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11550 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11551 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11552
11553 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11554 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11555 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11556 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11557 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11558 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11559 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11560 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11561
11562 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11563 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11564 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11565 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11566 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11567 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11568 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11569 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11570
11571
11572 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11573 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11574 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11575 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11576
11577 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11578 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11579 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11580 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11581
11582 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11583 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11584 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11585 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11586
11587 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11588 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11589 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11590 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11591 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11592 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11593
11594 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11595 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11596 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11597 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11598 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11599 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11600
11601 }
11602 else
11603 {
11604 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11605 v8qi, v8qi,
11606 intSI_type_node, 0);
11607 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11608 v4hi, v4hi,
11609 intSI_type_node, 0);
11610 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11611 v2si, v2si,
11612 intSI_type_node, 0);
11613
11614 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11615 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11616 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11617 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11618 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11619 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11620 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11621 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11622
11623 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11624 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11625 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11626 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11627 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11628 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11629 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11630 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11631
11632 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11633 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11634 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11635 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11636 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11637 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11638 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11639 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11640
11641
11642 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11643 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11644 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11645 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11646
11647 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11648 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11649 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11650 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11651
11652 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11653 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11654 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11655 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11656
11657 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11658 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11659 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11660 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11661 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11662 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11663
11664 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11665 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11666 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11667 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11668 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11669 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11670 }
11671 }
11672 }
11673
11674 /* Implement TARGET_BUILTIN_DECL hook. */
11675
11676 static tree
11677 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11678 {
11679 if (code >= SPARC_BUILTIN_MAX)
11680 return error_mark_node;
11681
11682 return sparc_builtins[code];
11683 }
11684
11685 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11686
11687 static rtx
11688 sparc_expand_builtin (tree exp, rtx target,
11689 rtx subtarget ATTRIBUTE_UNUSED,
11690 machine_mode tmode ATTRIBUTE_UNUSED,
11691 int ignore ATTRIBUTE_UNUSED)
11692 {
11693 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11694 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11695 enum insn_code icode = sparc_builtins_icode[code];
11696 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11697 call_expr_arg_iterator iter;
11698 int arg_count = 0;
11699 rtx pat, op[4];
11700 tree arg;
11701
11702 if (nonvoid)
11703 {
11704 machine_mode tmode = insn_data[icode].operand[0].mode;
11705 if (!target
11706 || GET_MODE (target) != tmode
11707 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11708 op[0] = gen_reg_rtx (tmode);
11709 else
11710 op[0] = target;
11711 }
11712 else
11713 op[0] = NULL_RTX;
11714
11715 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11716 {
11717 const struct insn_operand_data *insn_op;
11718 int idx;
11719
11720 if (arg == error_mark_node)
11721 return NULL_RTX;
11722
11723 arg_count++;
11724 idx = arg_count - !nonvoid;
11725 insn_op = &insn_data[icode].operand[idx];
11726 op[arg_count] = expand_normal (arg);
11727
11728 /* Some of the builtins require constant arguments. We check
11729 for this here. */
11730 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11731 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11732 && arg_count == 3)
11733 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11734 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11735 && arg_count == 2))
11736 {
11737 if (!check_constant_argument (icode, idx, op[arg_count]))
11738 return const0_rtx;
11739 }
11740
11741 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11742 {
11743 if (!address_operand (op[arg_count], SImode))
11744 {
11745 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11746 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11747 }
11748 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11749 }
11750
11751 else if (insn_op->mode == V1DImode
11752 && GET_MODE (op[arg_count]) == DImode)
11753 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11754
11755 else if (insn_op->mode == V1SImode
11756 && GET_MODE (op[arg_count]) == SImode)
11757 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11758
11759 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11760 insn_op->mode))
11761 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11762 }
11763
11764 switch (arg_count)
11765 {
11766 case 0:
11767 pat = GEN_FCN (icode) (op[0]);
11768 break;
11769 case 1:
11770 if (nonvoid)
11771 pat = GEN_FCN (icode) (op[0], op[1]);
11772 else
11773 pat = GEN_FCN (icode) (op[1]);
11774 break;
11775 case 2:
11776 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11777 break;
11778 case 3:
11779 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11780 break;
11781 default:
11782 gcc_unreachable ();
11783 }
11784
11785 if (!pat)
11786 return NULL_RTX;
11787
11788 emit_insn (pat);
11789
11790 return (nonvoid ? op[0] : const0_rtx);
11791 }
11792
11793 /* Return the upper 16 bits of the 8x16 multiplication. */
11794
11795 static int
11796 sparc_vis_mul8x16 (int e8, int e16)
11797 {
11798 return (e8 * e16 + 128) / 256;
11799 }
11800
11801 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11802 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11803
11804 static void
11805 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11806 tree inner_type, tree cst0, tree cst1)
11807 {
11808 unsigned i, num = VECTOR_CST_NELTS (cst0);
11809 int scale;
11810
11811 switch (fncode)
11812 {
11813 case SPARC_BUILTIN_FMUL8X16:
11814 for (i = 0; i < num; ++i)
11815 {
11816 int val
11817 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11818 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11819 n_elts->quick_push (build_int_cst (inner_type, val));
11820 }
11821 break;
11822
11823 case SPARC_BUILTIN_FMUL8X16AU:
11824 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11825
11826 for (i = 0; i < num; ++i)
11827 {
11828 int val
11829 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11830 scale);
11831 n_elts->quick_push (build_int_cst (inner_type, val));
11832 }
11833 break;
11834
11835 case SPARC_BUILTIN_FMUL8X16AL:
11836 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11837
11838 for (i = 0; i < num; ++i)
11839 {
11840 int val
11841 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11842 scale);
11843 n_elts->quick_push (build_int_cst (inner_type, val));
11844 }
11845 break;
11846
11847 default:
11848 gcc_unreachable ();
11849 }
11850 }
11851
11852 /* Implement TARGET_FOLD_BUILTIN hook.
11853
11854 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11855 result of the function call is ignored. NULL_TREE is returned if the
11856 function could not be folded. */
11857
11858 static tree
11859 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11860 tree *args, bool ignore)
11861 {
11862 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11863 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11864 tree arg0, arg1, arg2;
11865
11866 if (ignore)
11867 switch (code)
11868 {
11869 case SPARC_BUILTIN_LDFSR:
11870 case SPARC_BUILTIN_STFSR:
11871 case SPARC_BUILTIN_ALIGNADDR:
11872 case SPARC_BUILTIN_WRGSR:
11873 case SPARC_BUILTIN_BMASK:
11874 case SPARC_BUILTIN_CMASK8:
11875 case SPARC_BUILTIN_CMASK16:
11876 case SPARC_BUILTIN_CMASK32:
11877 break;
11878
11879 default:
11880 return build_zero_cst (rtype);
11881 }
11882
11883 switch (code)
11884 {
11885 case SPARC_BUILTIN_FEXPAND:
11886 arg0 = args[0];
11887 STRIP_NOPS (arg0);
11888
11889 if (TREE_CODE (arg0) == VECTOR_CST)
11890 {
11891 tree inner_type = TREE_TYPE (rtype);
11892 unsigned i;
11893
11894 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11895 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11896 {
11897 unsigned HOST_WIDE_INT val
11898 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11899 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11900 }
11901 return n_elts.build ();
11902 }
11903 break;
11904
11905 case SPARC_BUILTIN_FMUL8X16:
11906 case SPARC_BUILTIN_FMUL8X16AU:
11907 case SPARC_BUILTIN_FMUL8X16AL:
11908 arg0 = args[0];
11909 arg1 = args[1];
11910 STRIP_NOPS (arg0);
11911 STRIP_NOPS (arg1);
11912
11913 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11914 {
11915 tree inner_type = TREE_TYPE (rtype);
11916 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11917 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11918 return n_elts.build ();
11919 }
11920 break;
11921
11922 case SPARC_BUILTIN_FPMERGE:
11923 arg0 = args[0];
11924 arg1 = args[1];
11925 STRIP_NOPS (arg0);
11926 STRIP_NOPS (arg1);
11927
11928 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11929 {
11930 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11931 unsigned i;
11932 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11933 {
11934 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11935 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11936 }
11937
11938 return n_elts.build ();
11939 }
11940 break;
11941
11942 case SPARC_BUILTIN_PDIST:
11943 case SPARC_BUILTIN_PDISTN:
11944 arg0 = args[0];
11945 arg1 = args[1];
11946 STRIP_NOPS (arg0);
11947 STRIP_NOPS (arg1);
11948 if (code == SPARC_BUILTIN_PDIST)
11949 {
11950 arg2 = args[2];
11951 STRIP_NOPS (arg2);
11952 }
11953 else
11954 arg2 = integer_zero_node;
11955
11956 if (TREE_CODE (arg0) == VECTOR_CST
11957 && TREE_CODE (arg1) == VECTOR_CST
11958 && TREE_CODE (arg2) == INTEGER_CST)
11959 {
11960 bool overflow = false;
11961 widest_int result = wi::to_widest (arg2);
11962 widest_int tmp;
11963 unsigned i;
11964
11965 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11966 {
11967 tree e0 = VECTOR_CST_ELT (arg0, i);
11968 tree e1 = VECTOR_CST_ELT (arg1, i);
11969
11970 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11971
11972 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11973 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11974 if (wi::neg_p (tmp))
11975 tmp = wi::neg (tmp, &neg2_ovf);
11976 else
11977 neg2_ovf = wi::OVF_NONE;
11978 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11979 overflow |= ((neg1_ovf != wi::OVF_NONE)
11980 | (neg2_ovf != wi::OVF_NONE)
11981 | (add1_ovf != wi::OVF_NONE)
11982 | (add2_ovf != wi::OVF_NONE));
11983 }
11984
11985 gcc_assert (!overflow);
11986
11987 return wide_int_to_tree (rtype, result);
11988 }
11989
11990 default:
11991 break;
11992 }
11993
11994 return NULL_TREE;
11995 }
11996 \f
11997 /* ??? This duplicates information provided to the compiler by the
11998 ??? scheduler description. Some day, teach genautomata to output
11999 ??? the latencies and then CSE will just use that. */
12000
12001 static bool
12002 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12003 int opno ATTRIBUTE_UNUSED,
12004 int *total, bool speed ATTRIBUTE_UNUSED)
12005 {
12006 int code = GET_CODE (x);
12007 bool float_mode_p = FLOAT_MODE_P (mode);
12008
12009 switch (code)
12010 {
12011 case CONST_INT:
12012 if (SMALL_INT (x))
12013 *total = 0;
12014 else
12015 *total = 2;
12016 return true;
12017
12018 case CONST_WIDE_INT:
12019 *total = 0;
12020 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12021 *total += 2;
12022 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12023 *total += 2;
12024 return true;
12025
12026 case HIGH:
12027 *total = 2;
12028 return true;
12029
12030 case CONST:
12031 case LABEL_REF:
12032 case SYMBOL_REF:
12033 *total = 4;
12034 return true;
12035
12036 case CONST_DOUBLE:
12037 *total = 8;
12038 return true;
12039
12040 case MEM:
12041 /* If outer-code was a sign or zero extension, a cost
12042 of COSTS_N_INSNS (1) was already added in. This is
12043 why we are subtracting it back out. */
12044 if (outer_code == ZERO_EXTEND)
12045 {
12046 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12047 }
12048 else if (outer_code == SIGN_EXTEND)
12049 {
12050 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12051 }
12052 else if (float_mode_p)
12053 {
12054 *total = sparc_costs->float_load;
12055 }
12056 else
12057 {
12058 *total = sparc_costs->int_load;
12059 }
12060
12061 return true;
12062
12063 case PLUS:
12064 case MINUS:
12065 if (float_mode_p)
12066 *total = sparc_costs->float_plusminus;
12067 else
12068 *total = COSTS_N_INSNS (1);
12069 return false;
12070
12071 case FMA:
12072 {
12073 rtx sub;
12074
12075 gcc_assert (float_mode_p);
12076 *total = sparc_costs->float_mul;
12077
12078 sub = XEXP (x, 0);
12079 if (GET_CODE (sub) == NEG)
12080 sub = XEXP (sub, 0);
12081 *total += rtx_cost (sub, mode, FMA, 0, speed);
12082
12083 sub = XEXP (x, 2);
12084 if (GET_CODE (sub) == NEG)
12085 sub = XEXP (sub, 0);
12086 *total += rtx_cost (sub, mode, FMA, 2, speed);
12087 return true;
12088 }
12089
12090 case MULT:
12091 if (float_mode_p)
12092 *total = sparc_costs->float_mul;
12093 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12094 *total = COSTS_N_INSNS (25);
12095 else
12096 {
12097 int bit_cost;
12098
12099 bit_cost = 0;
12100 if (sparc_costs->int_mul_bit_factor)
12101 {
12102 int nbits;
12103
12104 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12105 {
12106 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12107 for (nbits = 0; value != 0; value &= value - 1)
12108 nbits++;
12109 }
12110 else
12111 nbits = 7;
12112
12113 if (nbits < 3)
12114 nbits = 3;
12115 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12116 bit_cost = COSTS_N_INSNS (bit_cost);
12117 }
12118
12119 if (mode == DImode || !TARGET_HARD_MUL)
12120 *total = sparc_costs->int_mulX + bit_cost;
12121 else
12122 *total = sparc_costs->int_mul + bit_cost;
12123 }
12124 return false;
12125
12126 case ASHIFT:
12127 case ASHIFTRT:
12128 case LSHIFTRT:
12129 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12130 return false;
12131
12132 case DIV:
12133 case UDIV:
12134 case MOD:
12135 case UMOD:
12136 if (float_mode_p)
12137 {
12138 if (mode == DFmode)
12139 *total = sparc_costs->float_div_df;
12140 else
12141 *total = sparc_costs->float_div_sf;
12142 }
12143 else
12144 {
12145 if (mode == DImode)
12146 *total = sparc_costs->int_divX;
12147 else
12148 *total = sparc_costs->int_div;
12149 }
12150 return false;
12151
12152 case NEG:
12153 if (! float_mode_p)
12154 {
12155 *total = COSTS_N_INSNS (1);
12156 return false;
12157 }
12158 /* FALLTHRU */
12159
12160 case ABS:
12161 case FLOAT:
12162 case UNSIGNED_FLOAT:
12163 case FIX:
12164 case UNSIGNED_FIX:
12165 case FLOAT_EXTEND:
12166 case FLOAT_TRUNCATE:
12167 *total = sparc_costs->float_move;
12168 return false;
12169
12170 case SQRT:
12171 if (mode == DFmode)
12172 *total = sparc_costs->float_sqrt_df;
12173 else
12174 *total = sparc_costs->float_sqrt_sf;
12175 return false;
12176
12177 case COMPARE:
12178 if (float_mode_p)
12179 *total = sparc_costs->float_cmp;
12180 else
12181 *total = COSTS_N_INSNS (1);
12182 return false;
12183
12184 case IF_THEN_ELSE:
12185 if (float_mode_p)
12186 *total = sparc_costs->float_cmove;
12187 else
12188 *total = sparc_costs->int_cmove;
12189 return false;
12190
12191 case IOR:
12192 /* Handle the NAND vector patterns. */
12193 if (sparc_vector_mode_supported_p (mode)
12194 && GET_CODE (XEXP (x, 0)) == NOT
12195 && GET_CODE (XEXP (x, 1)) == NOT)
12196 {
12197 *total = COSTS_N_INSNS (1);
12198 return true;
12199 }
12200 else
12201 return false;
12202
12203 default:
12204 return false;
12205 }
12206 }
12207
12208 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12209
12210 static inline bool
12211 general_or_i64_p (reg_class_t rclass)
12212 {
12213 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12214 }
12215
12216 /* Implement TARGET_REGISTER_MOVE_COST. */
12217
12218 static int
12219 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12220 reg_class_t from, reg_class_t to)
12221 {
12222 bool need_memory = false;
12223
12224 /* This helps postreload CSE to eliminate redundant comparisons. */
12225 if (from == NO_REGS || to == NO_REGS)
12226 return 100;
12227
12228 if (from == FPCC_REGS || to == FPCC_REGS)
12229 need_memory = true;
12230 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12231 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12232 {
12233 if (TARGET_VIS3)
12234 {
12235 int size = GET_MODE_SIZE (mode);
12236 if (size == 8 || size == 4)
12237 {
12238 if (! TARGET_ARCH32 || size == 4)
12239 return 4;
12240 else
12241 return 6;
12242 }
12243 }
12244 need_memory = true;
12245 }
12246
12247 if (need_memory)
12248 {
12249 if (sparc_cpu == PROCESSOR_ULTRASPARC
12250 || sparc_cpu == PROCESSOR_ULTRASPARC3
12251 || sparc_cpu == PROCESSOR_NIAGARA
12252 || sparc_cpu == PROCESSOR_NIAGARA2
12253 || sparc_cpu == PROCESSOR_NIAGARA3
12254 || sparc_cpu == PROCESSOR_NIAGARA4
12255 || sparc_cpu == PROCESSOR_NIAGARA7
12256 || sparc_cpu == PROCESSOR_M8)
12257 return 12;
12258
12259 return 6;
12260 }
12261
12262 return 2;
12263 }
12264
12265 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12266 This is achieved by means of a manual dynamic stack space allocation in
12267 the current frame. We make the assumption that SEQ doesn't contain any
12268 function calls, with the possible exception of calls to the GOT helper. */
12269
12270 static void
12271 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12272 {
12273 /* We must preserve the lowest 16 words for the register save area. */
12274 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12275 /* We really need only 2 words of fresh stack space. */
12276 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12277
12278 rtx slot
12279 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12280 SPARC_STACK_BIAS + offset));
12281
12282 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12283 emit_insn (gen_rtx_SET (slot, reg));
12284 if (reg2)
12285 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12286 reg2));
12287 emit_insn (seq);
12288 if (reg2)
12289 emit_insn (gen_rtx_SET (reg2,
12290 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12291 emit_insn (gen_rtx_SET (reg, slot));
12292 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12293 }
12294
12295 /* Output the assembler code for a thunk function. THUNK_DECL is the
12296 declaration for the thunk function itself, FUNCTION is the decl for
12297 the target function. DELTA is an immediate constant offset to be
12298 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12299 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12300
12301 static void
12302 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12303 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12304 tree function)
12305 {
12306 rtx this_rtx, funexp;
12307 rtx_insn *insn;
12308 unsigned int int_arg_first;
12309
12310 reload_completed = 1;
12311 epilogue_completed = 1;
12312
12313 emit_note (NOTE_INSN_PROLOGUE_END);
12314
12315 if (TARGET_FLAT)
12316 {
12317 sparc_leaf_function_p = 1;
12318
12319 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12320 }
12321 else if (flag_delayed_branch)
12322 {
12323 /* We will emit a regular sibcall below, so we need to instruct
12324 output_sibcall that we are in a leaf function. */
12325 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12326
12327 /* This will cause final.c to invoke leaf_renumber_regs so we
12328 must behave as if we were in a not-yet-leafified function. */
12329 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12330 }
12331 else
12332 {
12333 /* We will emit the sibcall manually below, so we will need to
12334 manually spill non-leaf registers. */
12335 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12336
12337 /* We really are in a leaf function. */
12338 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12339 }
12340
12341 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12342 returns a structure, the structure return pointer is there instead. */
12343 if (TARGET_ARCH64
12344 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12345 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12346 else
12347 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12348
12349 /* Add DELTA. When possible use a plain add, otherwise load it into
12350 a register first. */
12351 if (delta)
12352 {
12353 rtx delta_rtx = GEN_INT (delta);
12354
12355 if (! SPARC_SIMM13_P (delta))
12356 {
12357 rtx scratch = gen_rtx_REG (Pmode, 1);
12358 emit_move_insn (scratch, delta_rtx);
12359 delta_rtx = scratch;
12360 }
12361
12362 /* THIS_RTX += DELTA. */
12363 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12364 }
12365
12366 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12367 if (vcall_offset)
12368 {
12369 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12370 rtx scratch = gen_rtx_REG (Pmode, 1);
12371
12372 gcc_assert (vcall_offset < 0);
12373
12374 /* SCRATCH = *THIS_RTX. */
12375 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12376
12377 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12378 may not have any available scratch register at this point. */
12379 if (SPARC_SIMM13_P (vcall_offset))
12380 ;
12381 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12382 else if (! fixed_regs[5]
12383 /* The below sequence is made up of at least 2 insns,
12384 while the default method may need only one. */
12385 && vcall_offset < -8192)
12386 {
12387 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12388 emit_move_insn (scratch2, vcall_offset_rtx);
12389 vcall_offset_rtx = scratch2;
12390 }
12391 else
12392 {
12393 rtx increment = GEN_INT (-4096);
12394
12395 /* VCALL_OFFSET is a negative number whose typical range can be
12396 estimated as -32768..0 in 32-bit mode. In almost all cases
12397 it is therefore cheaper to emit multiple add insns than
12398 spilling and loading the constant into a register (at least
12399 6 insns). */
12400 while (! SPARC_SIMM13_P (vcall_offset))
12401 {
12402 emit_insn (gen_add2_insn (scratch, increment));
12403 vcall_offset += 4096;
12404 }
12405 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12406 }
12407
12408 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12409 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12410 gen_rtx_PLUS (Pmode,
12411 scratch,
12412 vcall_offset_rtx)));
12413
12414 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12415 emit_insn (gen_add2_insn (this_rtx, scratch));
12416 }
12417
12418 /* Generate a tail call to the target function. */
12419 if (! TREE_USED (function))
12420 {
12421 assemble_external (function);
12422 TREE_USED (function) = 1;
12423 }
12424 funexp = XEXP (DECL_RTL (function), 0);
12425
12426 if (flag_delayed_branch)
12427 {
12428 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12429 insn = emit_call_insn (gen_sibcall (funexp));
12430 SIBLING_CALL_P (insn) = 1;
12431 }
12432 else
12433 {
12434 /* The hoops we have to jump through in order to generate a sibcall
12435 without using delay slots... */
12436 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12437
12438 if (flag_pic)
12439 {
12440 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12441 start_sequence ();
12442 load_got_register (); /* clobbers %o7 */
12443 if (!TARGET_VXWORKS_RTP)
12444 pic_offset_table_rtx = global_offset_table_rtx;
12445 scratch = sparc_legitimize_pic_address (funexp, scratch);
12446 seq = get_insns ();
12447 end_sequence ();
12448 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12449 }
12450 else if (TARGET_ARCH32)
12451 {
12452 emit_insn (gen_rtx_SET (scratch,
12453 gen_rtx_HIGH (SImode, funexp)));
12454 emit_insn (gen_rtx_SET (scratch,
12455 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12456 }
12457 else /* TARGET_ARCH64 */
12458 {
12459 switch (sparc_cmodel)
12460 {
12461 case CM_MEDLOW:
12462 case CM_MEDMID:
12463 /* The destination can serve as a temporary. */
12464 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12465 break;
12466
12467 case CM_MEDANY:
12468 case CM_EMBMEDANY:
12469 /* The destination cannot serve as a temporary. */
12470 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12471 start_sequence ();
12472 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12473 seq = get_insns ();
12474 end_sequence ();
12475 emit_and_preserve (seq, spill_reg, 0);
12476 break;
12477
12478 default:
12479 gcc_unreachable ();
12480 }
12481 }
12482
12483 emit_jump_insn (gen_indirect_jump (scratch));
12484 }
12485
12486 emit_barrier ();
12487
12488 /* Run just enough of rest_of_compilation to get the insns emitted.
12489 There's not really enough bulk here to make other passes such as
12490 instruction scheduling worth while. Note that use_thunk calls
12491 assemble_start_function and assemble_end_function. */
12492 insn = get_insns ();
12493 shorten_branches (insn);
12494 final_start_function (insn, file, 1);
12495 final (insn, file, 1);
12496 final_end_function ();
12497
12498 reload_completed = 0;
12499 epilogue_completed = 0;
12500 }
12501
12502 /* Return true if sparc_output_mi_thunk would be able to output the
12503 assembler code for the thunk function specified by the arguments
12504 it is passed, and false otherwise. */
12505 static bool
12506 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12507 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12508 HOST_WIDE_INT vcall_offset,
12509 const_tree function ATTRIBUTE_UNUSED)
12510 {
12511 /* Bound the loop used in the default method above. */
12512 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12513 }
12514
12515 /* How to allocate a 'struct machine_function'. */
12516
12517 static struct machine_function *
12518 sparc_init_machine_status (void)
12519 {
12520 return ggc_cleared_alloc<machine_function> ();
12521 }
12522 \f
12523 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12524
12525 static unsigned HOST_WIDE_INT
12526 sparc_asan_shadow_offset (void)
12527 {
12528 return TARGET_ARCH64 ? HOST_WIDE_INT_C (0x7fff8000) : (HOST_WIDE_INT_1 << 29);
12529 }
12530 \f
12531 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12532 We need to emit DTP-relative relocations. */
12533
12534 static void
12535 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12536 {
12537 switch (size)
12538 {
12539 case 4:
12540 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12541 break;
12542 case 8:
12543 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12544 break;
12545 default:
12546 gcc_unreachable ();
12547 }
12548 output_addr_const (file, x);
12549 fputs (")", file);
12550 }
12551
12552 /* Do whatever processing is required at the end of a file. */
12553
12554 static void
12555 sparc_file_end (void)
12556 {
12557 /* If we need to emit the special GOT helper function, do so now. */
12558 if (got_helper_rtx)
12559 {
12560 const char *name = XSTR (got_helper_rtx, 0);
12561 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12562 #ifdef DWARF2_UNWIND_INFO
12563 bool do_cfi;
12564 #endif
12565
12566 if (USE_HIDDEN_LINKONCE)
12567 {
12568 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12569 get_identifier (name),
12570 build_function_type_list (void_type_node,
12571 NULL_TREE));
12572 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12573 NULL_TREE, void_type_node);
12574 TREE_PUBLIC (decl) = 1;
12575 TREE_STATIC (decl) = 1;
12576 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12577 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12578 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12579 resolve_unique_section (decl, 0, flag_function_sections);
12580 allocate_struct_function (decl, true);
12581 cfun->is_thunk = 1;
12582 current_function_decl = decl;
12583 init_varasm_status ();
12584 assemble_start_function (decl, name);
12585 }
12586 else
12587 {
12588 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12589 switch_to_section (text_section);
12590 if (align > 0)
12591 ASM_OUTPUT_ALIGN (asm_out_file, align);
12592 ASM_OUTPUT_LABEL (asm_out_file, name);
12593 }
12594
12595 #ifdef DWARF2_UNWIND_INFO
12596 do_cfi = dwarf2out_do_cfi_asm ();
12597 if (do_cfi)
12598 fprintf (asm_out_file, "\t.cfi_startproc\n");
12599 #endif
12600 if (flag_delayed_branch)
12601 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12602 reg_name, reg_name);
12603 else
12604 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12605 reg_name, reg_name);
12606 #ifdef DWARF2_UNWIND_INFO
12607 if (do_cfi)
12608 fprintf (asm_out_file, "\t.cfi_endproc\n");
12609 #endif
12610 }
12611
12612 if (NEED_INDICATE_EXEC_STACK)
12613 file_end_indicate_exec_stack ();
12614
12615 #ifdef TARGET_SOLARIS
12616 solaris_file_end ();
12617 #endif
12618 }
12619
12620 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12621 /* Implement TARGET_MANGLE_TYPE. */
12622
12623 static const char *
12624 sparc_mangle_type (const_tree type)
12625 {
12626 if (TARGET_ARCH32
12627 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12628 && TARGET_LONG_DOUBLE_128)
12629 return "g";
12630
12631 /* For all other types, use normal C++ mangling. */
12632 return NULL;
12633 }
12634 #endif
12635
12636 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12637 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12638 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12639
12640 void
12641 sparc_emit_membar_for_model (enum memmodel model,
12642 int load_store, int before_after)
12643 {
12644 /* Bits for the MEMBAR mmask field. */
12645 const int LoadLoad = 1;
12646 const int StoreLoad = 2;
12647 const int LoadStore = 4;
12648 const int StoreStore = 8;
12649
12650 int mm = 0, implied = 0;
12651
12652 switch (sparc_memory_model)
12653 {
12654 case SMM_SC:
12655 /* Sequential Consistency. All memory transactions are immediately
12656 visible in sequential execution order. No barriers needed. */
12657 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12658 break;
12659
12660 case SMM_TSO:
12661 /* Total Store Ordering: all memory transactions with store semantics
12662 are followed by an implied StoreStore. */
12663 implied |= StoreStore;
12664
12665 /* If we're not looking for a raw barrer (before+after), then atomic
12666 operations get the benefit of being both load and store. */
12667 if (load_store == 3 && before_after == 1)
12668 implied |= StoreLoad;
12669 /* FALLTHRU */
12670
12671 case SMM_PSO:
12672 /* Partial Store Ordering: all memory transactions with load semantics
12673 are followed by an implied LoadLoad | LoadStore. */
12674 implied |= LoadLoad | LoadStore;
12675
12676 /* If we're not looking for a raw barrer (before+after), then atomic
12677 operations get the benefit of being both load and store. */
12678 if (load_store == 3 && before_after == 2)
12679 implied |= StoreLoad | StoreStore;
12680 /* FALLTHRU */
12681
12682 case SMM_RMO:
12683 /* Relaxed Memory Ordering: no implicit bits. */
12684 break;
12685
12686 default:
12687 gcc_unreachable ();
12688 }
12689
12690 if (before_after & 1)
12691 {
12692 if (is_mm_release (model) || is_mm_acq_rel (model)
12693 || is_mm_seq_cst (model))
12694 {
12695 if (load_store & 1)
12696 mm |= LoadLoad | StoreLoad;
12697 if (load_store & 2)
12698 mm |= LoadStore | StoreStore;
12699 }
12700 }
12701 if (before_after & 2)
12702 {
12703 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12704 || is_mm_seq_cst (model))
12705 {
12706 if (load_store & 1)
12707 mm |= LoadLoad | LoadStore;
12708 if (load_store & 2)
12709 mm |= StoreLoad | StoreStore;
12710 }
12711 }
12712
12713 /* Remove the bits implied by the system memory model. */
12714 mm &= ~implied;
12715
12716 /* For raw barriers (before+after), always emit a barrier.
12717 This will become a compile-time barrier if needed. */
12718 if (mm || before_after == 3)
12719 emit_insn (gen_membar (GEN_INT (mm)));
12720 }
12721
12722 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12723 compare and swap on the word containing the byte or half-word. */
12724
12725 static void
12726 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12727 rtx oldval, rtx newval)
12728 {
12729 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12730 rtx addr = gen_reg_rtx (Pmode);
12731 rtx off = gen_reg_rtx (SImode);
12732 rtx oldv = gen_reg_rtx (SImode);
12733 rtx newv = gen_reg_rtx (SImode);
12734 rtx oldvalue = gen_reg_rtx (SImode);
12735 rtx newvalue = gen_reg_rtx (SImode);
12736 rtx res = gen_reg_rtx (SImode);
12737 rtx resv = gen_reg_rtx (SImode);
12738 rtx memsi, val, mask, cc;
12739
12740 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12741
12742 if (Pmode != SImode)
12743 addr1 = gen_lowpart (SImode, addr1);
12744 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12745
12746 memsi = gen_rtx_MEM (SImode, addr);
12747 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12748 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12749
12750 val = copy_to_reg (memsi);
12751
12752 emit_insn (gen_rtx_SET (off,
12753 gen_rtx_XOR (SImode, off,
12754 GEN_INT (GET_MODE (mem) == QImode
12755 ? 3 : 2))));
12756
12757 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12758
12759 if (GET_MODE (mem) == QImode)
12760 mask = force_reg (SImode, GEN_INT (0xff));
12761 else
12762 mask = force_reg (SImode, GEN_INT (0xffff));
12763
12764 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12765
12766 emit_insn (gen_rtx_SET (val,
12767 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12768 val)));
12769
12770 oldval = gen_lowpart (SImode, oldval);
12771 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12772
12773 newval = gen_lowpart_common (SImode, newval);
12774 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12775
12776 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12777
12778 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12779
12780 rtx_code_label *end_label = gen_label_rtx ();
12781 rtx_code_label *loop_label = gen_label_rtx ();
12782 emit_label (loop_label);
12783
12784 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12785
12786 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12787
12788 emit_move_insn (bool_result, const1_rtx);
12789
12790 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12791
12792 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12793
12794 emit_insn (gen_rtx_SET (resv,
12795 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12796 res)));
12797
12798 emit_move_insn (bool_result, const0_rtx);
12799
12800 cc = gen_compare_reg_1 (NE, resv, val);
12801 emit_insn (gen_rtx_SET (val, resv));
12802
12803 /* Use cbranchcc4 to separate the compare and branch! */
12804 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12805 cc, const0_rtx, loop_label));
12806
12807 emit_label (end_label);
12808
12809 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12810
12811 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12812
12813 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12814 }
12815
12816 /* Expand code to perform a compare-and-swap. */
12817
12818 void
12819 sparc_expand_compare_and_swap (rtx operands[])
12820 {
12821 rtx bval, retval, mem, oldval, newval;
12822 machine_mode mode;
12823 enum memmodel model;
12824
12825 bval = operands[0];
12826 retval = operands[1];
12827 mem = operands[2];
12828 oldval = operands[3];
12829 newval = operands[4];
12830 model = (enum memmodel) INTVAL (operands[6]);
12831 mode = GET_MODE (mem);
12832
12833 sparc_emit_membar_for_model (model, 3, 1);
12834
12835 if (reg_overlap_mentioned_p (retval, oldval))
12836 oldval = copy_to_reg (oldval);
12837
12838 if (mode == QImode || mode == HImode)
12839 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12840 else
12841 {
12842 rtx (*gen) (rtx, rtx, rtx, rtx);
12843 rtx x;
12844
12845 if (mode == SImode)
12846 gen = gen_atomic_compare_and_swapsi_1;
12847 else
12848 gen = gen_atomic_compare_and_swapdi_1;
12849 emit_insn (gen (retval, mem, oldval, newval));
12850
12851 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12852 if (x != bval)
12853 convert_move (bval, x, 1);
12854 }
12855
12856 sparc_emit_membar_for_model (model, 3, 2);
12857 }
12858
12859 void
12860 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12861 {
12862 rtx t_1, t_2, t_3;
12863
12864 sel = gen_lowpart (DImode, sel);
12865 switch (vmode)
12866 {
12867 case E_V2SImode:
12868 /* inp = xxxxxxxAxxxxxxxB */
12869 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12870 NULL_RTX, 1, OPTAB_DIRECT);
12871 /* t_1 = ....xxxxxxxAxxx. */
12872 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12873 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12874 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12875 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12876 /* sel = .......B */
12877 /* t_1 = ...A.... */
12878 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12879 /* sel = ...A...B */
12880 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12881 /* sel = AAAABBBB * 4 */
12882 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12883 /* sel = { A*4, A*4+1, A*4+2, ... } */
12884 break;
12885
12886 case E_V4HImode:
12887 /* inp = xxxAxxxBxxxCxxxD */
12888 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12889 NULL_RTX, 1, OPTAB_DIRECT);
12890 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12891 NULL_RTX, 1, OPTAB_DIRECT);
12892 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12893 NULL_RTX, 1, OPTAB_DIRECT);
12894 /* t_1 = ..xxxAxxxBxxxCxx */
12895 /* t_2 = ....xxxAxxxBxxxC */
12896 /* t_3 = ......xxxAxxxBxx */
12897 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12898 GEN_INT (0x07),
12899 NULL_RTX, 1, OPTAB_DIRECT);
12900 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12901 GEN_INT (0x0700),
12902 NULL_RTX, 1, OPTAB_DIRECT);
12903 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12904 GEN_INT (0x070000),
12905 NULL_RTX, 1, OPTAB_DIRECT);
12906 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12907 GEN_INT (0x07000000),
12908 NULL_RTX, 1, OPTAB_DIRECT);
12909 /* sel = .......D */
12910 /* t_1 = .....C.. */
12911 /* t_2 = ...B.... */
12912 /* t_3 = .A...... */
12913 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12914 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12915 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12916 /* sel = .A.B.C.D */
12917 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12918 /* sel = AABBCCDD * 2 */
12919 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12920 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12921 break;
12922
12923 case E_V8QImode:
12924 /* input = xAxBxCxDxExFxGxH */
12925 sel = expand_simple_binop (DImode, AND, sel,
12926 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12927 | 0x0f0f0f0f),
12928 NULL_RTX, 1, OPTAB_DIRECT);
12929 /* sel = .A.B.C.D.E.F.G.H */
12930 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12931 NULL_RTX, 1, OPTAB_DIRECT);
12932 /* t_1 = ..A.B.C.D.E.F.G. */
12933 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12934 NULL_RTX, 1, OPTAB_DIRECT);
12935 /* sel = .AABBCCDDEEFFGGH */
12936 sel = expand_simple_binop (DImode, AND, sel,
12937 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12938 | 0xff00ff),
12939 NULL_RTX, 1, OPTAB_DIRECT);
12940 /* sel = ..AB..CD..EF..GH */
12941 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12942 NULL_RTX, 1, OPTAB_DIRECT);
12943 /* t_1 = ....AB..CD..EF.. */
12944 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12945 NULL_RTX, 1, OPTAB_DIRECT);
12946 /* sel = ..ABABCDCDEFEFGH */
12947 sel = expand_simple_binop (DImode, AND, sel,
12948 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12949 NULL_RTX, 1, OPTAB_DIRECT);
12950 /* sel = ....ABCD....EFGH */
12951 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12952 NULL_RTX, 1, OPTAB_DIRECT);
12953 /* t_1 = ........ABCD.... */
12954 sel = gen_lowpart (SImode, sel);
12955 t_1 = gen_lowpart (SImode, t_1);
12956 break;
12957
12958 default:
12959 gcc_unreachable ();
12960 }
12961
12962 /* Always perform the final addition/merge within the bmask insn. */
12963 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12964 }
12965
12966 /* Implement TARGET_VEC_PERM_CONST. */
12967
12968 static bool
12969 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12970 rtx op1, const vec_perm_indices &sel)
12971 {
12972 if (!TARGET_VIS2)
12973 return false;
12974
12975 /* All permutes are supported. */
12976 if (!target)
12977 return true;
12978
12979 /* Force target-independent code to convert constant permutations on other
12980 modes down to V8QI. Rely on this to avoid the complexity of the byte
12981 order of the permutation. */
12982 if (vmode != V8QImode)
12983 return false;
12984
12985 unsigned int i, mask;
12986 for (i = mask = 0; i < 8; ++i)
12987 mask |= (sel[i] & 0xf) << (28 - i*4);
12988 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12989
12990 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12991 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12992 return true;
12993 }
12994
12995 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12996
12997 static bool
12998 sparc_frame_pointer_required (void)
12999 {
13000 /* If the stack pointer is dynamically modified in the function, it cannot
13001 serve as the frame pointer. */
13002 if (cfun->calls_alloca)
13003 return true;
13004
13005 /* If the function receives nonlocal gotos, it needs to save the frame
13006 pointer in the nonlocal_goto_save_area object. */
13007 if (cfun->has_nonlocal_label)
13008 return true;
13009
13010 /* In flat mode, that's it. */
13011 if (TARGET_FLAT)
13012 return false;
13013
13014 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13015 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13016 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13017 }
13018
13019 /* The way this is structured, we can't eliminate SFP in favor of SP
13020 if the frame pointer is required: we want to use the SFP->HFP elimination
13021 in that case. But the test in update_eliminables doesn't know we are
13022 assuming below that we only do the former elimination. */
13023
13024 static bool
13025 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13026 {
13027 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13028 }
13029
13030 /* Return the hard frame pointer directly to bypass the stack bias. */
13031
13032 static rtx
13033 sparc_builtin_setjmp_frame_value (void)
13034 {
13035 return hard_frame_pointer_rtx;
13036 }
13037
13038 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13039 they won't be allocated. */
13040
13041 static void
13042 sparc_conditional_register_usage (void)
13043 {
13044 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13045 {
13046 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13047 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13048 }
13049 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13050 /* then honor it. */
13051 if (TARGET_ARCH32 && fixed_regs[5])
13052 fixed_regs[5] = 1;
13053 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13054 fixed_regs[5] = 0;
13055 if (! TARGET_V9)
13056 {
13057 int regno;
13058 for (regno = SPARC_FIRST_V9_FP_REG;
13059 regno <= SPARC_LAST_V9_FP_REG;
13060 regno++)
13061 fixed_regs[regno] = 1;
13062 /* %fcc0 is used by v8 and v9. */
13063 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13064 regno <= SPARC_LAST_V9_FCC_REG;
13065 regno++)
13066 fixed_regs[regno] = 1;
13067 }
13068 if (! TARGET_FPU)
13069 {
13070 int regno;
13071 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13072 fixed_regs[regno] = 1;
13073 }
13074 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13075 /* then honor it. Likewise with g3 and g4. */
13076 if (fixed_regs[2] == 2)
13077 fixed_regs[2] = ! TARGET_APP_REGS;
13078 if (fixed_regs[3] == 2)
13079 fixed_regs[3] = ! TARGET_APP_REGS;
13080 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13081 fixed_regs[4] = ! TARGET_APP_REGS;
13082 else if (TARGET_CM_EMBMEDANY)
13083 fixed_regs[4] = 1;
13084 else if (fixed_regs[4] == 2)
13085 fixed_regs[4] = 0;
13086 if (TARGET_FLAT)
13087 {
13088 int regno;
13089 /* Disable leaf functions. */
13090 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13091 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13092 leaf_reg_remap [regno] = regno;
13093 }
13094 if (TARGET_VIS)
13095 global_regs[SPARC_GSR_REG] = 1;
13096 }
13097
13098 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13099
13100 static bool
13101 sparc_use_pseudo_pic_reg (void)
13102 {
13103 return !TARGET_VXWORKS_RTP && flag_pic;
13104 }
13105
13106 /* Implement TARGET_INIT_PIC_REG. */
13107
13108 static void
13109 sparc_init_pic_reg (void)
13110 {
13111 edge entry_edge;
13112 rtx_insn *seq;
13113
13114 if (!crtl->uses_pic_offset_table)
13115 return;
13116
13117 start_sequence ();
13118 load_got_register ();
13119 if (!TARGET_VXWORKS_RTP)
13120 emit_move_insn (pic_offset_table_rtx, global_offset_table_rtx);
13121 seq = get_insns ();
13122 end_sequence ();
13123
13124 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13125 insert_insn_on_edge (seq, entry_edge);
13126 commit_one_edge_insertion (entry_edge);
13127 }
13128
13129 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13130
13131 - We can't load constants into FP registers.
13132 - We can't load FP constants into integer registers when soft-float,
13133 because there is no soft-float pattern with a r/F constraint.
13134 - We can't load FP constants into integer registers for TFmode unless
13135 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13136 - Try and reload integer constants (symbolic or otherwise) back into
13137 registers directly, rather than having them dumped to memory. */
13138
13139 static reg_class_t
13140 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13141 {
13142 machine_mode mode = GET_MODE (x);
13143 if (CONSTANT_P (x))
13144 {
13145 if (FP_REG_CLASS_P (rclass)
13146 || rclass == GENERAL_OR_FP_REGS
13147 || rclass == GENERAL_OR_EXTRA_FP_REGS
13148 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13149 || (mode == TFmode && ! const_zero_operand (x, mode)))
13150 return NO_REGS;
13151
13152 if (GET_MODE_CLASS (mode) == MODE_INT)
13153 return GENERAL_REGS;
13154
13155 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13156 {
13157 if (! FP_REG_CLASS_P (rclass)
13158 || !(const_zero_operand (x, mode)
13159 || const_all_ones_operand (x, mode)))
13160 return NO_REGS;
13161 }
13162 }
13163
13164 if (TARGET_VIS3
13165 && ! TARGET_ARCH64
13166 && (rclass == EXTRA_FP_REGS
13167 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13168 {
13169 int regno = true_regnum (x);
13170
13171 if (SPARC_INT_REG_P (regno))
13172 return (rclass == EXTRA_FP_REGS
13173 ? FP_REGS : GENERAL_OR_FP_REGS);
13174 }
13175
13176 return rclass;
13177 }
13178
13179 /* Return true if we use LRA instead of reload pass. */
13180
13181 static bool
13182 sparc_lra_p (void)
13183 {
13184 return TARGET_LRA;
13185 }
13186
13187 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13188 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13189
13190 const char *
13191 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13192 {
13193 char mulstr[32];
13194
13195 gcc_assert (! TARGET_ARCH64);
13196
13197 if (sparc_check_64 (operands[1], insn) <= 0)
13198 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13199 if (which_alternative == 1)
13200 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13201 if (GET_CODE (operands[2]) == CONST_INT)
13202 {
13203 if (which_alternative == 1)
13204 {
13205 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13206 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13207 output_asm_insn (mulstr, operands);
13208 return "srlx\t%L0, 32, %H0";
13209 }
13210 else
13211 {
13212 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13213 output_asm_insn ("or\t%L1, %3, %3", operands);
13214 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13215 output_asm_insn (mulstr, operands);
13216 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13217 return "mov\t%3, %L0";
13218 }
13219 }
13220 else if (rtx_equal_p (operands[1], operands[2]))
13221 {
13222 if (which_alternative == 1)
13223 {
13224 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13225 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13226 output_asm_insn (mulstr, operands);
13227 return "srlx\t%L0, 32, %H0";
13228 }
13229 else
13230 {
13231 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13232 output_asm_insn ("or\t%L1, %3, %3", operands);
13233 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13234 output_asm_insn (mulstr, operands);
13235 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13236 return "mov\t%3, %L0";
13237 }
13238 }
13239 if (sparc_check_64 (operands[2], insn) <= 0)
13240 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13241 if (which_alternative == 1)
13242 {
13243 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13244 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13245 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13246 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13247 output_asm_insn (mulstr, operands);
13248 return "srlx\t%L0, 32, %H0";
13249 }
13250 else
13251 {
13252 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13253 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13254 output_asm_insn ("or\t%L1, %3, %3", operands);
13255 output_asm_insn ("or\t%L2, %4, %4", operands);
13256 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13257 output_asm_insn (mulstr, operands);
13258 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13259 return "mov\t%3, %L0";
13260 }
13261 }
13262
13263 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13264 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13265 and INNER_MODE are the modes describing TARGET. */
13266
13267 static void
13268 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13269 machine_mode inner_mode)
13270 {
13271 rtx t1, final_insn, sel;
13272 int bmask;
13273
13274 t1 = gen_reg_rtx (mode);
13275
13276 elt = convert_modes (SImode, inner_mode, elt, true);
13277 emit_move_insn (gen_lowpart(SImode, t1), elt);
13278
13279 switch (mode)
13280 {
13281 case E_V2SImode:
13282 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13283 bmask = 0x45674567;
13284 break;
13285 case E_V4HImode:
13286 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13287 bmask = 0x67676767;
13288 break;
13289 case E_V8QImode:
13290 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13291 bmask = 0x77777777;
13292 break;
13293 default:
13294 gcc_unreachable ();
13295 }
13296
13297 sel = force_reg (SImode, GEN_INT (bmask));
13298 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13299 emit_insn (final_insn);
13300 }
13301
13302 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13303 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13304
13305 static void
13306 vector_init_fpmerge (rtx target, rtx elt)
13307 {
13308 rtx t1, t2, t2_low, t3, t3_low;
13309
13310 t1 = gen_reg_rtx (V4QImode);
13311 elt = convert_modes (SImode, QImode, elt, true);
13312 emit_move_insn (gen_lowpart (SImode, t1), elt);
13313
13314 t2 = gen_reg_rtx (V8QImode);
13315 t2_low = gen_lowpart (V4QImode, t2);
13316 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13317
13318 t3 = gen_reg_rtx (V8QImode);
13319 t3_low = gen_lowpart (V4QImode, t3);
13320 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13321
13322 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13323 }
13324
13325 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13326 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13327
13328 static void
13329 vector_init_faligndata (rtx target, rtx elt)
13330 {
13331 rtx t1 = gen_reg_rtx (V4HImode);
13332 int i;
13333
13334 elt = convert_modes (SImode, HImode, elt, true);
13335 emit_move_insn (gen_lowpart (SImode, t1), elt);
13336
13337 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13338 force_reg (SImode, GEN_INT (6)),
13339 const0_rtx));
13340
13341 for (i = 0; i < 4; i++)
13342 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13343 }
13344
13345 /* Emit code to initialize TARGET to values for individual fields VALS. */
13346
13347 void
13348 sparc_expand_vector_init (rtx target, rtx vals)
13349 {
13350 const machine_mode mode = GET_MODE (target);
13351 const machine_mode inner_mode = GET_MODE_INNER (mode);
13352 const int n_elts = GET_MODE_NUNITS (mode);
13353 int i, n_var = 0;
13354 bool all_same = true;
13355 rtx mem;
13356
13357 for (i = 0; i < n_elts; i++)
13358 {
13359 rtx x = XVECEXP (vals, 0, i);
13360 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13361 n_var++;
13362
13363 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13364 all_same = false;
13365 }
13366
13367 if (n_var == 0)
13368 {
13369 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13370 return;
13371 }
13372
13373 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13374 {
13375 if (GET_MODE_SIZE (inner_mode) == 4)
13376 {
13377 emit_move_insn (gen_lowpart (SImode, target),
13378 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13379 return;
13380 }
13381 else if (GET_MODE_SIZE (inner_mode) == 8)
13382 {
13383 emit_move_insn (gen_lowpart (DImode, target),
13384 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13385 return;
13386 }
13387 }
13388 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13389 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13390 {
13391 emit_move_insn (gen_highpart (word_mode, target),
13392 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13393 emit_move_insn (gen_lowpart (word_mode, target),
13394 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13395 return;
13396 }
13397
13398 if (all_same && GET_MODE_SIZE (mode) == 8)
13399 {
13400 if (TARGET_VIS2)
13401 {
13402 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13403 return;
13404 }
13405 if (mode == V8QImode)
13406 {
13407 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13408 return;
13409 }
13410 if (mode == V4HImode)
13411 {
13412 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13413 return;
13414 }
13415 }
13416
13417 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13418 for (i = 0; i < n_elts; i++)
13419 emit_move_insn (adjust_address_nv (mem, inner_mode,
13420 i * GET_MODE_SIZE (inner_mode)),
13421 XVECEXP (vals, 0, i));
13422 emit_move_insn (target, mem);
13423 }
13424
13425 /* Implement TARGET_SECONDARY_RELOAD. */
13426
13427 static reg_class_t
13428 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13429 machine_mode mode, secondary_reload_info *sri)
13430 {
13431 enum reg_class rclass = (enum reg_class) rclass_i;
13432
13433 sri->icode = CODE_FOR_nothing;
13434 sri->extra_cost = 0;
13435
13436 /* We need a temporary when loading/storing a HImode/QImode value
13437 between memory and the FPU registers. This can happen when combine puts
13438 a paradoxical subreg in a float/fix conversion insn. */
13439 if (FP_REG_CLASS_P (rclass)
13440 && (mode == HImode || mode == QImode)
13441 && (GET_CODE (x) == MEM
13442 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13443 && true_regnum (x) == -1)))
13444 return GENERAL_REGS;
13445
13446 /* On 32-bit we need a temporary when loading/storing a DFmode value
13447 between unaligned memory and the upper FPU registers. */
13448 if (TARGET_ARCH32
13449 && rclass == EXTRA_FP_REGS
13450 && mode == DFmode
13451 && GET_CODE (x) == MEM
13452 && ! mem_min_alignment (x, 8))
13453 return FP_REGS;
13454
13455 if (((TARGET_CM_MEDANY
13456 && symbolic_operand (x, mode))
13457 || (TARGET_CM_EMBMEDANY
13458 && text_segment_operand (x, mode)))
13459 && ! flag_pic)
13460 {
13461 if (in_p)
13462 sri->icode = direct_optab_handler (reload_in_optab, mode);
13463 else
13464 sri->icode = direct_optab_handler (reload_out_optab, mode);
13465 return NO_REGS;
13466 }
13467
13468 if (TARGET_VIS3 && TARGET_ARCH32)
13469 {
13470 int regno = true_regnum (x);
13471
13472 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13473 to move 8-byte values in 4-byte pieces. This only works via
13474 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13475 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13476 an FP_REGS intermediate move. */
13477 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13478 || ((general_or_i64_p (rclass)
13479 || rclass == GENERAL_OR_FP_REGS)
13480 && SPARC_FP_REG_P (regno)))
13481 {
13482 sri->extra_cost = 2;
13483 return FP_REGS;
13484 }
13485 }
13486
13487 return NO_REGS;
13488 }
13489
13490 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13491
13492 On SPARC when not VIS3 it is not possible to directly move data
13493 between GENERAL_REGS and FP_REGS. */
13494
13495 static bool
13496 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13497 reg_class_t class2)
13498 {
13499 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13500 && (! TARGET_VIS3
13501 || GET_MODE_SIZE (mode) > 8
13502 || GET_MODE_SIZE (mode) < 4));
13503 }
13504
13505 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13506
13507 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13508 because the movsi and movsf patterns don't handle r/f moves.
13509 For v8 we copy the default definition. */
13510
13511 static machine_mode
13512 sparc_secondary_memory_needed_mode (machine_mode mode)
13513 {
13514 if (TARGET_ARCH64)
13515 {
13516 if (GET_MODE_BITSIZE (mode) < 32)
13517 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13518 return mode;
13519 }
13520 else
13521 {
13522 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13523 return mode_for_size (BITS_PER_WORD,
13524 GET_MODE_CLASS (mode), 0).require ();
13525 return mode;
13526 }
13527 }
13528
13529 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13530 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13531
13532 bool
13533 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13534 {
13535 enum rtx_code rc = GET_CODE (operands[1]);
13536 machine_mode cmp_mode;
13537 rtx cc_reg, dst, cmp;
13538
13539 cmp = operands[1];
13540 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13541 return false;
13542
13543 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13544 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13545
13546 cmp_mode = GET_MODE (XEXP (cmp, 0));
13547 rc = GET_CODE (cmp);
13548
13549 dst = operands[0];
13550 if (! rtx_equal_p (operands[2], dst)
13551 && ! rtx_equal_p (operands[3], dst))
13552 {
13553 if (reg_overlap_mentioned_p (dst, cmp))
13554 dst = gen_reg_rtx (mode);
13555
13556 emit_move_insn (dst, operands[3]);
13557 }
13558 else if (operands[2] == dst)
13559 {
13560 operands[2] = operands[3];
13561
13562 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13563 rc = reverse_condition_maybe_unordered (rc);
13564 else
13565 rc = reverse_condition (rc);
13566 }
13567
13568 if (XEXP (cmp, 1) == const0_rtx
13569 && GET_CODE (XEXP (cmp, 0)) == REG
13570 && cmp_mode == DImode
13571 && v9_regcmp_p (rc))
13572 cc_reg = XEXP (cmp, 0);
13573 else
13574 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13575
13576 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13577
13578 emit_insn (gen_rtx_SET (dst,
13579 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13580
13581 if (dst != operands[0])
13582 emit_move_insn (operands[0], dst);
13583
13584 return true;
13585 }
13586
13587 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13588 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13589 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13590 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13591 code to be used for the condition mask. */
13592
13593 void
13594 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13595 {
13596 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13597 enum rtx_code code = GET_CODE (operands[3]);
13598
13599 mask = gen_reg_rtx (Pmode);
13600 cop0 = operands[4];
13601 cop1 = operands[5];
13602 if (code == LT || code == GE)
13603 {
13604 rtx t;
13605
13606 code = swap_condition (code);
13607 t = cop0; cop0 = cop1; cop1 = t;
13608 }
13609
13610 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13611
13612 fcmp = gen_rtx_UNSPEC (Pmode,
13613 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13614 fcode);
13615
13616 cmask = gen_rtx_UNSPEC (DImode,
13617 gen_rtvec (2, mask, gsr),
13618 ccode);
13619
13620 bshuf = gen_rtx_UNSPEC (mode,
13621 gen_rtvec (3, operands[1], operands[2], gsr),
13622 UNSPEC_BSHUFFLE);
13623
13624 emit_insn (gen_rtx_SET (mask, fcmp));
13625 emit_insn (gen_rtx_SET (gsr, cmask));
13626
13627 emit_insn (gen_rtx_SET (operands[0], bshuf));
13628 }
13629
13630 /* On sparc, any mode which naturally allocates into the float
13631 registers should return 4 here. */
13632
13633 unsigned int
13634 sparc_regmode_natural_size (machine_mode mode)
13635 {
13636 int size = UNITS_PER_WORD;
13637
13638 if (TARGET_ARCH64)
13639 {
13640 enum mode_class mclass = GET_MODE_CLASS (mode);
13641
13642 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13643 size = 4;
13644 }
13645
13646 return size;
13647 }
13648
13649 /* Implement TARGET_HARD_REGNO_NREGS.
13650
13651 On SPARC, ordinary registers hold 32 bits worth; this means both
13652 integer and floating point registers. On v9, integer regs hold 64
13653 bits worth; floating point regs hold 32 bits worth (this includes the
13654 new fp regs as even the odd ones are included in the hard register
13655 count). */
13656
13657 static unsigned int
13658 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13659 {
13660 if (regno == SPARC_GSR_REG)
13661 return 1;
13662 if (TARGET_ARCH64)
13663 {
13664 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13665 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13666 return CEIL (GET_MODE_SIZE (mode), 4);
13667 }
13668 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13669 }
13670
13671 /* Implement TARGET_HARD_REGNO_MODE_OK.
13672
13673 ??? Because of the funny way we pass parameters we should allow certain
13674 ??? types of float/complex values to be in integer registers during
13675 ??? RTL generation. This only matters on arch32. */
13676
13677 static bool
13678 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13679 {
13680 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13681 }
13682
13683 /* Implement TARGET_MODES_TIEABLE_P.
13684
13685 For V9 we have to deal with the fact that only the lower 32 floating
13686 point registers are 32-bit addressable. */
13687
13688 static bool
13689 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13690 {
13691 enum mode_class mclass1, mclass2;
13692 unsigned short size1, size2;
13693
13694 if (mode1 == mode2)
13695 return true;
13696
13697 mclass1 = GET_MODE_CLASS (mode1);
13698 mclass2 = GET_MODE_CLASS (mode2);
13699 if (mclass1 != mclass2)
13700 return false;
13701
13702 if (! TARGET_V9)
13703 return true;
13704
13705 /* Classes are the same and we are V9 so we have to deal with upper
13706 vs. lower floating point registers. If one of the modes is a
13707 4-byte mode, and the other is not, we have to mark them as not
13708 tieable because only the lower 32 floating point register are
13709 addressable 32-bits at a time.
13710
13711 We can't just test explicitly for SFmode, otherwise we won't
13712 cover the vector mode cases properly. */
13713
13714 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13715 return true;
13716
13717 size1 = GET_MODE_SIZE (mode1);
13718 size2 = GET_MODE_SIZE (mode2);
13719 if ((size1 > 4 && size2 == 4)
13720 || (size2 > 4 && size1 == 4))
13721 return false;
13722
13723 return true;
13724 }
13725
13726 /* Implement TARGET_CSTORE_MODE. */
13727
13728 static scalar_int_mode
13729 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13730 {
13731 return (TARGET_ARCH64 ? DImode : SImode);
13732 }
13733
13734 /* Return the compound expression made of T1 and T2. */
13735
13736 static inline tree
13737 compound_expr (tree t1, tree t2)
13738 {
13739 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13740 }
13741
13742 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13743
13744 static void
13745 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13746 {
13747 if (!TARGET_FPU)
13748 return;
13749
13750 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13751 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13752
13753 /* We generate the equivalent of feholdexcept (&fenv_var):
13754
13755 unsigned int fenv_var;
13756 __builtin_store_fsr (&fenv_var);
13757
13758 unsigned int tmp1_var;
13759 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13760
13761 __builtin_load_fsr (&tmp1_var); */
13762
13763 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13764 TREE_ADDRESSABLE (fenv_var) = 1;
13765 tree fenv_addr = build_fold_addr_expr (fenv_var);
13766 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13767 tree hold_stfsr
13768 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13769 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13770
13771 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13772 TREE_ADDRESSABLE (tmp1_var) = 1;
13773 tree masked_fenv_var
13774 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13775 build_int_cst (unsigned_type_node,
13776 ~(accrued_exception_mask | trap_enable_mask)));
13777 tree hold_mask
13778 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13779 NULL_TREE, NULL_TREE);
13780
13781 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13782 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13783 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13784
13785 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13786
13787 /* We reload the value of tmp1_var to clear the exceptions:
13788
13789 __builtin_load_fsr (&tmp1_var); */
13790
13791 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13792
13793 /* We generate the equivalent of feupdateenv (&fenv_var):
13794
13795 unsigned int tmp2_var;
13796 __builtin_store_fsr (&tmp2_var);
13797
13798 __builtin_load_fsr (&fenv_var);
13799
13800 if (SPARC_LOW_FE_EXCEPT_VALUES)
13801 tmp2_var >>= 5;
13802 __atomic_feraiseexcept ((int) tmp2_var); */
13803
13804 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13805 TREE_ADDRESSABLE (tmp2_var) = 1;
13806 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13807 tree update_stfsr
13808 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13809 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13810
13811 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13812
13813 tree atomic_feraiseexcept
13814 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13815 tree update_call
13816 = build_call_expr (atomic_feraiseexcept, 1,
13817 fold_convert (integer_type_node, tmp2_var));
13818
13819 if (SPARC_LOW_FE_EXCEPT_VALUES)
13820 {
13821 tree shifted_tmp2_var
13822 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13823 build_int_cst (unsigned_type_node, 5));
13824 tree update_shift
13825 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13826 update_call = compound_expr (update_shift, update_call);
13827 }
13828
13829 *update
13830 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13831 }
13832
13833 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13834
13835 SImode loads to floating-point registers are not zero-extended.
13836 The definition for LOAD_EXTEND_OP specifies that integer loads
13837 narrower than BITS_PER_WORD will be zero-extended. As a result,
13838 we inhibit changes from SImode unless they are to a mode that is
13839 identical in size.
13840
13841 Likewise for SFmode, since word-mode paradoxical subregs are
13842 problematic on big-endian architectures. */
13843
13844 static bool
13845 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13846 reg_class_t rclass)
13847 {
13848 if (TARGET_ARCH64
13849 && GET_MODE_SIZE (from) == 4
13850 && GET_MODE_SIZE (to) != 4)
13851 return !reg_classes_intersect_p (rclass, FP_REGS);
13852 return true;
13853 }
13854
13855 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13856
13857 static HOST_WIDE_INT
13858 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13859 {
13860 if (TREE_CODE (exp) == STRING_CST)
13861 return MAX (align, FASTEST_ALIGNMENT);
13862 return align;
13863 }
13864
13865 #include "gt-sparc.h"