]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
re PR target/92095 (internal error with -O1 -mcpu=niagara2 -fPIE)
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2019 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
470 };
471
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
518
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
528
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
546
547 struct GTY(()) machine_function
548 {
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
551
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
555
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
561
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
564
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
570
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
573
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
576 };
577
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
586
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
589
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
594
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 const function_arg_info &);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 const function_arg_info &);
661 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
662 static rtx sparc_function_incoming_arg (cumulative_args_t,
663 const function_arg_info &);
664 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
665 static unsigned int sparc_function_arg_boundary (machine_mode,
666 const_tree);
667 static int sparc_arg_partial_bytes (cumulative_args_t,
668 const function_arg_info &);
669 static bool sparc_return_in_memory (const_tree, const_tree);
670 static rtx sparc_struct_value_rtx (tree, int);
671 static rtx sparc_function_value (const_tree, const_tree, bool);
672 static rtx sparc_libcall_value (machine_mode, const_rtx);
673 static bool sparc_function_value_regno_p (const unsigned int);
674 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
675 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
676 static void sparc_file_end (void);
677 static bool sparc_frame_pointer_required (void);
678 static bool sparc_can_eliminate (const int, const int);
679 static void sparc_conditional_register_usage (void);
680 static bool sparc_use_pseudo_pic_reg (void);
681 static void sparc_init_pic_reg (void);
682 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
683 static const char *sparc_mangle_type (const_tree);
684 #endif
685 static void sparc_trampoline_init (rtx, tree, rtx);
686 static machine_mode sparc_preferred_simd_mode (scalar_mode);
687 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
688 static bool sparc_lra_p (void);
689 static bool sparc_print_operand_punct_valid_p (unsigned char);
690 static void sparc_print_operand (FILE *, rtx, int);
691 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
692 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
693 machine_mode,
694 secondary_reload_info *);
695 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
696 reg_class_t);
697 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
698 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
699 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
700 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
701 static unsigned int sparc_min_arithmetic_precision (void);
702 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
703 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
704 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
705 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
706 reg_class_t);
707 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
708 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
709 const vec_perm_indices &);
710 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
711 \f
712 #ifdef SUBTARGET_ATTRIBUTE_TABLE
713 /* Table of valid machine attributes. */
714 static const struct attribute_spec sparc_attribute_table[] =
715 {
716 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
717 do_diagnostic, handler, exclude } */
718 SUBTARGET_ATTRIBUTE_TABLE,
719 { NULL, 0, 0, false, false, false, false, NULL, NULL }
720 };
721 #endif
722 \f
723 char sparc_hard_reg_printed[8];
724
725 /* Initialize the GCC target structure. */
726
727 /* The default is to use .half rather than .short for aligned HI objects. */
728 #undef TARGET_ASM_ALIGNED_HI_OP
729 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
730
731 #undef TARGET_ASM_UNALIGNED_HI_OP
732 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
733 #undef TARGET_ASM_UNALIGNED_SI_OP
734 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
735 #undef TARGET_ASM_UNALIGNED_DI_OP
736 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
737
738 /* The target hook has to handle DI-mode values. */
739 #undef TARGET_ASM_INTEGER
740 #define TARGET_ASM_INTEGER sparc_assemble_integer
741
742 #undef TARGET_ASM_FUNCTION_PROLOGUE
743 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
744 #undef TARGET_ASM_FUNCTION_EPILOGUE
745 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
746
747 #undef TARGET_SCHED_ADJUST_COST
748 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
749 #undef TARGET_SCHED_ISSUE_RATE
750 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
751 #undef TARGET_SCHED_INIT
752 #define TARGET_SCHED_INIT sparc_sched_init
753 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
754 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
755
756 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
757 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
758
759 #undef TARGET_INIT_LIBFUNCS
760 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
761
762 #undef TARGET_LEGITIMIZE_ADDRESS
763 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
764 #undef TARGET_DELEGITIMIZE_ADDRESS
765 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
766 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
767 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
768
769 #undef TARGET_INIT_BUILTINS
770 #define TARGET_INIT_BUILTINS sparc_init_builtins
771 #undef TARGET_BUILTIN_DECL
772 #define TARGET_BUILTIN_DECL sparc_builtin_decl
773 #undef TARGET_EXPAND_BUILTIN
774 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
775 #undef TARGET_FOLD_BUILTIN
776 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
777
778 #if TARGET_TLS
779 #undef TARGET_HAVE_TLS
780 #define TARGET_HAVE_TLS true
781 #endif
782
783 #undef TARGET_CANNOT_FORCE_CONST_MEM
784 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
785
786 #undef TARGET_ASM_OUTPUT_MI_THUNK
787 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
788 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
789 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
790
791 #undef TARGET_RTX_COSTS
792 #define TARGET_RTX_COSTS sparc_rtx_costs
793 #undef TARGET_ADDRESS_COST
794 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
795 #undef TARGET_REGISTER_MOVE_COST
796 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
797
798 #undef TARGET_PROMOTE_FUNCTION_MODE
799 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
800 #undef TARGET_STRICT_ARGUMENT_NAMING
801 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
802
803 #undef TARGET_MUST_PASS_IN_STACK
804 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
805 #undef TARGET_PASS_BY_REFERENCE
806 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
807 #undef TARGET_ARG_PARTIAL_BYTES
808 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
809 #undef TARGET_FUNCTION_ARG_ADVANCE
810 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
811 #undef TARGET_FUNCTION_ARG
812 #define TARGET_FUNCTION_ARG sparc_function_arg
813 #undef TARGET_FUNCTION_INCOMING_ARG
814 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
815 #undef TARGET_FUNCTION_ARG_PADDING
816 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
817 #undef TARGET_FUNCTION_ARG_BOUNDARY
818 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
819
820 #undef TARGET_RETURN_IN_MEMORY
821 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
822 #undef TARGET_STRUCT_VALUE_RTX
823 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
824 #undef TARGET_FUNCTION_VALUE
825 #define TARGET_FUNCTION_VALUE sparc_function_value
826 #undef TARGET_LIBCALL_VALUE
827 #define TARGET_LIBCALL_VALUE sparc_libcall_value
828 #undef TARGET_FUNCTION_VALUE_REGNO_P
829 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
830
831 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
832 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
833
834 #undef TARGET_ASAN_SHADOW_OFFSET
835 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
836
837 #undef TARGET_EXPAND_BUILTIN_VA_START
838 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
839 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
840 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
841
842 #undef TARGET_VECTOR_MODE_SUPPORTED_P
843 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
844
845 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
846 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
847
848 #ifdef SUBTARGET_INSERT_ATTRIBUTES
849 #undef TARGET_INSERT_ATTRIBUTES
850 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
851 #endif
852
853 #ifdef SUBTARGET_ATTRIBUTE_TABLE
854 #undef TARGET_ATTRIBUTE_TABLE
855 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
856 #endif
857
858 #undef TARGET_OPTION_OVERRIDE
859 #define TARGET_OPTION_OVERRIDE sparc_option_override
860
861 #ifdef TARGET_THREAD_SSP_OFFSET
862 #undef TARGET_STACK_PROTECT_GUARD
863 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
864 #endif
865
866 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
867 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
868 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
869 #endif
870
871 #undef TARGET_ASM_FILE_END
872 #define TARGET_ASM_FILE_END sparc_file_end
873
874 #undef TARGET_FRAME_POINTER_REQUIRED
875 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
876
877 #undef TARGET_CAN_ELIMINATE
878 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
879
880 #undef TARGET_PREFERRED_RELOAD_CLASS
881 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
882
883 #undef TARGET_SECONDARY_RELOAD
884 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
885 #undef TARGET_SECONDARY_MEMORY_NEEDED
886 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
887 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
888 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
889
890 #undef TARGET_CONDITIONAL_REGISTER_USAGE
891 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
892
893 #undef TARGET_INIT_PIC_REG
894 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
895
896 #undef TARGET_USE_PSEUDO_PIC_REG
897 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
898
899 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
900 #undef TARGET_MANGLE_TYPE
901 #define TARGET_MANGLE_TYPE sparc_mangle_type
902 #endif
903
904 #undef TARGET_LRA_P
905 #define TARGET_LRA_P sparc_lra_p
906
907 #undef TARGET_LEGITIMATE_ADDRESS_P
908 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
909
910 #undef TARGET_LEGITIMATE_CONSTANT_P
911 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
912
913 #undef TARGET_TRAMPOLINE_INIT
914 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
915
916 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
917 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
918 #undef TARGET_PRINT_OPERAND
919 #define TARGET_PRINT_OPERAND sparc_print_operand
920 #undef TARGET_PRINT_OPERAND_ADDRESS
921 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
922
923 /* The value stored by LDSTUB. */
924 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
925 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
926
927 #undef TARGET_CSTORE_MODE
928 #define TARGET_CSTORE_MODE sparc_cstore_mode
929
930 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
931 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
932
933 #undef TARGET_FIXED_CONDITION_CODE_REGS
934 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
935
936 #undef TARGET_MIN_ARITHMETIC_PRECISION
937 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
938
939 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
940 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
941
942 #undef TARGET_HARD_REGNO_NREGS
943 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
944 #undef TARGET_HARD_REGNO_MODE_OK
945 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
946
947 #undef TARGET_MODES_TIEABLE_P
948 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
949
950 #undef TARGET_CAN_CHANGE_MODE_CLASS
951 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
952
953 #undef TARGET_CONSTANT_ALIGNMENT
954 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
955
956 #undef TARGET_VECTORIZE_VEC_PERM_CONST
957 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
958
959 #undef TARGET_CAN_FOLLOW_JUMP
960 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
961
962 struct gcc_target targetm = TARGET_INITIALIZER;
963
964 /* Return the memory reference contained in X if any, zero otherwise. */
965
966 static rtx
967 mem_ref (rtx x)
968 {
969 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
970 x = XEXP (x, 0);
971
972 if (MEM_P (x))
973 return x;
974
975 return NULL_RTX;
976 }
977
978 /* True if any of INSN's source register(s) is REG. */
979
980 static bool
981 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
982 {
983 extract_insn (insn);
984 return ((REG_P (recog_data.operand[1])
985 && REGNO (recog_data.operand[1]) == reg)
986 || (recog_data.n_operands == 3
987 && REG_P (recog_data.operand[2])
988 && REGNO (recog_data.operand[2]) == reg));
989 }
990
991 /* True if INSN is a floating-point division or square-root. */
992
993 static bool
994 div_sqrt_insn_p (rtx_insn *insn)
995 {
996 if (GET_CODE (PATTERN (insn)) != SET)
997 return false;
998
999 switch (get_attr_type (insn))
1000 {
1001 case TYPE_FPDIVS:
1002 case TYPE_FPSQRTS:
1003 case TYPE_FPDIVD:
1004 case TYPE_FPSQRTD:
1005 return true;
1006 default:
1007 return false;
1008 }
1009 }
1010
1011 /* True if INSN is a floating-point instruction. */
1012
1013 static bool
1014 fpop_insn_p (rtx_insn *insn)
1015 {
1016 if (GET_CODE (PATTERN (insn)) != SET)
1017 return false;
1018
1019 switch (get_attr_type (insn))
1020 {
1021 case TYPE_FPMOVE:
1022 case TYPE_FPCMOVE:
1023 case TYPE_FP:
1024 case TYPE_FPCMP:
1025 case TYPE_FPMUL:
1026 case TYPE_FPDIVS:
1027 case TYPE_FPSQRTS:
1028 case TYPE_FPDIVD:
1029 case TYPE_FPSQRTD:
1030 return true;
1031 default:
1032 return false;
1033 }
1034 }
1035
1036 /* True if INSN is an atomic instruction. */
1037
1038 static bool
1039 atomic_insn_for_leon3_p (rtx_insn *insn)
1040 {
1041 switch (INSN_CODE (insn))
1042 {
1043 case CODE_FOR_swapsi:
1044 case CODE_FOR_ldstub:
1045 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1046 return true;
1047 default:
1048 return false;
1049 }
1050 }
1051
1052 /* We use a machine specific pass to enable workarounds for errata.
1053
1054 We need to have the (essentially) final form of the insn stream in order
1055 to properly detect the various hazards. Therefore, this machine specific
1056 pass runs as late as possible. */
1057
1058 /* True if INSN is a md pattern or asm statement. */
1059 #define USEFUL_INSN_P(INSN) \
1060 (NONDEBUG_INSN_P (INSN) \
1061 && GET_CODE (PATTERN (INSN)) != USE \
1062 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1063
1064 static unsigned int
1065 sparc_do_work_around_errata (void)
1066 {
1067 rtx_insn *insn, *next;
1068
1069 /* Force all instructions to be split into their final form. */
1070 split_all_insns_noflow ();
1071
1072 /* Now look for specific patterns in the insn stream. */
1073 for (insn = get_insns (); insn; insn = next)
1074 {
1075 bool insert_nop = false;
1076 rtx set;
1077 rtx_insn *jump;
1078 rtx_sequence *seq;
1079
1080 /* Look into the instruction in a delay slot. */
1081 if (NONJUMP_INSN_P (insn)
1082 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1083 {
1084 jump = seq->insn (0);
1085 insn = seq->insn (1);
1086 }
1087 else if (JUMP_P (insn))
1088 jump = insn;
1089 else
1090 jump = NULL;
1091
1092 /* Place a NOP at the branch target of an integer branch if it is a
1093 floating-point operation or a floating-point branch. */
1094 if (sparc_fix_gr712rc
1095 && jump
1096 && jump_to_label_p (jump)
1097 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1098 {
1099 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1100 if (target
1101 && (fpop_insn_p (target)
1102 || (JUMP_P (target)
1103 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1104 emit_insn_before (gen_nop (), target);
1105 }
1106
1107 /* Insert a NOP between load instruction and atomic instruction. Insert
1108 a NOP at branch target if there is a load in delay slot and an atomic
1109 instruction at branch target. */
1110 if (sparc_fix_ut700
1111 && NONJUMP_INSN_P (insn)
1112 && (set = single_set (insn)) != NULL_RTX
1113 && mem_ref (SET_SRC (set))
1114 && REG_P (SET_DEST (set)))
1115 {
1116 if (jump && jump_to_label_p (jump))
1117 {
1118 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1119 if (target && atomic_insn_for_leon3_p (target))
1120 emit_insn_before (gen_nop (), target);
1121 }
1122
1123 next = next_active_insn (insn);
1124 if (!next)
1125 break;
1126
1127 if (atomic_insn_for_leon3_p (next))
1128 insert_nop = true;
1129 }
1130
1131 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1132 ends with another fdiv or fsqrt instruction with no dependencies on
1133 the former, along with an appropriate pattern in between. */
1134 if (sparc_fix_lost_divsqrt
1135 && NONJUMP_INSN_P (insn)
1136 && div_sqrt_insn_p (insn))
1137 {
1138 int i;
1139 int fp_found = 0;
1140 rtx_insn *after;
1141
1142 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1143
1144 next = next_active_insn (insn);
1145 if (!next)
1146 break;
1147
1148 for (after = next, i = 0; i < 4; i++)
1149 {
1150 /* Count floating-point operations. */
1151 if (i != 3 && fpop_insn_p (after))
1152 {
1153 /* If the insn uses the destination register of
1154 the div/sqrt, then it cannot be problematic. */
1155 if (insn_uses_reg_p (after, dest_reg))
1156 break;
1157 fp_found++;
1158 }
1159
1160 /* Count floating-point loads. */
1161 if (i != 3
1162 && (set = single_set (after)) != NULL_RTX
1163 && REG_P (SET_DEST (set))
1164 && REGNO (SET_DEST (set)) > 31)
1165 {
1166 /* If the insn uses the destination register of
1167 the div/sqrt, then it cannot be problematic. */
1168 if (REGNO (SET_DEST (set)) == dest_reg)
1169 break;
1170 fp_found++;
1171 }
1172
1173 /* Check if this is a problematic sequence. */
1174 if (i > 1
1175 && fp_found >= 2
1176 && div_sqrt_insn_p (after))
1177 {
1178 /* If this is the short version of the problematic
1179 sequence we add two NOPs in a row to also prevent
1180 the long version. */
1181 if (i == 2)
1182 emit_insn_before (gen_nop (), next);
1183 insert_nop = true;
1184 break;
1185 }
1186
1187 /* No need to scan past a second div/sqrt. */
1188 if (div_sqrt_insn_p (after))
1189 break;
1190
1191 /* Insert NOP before branch. */
1192 if (i < 3
1193 && (!NONJUMP_INSN_P (after)
1194 || GET_CODE (PATTERN (after)) == SEQUENCE))
1195 {
1196 insert_nop = true;
1197 break;
1198 }
1199
1200 after = next_active_insn (after);
1201 if (!after)
1202 break;
1203 }
1204 }
1205
1206 /* Look for either of these two sequences:
1207
1208 Sequence A:
1209 1. store of word size or less (e.g. st / stb / sth / stf)
1210 2. any single instruction that is not a load or store
1211 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1212
1213 Sequence B:
1214 1. store of double word size (e.g. std / stdf)
1215 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1216 if (sparc_fix_b2bst
1217 && NONJUMP_INSN_P (insn)
1218 && (set = single_set (insn)) != NULL_RTX
1219 && MEM_P (SET_DEST (set)))
1220 {
1221 /* Sequence B begins with a double-word store. */
1222 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1223 rtx_insn *after;
1224 int i;
1225
1226 next = next_active_insn (insn);
1227 if (!next)
1228 break;
1229
1230 for (after = next, i = 0; i < 2; i++)
1231 {
1232 /* Skip empty assembly statements. */
1233 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1234 || (USEFUL_INSN_P (after)
1235 && (asm_noperands (PATTERN (after))>=0)
1236 && !strcmp (decode_asm_operands (PATTERN (after),
1237 NULL, NULL, NULL,
1238 NULL, NULL), "")))
1239 after = next_active_insn (after);
1240 if (!after)
1241 break;
1242
1243 /* If the insn is a branch, then it cannot be problematic. */
1244 if (!NONJUMP_INSN_P (after)
1245 || GET_CODE (PATTERN (after)) == SEQUENCE)
1246 break;
1247
1248 /* Sequence B is only two instructions long. */
1249 if (seq_b)
1250 {
1251 /* Add NOP if followed by a store. */
1252 if ((set = single_set (after)) != NULL_RTX
1253 && MEM_P (SET_DEST (set)))
1254 insert_nop = true;
1255
1256 /* Otherwise it is ok. */
1257 break;
1258 }
1259
1260 /* If the second instruction is a load or a store,
1261 then the sequence cannot be problematic. */
1262 if (i == 0)
1263 {
1264 if ((set = single_set (after)) != NULL_RTX
1265 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1266 break;
1267
1268 after = next_active_insn (after);
1269 if (!after)
1270 break;
1271 }
1272
1273 /* Add NOP if third instruction is a store. */
1274 if (i == 1
1275 && (set = single_set (after)) != NULL_RTX
1276 && MEM_P (SET_DEST (set)))
1277 insert_nop = true;
1278 }
1279 }
1280
1281 /* Look for a single-word load into an odd-numbered FP register. */
1282 else if (sparc_fix_at697f
1283 && NONJUMP_INSN_P (insn)
1284 && (set = single_set (insn)) != NULL_RTX
1285 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1286 && mem_ref (SET_SRC (set))
1287 && REG_P (SET_DEST (set))
1288 && REGNO (SET_DEST (set)) > 31
1289 && REGNO (SET_DEST (set)) % 2 != 0)
1290 {
1291 /* The wrong dependency is on the enclosing double register. */
1292 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1293 unsigned int src1, src2, dest;
1294 int code;
1295
1296 next = next_active_insn (insn);
1297 if (!next)
1298 break;
1299 /* If the insn is a branch, then it cannot be problematic. */
1300 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1301 continue;
1302
1303 extract_insn (next);
1304 code = INSN_CODE (next);
1305
1306 switch (code)
1307 {
1308 case CODE_FOR_adddf3:
1309 case CODE_FOR_subdf3:
1310 case CODE_FOR_muldf3:
1311 case CODE_FOR_divdf3:
1312 dest = REGNO (recog_data.operand[0]);
1313 src1 = REGNO (recog_data.operand[1]);
1314 src2 = REGNO (recog_data.operand[2]);
1315 if (src1 != src2)
1316 {
1317 /* Case [1-4]:
1318 ld [address], %fx+1
1319 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1320 if ((src1 == x || src2 == x)
1321 && (dest == src1 || dest == src2))
1322 insert_nop = true;
1323 }
1324 else
1325 {
1326 /* Case 5:
1327 ld [address], %fx+1
1328 FPOPd %fx, %fx, %fx */
1329 if (src1 == x
1330 && dest == src1
1331 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1332 insert_nop = true;
1333 }
1334 break;
1335
1336 case CODE_FOR_sqrtdf2:
1337 dest = REGNO (recog_data.operand[0]);
1338 src1 = REGNO (recog_data.operand[1]);
1339 /* Case 6:
1340 ld [address], %fx+1
1341 fsqrtd %fx, %fx */
1342 if (src1 == x && dest == src1)
1343 insert_nop = true;
1344 break;
1345
1346 default:
1347 break;
1348 }
1349 }
1350
1351 /* Look for a single-word load into an integer register. */
1352 else if (sparc_fix_ut699
1353 && NONJUMP_INSN_P (insn)
1354 && (set = single_set (insn)) != NULL_RTX
1355 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1356 && (mem_ref (SET_SRC (set)) != NULL_RTX
1357 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1358 && REG_P (SET_DEST (set))
1359 && REGNO (SET_DEST (set)) < 32)
1360 {
1361 /* There is no problem if the second memory access has a data
1362 dependency on the first single-cycle load. */
1363 rtx x = SET_DEST (set);
1364
1365 next = next_active_insn (insn);
1366 if (!next)
1367 break;
1368 /* If the insn is a branch, then it cannot be problematic. */
1369 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1370 continue;
1371
1372 /* Look for a second memory access to/from an integer register. */
1373 if ((set = single_set (next)) != NULL_RTX)
1374 {
1375 rtx src = SET_SRC (set);
1376 rtx dest = SET_DEST (set);
1377 rtx mem;
1378
1379 /* LDD is affected. */
1380 if ((mem = mem_ref (src)) != NULL_RTX
1381 && REG_P (dest)
1382 && REGNO (dest) < 32
1383 && !reg_mentioned_p (x, XEXP (mem, 0)))
1384 insert_nop = true;
1385
1386 /* STD is *not* affected. */
1387 else if (MEM_P (dest)
1388 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1389 && (src == CONST0_RTX (GET_MODE (dest))
1390 || (REG_P (src)
1391 && REGNO (src) < 32
1392 && REGNO (src) != REGNO (x)))
1393 && !reg_mentioned_p (x, XEXP (dest, 0)))
1394 insert_nop = true;
1395
1396 /* GOT accesses uses LD. */
1397 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1398 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1399 insert_nop = true;
1400 }
1401 }
1402
1403 /* Look for a single-word load/operation into an FP register. */
1404 else if (sparc_fix_ut699
1405 && NONJUMP_INSN_P (insn)
1406 && (set = single_set (insn)) != NULL_RTX
1407 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1408 && REG_P (SET_DEST (set))
1409 && REGNO (SET_DEST (set)) > 31)
1410 {
1411 /* Number of instructions in the problematic window. */
1412 const int n_insns = 4;
1413 /* The problematic combination is with the sibling FP register. */
1414 const unsigned int x = REGNO (SET_DEST (set));
1415 const unsigned int y = x ^ 1;
1416 rtx_insn *after;
1417 int i;
1418
1419 next = next_active_insn (insn);
1420 if (!next)
1421 break;
1422 /* If the insn is a branch, then it cannot be problematic. */
1423 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1424 continue;
1425
1426 /* Look for a second load/operation into the sibling FP register. */
1427 if (!((set = single_set (next)) != NULL_RTX
1428 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1429 && REG_P (SET_DEST (set))
1430 && REGNO (SET_DEST (set)) == y))
1431 continue;
1432
1433 /* Look for a (possible) store from the FP register in the next N
1434 instructions, but bail out if it is again modified or if there
1435 is a store from the sibling FP register before this store. */
1436 for (after = next, i = 0; i < n_insns; i++)
1437 {
1438 bool branch_p;
1439
1440 after = next_active_insn (after);
1441 if (!after)
1442 break;
1443
1444 /* This is a branch with an empty delay slot. */
1445 if (!NONJUMP_INSN_P (after))
1446 {
1447 if (++i == n_insns)
1448 break;
1449 branch_p = true;
1450 after = NULL;
1451 }
1452 /* This is a branch with a filled delay slot. */
1453 else if (rtx_sequence *seq =
1454 dyn_cast <rtx_sequence *> (PATTERN (after)))
1455 {
1456 if (++i == n_insns)
1457 break;
1458 branch_p = true;
1459 after = seq->insn (1);
1460 }
1461 /* This is a regular instruction. */
1462 else
1463 branch_p = false;
1464
1465 if (after && (set = single_set (after)) != NULL_RTX)
1466 {
1467 const rtx src = SET_SRC (set);
1468 const rtx dest = SET_DEST (set);
1469 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1470
1471 /* If the FP register is again modified before the store,
1472 then the store isn't affected. */
1473 if (REG_P (dest)
1474 && (REGNO (dest) == x
1475 || (REGNO (dest) == y && size == 8)))
1476 break;
1477
1478 if (MEM_P (dest) && REG_P (src))
1479 {
1480 /* If there is a store from the sibling FP register
1481 before the store, then the store is not affected. */
1482 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1483 break;
1484
1485 /* Otherwise, the store is affected. */
1486 if (REGNO (src) == x && size == 4)
1487 {
1488 insert_nop = true;
1489 break;
1490 }
1491 }
1492 }
1493
1494 /* If we have a branch in the first M instructions, then we
1495 cannot see the (M+2)th instruction so we play safe. */
1496 if (branch_p && i <= (n_insns - 2))
1497 {
1498 insert_nop = true;
1499 break;
1500 }
1501 }
1502 }
1503
1504 else
1505 next = NEXT_INSN (insn);
1506
1507 if (insert_nop)
1508 emit_insn_before (gen_nop (), next);
1509 }
1510
1511 return 0;
1512 }
1513
1514 namespace {
1515
1516 const pass_data pass_data_work_around_errata =
1517 {
1518 RTL_PASS, /* type */
1519 "errata", /* name */
1520 OPTGROUP_NONE, /* optinfo_flags */
1521 TV_MACH_DEP, /* tv_id */
1522 0, /* properties_required */
1523 0, /* properties_provided */
1524 0, /* properties_destroyed */
1525 0, /* todo_flags_start */
1526 0, /* todo_flags_finish */
1527 };
1528
1529 class pass_work_around_errata : public rtl_opt_pass
1530 {
1531 public:
1532 pass_work_around_errata(gcc::context *ctxt)
1533 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1534 {}
1535
1536 /* opt_pass methods: */
1537 virtual bool gate (function *)
1538 {
1539 return sparc_fix_at697f
1540 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1541 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1542 }
1543
1544 virtual unsigned int execute (function *)
1545 {
1546 return sparc_do_work_around_errata ();
1547 }
1548
1549 }; // class pass_work_around_errata
1550
1551 } // anon namespace
1552
1553 rtl_opt_pass *
1554 make_pass_work_around_errata (gcc::context *ctxt)
1555 {
1556 return new pass_work_around_errata (ctxt);
1557 }
1558
1559 /* Helpers for TARGET_DEBUG_OPTIONS. */
1560 static void
1561 dump_target_flag_bits (const int flags)
1562 {
1563 if (flags & MASK_64BIT)
1564 fprintf (stderr, "64BIT ");
1565 if (flags & MASK_APP_REGS)
1566 fprintf (stderr, "APP_REGS ");
1567 if (flags & MASK_FASTER_STRUCTS)
1568 fprintf (stderr, "FASTER_STRUCTS ");
1569 if (flags & MASK_FLAT)
1570 fprintf (stderr, "FLAT ");
1571 if (flags & MASK_FMAF)
1572 fprintf (stderr, "FMAF ");
1573 if (flags & MASK_FSMULD)
1574 fprintf (stderr, "FSMULD ");
1575 if (flags & MASK_FPU)
1576 fprintf (stderr, "FPU ");
1577 if (flags & MASK_HARD_QUAD)
1578 fprintf (stderr, "HARD_QUAD ");
1579 if (flags & MASK_POPC)
1580 fprintf (stderr, "POPC ");
1581 if (flags & MASK_PTR64)
1582 fprintf (stderr, "PTR64 ");
1583 if (flags & MASK_STACK_BIAS)
1584 fprintf (stderr, "STACK_BIAS ");
1585 if (flags & MASK_UNALIGNED_DOUBLES)
1586 fprintf (stderr, "UNALIGNED_DOUBLES ");
1587 if (flags & MASK_V8PLUS)
1588 fprintf (stderr, "V8PLUS ");
1589 if (flags & MASK_VIS)
1590 fprintf (stderr, "VIS ");
1591 if (flags & MASK_VIS2)
1592 fprintf (stderr, "VIS2 ");
1593 if (flags & MASK_VIS3)
1594 fprintf (stderr, "VIS3 ");
1595 if (flags & MASK_VIS4)
1596 fprintf (stderr, "VIS4 ");
1597 if (flags & MASK_VIS4B)
1598 fprintf (stderr, "VIS4B ");
1599 if (flags & MASK_CBCOND)
1600 fprintf (stderr, "CBCOND ");
1601 if (flags & MASK_DEPRECATED_V8_INSNS)
1602 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1603 if (flags & MASK_SPARCLET)
1604 fprintf (stderr, "SPARCLET ");
1605 if (flags & MASK_SPARCLITE)
1606 fprintf (stderr, "SPARCLITE ");
1607 if (flags & MASK_V8)
1608 fprintf (stderr, "V8 ");
1609 if (flags & MASK_V9)
1610 fprintf (stderr, "V9 ");
1611 }
1612
1613 static void
1614 dump_target_flags (const char *prefix, const int flags)
1615 {
1616 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1617 dump_target_flag_bits (flags);
1618 fprintf(stderr, "]\n");
1619 }
1620
1621 /* Validate and override various options, and do some machine dependent
1622 initialization. */
1623
1624 static void
1625 sparc_option_override (void)
1626 {
1627 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1628 static struct cpu_default {
1629 const int cpu;
1630 const enum sparc_processor_type processor;
1631 } const cpu_default[] = {
1632 /* There must be one entry here for each TARGET_CPU value. */
1633 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1634 { TARGET_CPU_v8, PROCESSOR_V8 },
1635 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1636 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1637 { TARGET_CPU_leon, PROCESSOR_LEON },
1638 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1639 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1640 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1641 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1642 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1643 { TARGET_CPU_v9, PROCESSOR_V9 },
1644 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1645 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1646 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1647 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1648 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1649 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1650 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1651 { TARGET_CPU_m8, PROCESSOR_M8 },
1652 { -1, PROCESSOR_V7 }
1653 };
1654 const struct cpu_default *def;
1655 /* Table of values for -m{cpu,tune}=. This must match the order of
1656 the enum processor_type in sparc-opts.h. */
1657 static struct cpu_table {
1658 const char *const name;
1659 const int disable;
1660 const int enable;
1661 } const cpu_table[] = {
1662 { "v7", MASK_ISA, 0 },
1663 { "cypress", MASK_ISA, 0 },
1664 { "v8", MASK_ISA, MASK_V8 },
1665 /* TI TMS390Z55 supersparc */
1666 { "supersparc", MASK_ISA, MASK_V8 },
1667 { "hypersparc", MASK_ISA, MASK_V8 },
1668 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1669 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1670 { "leon3v7", MASK_ISA, MASK_LEON3 },
1671 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1672 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1673 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1674 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1675 { "f934", MASK_ISA, MASK_SPARCLITE },
1676 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1677 { "sparclet", MASK_ISA, MASK_SPARCLET },
1678 /* TEMIC sparclet */
1679 { "tsc701", MASK_ISA, MASK_SPARCLET },
1680 { "v9", MASK_ISA, MASK_V9 },
1681 /* UltraSPARC I, II, IIi */
1682 { "ultrasparc", MASK_ISA,
1683 /* Although insns using %y are deprecated, it is a clear win. */
1684 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1685 /* UltraSPARC III */
1686 /* ??? Check if %y issue still holds true. */
1687 { "ultrasparc3", MASK_ISA,
1688 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1689 /* UltraSPARC T1 */
1690 { "niagara", MASK_ISA,
1691 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1692 /* UltraSPARC T2 */
1693 { "niagara2", MASK_ISA,
1694 MASK_V9|MASK_POPC|MASK_VIS2 },
1695 /* UltraSPARC T3 */
1696 { "niagara3", MASK_ISA,
1697 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1698 /* UltraSPARC T4 */
1699 { "niagara4", MASK_ISA,
1700 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1701 /* UltraSPARC M7 */
1702 { "niagara7", MASK_ISA,
1703 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1704 /* UltraSPARC M8 */
1705 { "m8", MASK_ISA,
1706 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1707 };
1708 const struct cpu_table *cpu;
1709 unsigned int i;
1710
1711 if (sparc_debug_string != NULL)
1712 {
1713 const char *q;
1714 char *p;
1715
1716 p = ASTRDUP (sparc_debug_string);
1717 while ((q = strtok (p, ",")) != NULL)
1718 {
1719 bool invert;
1720 int mask;
1721
1722 p = NULL;
1723 if (*q == '!')
1724 {
1725 invert = true;
1726 q++;
1727 }
1728 else
1729 invert = false;
1730
1731 if (! strcmp (q, "all"))
1732 mask = MASK_DEBUG_ALL;
1733 else if (! strcmp (q, "options"))
1734 mask = MASK_DEBUG_OPTIONS;
1735 else
1736 error ("unknown %<-mdebug-%s%> switch", q);
1737
1738 if (invert)
1739 sparc_debug &= ~mask;
1740 else
1741 sparc_debug |= mask;
1742 }
1743 }
1744
1745 /* Enable the FsMULd instruction by default if not explicitly specified by
1746 the user. It may be later disabled by the CPU (explicitly or not). */
1747 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1748 target_flags |= MASK_FSMULD;
1749
1750 if (TARGET_DEBUG_OPTIONS)
1751 {
1752 dump_target_flags("Initial target_flags", target_flags);
1753 dump_target_flags("target_flags_explicit", target_flags_explicit);
1754 }
1755
1756 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1757 SUBTARGET_OVERRIDE_OPTIONS;
1758 #endif
1759
1760 #ifndef SPARC_BI_ARCH
1761 /* Check for unsupported architecture size. */
1762 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1763 error ("%s is not supported by this configuration",
1764 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1765 #endif
1766
1767 /* We force all 64bit archs to use 128 bit long double */
1768 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1769 {
1770 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1771 target_flags |= MASK_LONG_DOUBLE_128;
1772 }
1773
1774 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1775 for (i = 8; i < 16; i++)
1776 if (!call_used_regs [i])
1777 {
1778 error ("%<-fcall-saved-REG%> is not supported for out registers");
1779 call_used_regs [i] = 1;
1780 }
1781
1782 /* Set the default CPU if no -mcpu option was specified. */
1783 if (!global_options_set.x_sparc_cpu_and_features)
1784 {
1785 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1786 if (def->cpu == TARGET_CPU_DEFAULT)
1787 break;
1788 gcc_assert (def->cpu != -1);
1789 sparc_cpu_and_features = def->processor;
1790 }
1791
1792 /* Set the default CPU if no -mtune option was specified. */
1793 if (!global_options_set.x_sparc_cpu)
1794 sparc_cpu = sparc_cpu_and_features;
1795
1796 cpu = &cpu_table[(int) sparc_cpu_and_features];
1797
1798 if (TARGET_DEBUG_OPTIONS)
1799 {
1800 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1801 dump_target_flags ("cpu->disable", cpu->disable);
1802 dump_target_flags ("cpu->enable", cpu->enable);
1803 }
1804
1805 target_flags &= ~cpu->disable;
1806 target_flags |= (cpu->enable
1807 #ifndef HAVE_AS_FMAF_HPC_VIS3
1808 & ~(MASK_FMAF | MASK_VIS3)
1809 #endif
1810 #ifndef HAVE_AS_SPARC4
1811 & ~MASK_CBCOND
1812 #endif
1813 #ifndef HAVE_AS_SPARC5_VIS4
1814 & ~(MASK_VIS4 | MASK_SUBXC)
1815 #endif
1816 #ifndef HAVE_AS_SPARC6
1817 & ~(MASK_VIS4B)
1818 #endif
1819 #ifndef HAVE_AS_LEON
1820 & ~(MASK_LEON | MASK_LEON3)
1821 #endif
1822 & ~(target_flags_explicit & MASK_FEATURES)
1823 );
1824
1825 /* FsMULd is a V8 instruction. */
1826 if (!TARGET_V8 && !TARGET_V9)
1827 target_flags &= ~MASK_FSMULD;
1828
1829 /* -mvis2 implies -mvis. */
1830 if (TARGET_VIS2)
1831 target_flags |= MASK_VIS;
1832
1833 /* -mvis3 implies -mvis2 and -mvis. */
1834 if (TARGET_VIS3)
1835 target_flags |= MASK_VIS2 | MASK_VIS;
1836
1837 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1838 if (TARGET_VIS4)
1839 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1840
1841 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1842 if (TARGET_VIS4B)
1843 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1844
1845 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1846 FPU is disabled. */
1847 if (!TARGET_FPU)
1848 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1849 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1850
1851 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1852 are available; -m64 also implies v9. */
1853 if (TARGET_VIS || TARGET_ARCH64)
1854 {
1855 target_flags |= MASK_V9;
1856 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1857 }
1858
1859 /* -mvis also implies -mv8plus on 32-bit. */
1860 if (TARGET_VIS && !TARGET_ARCH64)
1861 target_flags |= MASK_V8PLUS;
1862
1863 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1864 if (TARGET_V9 && TARGET_ARCH32)
1865 target_flags |= MASK_DEPRECATED_V8_INSNS;
1866
1867 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1868 if (!TARGET_V9 || TARGET_ARCH64)
1869 target_flags &= ~MASK_V8PLUS;
1870
1871 /* Don't use stack biasing in 32-bit mode. */
1872 if (TARGET_ARCH32)
1873 target_flags &= ~MASK_STACK_BIAS;
1874
1875 /* Use LRA instead of reload, unless otherwise instructed. */
1876 if (!(target_flags_explicit & MASK_LRA))
1877 target_flags |= MASK_LRA;
1878
1879 /* Enable applicable errata workarounds for LEON3FT. */
1880 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1881 {
1882 sparc_fix_b2bst = 1;
1883 sparc_fix_lost_divsqrt = 1;
1884 }
1885
1886 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1887 if (sparc_fix_ut699)
1888 target_flags &= ~MASK_FSMULD;
1889
1890 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1891 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1892 target_flags |= MASK_LONG_DOUBLE_128;
1893 #endif
1894
1895 if (TARGET_DEBUG_OPTIONS)
1896 dump_target_flags ("Final target_flags", target_flags);
1897
1898 /* Set the code model if no -mcmodel option was specified. */
1899 if (global_options_set.x_sparc_code_model)
1900 {
1901 if (TARGET_ARCH32)
1902 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1903 }
1904 else
1905 {
1906 if (TARGET_ARCH32)
1907 sparc_code_model = CM_32;
1908 else
1909 sparc_code_model = SPARC_DEFAULT_CMODEL;
1910 }
1911
1912 /* Set the memory model if no -mmemory-model option was specified. */
1913 if (!global_options_set.x_sparc_memory_model)
1914 {
1915 /* Choose the memory model for the operating system. */
1916 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1917 if (os_default != SMM_DEFAULT)
1918 sparc_memory_model = os_default;
1919 /* Choose the most relaxed model for the processor. */
1920 else if (TARGET_V9)
1921 sparc_memory_model = SMM_RMO;
1922 else if (TARGET_LEON3)
1923 sparc_memory_model = SMM_TSO;
1924 else if (TARGET_LEON)
1925 sparc_memory_model = SMM_SC;
1926 else if (TARGET_V8)
1927 sparc_memory_model = SMM_PSO;
1928 else
1929 sparc_memory_model = SMM_SC;
1930 }
1931
1932 /* Supply a default value for align_functions. */
1933 if (flag_align_functions && !str_align_functions)
1934 {
1935 if (sparc_cpu == PROCESSOR_ULTRASPARC
1936 || sparc_cpu == PROCESSOR_ULTRASPARC3
1937 || sparc_cpu == PROCESSOR_NIAGARA
1938 || sparc_cpu == PROCESSOR_NIAGARA2
1939 || sparc_cpu == PROCESSOR_NIAGARA3
1940 || sparc_cpu == PROCESSOR_NIAGARA4)
1941 str_align_functions = "32";
1942 else if (sparc_cpu == PROCESSOR_NIAGARA7
1943 || sparc_cpu == PROCESSOR_M8)
1944 str_align_functions = "64";
1945 }
1946
1947 /* Validate PCC_STRUCT_RETURN. */
1948 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1949 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1950
1951 /* Only use .uaxword when compiling for a 64-bit target. */
1952 if (!TARGET_ARCH64)
1953 targetm.asm_out.unaligned_op.di = NULL;
1954
1955 /* Set the processor costs. */
1956 switch (sparc_cpu)
1957 {
1958 case PROCESSOR_V7:
1959 case PROCESSOR_CYPRESS:
1960 sparc_costs = &cypress_costs;
1961 break;
1962 case PROCESSOR_V8:
1963 case PROCESSOR_SPARCLITE:
1964 case PROCESSOR_SUPERSPARC:
1965 sparc_costs = &supersparc_costs;
1966 break;
1967 case PROCESSOR_F930:
1968 case PROCESSOR_F934:
1969 case PROCESSOR_HYPERSPARC:
1970 case PROCESSOR_SPARCLITE86X:
1971 sparc_costs = &hypersparc_costs;
1972 break;
1973 case PROCESSOR_LEON:
1974 sparc_costs = &leon_costs;
1975 break;
1976 case PROCESSOR_LEON3:
1977 case PROCESSOR_LEON3V7:
1978 sparc_costs = &leon3_costs;
1979 break;
1980 case PROCESSOR_SPARCLET:
1981 case PROCESSOR_TSC701:
1982 sparc_costs = &sparclet_costs;
1983 break;
1984 case PROCESSOR_V9:
1985 case PROCESSOR_ULTRASPARC:
1986 sparc_costs = &ultrasparc_costs;
1987 break;
1988 case PROCESSOR_ULTRASPARC3:
1989 sparc_costs = &ultrasparc3_costs;
1990 break;
1991 case PROCESSOR_NIAGARA:
1992 sparc_costs = &niagara_costs;
1993 break;
1994 case PROCESSOR_NIAGARA2:
1995 sparc_costs = &niagara2_costs;
1996 break;
1997 case PROCESSOR_NIAGARA3:
1998 sparc_costs = &niagara3_costs;
1999 break;
2000 case PROCESSOR_NIAGARA4:
2001 sparc_costs = &niagara4_costs;
2002 break;
2003 case PROCESSOR_NIAGARA7:
2004 sparc_costs = &niagara7_costs;
2005 break;
2006 case PROCESSOR_M8:
2007 sparc_costs = &m8_costs;
2008 break;
2009 case PROCESSOR_NATIVE:
2010 gcc_unreachable ();
2011 };
2012
2013 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2014 can run at the same time. More important, it is the threshold
2015 defining when additional prefetches will be dropped by the
2016 hardware.
2017
2018 The UltraSPARC-III features a documented prefetch queue with a
2019 size of 8. Additional prefetches issued in the cpu are
2020 dropped.
2021
2022 Niagara processors are different. In these processors prefetches
2023 are handled much like regular loads. The L1 miss buffer is 32
2024 entries, but prefetches start getting affected when 30 entries
2025 become occupied. That occupation could be a mix of regular loads
2026 and prefetches though. And that buffer is shared by all threads.
2027 Once the threshold is reached, if the core is running a single
2028 thread the prefetch will retry. If more than one thread is
2029 running, the prefetch will be dropped.
2030
2031 All this makes it very difficult to determine how many
2032 simultaneous prefetches can be issued simultaneously, even in a
2033 single-threaded program. Experimental results show that setting
2034 this parameter to 32 works well when the number of threads is not
2035 high. */
2036 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2037 ((sparc_cpu == PROCESSOR_ULTRASPARC
2038 || sparc_cpu == PROCESSOR_NIAGARA
2039 || sparc_cpu == PROCESSOR_NIAGARA2
2040 || sparc_cpu == PROCESSOR_NIAGARA3
2041 || sparc_cpu == PROCESSOR_NIAGARA4)
2042 ? 2
2043 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2044 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2045 || sparc_cpu == PROCESSOR_M8)
2046 ? 32 : 3))),
2047 global_options.x_param_values,
2048 global_options_set.x_param_values);
2049
2050 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2051 bytes.
2052
2053 The Oracle SPARC Architecture (previously the UltraSPARC
2054 Architecture) specification states that when a PREFETCH[A]
2055 instruction is executed an implementation-specific amount of data
2056 is prefetched, and that it is at least 64 bytes long (aligned to
2057 at least 64 bytes).
2058
2059 However, this is not correct. The M7 (and implementations prior
2060 to that) does not guarantee a 64B prefetch into a cache if the
2061 line size is smaller. A single cache line is all that is ever
2062 prefetched. So for the M7, where the L1D$ has 32B lines and the
2063 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2064 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2065 is a read_n prefetch, which is the only type which allocates to
2066 the L1.) */
2067 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2068 (sparc_cpu == PROCESSOR_M8
2069 ? 64 : 32),
2070 global_options.x_param_values,
2071 global_options_set.x_param_values);
2072
2073 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2074 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2075 Niagara processors feature a L1D$ of 16KB. */
2076 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2077 ((sparc_cpu == PROCESSOR_ULTRASPARC
2078 || sparc_cpu == PROCESSOR_ULTRASPARC3
2079 || sparc_cpu == PROCESSOR_NIAGARA
2080 || sparc_cpu == PROCESSOR_NIAGARA2
2081 || sparc_cpu == PROCESSOR_NIAGARA3
2082 || sparc_cpu == PROCESSOR_NIAGARA4
2083 || sparc_cpu == PROCESSOR_NIAGARA7
2084 || sparc_cpu == PROCESSOR_M8)
2085 ? 16 : 64),
2086 global_options.x_param_values,
2087 global_options_set.x_param_values);
2088
2089
2090 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2091 that 512 is the default in params.def. */
2092 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2093 ((sparc_cpu == PROCESSOR_NIAGARA4
2094 || sparc_cpu == PROCESSOR_M8)
2095 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2096 ? 256 : 512)),
2097 global_options.x_param_values,
2098 global_options_set.x_param_values);
2099
2100
2101 /* Disable save slot sharing for call-clobbered registers by default.
2102 The IRA sharing algorithm works on single registers only and this
2103 pessimizes for double floating-point registers. */
2104 if (!global_options_set.x_flag_ira_share_save_slots)
2105 flag_ira_share_save_slots = 0;
2106
2107 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2108 redundant 32-to-64-bit extensions. */
2109 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2110 flag_ree = 0;
2111
2112 /* Do various machine dependent initializations. */
2113 sparc_init_modes ();
2114
2115 /* Set up function hooks. */
2116 init_machine_status = sparc_init_machine_status;
2117 }
2118 \f
2119 /* Miscellaneous utilities. */
2120
2121 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2122 or branch on register contents instructions. */
2123
2124 int
2125 v9_regcmp_p (enum rtx_code code)
2126 {
2127 return (code == EQ || code == NE || code == GE || code == LT
2128 || code == LE || code == GT);
2129 }
2130
2131 /* Nonzero if OP is a floating point constant which can
2132 be loaded into an integer register using a single
2133 sethi instruction. */
2134
2135 int
2136 fp_sethi_p (rtx op)
2137 {
2138 if (GET_CODE (op) == CONST_DOUBLE)
2139 {
2140 long i;
2141
2142 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2143 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2144 }
2145
2146 return 0;
2147 }
2148
2149 /* Nonzero if OP is a floating point constant which can
2150 be loaded into an integer register using a single
2151 mov instruction. */
2152
2153 int
2154 fp_mov_p (rtx op)
2155 {
2156 if (GET_CODE (op) == CONST_DOUBLE)
2157 {
2158 long i;
2159
2160 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2161 return SPARC_SIMM13_P (i);
2162 }
2163
2164 return 0;
2165 }
2166
2167 /* Nonzero if OP is a floating point constant which can
2168 be loaded into an integer register using a high/losum
2169 instruction sequence. */
2170
2171 int
2172 fp_high_losum_p (rtx op)
2173 {
2174 /* The constraints calling this should only be in
2175 SFmode move insns, so any constant which cannot
2176 be moved using a single insn will do. */
2177 if (GET_CODE (op) == CONST_DOUBLE)
2178 {
2179 long i;
2180
2181 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2182 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2183 }
2184
2185 return 0;
2186 }
2187
2188 /* Return true if the address of LABEL can be loaded by means of the
2189 mov{si,di}_pic_label_ref patterns in PIC mode. */
2190
2191 static bool
2192 can_use_mov_pic_label_ref (rtx label)
2193 {
2194 /* VxWorks does not impose a fixed gap between segments; the run-time
2195 gap can be different from the object-file gap. We therefore can't
2196 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2197 are absolutely sure that X is in the same segment as the GOT.
2198 Unfortunately, the flexibility of linker scripts means that we
2199 can't be sure of that in general, so assume that GOT-relative
2200 accesses are never valid on VxWorks. */
2201 if (TARGET_VXWORKS_RTP)
2202 return false;
2203
2204 /* Similarly, if the label is non-local, it might end up being placed
2205 in a different section than the current one; now mov_pic_label_ref
2206 requires the label and the code to be in the same section. */
2207 if (LABEL_REF_NONLOCAL_P (label))
2208 return false;
2209
2210 /* Finally, if we are reordering basic blocks and partition into hot
2211 and cold sections, this might happen for any label. */
2212 if (flag_reorder_blocks_and_partition)
2213 return false;
2214
2215 return true;
2216 }
2217
2218 /* Expand a move instruction. Return true if all work is done. */
2219
2220 bool
2221 sparc_expand_move (machine_mode mode, rtx *operands)
2222 {
2223 /* Handle sets of MEM first. */
2224 if (GET_CODE (operands[0]) == MEM)
2225 {
2226 /* 0 is a register (or a pair of registers) on SPARC. */
2227 if (register_or_zero_operand (operands[1], mode))
2228 return false;
2229
2230 if (!reload_in_progress)
2231 {
2232 operands[0] = validize_mem (operands[0]);
2233 operands[1] = force_reg (mode, operands[1]);
2234 }
2235 }
2236
2237 /* Fix up TLS cases. */
2238 if (TARGET_HAVE_TLS
2239 && CONSTANT_P (operands[1])
2240 && sparc_tls_referenced_p (operands [1]))
2241 {
2242 operands[1] = sparc_legitimize_tls_address (operands[1]);
2243 return false;
2244 }
2245
2246 /* Fix up PIC cases. */
2247 if (flag_pic && CONSTANT_P (operands[1]))
2248 {
2249 if (pic_address_needs_scratch (operands[1]))
2250 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2251
2252 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2253 if ((GET_CODE (operands[1]) == LABEL_REF
2254 && can_use_mov_pic_label_ref (operands[1]))
2255 || (GET_CODE (operands[1]) == CONST
2256 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2257 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2258 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2259 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2260 {
2261 if (mode == SImode)
2262 {
2263 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2264 return true;
2265 }
2266
2267 if (mode == DImode)
2268 {
2269 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2270 return true;
2271 }
2272 }
2273
2274 if (symbolic_operand (operands[1], mode))
2275 {
2276 operands[1]
2277 = sparc_legitimize_pic_address (operands[1],
2278 reload_in_progress
2279 ? operands[0] : NULL_RTX);
2280 return false;
2281 }
2282 }
2283
2284 /* If we are trying to toss an integer constant into FP registers,
2285 or loading a FP or vector constant, force it into memory. */
2286 if (CONSTANT_P (operands[1])
2287 && REG_P (operands[0])
2288 && (SPARC_FP_REG_P (REGNO (operands[0]))
2289 || SCALAR_FLOAT_MODE_P (mode)
2290 || VECTOR_MODE_P (mode)))
2291 {
2292 /* emit_group_store will send such bogosity to us when it is
2293 not storing directly into memory. So fix this up to avoid
2294 crashes in output_constant_pool. */
2295 if (operands [1] == const0_rtx)
2296 operands[1] = CONST0_RTX (mode);
2297
2298 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2299 always other regs. */
2300 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2301 && (const_zero_operand (operands[1], mode)
2302 || const_all_ones_operand (operands[1], mode)))
2303 return false;
2304
2305 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2306 /* We are able to build any SF constant in integer registers
2307 with at most 2 instructions. */
2308 && (mode == SFmode
2309 /* And any DF constant in integer registers if needed. */
2310 || (mode == DFmode && !can_create_pseudo_p ())))
2311 return false;
2312
2313 operands[1] = force_const_mem (mode, operands[1]);
2314 if (!reload_in_progress)
2315 operands[1] = validize_mem (operands[1]);
2316 return false;
2317 }
2318
2319 /* Accept non-constants and valid constants unmodified. */
2320 if (!CONSTANT_P (operands[1])
2321 || GET_CODE (operands[1]) == HIGH
2322 || input_operand (operands[1], mode))
2323 return false;
2324
2325 switch (mode)
2326 {
2327 case E_QImode:
2328 /* All QImode constants require only one insn, so proceed. */
2329 break;
2330
2331 case E_HImode:
2332 case E_SImode:
2333 sparc_emit_set_const32 (operands[0], operands[1]);
2334 return true;
2335
2336 case E_DImode:
2337 /* input_operand should have filtered out 32-bit mode. */
2338 sparc_emit_set_const64 (operands[0], operands[1]);
2339 return true;
2340
2341 case E_TImode:
2342 {
2343 rtx high, low;
2344 /* TImode isn't available in 32-bit mode. */
2345 split_double (operands[1], &high, &low);
2346 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2347 high));
2348 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2349 low));
2350 }
2351 return true;
2352
2353 default:
2354 gcc_unreachable ();
2355 }
2356
2357 return false;
2358 }
2359
2360 /* Load OP1, a 32-bit constant, into OP0, a register.
2361 We know it can't be done in one insn when we get
2362 here, the move expander guarantees this. */
2363
2364 static void
2365 sparc_emit_set_const32 (rtx op0, rtx op1)
2366 {
2367 machine_mode mode = GET_MODE (op0);
2368 rtx temp = op0;
2369
2370 if (can_create_pseudo_p ())
2371 temp = gen_reg_rtx (mode);
2372
2373 if (GET_CODE (op1) == CONST_INT)
2374 {
2375 gcc_assert (!small_int_operand (op1, mode)
2376 && !const_high_operand (op1, mode));
2377
2378 /* Emit them as real moves instead of a HIGH/LO_SUM,
2379 this way CSE can see everything and reuse intermediate
2380 values if it wants. */
2381 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2382 & ~(HOST_WIDE_INT) 0x3ff)));
2383
2384 emit_insn (gen_rtx_SET (op0,
2385 gen_rtx_IOR (mode, temp,
2386 GEN_INT (INTVAL (op1) & 0x3ff))));
2387 }
2388 else
2389 {
2390 /* A symbol, emit in the traditional way. */
2391 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2392 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2393 }
2394 }
2395
2396 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2397 If TEMP is nonzero, we are forbidden to use any other scratch
2398 registers. Otherwise, we are allowed to generate them as needed.
2399
2400 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2401 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2402
2403 void
2404 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2405 {
2406 rtx cst, temp1, temp2, temp3, temp4, temp5;
2407 rtx ti_temp = 0;
2408
2409 /* Deal with too large offsets. */
2410 if (GET_CODE (op1) == CONST
2411 && GET_CODE (XEXP (op1, 0)) == PLUS
2412 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2413 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2414 {
2415 gcc_assert (!temp);
2416 temp1 = gen_reg_rtx (DImode);
2417 temp2 = gen_reg_rtx (DImode);
2418 sparc_emit_set_const64 (temp2, cst);
2419 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2420 NULL_RTX);
2421 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2422 return;
2423 }
2424
2425 if (temp && GET_MODE (temp) == TImode)
2426 {
2427 ti_temp = temp;
2428 temp = gen_rtx_REG (DImode, REGNO (temp));
2429 }
2430
2431 /* SPARC-V9 code model support. */
2432 switch (sparc_code_model)
2433 {
2434 case CM_MEDLOW:
2435 /* The range spanned by all instructions in the object is less
2436 than 2^31 bytes (2GB) and the distance from any instruction
2437 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2438 than 2^31 bytes (2GB).
2439
2440 The executable must be in the low 4TB of the virtual address
2441 space.
2442
2443 sethi %hi(symbol), %temp1
2444 or %temp1, %lo(symbol), %reg */
2445 if (temp)
2446 temp1 = temp; /* op0 is allowed. */
2447 else
2448 temp1 = gen_reg_rtx (DImode);
2449
2450 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2451 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2452 break;
2453
2454 case CM_MEDMID:
2455 /* The range spanned by all instructions in the object is less
2456 than 2^31 bytes (2GB) and the distance from any instruction
2457 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2458 than 2^31 bytes (2GB).
2459
2460 The executable must be in the low 16TB of the virtual address
2461 space.
2462
2463 sethi %h44(symbol), %temp1
2464 or %temp1, %m44(symbol), %temp2
2465 sllx %temp2, 12, %temp3
2466 or %temp3, %l44(symbol), %reg */
2467 if (temp)
2468 {
2469 temp1 = op0;
2470 temp2 = op0;
2471 temp3 = temp; /* op0 is allowed. */
2472 }
2473 else
2474 {
2475 temp1 = gen_reg_rtx (DImode);
2476 temp2 = gen_reg_rtx (DImode);
2477 temp3 = gen_reg_rtx (DImode);
2478 }
2479
2480 emit_insn (gen_seth44 (temp1, op1));
2481 emit_insn (gen_setm44 (temp2, temp1, op1));
2482 emit_insn (gen_rtx_SET (temp3,
2483 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2484 emit_insn (gen_setl44 (op0, temp3, op1));
2485 break;
2486
2487 case CM_MEDANY:
2488 /* The range spanned by all instructions in the object is less
2489 than 2^31 bytes (2GB) and the distance from any instruction
2490 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2491 than 2^31 bytes (2GB).
2492
2493 The executable can be placed anywhere in the virtual address
2494 space.
2495
2496 sethi %hh(symbol), %temp1
2497 sethi %lm(symbol), %temp2
2498 or %temp1, %hm(symbol), %temp3
2499 sllx %temp3, 32, %temp4
2500 or %temp4, %temp2, %temp5
2501 or %temp5, %lo(symbol), %reg */
2502 if (temp)
2503 {
2504 /* It is possible that one of the registers we got for operands[2]
2505 might coincide with that of operands[0] (which is why we made
2506 it TImode). Pick the other one to use as our scratch. */
2507 if (rtx_equal_p (temp, op0))
2508 {
2509 gcc_assert (ti_temp);
2510 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2511 }
2512 temp1 = op0;
2513 temp2 = temp; /* op0 is _not_ allowed, see above. */
2514 temp3 = op0;
2515 temp4 = op0;
2516 temp5 = op0;
2517 }
2518 else
2519 {
2520 temp1 = gen_reg_rtx (DImode);
2521 temp2 = gen_reg_rtx (DImode);
2522 temp3 = gen_reg_rtx (DImode);
2523 temp4 = gen_reg_rtx (DImode);
2524 temp5 = gen_reg_rtx (DImode);
2525 }
2526
2527 emit_insn (gen_sethh (temp1, op1));
2528 emit_insn (gen_setlm (temp2, op1));
2529 emit_insn (gen_sethm (temp3, temp1, op1));
2530 emit_insn (gen_rtx_SET (temp4,
2531 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2532 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2533 emit_insn (gen_setlo (op0, temp5, op1));
2534 break;
2535
2536 case CM_EMBMEDANY:
2537 /* Old old old backwards compatibility kruft here.
2538 Essentially it is MEDLOW with a fixed 64-bit
2539 virtual base added to all data segment addresses.
2540 Text-segment stuff is computed like MEDANY, we can't
2541 reuse the code above because the relocation knobs
2542 look different.
2543
2544 Data segment: sethi %hi(symbol), %temp1
2545 add %temp1, EMBMEDANY_BASE_REG, %temp2
2546 or %temp2, %lo(symbol), %reg */
2547 if (data_segment_operand (op1, GET_MODE (op1)))
2548 {
2549 if (temp)
2550 {
2551 temp1 = temp; /* op0 is allowed. */
2552 temp2 = op0;
2553 }
2554 else
2555 {
2556 temp1 = gen_reg_rtx (DImode);
2557 temp2 = gen_reg_rtx (DImode);
2558 }
2559
2560 emit_insn (gen_embmedany_sethi (temp1, op1));
2561 emit_insn (gen_embmedany_brsum (temp2, temp1));
2562 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2563 }
2564
2565 /* Text segment: sethi %uhi(symbol), %temp1
2566 sethi %hi(symbol), %temp2
2567 or %temp1, %ulo(symbol), %temp3
2568 sllx %temp3, 32, %temp4
2569 or %temp4, %temp2, %temp5
2570 or %temp5, %lo(symbol), %reg */
2571 else
2572 {
2573 if (temp)
2574 {
2575 /* It is possible that one of the registers we got for operands[2]
2576 might coincide with that of operands[0] (which is why we made
2577 it TImode). Pick the other one to use as our scratch. */
2578 if (rtx_equal_p (temp, op0))
2579 {
2580 gcc_assert (ti_temp);
2581 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2582 }
2583 temp1 = op0;
2584 temp2 = temp; /* op0 is _not_ allowed, see above. */
2585 temp3 = op0;
2586 temp4 = op0;
2587 temp5 = op0;
2588 }
2589 else
2590 {
2591 temp1 = gen_reg_rtx (DImode);
2592 temp2 = gen_reg_rtx (DImode);
2593 temp3 = gen_reg_rtx (DImode);
2594 temp4 = gen_reg_rtx (DImode);
2595 temp5 = gen_reg_rtx (DImode);
2596 }
2597
2598 emit_insn (gen_embmedany_textuhi (temp1, op1));
2599 emit_insn (gen_embmedany_texthi (temp2, op1));
2600 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2601 emit_insn (gen_rtx_SET (temp4,
2602 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2603 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2604 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2605 }
2606 break;
2607
2608 default:
2609 gcc_unreachable ();
2610 }
2611 }
2612
2613 /* These avoid problems when cross compiling. If we do not
2614 go through all this hair then the optimizer will see
2615 invalid REG_EQUAL notes or in some cases none at all. */
2616 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2617 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2618 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2619 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2620
2621 /* The optimizer is not to assume anything about exactly
2622 which bits are set for a HIGH, they are unspecified.
2623 Unfortunately this leads to many missed optimizations
2624 during CSE. We mask out the non-HIGH bits, and matches
2625 a plain movdi, to alleviate this problem. */
2626 static rtx
2627 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2628 {
2629 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2630 }
2631
2632 static rtx
2633 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2634 {
2635 return gen_rtx_SET (dest, GEN_INT (val));
2636 }
2637
2638 static rtx
2639 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2640 {
2641 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2642 }
2643
2644 static rtx
2645 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2646 {
2647 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2648 }
2649
2650 /* Worker routines for 64-bit constant formation on arch64.
2651 One of the key things to be doing in these emissions is
2652 to create as many temp REGs as possible. This makes it
2653 possible for half-built constants to be used later when
2654 such values are similar to something required later on.
2655 Without doing this, the optimizer cannot see such
2656 opportunities. */
2657
2658 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2659 unsigned HOST_WIDE_INT, int);
2660
2661 static void
2662 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2663 unsigned HOST_WIDE_INT low_bits, int is_neg)
2664 {
2665 unsigned HOST_WIDE_INT high_bits;
2666
2667 if (is_neg)
2668 high_bits = (~low_bits) & 0xffffffff;
2669 else
2670 high_bits = low_bits;
2671
2672 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2673 if (!is_neg)
2674 {
2675 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2676 }
2677 else
2678 {
2679 /* If we are XOR'ing with -1, then we should emit a one's complement
2680 instead. This way the combiner will notice logical operations
2681 such as ANDN later on and substitute. */
2682 if ((low_bits & 0x3ff) == 0x3ff)
2683 {
2684 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2685 }
2686 else
2687 {
2688 emit_insn (gen_rtx_SET (op0,
2689 gen_safe_XOR64 (temp,
2690 (-(HOST_WIDE_INT)0x400
2691 | (low_bits & 0x3ff)))));
2692 }
2693 }
2694 }
2695
2696 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2697 unsigned HOST_WIDE_INT, int);
2698
2699 static void
2700 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2701 unsigned HOST_WIDE_INT high_bits,
2702 unsigned HOST_WIDE_INT low_immediate,
2703 int shift_count)
2704 {
2705 rtx temp2 = op0;
2706
2707 if ((high_bits & 0xfffffc00) != 0)
2708 {
2709 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2710 if ((high_bits & ~0xfffffc00) != 0)
2711 emit_insn (gen_rtx_SET (op0,
2712 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2713 else
2714 temp2 = temp;
2715 }
2716 else
2717 {
2718 emit_insn (gen_safe_SET64 (temp, high_bits));
2719 temp2 = temp;
2720 }
2721
2722 /* Now shift it up into place. */
2723 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2724 GEN_INT (shift_count))));
2725
2726 /* If there is a low immediate part piece, finish up by
2727 putting that in as well. */
2728 if (low_immediate != 0)
2729 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2730 }
2731
2732 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2733 unsigned HOST_WIDE_INT);
2734
2735 /* Full 64-bit constant decomposition. Even though this is the
2736 'worst' case, we still optimize a few things away. */
2737 static void
2738 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2739 unsigned HOST_WIDE_INT high_bits,
2740 unsigned HOST_WIDE_INT low_bits)
2741 {
2742 rtx sub_temp = op0;
2743
2744 if (can_create_pseudo_p ())
2745 sub_temp = gen_reg_rtx (DImode);
2746
2747 if ((high_bits & 0xfffffc00) != 0)
2748 {
2749 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2750 if ((high_bits & ~0xfffffc00) != 0)
2751 emit_insn (gen_rtx_SET (sub_temp,
2752 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2753 else
2754 sub_temp = temp;
2755 }
2756 else
2757 {
2758 emit_insn (gen_safe_SET64 (temp, high_bits));
2759 sub_temp = temp;
2760 }
2761
2762 if (can_create_pseudo_p ())
2763 {
2764 rtx temp2 = gen_reg_rtx (DImode);
2765 rtx temp3 = gen_reg_rtx (DImode);
2766 rtx temp4 = gen_reg_rtx (DImode);
2767
2768 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2769 GEN_INT (32))));
2770
2771 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2772 if ((low_bits & ~0xfffffc00) != 0)
2773 {
2774 emit_insn (gen_rtx_SET (temp3,
2775 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2776 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2777 }
2778 else
2779 {
2780 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2781 }
2782 }
2783 else
2784 {
2785 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2786 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2787 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2788 int to_shift = 12;
2789
2790 /* We are in the middle of reload, so this is really
2791 painful. However we do still make an attempt to
2792 avoid emitting truly stupid code. */
2793 if (low1 != const0_rtx)
2794 {
2795 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2796 GEN_INT (to_shift))));
2797 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2798 sub_temp = op0;
2799 to_shift = 12;
2800 }
2801 else
2802 {
2803 to_shift += 12;
2804 }
2805 if (low2 != const0_rtx)
2806 {
2807 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2808 GEN_INT (to_shift))));
2809 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2810 sub_temp = op0;
2811 to_shift = 8;
2812 }
2813 else
2814 {
2815 to_shift += 8;
2816 }
2817 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2818 GEN_INT (to_shift))));
2819 if (low3 != const0_rtx)
2820 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2821 /* phew... */
2822 }
2823 }
2824
2825 /* Analyze a 64-bit constant for certain properties. */
2826 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2827 unsigned HOST_WIDE_INT,
2828 int *, int *, int *);
2829
2830 static void
2831 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2832 unsigned HOST_WIDE_INT low_bits,
2833 int *hbsp, int *lbsp, int *abbasp)
2834 {
2835 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2836 int i;
2837
2838 lowest_bit_set = highest_bit_set = -1;
2839 i = 0;
2840 do
2841 {
2842 if ((lowest_bit_set == -1)
2843 && ((low_bits >> i) & 1))
2844 lowest_bit_set = i;
2845 if ((highest_bit_set == -1)
2846 && ((high_bits >> (32 - i - 1)) & 1))
2847 highest_bit_set = (64 - i - 1);
2848 }
2849 while (++i < 32
2850 && ((highest_bit_set == -1)
2851 || (lowest_bit_set == -1)));
2852 if (i == 32)
2853 {
2854 i = 0;
2855 do
2856 {
2857 if ((lowest_bit_set == -1)
2858 && ((high_bits >> i) & 1))
2859 lowest_bit_set = i + 32;
2860 if ((highest_bit_set == -1)
2861 && ((low_bits >> (32 - i - 1)) & 1))
2862 highest_bit_set = 32 - i - 1;
2863 }
2864 while (++i < 32
2865 && ((highest_bit_set == -1)
2866 || (lowest_bit_set == -1)));
2867 }
2868 /* If there are no bits set this should have gone out
2869 as one instruction! */
2870 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2871 all_bits_between_are_set = 1;
2872 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2873 {
2874 if (i < 32)
2875 {
2876 if ((low_bits & (1 << i)) != 0)
2877 continue;
2878 }
2879 else
2880 {
2881 if ((high_bits & (1 << (i - 32))) != 0)
2882 continue;
2883 }
2884 all_bits_between_are_set = 0;
2885 break;
2886 }
2887 *hbsp = highest_bit_set;
2888 *lbsp = lowest_bit_set;
2889 *abbasp = all_bits_between_are_set;
2890 }
2891
2892 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2893
2894 static int
2895 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2896 unsigned HOST_WIDE_INT low_bits)
2897 {
2898 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2899
2900 if (high_bits == 0
2901 || high_bits == 0xffffffff)
2902 return 1;
2903
2904 analyze_64bit_constant (high_bits, low_bits,
2905 &highest_bit_set, &lowest_bit_set,
2906 &all_bits_between_are_set);
2907
2908 if ((highest_bit_set == 63
2909 || lowest_bit_set == 0)
2910 && all_bits_between_are_set != 0)
2911 return 1;
2912
2913 if ((highest_bit_set - lowest_bit_set) < 21)
2914 return 1;
2915
2916 return 0;
2917 }
2918
2919 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2920 unsigned HOST_WIDE_INT,
2921 int, int);
2922
2923 static unsigned HOST_WIDE_INT
2924 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2925 unsigned HOST_WIDE_INT low_bits,
2926 int lowest_bit_set, int shift)
2927 {
2928 HOST_WIDE_INT hi, lo;
2929
2930 if (lowest_bit_set < 32)
2931 {
2932 lo = (low_bits >> lowest_bit_set) << shift;
2933 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2934 }
2935 else
2936 {
2937 lo = 0;
2938 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2939 }
2940 gcc_assert (! (hi & lo));
2941 return (hi | lo);
2942 }
2943
2944 /* Here we are sure to be arch64 and this is an integer constant
2945 being loaded into a register. Emit the most efficient
2946 insn sequence possible. Detection of all the 1-insn cases
2947 has been done already. */
2948 static void
2949 sparc_emit_set_const64 (rtx op0, rtx op1)
2950 {
2951 unsigned HOST_WIDE_INT high_bits, low_bits;
2952 int lowest_bit_set, highest_bit_set;
2953 int all_bits_between_are_set;
2954 rtx temp = 0;
2955
2956 /* Sanity check that we know what we are working with. */
2957 gcc_assert (TARGET_ARCH64
2958 && (GET_CODE (op0) == SUBREG
2959 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2960
2961 if (! can_create_pseudo_p ())
2962 temp = op0;
2963
2964 if (GET_CODE (op1) != CONST_INT)
2965 {
2966 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2967 return;
2968 }
2969
2970 if (! temp)
2971 temp = gen_reg_rtx (DImode);
2972
2973 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2974 low_bits = (INTVAL (op1) & 0xffffffff);
2975
2976 /* low_bits bits 0 --> 31
2977 high_bits bits 32 --> 63 */
2978
2979 analyze_64bit_constant (high_bits, low_bits,
2980 &highest_bit_set, &lowest_bit_set,
2981 &all_bits_between_are_set);
2982
2983 /* First try for a 2-insn sequence. */
2984
2985 /* These situations are preferred because the optimizer can
2986 * do more things with them:
2987 * 1) mov -1, %reg
2988 * sllx %reg, shift, %reg
2989 * 2) mov -1, %reg
2990 * srlx %reg, shift, %reg
2991 * 3) mov some_small_const, %reg
2992 * sllx %reg, shift, %reg
2993 */
2994 if (((highest_bit_set == 63
2995 || lowest_bit_set == 0)
2996 && all_bits_between_are_set != 0)
2997 || ((highest_bit_set - lowest_bit_set) < 12))
2998 {
2999 HOST_WIDE_INT the_const = -1;
3000 int shift = lowest_bit_set;
3001
3002 if ((highest_bit_set != 63
3003 && lowest_bit_set != 0)
3004 || all_bits_between_are_set == 0)
3005 {
3006 the_const =
3007 create_simple_focus_bits (high_bits, low_bits,
3008 lowest_bit_set, 0);
3009 }
3010 else if (lowest_bit_set == 0)
3011 shift = -(63 - highest_bit_set);
3012
3013 gcc_assert (SPARC_SIMM13_P (the_const));
3014 gcc_assert (shift != 0);
3015
3016 emit_insn (gen_safe_SET64 (temp, the_const));
3017 if (shift > 0)
3018 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3019 GEN_INT (shift))));
3020 else if (shift < 0)
3021 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3022 GEN_INT (-shift))));
3023 return;
3024 }
3025
3026 /* Now a range of 22 or less bits set somewhere.
3027 * 1) sethi %hi(focus_bits), %reg
3028 * sllx %reg, shift, %reg
3029 * 2) sethi %hi(focus_bits), %reg
3030 * srlx %reg, shift, %reg
3031 */
3032 if ((highest_bit_set - lowest_bit_set) < 21)
3033 {
3034 unsigned HOST_WIDE_INT focus_bits =
3035 create_simple_focus_bits (high_bits, low_bits,
3036 lowest_bit_set, 10);
3037
3038 gcc_assert (SPARC_SETHI_P (focus_bits));
3039 gcc_assert (lowest_bit_set != 10);
3040
3041 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3042
3043 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3044 if (lowest_bit_set < 10)
3045 emit_insn (gen_rtx_SET (op0,
3046 gen_rtx_LSHIFTRT (DImode, temp,
3047 GEN_INT (10 - lowest_bit_set))));
3048 else if (lowest_bit_set > 10)
3049 emit_insn (gen_rtx_SET (op0,
3050 gen_rtx_ASHIFT (DImode, temp,
3051 GEN_INT (lowest_bit_set - 10))));
3052 return;
3053 }
3054
3055 /* 1) sethi %hi(low_bits), %reg
3056 * or %reg, %lo(low_bits), %reg
3057 * 2) sethi %hi(~low_bits), %reg
3058 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3059 */
3060 if (high_bits == 0
3061 || high_bits == 0xffffffff)
3062 {
3063 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3064 (high_bits == 0xffffffff));
3065 return;
3066 }
3067
3068 /* Now, try 3-insn sequences. */
3069
3070 /* 1) sethi %hi(high_bits), %reg
3071 * or %reg, %lo(high_bits), %reg
3072 * sllx %reg, 32, %reg
3073 */
3074 if (low_bits == 0)
3075 {
3076 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3077 return;
3078 }
3079
3080 /* We may be able to do something quick
3081 when the constant is negated, so try that. */
3082 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3083 (~low_bits) & 0xfffffc00))
3084 {
3085 /* NOTE: The trailing bits get XOR'd so we need the
3086 non-negated bits, not the negated ones. */
3087 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3088
3089 if ((((~high_bits) & 0xffffffff) == 0
3090 && ((~low_bits) & 0x80000000) == 0)
3091 || (((~high_bits) & 0xffffffff) == 0xffffffff
3092 && ((~low_bits) & 0x80000000) != 0))
3093 {
3094 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3095
3096 if ((SPARC_SETHI_P (fast_int)
3097 && (~high_bits & 0xffffffff) == 0)
3098 || SPARC_SIMM13_P (fast_int))
3099 emit_insn (gen_safe_SET64 (temp, fast_int));
3100 else
3101 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3102 }
3103 else
3104 {
3105 rtx negated_const;
3106 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3107 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3108 sparc_emit_set_const64 (temp, negated_const);
3109 }
3110
3111 /* If we are XOR'ing with -1, then we should emit a one's complement
3112 instead. This way the combiner will notice logical operations
3113 such as ANDN later on and substitute. */
3114 if (trailing_bits == 0x3ff)
3115 {
3116 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3117 }
3118 else
3119 {
3120 emit_insn (gen_rtx_SET (op0,
3121 gen_safe_XOR64 (temp,
3122 (-0x400 | trailing_bits))));
3123 }
3124 return;
3125 }
3126
3127 /* 1) sethi %hi(xxx), %reg
3128 * or %reg, %lo(xxx), %reg
3129 * sllx %reg, yyy, %reg
3130 *
3131 * ??? This is just a generalized version of the low_bits==0
3132 * thing above, FIXME...
3133 */
3134 if ((highest_bit_set - lowest_bit_set) < 32)
3135 {
3136 unsigned HOST_WIDE_INT focus_bits =
3137 create_simple_focus_bits (high_bits, low_bits,
3138 lowest_bit_set, 0);
3139
3140 /* We can't get here in this state. */
3141 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3142
3143 /* So what we know is that the set bits straddle the
3144 middle of the 64-bit word. */
3145 sparc_emit_set_const64_quick2 (op0, temp,
3146 focus_bits, 0,
3147 lowest_bit_set);
3148 return;
3149 }
3150
3151 /* 1) sethi %hi(high_bits), %reg
3152 * or %reg, %lo(high_bits), %reg
3153 * sllx %reg, 32, %reg
3154 * or %reg, low_bits, %reg
3155 */
3156 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3157 {
3158 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3159 return;
3160 }
3161
3162 /* The easiest way when all else fails, is full decomposition. */
3163 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3164 }
3165
3166 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3167
3168 static bool
3169 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3170 {
3171 *p1 = SPARC_ICC_REG;
3172 *p2 = SPARC_FCC_REG;
3173 return true;
3174 }
3175
3176 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3177
3178 static unsigned int
3179 sparc_min_arithmetic_precision (void)
3180 {
3181 return 32;
3182 }
3183
3184 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3185 return the mode to be used for the comparison. For floating-point,
3186 CCFP[E]mode is used. CCNZmode should be used when the first operand
3187 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3188 processing is needed. */
3189
3190 machine_mode
3191 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3192 {
3193 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3194 {
3195 switch (op)
3196 {
3197 case EQ:
3198 case NE:
3199 case UNORDERED:
3200 case ORDERED:
3201 case UNLT:
3202 case UNLE:
3203 case UNGT:
3204 case UNGE:
3205 case UNEQ:
3206 return CCFPmode;
3207
3208 case LT:
3209 case LE:
3210 case GT:
3211 case GE:
3212 case LTGT:
3213 return CCFPEmode;
3214
3215 default:
3216 gcc_unreachable ();
3217 }
3218 }
3219 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3220 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3221 && y == const0_rtx)
3222 {
3223 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3224 return CCXNZmode;
3225 else
3226 return CCNZmode;
3227 }
3228 else
3229 {
3230 /* This is for the cmp<mode>_sne pattern. */
3231 if (GET_CODE (x) == NOT && y == constm1_rtx)
3232 {
3233 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3234 return CCXCmode;
3235 else
3236 return CCCmode;
3237 }
3238
3239 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3240 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3241 {
3242 if (GET_CODE (y) == UNSPEC
3243 && (XINT (y, 1) == UNSPEC_ADDV
3244 || XINT (y, 1) == UNSPEC_SUBV
3245 || XINT (y, 1) == UNSPEC_NEGV))
3246 return CCVmode;
3247 else
3248 return CCCmode;
3249 }
3250
3251 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3252 return CCXmode;
3253 else
3254 return CCmode;
3255 }
3256 }
3257
3258 /* Emit the compare insn and return the CC reg for a CODE comparison
3259 with operands X and Y. */
3260
3261 static rtx
3262 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3263 {
3264 machine_mode mode;
3265 rtx cc_reg;
3266
3267 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3268 return x;
3269
3270 mode = SELECT_CC_MODE (code, x, y);
3271
3272 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3273 fcc regs (cse can't tell they're really call clobbered regs and will
3274 remove a duplicate comparison even if there is an intervening function
3275 call - it will then try to reload the cc reg via an int reg which is why
3276 we need the movcc patterns). It is possible to provide the movcc
3277 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3278 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3279 to tell cse that CCFPE mode registers (even pseudos) are call
3280 clobbered. */
3281
3282 /* ??? This is an experiment. Rather than making changes to cse which may
3283 or may not be easy/clean, we do our own cse. This is possible because
3284 we will generate hard registers. Cse knows they're call clobbered (it
3285 doesn't know the same thing about pseudos). If we guess wrong, no big
3286 deal, but if we win, great! */
3287
3288 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3289 #if 1 /* experiment */
3290 {
3291 int reg;
3292 /* We cycle through the registers to ensure they're all exercised. */
3293 static int next_fcc_reg = 0;
3294 /* Previous x,y for each fcc reg. */
3295 static rtx prev_args[4][2];
3296
3297 /* Scan prev_args for x,y. */
3298 for (reg = 0; reg < 4; reg++)
3299 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3300 break;
3301 if (reg == 4)
3302 {
3303 reg = next_fcc_reg;
3304 prev_args[reg][0] = x;
3305 prev_args[reg][1] = y;
3306 next_fcc_reg = (next_fcc_reg + 1) & 3;
3307 }
3308 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3309 }
3310 #else
3311 cc_reg = gen_reg_rtx (mode);
3312 #endif /* ! experiment */
3313 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3314 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3315 else
3316 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3317
3318 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3319 will only result in an unrecognizable insn so no point in asserting. */
3320 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3321
3322 return cc_reg;
3323 }
3324
3325
3326 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3327
3328 rtx
3329 gen_compare_reg (rtx cmp)
3330 {
3331 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3332 }
3333
3334 /* This function is used for v9 only.
3335 DEST is the target of the Scc insn.
3336 CODE is the code for an Scc's comparison.
3337 X and Y are the values we compare.
3338
3339 This function is needed to turn
3340
3341 (set (reg:SI 110)
3342 (gt (reg:CCX 100 %icc)
3343 (const_int 0)))
3344 into
3345 (set (reg:SI 110)
3346 (gt:DI (reg:CCX 100 %icc)
3347 (const_int 0)))
3348
3349 IE: The instruction recognizer needs to see the mode of the comparison to
3350 find the right instruction. We could use "gt:DI" right in the
3351 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3352
3353 static int
3354 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3355 {
3356 if (! TARGET_ARCH64
3357 && (GET_MODE (x) == DImode
3358 || GET_MODE (dest) == DImode))
3359 return 0;
3360
3361 /* Try to use the movrCC insns. */
3362 if (TARGET_ARCH64
3363 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3364 && y == const0_rtx
3365 && v9_regcmp_p (compare_code))
3366 {
3367 rtx op0 = x;
3368 rtx temp;
3369
3370 /* Special case for op0 != 0. This can be done with one instruction if
3371 dest == x. */
3372
3373 if (compare_code == NE
3374 && GET_MODE (dest) == DImode
3375 && rtx_equal_p (op0, dest))
3376 {
3377 emit_insn (gen_rtx_SET (dest,
3378 gen_rtx_IF_THEN_ELSE (DImode,
3379 gen_rtx_fmt_ee (compare_code, DImode,
3380 op0, const0_rtx),
3381 const1_rtx,
3382 dest)));
3383 return 1;
3384 }
3385
3386 if (reg_overlap_mentioned_p (dest, op0))
3387 {
3388 /* Handle the case where dest == x.
3389 We "early clobber" the result. */
3390 op0 = gen_reg_rtx (GET_MODE (x));
3391 emit_move_insn (op0, x);
3392 }
3393
3394 emit_insn (gen_rtx_SET (dest, const0_rtx));
3395 if (GET_MODE (op0) != DImode)
3396 {
3397 temp = gen_reg_rtx (DImode);
3398 convert_move (temp, op0, 0);
3399 }
3400 else
3401 temp = op0;
3402 emit_insn (gen_rtx_SET (dest,
3403 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3404 gen_rtx_fmt_ee (compare_code, DImode,
3405 temp, const0_rtx),
3406 const1_rtx,
3407 dest)));
3408 return 1;
3409 }
3410 else
3411 {
3412 x = gen_compare_reg_1 (compare_code, x, y);
3413 y = const0_rtx;
3414
3415 emit_insn (gen_rtx_SET (dest, const0_rtx));
3416 emit_insn (gen_rtx_SET (dest,
3417 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3418 gen_rtx_fmt_ee (compare_code,
3419 GET_MODE (x), x, y),
3420 const1_rtx, dest)));
3421 return 1;
3422 }
3423 }
3424
3425
3426 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3427 without jumps using the addx/subx instructions. */
3428
3429 bool
3430 emit_scc_insn (rtx operands[])
3431 {
3432 rtx tem, x, y;
3433 enum rtx_code code;
3434 machine_mode mode;
3435
3436 /* The quad-word fp compare library routines all return nonzero to indicate
3437 true, which is different from the equivalent libgcc routines, so we must
3438 handle them specially here. */
3439 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3440 {
3441 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3442 GET_CODE (operands[1]));
3443 operands[2] = XEXP (operands[1], 0);
3444 operands[3] = XEXP (operands[1], 1);
3445 }
3446
3447 code = GET_CODE (operands[1]);
3448 x = operands[2];
3449 y = operands[3];
3450 mode = GET_MODE (x);
3451
3452 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3453 more applications). The exception to this is "reg != 0" which can
3454 be done in one instruction on v9 (so we do it). */
3455 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3456 {
3457 if (y != const0_rtx)
3458 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3459
3460 rtx pat = gen_rtx_SET (operands[0],
3461 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3462 x, const0_rtx));
3463
3464 /* If we can use addx/subx or addxc, add a clobber for CC. */
3465 if (mode == SImode || (code == NE && TARGET_VIS3))
3466 {
3467 rtx clobber
3468 = gen_rtx_CLOBBER (VOIDmode,
3469 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3470 SPARC_ICC_REG));
3471 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3472 }
3473
3474 emit_insn (pat);
3475 return true;
3476 }
3477
3478 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3479 if (TARGET_ARCH64
3480 && mode == DImode
3481 && !((code == LTU || code == GTU) && TARGET_VIS3)
3482 && gen_v9_scc (operands[0], code, x, y))
3483 return true;
3484
3485 /* We can do LTU and GEU using the addx/subx instructions too. And
3486 for GTU/LEU, if both operands are registers swap them and fall
3487 back to the easy case. */
3488 if (code == GTU || code == LEU)
3489 {
3490 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3491 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3492 {
3493 tem = x;
3494 x = y;
3495 y = tem;
3496 code = swap_condition (code);
3497 }
3498 }
3499
3500 if (code == LTU || code == GEU)
3501 {
3502 emit_insn (gen_rtx_SET (operands[0],
3503 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3504 gen_compare_reg_1 (code, x, y),
3505 const0_rtx)));
3506 return true;
3507 }
3508
3509 /* All the posibilities to use addx/subx based sequences has been
3510 exhausted, try for a 3 instruction sequence using v9 conditional
3511 moves. */
3512 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3513 return true;
3514
3515 /* Nope, do branches. */
3516 return false;
3517 }
3518
3519 /* Emit a conditional jump insn for the v9 architecture using comparison code
3520 CODE and jump target LABEL.
3521 This function exists to take advantage of the v9 brxx insns. */
3522
3523 static void
3524 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3525 {
3526 emit_jump_insn (gen_rtx_SET (pc_rtx,
3527 gen_rtx_IF_THEN_ELSE (VOIDmode,
3528 gen_rtx_fmt_ee (code, GET_MODE (op0),
3529 op0, const0_rtx),
3530 gen_rtx_LABEL_REF (VOIDmode, label),
3531 pc_rtx)));
3532 }
3533
3534 /* Emit a conditional jump insn for the UA2011 architecture using
3535 comparison code CODE and jump target LABEL. This function exists
3536 to take advantage of the UA2011 Compare and Branch insns. */
3537
3538 static void
3539 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3540 {
3541 rtx if_then_else;
3542
3543 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3544 gen_rtx_fmt_ee(code, GET_MODE(op0),
3545 op0, op1),
3546 gen_rtx_LABEL_REF (VOIDmode, label),
3547 pc_rtx);
3548
3549 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3550 }
3551
3552 void
3553 emit_conditional_branch_insn (rtx operands[])
3554 {
3555 /* The quad-word fp compare library routines all return nonzero to indicate
3556 true, which is different from the equivalent libgcc routines, so we must
3557 handle them specially here. */
3558 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3559 {
3560 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3561 GET_CODE (operands[0]));
3562 operands[1] = XEXP (operands[0], 0);
3563 operands[2] = XEXP (operands[0], 1);
3564 }
3565
3566 /* If we can tell early on that the comparison is against a constant
3567 that won't fit in the 5-bit signed immediate field of a cbcond,
3568 use one of the other v9 conditional branch sequences. */
3569 if (TARGET_CBCOND
3570 && GET_CODE (operands[1]) == REG
3571 && (GET_MODE (operands[1]) == SImode
3572 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3573 && (GET_CODE (operands[2]) != CONST_INT
3574 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3575 {
3576 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3577 return;
3578 }
3579
3580 if (TARGET_ARCH64 && operands[2] == const0_rtx
3581 && GET_CODE (operands[1]) == REG
3582 && GET_MODE (operands[1]) == DImode)
3583 {
3584 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3585 return;
3586 }
3587
3588 operands[1] = gen_compare_reg (operands[0]);
3589 operands[2] = const0_rtx;
3590 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3591 operands[1], operands[2]);
3592 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3593 operands[3]));
3594 }
3595
3596
3597 /* Generate a DFmode part of a hard TFmode register.
3598 REG is the TFmode hard register, LOW is 1 for the
3599 low 64bit of the register and 0 otherwise.
3600 */
3601 rtx
3602 gen_df_reg (rtx reg, int low)
3603 {
3604 int regno = REGNO (reg);
3605
3606 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3607 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3608 return gen_rtx_REG (DFmode, regno);
3609 }
3610 \f
3611 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3612 Unlike normal calls, TFmode operands are passed by reference. It is
3613 assumed that no more than 3 operands are required. */
3614
3615 static void
3616 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3617 {
3618 rtx ret_slot = NULL, arg[3], func_sym;
3619 int i;
3620
3621 /* We only expect to be called for conversions, unary, and binary ops. */
3622 gcc_assert (nargs == 2 || nargs == 3);
3623
3624 for (i = 0; i < nargs; ++i)
3625 {
3626 rtx this_arg = operands[i];
3627 rtx this_slot;
3628
3629 /* TFmode arguments and return values are passed by reference. */
3630 if (GET_MODE (this_arg) == TFmode)
3631 {
3632 int force_stack_temp;
3633
3634 force_stack_temp = 0;
3635 if (TARGET_BUGGY_QP_LIB && i == 0)
3636 force_stack_temp = 1;
3637
3638 if (GET_CODE (this_arg) == MEM
3639 && ! force_stack_temp)
3640 {
3641 tree expr = MEM_EXPR (this_arg);
3642 if (expr)
3643 mark_addressable (expr);
3644 this_arg = XEXP (this_arg, 0);
3645 }
3646 else if (CONSTANT_P (this_arg)
3647 && ! force_stack_temp)
3648 {
3649 this_slot = force_const_mem (TFmode, this_arg);
3650 this_arg = XEXP (this_slot, 0);
3651 }
3652 else
3653 {
3654 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3655
3656 /* Operand 0 is the return value. We'll copy it out later. */
3657 if (i > 0)
3658 emit_move_insn (this_slot, this_arg);
3659 else
3660 ret_slot = this_slot;
3661
3662 this_arg = XEXP (this_slot, 0);
3663 }
3664 }
3665
3666 arg[i] = this_arg;
3667 }
3668
3669 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3670
3671 if (GET_MODE (operands[0]) == TFmode)
3672 {
3673 if (nargs == 2)
3674 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3675 arg[0], GET_MODE (arg[0]),
3676 arg[1], GET_MODE (arg[1]));
3677 else
3678 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3679 arg[0], GET_MODE (arg[0]),
3680 arg[1], GET_MODE (arg[1]),
3681 arg[2], GET_MODE (arg[2]));
3682
3683 if (ret_slot)
3684 emit_move_insn (operands[0], ret_slot);
3685 }
3686 else
3687 {
3688 rtx ret;
3689
3690 gcc_assert (nargs == 2);
3691
3692 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3693 GET_MODE (operands[0]),
3694 arg[1], GET_MODE (arg[1]));
3695
3696 if (ret != operands[0])
3697 emit_move_insn (operands[0], ret);
3698 }
3699 }
3700
3701 /* Expand soft-float TFmode calls to sparc abi routines. */
3702
3703 static void
3704 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3705 {
3706 const char *func;
3707
3708 switch (code)
3709 {
3710 case PLUS:
3711 func = "_Qp_add";
3712 break;
3713 case MINUS:
3714 func = "_Qp_sub";
3715 break;
3716 case MULT:
3717 func = "_Qp_mul";
3718 break;
3719 case DIV:
3720 func = "_Qp_div";
3721 break;
3722 default:
3723 gcc_unreachable ();
3724 }
3725
3726 emit_soft_tfmode_libcall (func, 3, operands);
3727 }
3728
3729 static void
3730 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3731 {
3732 const char *func;
3733
3734 gcc_assert (code == SQRT);
3735 func = "_Qp_sqrt";
3736
3737 emit_soft_tfmode_libcall (func, 2, operands);
3738 }
3739
3740 static void
3741 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3742 {
3743 const char *func;
3744
3745 switch (code)
3746 {
3747 case FLOAT_EXTEND:
3748 switch (GET_MODE (operands[1]))
3749 {
3750 case E_SFmode:
3751 func = "_Qp_stoq";
3752 break;
3753 case E_DFmode:
3754 func = "_Qp_dtoq";
3755 break;
3756 default:
3757 gcc_unreachable ();
3758 }
3759 break;
3760
3761 case FLOAT_TRUNCATE:
3762 switch (GET_MODE (operands[0]))
3763 {
3764 case E_SFmode:
3765 func = "_Qp_qtos";
3766 break;
3767 case E_DFmode:
3768 func = "_Qp_qtod";
3769 break;
3770 default:
3771 gcc_unreachable ();
3772 }
3773 break;
3774
3775 case FLOAT:
3776 switch (GET_MODE (operands[1]))
3777 {
3778 case E_SImode:
3779 func = "_Qp_itoq";
3780 if (TARGET_ARCH64)
3781 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3782 break;
3783 case E_DImode:
3784 func = "_Qp_xtoq";
3785 break;
3786 default:
3787 gcc_unreachable ();
3788 }
3789 break;
3790
3791 case UNSIGNED_FLOAT:
3792 switch (GET_MODE (operands[1]))
3793 {
3794 case E_SImode:
3795 func = "_Qp_uitoq";
3796 if (TARGET_ARCH64)
3797 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3798 break;
3799 case E_DImode:
3800 func = "_Qp_uxtoq";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805 break;
3806
3807 case FIX:
3808 switch (GET_MODE (operands[0]))
3809 {
3810 case E_SImode:
3811 func = "_Qp_qtoi";
3812 break;
3813 case E_DImode:
3814 func = "_Qp_qtox";
3815 break;
3816 default:
3817 gcc_unreachable ();
3818 }
3819 break;
3820
3821 case UNSIGNED_FIX:
3822 switch (GET_MODE (operands[0]))
3823 {
3824 case E_SImode:
3825 func = "_Qp_qtoui";
3826 break;
3827 case E_DImode:
3828 func = "_Qp_qtoux";
3829 break;
3830 default:
3831 gcc_unreachable ();
3832 }
3833 break;
3834
3835 default:
3836 gcc_unreachable ();
3837 }
3838
3839 emit_soft_tfmode_libcall (func, 2, operands);
3840 }
3841
3842 /* Expand a hard-float tfmode operation. All arguments must be in
3843 registers. */
3844
3845 static void
3846 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3847 {
3848 rtx op, dest;
3849
3850 if (GET_RTX_CLASS (code) == RTX_UNARY)
3851 {
3852 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3853 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3854 }
3855 else
3856 {
3857 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3858 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3859 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3860 operands[1], operands[2]);
3861 }
3862
3863 if (register_operand (operands[0], VOIDmode))
3864 dest = operands[0];
3865 else
3866 dest = gen_reg_rtx (GET_MODE (operands[0]));
3867
3868 emit_insn (gen_rtx_SET (dest, op));
3869
3870 if (dest != operands[0])
3871 emit_move_insn (operands[0], dest);
3872 }
3873
3874 void
3875 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3876 {
3877 if (TARGET_HARD_QUAD)
3878 emit_hard_tfmode_operation (code, operands);
3879 else
3880 emit_soft_tfmode_binop (code, operands);
3881 }
3882
3883 void
3884 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3885 {
3886 if (TARGET_HARD_QUAD)
3887 emit_hard_tfmode_operation (code, operands);
3888 else
3889 emit_soft_tfmode_unop (code, operands);
3890 }
3891
3892 void
3893 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3894 {
3895 if (TARGET_HARD_QUAD)
3896 emit_hard_tfmode_operation (code, operands);
3897 else
3898 emit_soft_tfmode_cvt (code, operands);
3899 }
3900 \f
3901 /* Return nonzero if a branch/jump/call instruction will be emitting
3902 nop into its delay slot. */
3903
3904 int
3905 empty_delay_slot (rtx_insn *insn)
3906 {
3907 rtx seq;
3908
3909 /* If no previous instruction (should not happen), return true. */
3910 if (PREV_INSN (insn) == NULL)
3911 return 1;
3912
3913 seq = NEXT_INSN (PREV_INSN (insn));
3914 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3915 return 0;
3916
3917 return 1;
3918 }
3919
3920 /* Return nonzero if we should emit a nop after a cbcond instruction.
3921 The cbcond instruction does not have a delay slot, however there is
3922 a severe performance penalty if a control transfer appears right
3923 after a cbcond. Therefore we emit a nop when we detect this
3924 situation. */
3925
3926 int
3927 emit_cbcond_nop (rtx_insn *insn)
3928 {
3929 rtx next = next_active_insn (insn);
3930
3931 if (!next)
3932 return 1;
3933
3934 if (NONJUMP_INSN_P (next)
3935 && GET_CODE (PATTERN (next)) == SEQUENCE)
3936 next = XVECEXP (PATTERN (next), 0, 0);
3937 else if (CALL_P (next)
3938 && GET_CODE (PATTERN (next)) == PARALLEL)
3939 {
3940 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3941
3942 if (GET_CODE (delay) == RETURN)
3943 {
3944 /* It's a sibling call. Do not emit the nop if we're going
3945 to emit something other than the jump itself as the first
3946 instruction of the sibcall sequence. */
3947 if (sparc_leaf_function_p || TARGET_FLAT)
3948 return 0;
3949 }
3950 }
3951
3952 if (NONJUMP_INSN_P (next))
3953 return 0;
3954
3955 return 1;
3956 }
3957
3958 /* Return nonzero if TRIAL can go into the call delay slot. */
3959
3960 int
3961 eligible_for_call_delay (rtx_insn *trial)
3962 {
3963 rtx pat;
3964
3965 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3966 return 0;
3967
3968 /* Binutils allows
3969 call __tls_get_addr, %tgd_call (foo)
3970 add %l7, %o0, %o0, %tgd_add (foo)
3971 while Sun as/ld does not. */
3972 if (TARGET_GNU_TLS || !TARGET_TLS)
3973 return 1;
3974
3975 pat = PATTERN (trial);
3976
3977 /* We must reject tgd_add{32|64}, i.e.
3978 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3979 and tldm_add{32|64}, i.e.
3980 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3981 for Sun as/ld. */
3982 if (GET_CODE (pat) == SET
3983 && GET_CODE (SET_SRC (pat)) == PLUS)
3984 {
3985 rtx unspec = XEXP (SET_SRC (pat), 1);
3986
3987 if (GET_CODE (unspec) == UNSPEC
3988 && (XINT (unspec, 1) == UNSPEC_TLSGD
3989 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3990 return 0;
3991 }
3992
3993 return 1;
3994 }
3995
3996 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3997 instruction. RETURN_P is true if the v9 variant 'return' is to be
3998 considered in the test too.
3999
4000 TRIAL must be a SET whose destination is a REG appropriate for the
4001 'restore' instruction or, if RETURN_P is true, for the 'return'
4002 instruction. */
4003
4004 static int
4005 eligible_for_restore_insn (rtx trial, bool return_p)
4006 {
4007 rtx pat = PATTERN (trial);
4008 rtx src = SET_SRC (pat);
4009 bool src_is_freg = false;
4010 rtx src_reg;
4011
4012 /* Since we now can do moves between float and integer registers when
4013 VIS3 is enabled, we have to catch this case. We can allow such
4014 moves when doing a 'return' however. */
4015 src_reg = src;
4016 if (GET_CODE (src_reg) == SUBREG)
4017 src_reg = SUBREG_REG (src_reg);
4018 if (GET_CODE (src_reg) == REG
4019 && SPARC_FP_REG_P (REGNO (src_reg)))
4020 src_is_freg = true;
4021
4022 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4023 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4024 && arith_operand (src, GET_MODE (src))
4025 && ! src_is_freg)
4026 {
4027 if (TARGET_ARCH64)
4028 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4029 else
4030 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4031 }
4032
4033 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4034 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4035 && arith_double_operand (src, GET_MODE (src))
4036 && ! src_is_freg)
4037 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4038
4039 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4040 else if (! TARGET_FPU && register_operand (src, SFmode))
4041 return 1;
4042
4043 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4044 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4045 return 1;
4046
4047 /* If we have the 'return' instruction, anything that does not use
4048 local or output registers and can go into a delay slot wins. */
4049 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4050 return 1;
4051
4052 /* The 'restore src1,src2,dest' pattern for SImode. */
4053 else if (GET_CODE (src) == PLUS
4054 && register_operand (XEXP (src, 0), SImode)
4055 && arith_operand (XEXP (src, 1), SImode))
4056 return 1;
4057
4058 /* The 'restore src1,src2,dest' pattern for DImode. */
4059 else if (GET_CODE (src) == PLUS
4060 && register_operand (XEXP (src, 0), DImode)
4061 && arith_double_operand (XEXP (src, 1), DImode))
4062 return 1;
4063
4064 /* The 'restore src1,%lo(src2),dest' pattern. */
4065 else if (GET_CODE (src) == LO_SUM
4066 && ! TARGET_CM_MEDMID
4067 && ((register_operand (XEXP (src, 0), SImode)
4068 && immediate_operand (XEXP (src, 1), SImode))
4069 || (TARGET_ARCH64
4070 && register_operand (XEXP (src, 0), DImode)
4071 && immediate_operand (XEXP (src, 1), DImode))))
4072 return 1;
4073
4074 /* The 'restore src,src,dest' pattern. */
4075 else if (GET_CODE (src) == ASHIFT
4076 && (register_operand (XEXP (src, 0), SImode)
4077 || register_operand (XEXP (src, 0), DImode))
4078 && XEXP (src, 1) == const1_rtx)
4079 return 1;
4080
4081 return 0;
4082 }
4083
4084 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4085
4086 int
4087 eligible_for_return_delay (rtx_insn *trial)
4088 {
4089 int regno;
4090 rtx pat;
4091
4092 /* If the function uses __builtin_eh_return, the eh_return machinery
4093 occupies the delay slot. */
4094 if (crtl->calls_eh_return)
4095 return 0;
4096
4097 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4098 return 0;
4099
4100 /* In the case of a leaf or flat function, anything can go into the slot. */
4101 if (sparc_leaf_function_p || TARGET_FLAT)
4102 return 1;
4103
4104 if (!NONJUMP_INSN_P (trial))
4105 return 0;
4106
4107 pat = PATTERN (trial);
4108 if (GET_CODE (pat) == PARALLEL)
4109 {
4110 int i;
4111
4112 if (! TARGET_V9)
4113 return 0;
4114 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4115 {
4116 rtx expr = XVECEXP (pat, 0, i);
4117 if (GET_CODE (expr) != SET)
4118 return 0;
4119 if (GET_CODE (SET_DEST (expr)) != REG)
4120 return 0;
4121 regno = REGNO (SET_DEST (expr));
4122 if (regno >= 8 && regno < 24)
4123 return 0;
4124 }
4125 return !epilogue_renumber (&pat, 1);
4126 }
4127
4128 if (GET_CODE (pat) != SET)
4129 return 0;
4130
4131 if (GET_CODE (SET_DEST (pat)) != REG)
4132 return 0;
4133
4134 regno = REGNO (SET_DEST (pat));
4135
4136 /* Otherwise, only operations which can be done in tandem with
4137 a `restore' or `return' insn can go into the delay slot. */
4138 if (regno >= 8 && regno < 24)
4139 return 0;
4140
4141 /* If this instruction sets up floating point register and we have a return
4142 instruction, it can probably go in. But restore will not work
4143 with FP_REGS. */
4144 if (! SPARC_INT_REG_P (regno))
4145 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4146
4147 return eligible_for_restore_insn (trial, true);
4148 }
4149
4150 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4151
4152 int
4153 eligible_for_sibcall_delay (rtx_insn *trial)
4154 {
4155 rtx pat;
4156
4157 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4158 return 0;
4159
4160 if (!NONJUMP_INSN_P (trial))
4161 return 0;
4162
4163 pat = PATTERN (trial);
4164
4165 if (sparc_leaf_function_p || TARGET_FLAT)
4166 {
4167 /* If the tail call is done using the call instruction,
4168 we have to restore %o7 in the delay slot. */
4169 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4170 return 0;
4171
4172 /* %g1 is used to build the function address */
4173 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4174 return 0;
4175
4176 return 1;
4177 }
4178
4179 if (GET_CODE (pat) != SET)
4180 return 0;
4181
4182 /* Otherwise, only operations which can be done in tandem with
4183 a `restore' insn can go into the delay slot. */
4184 if (GET_CODE (SET_DEST (pat)) != REG
4185 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4186 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4187 return 0;
4188
4189 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4190 in most cases. */
4191 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4192 return 0;
4193
4194 return eligible_for_restore_insn (trial, false);
4195 }
4196 \f
4197 /* Determine if it's legal to put X into the constant pool. This
4198 is not possible if X contains the address of a symbol that is
4199 not constant (TLS) or not known at final link time (PIC). */
4200
4201 static bool
4202 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4203 {
4204 switch (GET_CODE (x))
4205 {
4206 case CONST_INT:
4207 case CONST_WIDE_INT:
4208 case CONST_DOUBLE:
4209 case CONST_VECTOR:
4210 /* Accept all non-symbolic constants. */
4211 return false;
4212
4213 case LABEL_REF:
4214 /* Labels are OK iff we are non-PIC. */
4215 return flag_pic != 0;
4216
4217 case SYMBOL_REF:
4218 /* 'Naked' TLS symbol references are never OK,
4219 non-TLS symbols are OK iff we are non-PIC. */
4220 if (SYMBOL_REF_TLS_MODEL (x))
4221 return true;
4222 else
4223 return flag_pic != 0;
4224
4225 case CONST:
4226 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4227 case PLUS:
4228 case MINUS:
4229 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4230 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4231 case UNSPEC:
4232 return true;
4233 default:
4234 gcc_unreachable ();
4235 }
4236 }
4237 \f
4238 /* Global Offset Table support. */
4239 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4240 static GTY(()) rtx got_register_rtx = NULL_RTX;
4241 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4242
4243 static GTY(()) bool got_helper_needed = false;
4244
4245 /* Return the SYMBOL_REF for the Global Offset Table. */
4246
4247 static rtx
4248 sparc_got (void)
4249 {
4250 if (!got_symbol_rtx)
4251 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4252
4253 return got_symbol_rtx;
4254 }
4255
4256 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4257
4258 static rtx
4259 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4260 {
4261 int orig_flag_pic = flag_pic;
4262 rtx insn;
4263
4264 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4265 flag_pic = 0;
4266 if (TARGET_ARCH64)
4267 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4268 else
4269 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4270 flag_pic = orig_flag_pic;
4271
4272 return insn;
4273 }
4274
4275 /* Output the load_pcrel_sym{si,di} patterns. */
4276
4277 const char *
4278 output_load_pcrel_sym (rtx *operands)
4279 {
4280 if (flag_delayed_branch)
4281 {
4282 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4283 output_asm_insn ("call\t%a2", operands);
4284 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4285 }
4286 else
4287 {
4288 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4289 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4290 output_asm_insn ("call\t%a2", operands);
4291 output_asm_insn (" nop", NULL);
4292 }
4293
4294 if (operands[2] == got_helper_rtx)
4295 got_helper_needed = true;
4296
4297 return "";
4298 }
4299
4300 #ifdef HAVE_GAS_HIDDEN
4301 # define USE_HIDDEN_LINKONCE 1
4302 #else
4303 # define USE_HIDDEN_LINKONCE 0
4304 #endif
4305
4306 /* Emit code to load the GOT register. */
4307
4308 void
4309 load_got_register (void)
4310 {
4311 rtx insn;
4312
4313 if (TARGET_VXWORKS_RTP)
4314 {
4315 if (!got_register_rtx)
4316 got_register_rtx = pic_offset_table_rtx;
4317
4318 insn = gen_vxworks_load_got ();
4319 }
4320 else
4321 {
4322 if (!got_register_rtx)
4323 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4324
4325 /* The GOT symbol is subject to a PC-relative relocation so we need a
4326 helper function to add the PC value and thus get the final value. */
4327 if (!got_helper_rtx)
4328 {
4329 char name[32];
4330
4331 /* Skip the leading '%' as that cannot be used in a symbol name. */
4332 if (USE_HIDDEN_LINKONCE)
4333 sprintf (name, "__sparc_get_pc_thunk.%s",
4334 reg_names[REGNO (got_register_rtx)] + 1);
4335 else
4336 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4337 REGNO (got_register_rtx));
4338
4339 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4340 }
4341
4342 insn
4343 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4344 }
4345
4346 emit_insn (insn);
4347 }
4348
4349 /* Ensure that we are not using patterns that are not OK with PIC. */
4350
4351 int
4352 check_pic (int i)
4353 {
4354 rtx op;
4355
4356 switch (flag_pic)
4357 {
4358 case 1:
4359 op = recog_data.operand[i];
4360 gcc_assert (GET_CODE (op) != SYMBOL_REF
4361 && (GET_CODE (op) != CONST
4362 || (GET_CODE (XEXP (op, 0)) == MINUS
4363 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4364 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4365 /* fallthrough */
4366 case 2:
4367 default:
4368 return 1;
4369 }
4370 }
4371
4372 /* Return true if X is an address which needs a temporary register when
4373 reloaded while generating PIC code. */
4374
4375 int
4376 pic_address_needs_scratch (rtx x)
4377 {
4378 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4379 if (GET_CODE (x) == CONST
4380 && GET_CODE (XEXP (x, 0)) == PLUS
4381 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4382 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4383 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4384 return 1;
4385
4386 return 0;
4387 }
4388
4389 /* Determine if a given RTX is a valid constant. We already know this
4390 satisfies CONSTANT_P. */
4391
4392 static bool
4393 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4394 {
4395 switch (GET_CODE (x))
4396 {
4397 case CONST:
4398 case SYMBOL_REF:
4399 if (sparc_tls_referenced_p (x))
4400 return false;
4401 break;
4402
4403 case CONST_DOUBLE:
4404 /* Floating point constants are generally not ok.
4405 The only exception is 0.0 and all-ones in VIS. */
4406 if (TARGET_VIS
4407 && SCALAR_FLOAT_MODE_P (mode)
4408 && (const_zero_operand (x, mode)
4409 || const_all_ones_operand (x, mode)))
4410 return true;
4411
4412 return false;
4413
4414 case CONST_VECTOR:
4415 /* Vector constants are generally not ok.
4416 The only exception is 0 or -1 in VIS. */
4417 if (TARGET_VIS
4418 && (const_zero_operand (x, mode)
4419 || const_all_ones_operand (x, mode)))
4420 return true;
4421
4422 return false;
4423
4424 default:
4425 break;
4426 }
4427
4428 return true;
4429 }
4430
4431 /* Determine if a given RTX is a valid constant address. */
4432
4433 bool
4434 constant_address_p (rtx x)
4435 {
4436 switch (GET_CODE (x))
4437 {
4438 case LABEL_REF:
4439 case CONST_INT:
4440 case HIGH:
4441 return true;
4442
4443 case CONST:
4444 if (flag_pic && pic_address_needs_scratch (x))
4445 return false;
4446 return sparc_legitimate_constant_p (Pmode, x);
4447
4448 case SYMBOL_REF:
4449 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4450
4451 default:
4452 return false;
4453 }
4454 }
4455
4456 /* Nonzero if the constant value X is a legitimate general operand
4457 when generating PIC code. It is given that flag_pic is on and
4458 that X satisfies CONSTANT_P. */
4459
4460 bool
4461 legitimate_pic_operand_p (rtx x)
4462 {
4463 if (pic_address_needs_scratch (x))
4464 return false;
4465 if (sparc_tls_referenced_p (x))
4466 return false;
4467 return true;
4468 }
4469
4470 /* Return true if X is a representation of the PIC register. */
4471
4472 static bool
4473 sparc_pic_register_p (rtx x)
4474 {
4475 if (!REG_P (x) || !pic_offset_table_rtx)
4476 return false;
4477
4478 if (x == pic_offset_table_rtx)
4479 return true;
4480
4481 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4482 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4483 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4484 return true;
4485
4486 return false;
4487 }
4488
4489 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4490 (CONST_INT_P (X) \
4491 && INTVAL (X) >= -0x1000 \
4492 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4493
4494 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4495 (CONST_INT_P (X) \
4496 && INTVAL (X) >= -0x1000 \
4497 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4498
4499 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4500
4501 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4502 ordinarily. This changes a bit when generating PIC. */
4503
4504 static bool
4505 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4506 {
4507 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4508
4509 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4510 rs1 = addr;
4511 else if (GET_CODE (addr) == PLUS)
4512 {
4513 rs1 = XEXP (addr, 0);
4514 rs2 = XEXP (addr, 1);
4515
4516 /* Canonicalize. REG comes first, if there are no regs,
4517 LO_SUM comes first. */
4518 if (!REG_P (rs1)
4519 && GET_CODE (rs1) != SUBREG
4520 && (REG_P (rs2)
4521 || GET_CODE (rs2) == SUBREG
4522 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4523 {
4524 rs1 = XEXP (addr, 1);
4525 rs2 = XEXP (addr, 0);
4526 }
4527
4528 if ((flag_pic == 1
4529 && sparc_pic_register_p (rs1)
4530 && !REG_P (rs2)
4531 && GET_CODE (rs2) != SUBREG
4532 && GET_CODE (rs2) != LO_SUM
4533 && GET_CODE (rs2) != MEM
4534 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4535 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4536 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4537 || ((REG_P (rs1)
4538 || GET_CODE (rs1) == SUBREG)
4539 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4540 {
4541 imm1 = rs2;
4542 rs2 = NULL;
4543 }
4544 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4545 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4546 {
4547 /* We prohibit REG + REG for TFmode when there are no quad move insns
4548 and we consequently need to split. We do this because REG+REG
4549 is not an offsettable address. If we get the situation in reload
4550 where source and destination of a movtf pattern are both MEMs with
4551 REG+REG address, then only one of them gets converted to an
4552 offsettable address. */
4553 if (mode == TFmode
4554 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4555 return 0;
4556
4557 /* Likewise for TImode, but in all cases. */
4558 if (mode == TImode)
4559 return 0;
4560
4561 /* We prohibit REG + REG on ARCH32 if not optimizing for
4562 DFmode/DImode because then mem_min_alignment is likely to be zero
4563 after reload and the forced split would lack a matching splitter
4564 pattern. */
4565 if (TARGET_ARCH32 && !optimize
4566 && (mode == DFmode || mode == DImode))
4567 return 0;
4568 }
4569 else if (USE_AS_OFFSETABLE_LO10
4570 && GET_CODE (rs1) == LO_SUM
4571 && TARGET_ARCH64
4572 && ! TARGET_CM_MEDMID
4573 && RTX_OK_FOR_OLO10_P (rs2, mode))
4574 {
4575 rs2 = NULL;
4576 imm1 = XEXP (rs1, 1);
4577 rs1 = XEXP (rs1, 0);
4578 if (!CONSTANT_P (imm1)
4579 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4580 return 0;
4581 }
4582 }
4583 else if (GET_CODE (addr) == LO_SUM)
4584 {
4585 rs1 = XEXP (addr, 0);
4586 imm1 = XEXP (addr, 1);
4587
4588 if (!CONSTANT_P (imm1)
4589 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4590 return 0;
4591
4592 /* We can't allow TFmode in 32-bit mode, because an offset greater
4593 than the alignment (8) may cause the LO_SUM to overflow. */
4594 if (mode == TFmode && TARGET_ARCH32)
4595 return 0;
4596
4597 /* During reload, accept the HIGH+LO_SUM construct generated by
4598 sparc_legitimize_reload_address. */
4599 if (reload_in_progress
4600 && GET_CODE (rs1) == HIGH
4601 && XEXP (rs1, 0) == imm1)
4602 return 1;
4603 }
4604 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4605 return 1;
4606 else
4607 return 0;
4608
4609 if (GET_CODE (rs1) == SUBREG)
4610 rs1 = SUBREG_REG (rs1);
4611 if (!REG_P (rs1))
4612 return 0;
4613
4614 if (rs2)
4615 {
4616 if (GET_CODE (rs2) == SUBREG)
4617 rs2 = SUBREG_REG (rs2);
4618 if (!REG_P (rs2))
4619 return 0;
4620 }
4621
4622 if (strict)
4623 {
4624 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4625 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4626 return 0;
4627 }
4628 else
4629 {
4630 if ((! SPARC_INT_REG_P (REGNO (rs1))
4631 && REGNO (rs1) != FRAME_POINTER_REGNUM
4632 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4633 || (rs2
4634 && (! SPARC_INT_REG_P (REGNO (rs2))
4635 && REGNO (rs2) != FRAME_POINTER_REGNUM
4636 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4637 return 0;
4638 }
4639 return 1;
4640 }
4641
4642 /* Return the SYMBOL_REF for the tls_get_addr function. */
4643
4644 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4645
4646 static rtx
4647 sparc_tls_get_addr (void)
4648 {
4649 if (!sparc_tls_symbol)
4650 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4651
4652 return sparc_tls_symbol;
4653 }
4654
4655 /* Return the Global Offset Table to be used in TLS mode. */
4656
4657 static rtx
4658 sparc_tls_got (void)
4659 {
4660 /* In PIC mode, this is just the PIC offset table. */
4661 if (flag_pic)
4662 {
4663 crtl->uses_pic_offset_table = 1;
4664 return pic_offset_table_rtx;
4665 }
4666
4667 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4668 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4669 if (TARGET_SUN_TLS && TARGET_ARCH32)
4670 {
4671 load_got_register ();
4672 return got_register_rtx;
4673 }
4674
4675 /* In all other cases, we load a new pseudo with the GOT symbol. */
4676 return copy_to_reg (sparc_got ());
4677 }
4678
4679 /* Return true if X contains a thread-local symbol. */
4680
4681 static bool
4682 sparc_tls_referenced_p (rtx x)
4683 {
4684 if (!TARGET_HAVE_TLS)
4685 return false;
4686
4687 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4688 x = XEXP (XEXP (x, 0), 0);
4689
4690 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4691 return true;
4692
4693 /* That's all we handle in sparc_legitimize_tls_address for now. */
4694 return false;
4695 }
4696
4697 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4698 this (thread-local) address. */
4699
4700 static rtx
4701 sparc_legitimize_tls_address (rtx addr)
4702 {
4703 rtx temp1, temp2, temp3, ret, o0, got;
4704 rtx_insn *insn;
4705
4706 gcc_assert (can_create_pseudo_p ());
4707
4708 if (GET_CODE (addr) == SYMBOL_REF)
4709 /* Although the various sethi/or sequences generate SImode values, many of
4710 them can be transformed by the linker when relaxing and, if relaxing to
4711 local-exec, will become a sethi/xor pair, which is signed and therefore
4712 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4713 values be spilled onto the stack in 64-bit mode. */
4714 switch (SYMBOL_REF_TLS_MODEL (addr))
4715 {
4716 case TLS_MODEL_GLOBAL_DYNAMIC:
4717 start_sequence ();
4718 temp1 = gen_reg_rtx (Pmode);
4719 temp2 = gen_reg_rtx (Pmode);
4720 ret = gen_reg_rtx (Pmode);
4721 o0 = gen_rtx_REG (Pmode, 8);
4722 got = sparc_tls_got ();
4723 if (TARGET_ARCH32)
4724 {
4725 emit_insn (gen_tgd_hi22si (temp1, addr));
4726 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4727 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4728 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4729 addr, const1_rtx));
4730 }
4731 else
4732 {
4733 emit_insn (gen_tgd_hi22di (temp1, addr));
4734 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4735 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4736 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4737 addr, const1_rtx));
4738 }
4739 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4740 RTL_CONST_CALL_P (insn) = 1;
4741 insn = get_insns ();
4742 end_sequence ();
4743 emit_libcall_block (insn, ret, o0, addr);
4744 break;
4745
4746 case TLS_MODEL_LOCAL_DYNAMIC:
4747 start_sequence ();
4748 temp1 = gen_reg_rtx (Pmode);
4749 temp2 = gen_reg_rtx (Pmode);
4750 temp3 = gen_reg_rtx (Pmode);
4751 ret = gen_reg_rtx (Pmode);
4752 o0 = gen_rtx_REG (Pmode, 8);
4753 got = sparc_tls_got ();
4754 if (TARGET_ARCH32)
4755 {
4756 emit_insn (gen_tldm_hi22si (temp1));
4757 emit_insn (gen_tldm_lo10si (temp2, temp1));
4758 emit_insn (gen_tldm_addsi (o0, got, temp2));
4759 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4760 const1_rtx));
4761 }
4762 else
4763 {
4764 emit_insn (gen_tldm_hi22di (temp1));
4765 emit_insn (gen_tldm_lo10di (temp2, temp1));
4766 emit_insn (gen_tldm_adddi (o0, got, temp2));
4767 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4768 const1_rtx));
4769 }
4770 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4771 RTL_CONST_CALL_P (insn) = 1;
4772 insn = get_insns ();
4773 end_sequence ();
4774 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4775 share the LD_BASE result with other LD model accesses. */
4776 emit_libcall_block (insn, temp3, o0,
4777 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4778 UNSPEC_TLSLD_BASE));
4779 temp1 = gen_reg_rtx (Pmode);
4780 temp2 = gen_reg_rtx (Pmode);
4781 if (TARGET_ARCH32)
4782 {
4783 emit_insn (gen_tldo_hix22si (temp1, addr));
4784 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4785 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4786 }
4787 else
4788 {
4789 emit_insn (gen_tldo_hix22di (temp1, addr));
4790 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4791 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4792 }
4793 break;
4794
4795 case TLS_MODEL_INITIAL_EXEC:
4796 temp1 = gen_reg_rtx (Pmode);
4797 temp2 = gen_reg_rtx (Pmode);
4798 temp3 = gen_reg_rtx (Pmode);
4799 got = sparc_tls_got ();
4800 if (TARGET_ARCH32)
4801 {
4802 emit_insn (gen_tie_hi22si (temp1, addr));
4803 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4804 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4805 }
4806 else
4807 {
4808 emit_insn (gen_tie_hi22di (temp1, addr));
4809 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4810 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4811 }
4812 if (TARGET_SUN_TLS)
4813 {
4814 ret = gen_reg_rtx (Pmode);
4815 if (TARGET_ARCH32)
4816 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4817 temp3, addr));
4818 else
4819 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4820 temp3, addr));
4821 }
4822 else
4823 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4824 break;
4825
4826 case TLS_MODEL_LOCAL_EXEC:
4827 temp1 = gen_reg_rtx (Pmode);
4828 temp2 = gen_reg_rtx (Pmode);
4829 if (TARGET_ARCH32)
4830 {
4831 emit_insn (gen_tle_hix22si (temp1, addr));
4832 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4833 }
4834 else
4835 {
4836 emit_insn (gen_tle_hix22di (temp1, addr));
4837 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4838 }
4839 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4840 break;
4841
4842 default:
4843 gcc_unreachable ();
4844 }
4845
4846 else if (GET_CODE (addr) == CONST)
4847 {
4848 rtx base, offset;
4849
4850 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4851
4852 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4853 offset = XEXP (XEXP (addr, 0), 1);
4854
4855 base = force_operand (base, NULL_RTX);
4856 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4857 offset = force_reg (Pmode, offset);
4858 ret = gen_rtx_PLUS (Pmode, base, offset);
4859 }
4860
4861 else
4862 gcc_unreachable (); /* for now ... */
4863
4864 return ret;
4865 }
4866
4867 /* Legitimize PIC addresses. If the address is already position-independent,
4868 we return ORIG. Newly generated position-independent addresses go into a
4869 reg. This is REG if nonzero, otherwise we allocate register(s) as
4870 necessary. */
4871
4872 static rtx
4873 sparc_legitimize_pic_address (rtx orig, rtx reg)
4874 {
4875 if (GET_CODE (orig) == SYMBOL_REF
4876 /* See the comment in sparc_expand_move. */
4877 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4878 {
4879 bool gotdata_op = false;
4880 rtx pic_ref, address;
4881 rtx_insn *insn;
4882
4883 if (!reg)
4884 {
4885 gcc_assert (can_create_pseudo_p ());
4886 reg = gen_reg_rtx (Pmode);
4887 }
4888
4889 if (flag_pic == 2)
4890 {
4891 /* If not during reload, allocate another temp reg here for loading
4892 in the address, so that these instructions can be optimized
4893 properly. */
4894 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4895
4896 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4897 won't get confused into thinking that these two instructions
4898 are loading in the true address of the symbol. If in the
4899 future a PIC rtx exists, that should be used instead. */
4900 if (TARGET_ARCH64)
4901 {
4902 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4903 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4904 }
4905 else
4906 {
4907 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4908 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4909 }
4910
4911 address = temp_reg;
4912 gotdata_op = true;
4913 }
4914 else
4915 address = orig;
4916
4917 crtl->uses_pic_offset_table = 1;
4918 if (gotdata_op)
4919 {
4920 if (TARGET_ARCH64)
4921 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4922 pic_offset_table_rtx,
4923 address, orig));
4924 else
4925 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4926 pic_offset_table_rtx,
4927 address, orig));
4928 }
4929 else
4930 {
4931 pic_ref
4932 = gen_const_mem (Pmode,
4933 gen_rtx_PLUS (Pmode,
4934 pic_offset_table_rtx, address));
4935 insn = emit_move_insn (reg, pic_ref);
4936 }
4937
4938 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4939 by loop. */
4940 set_unique_reg_note (insn, REG_EQUAL, orig);
4941 return reg;
4942 }
4943 else if (GET_CODE (orig) == CONST)
4944 {
4945 rtx base, offset;
4946
4947 if (GET_CODE (XEXP (orig, 0)) == PLUS
4948 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4949 return orig;
4950
4951 if (!reg)
4952 {
4953 gcc_assert (can_create_pseudo_p ());
4954 reg = gen_reg_rtx (Pmode);
4955 }
4956
4957 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4958 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4959 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4960 base == reg ? NULL_RTX : reg);
4961
4962 if (GET_CODE (offset) == CONST_INT)
4963 {
4964 if (SMALL_INT (offset))
4965 return plus_constant (Pmode, base, INTVAL (offset));
4966 else if (can_create_pseudo_p ())
4967 offset = force_reg (Pmode, offset);
4968 else
4969 /* If we reach here, then something is seriously wrong. */
4970 gcc_unreachable ();
4971 }
4972 return gen_rtx_PLUS (Pmode, base, offset);
4973 }
4974 else if (GET_CODE (orig) == LABEL_REF)
4975 /* ??? We ought to be checking that the register is live instead, in case
4976 it is eliminated. */
4977 crtl->uses_pic_offset_table = 1;
4978
4979 return orig;
4980 }
4981
4982 /* Try machine-dependent ways of modifying an illegitimate address X
4983 to be legitimate. If we find one, return the new, valid address.
4984
4985 OLDX is the address as it was before break_out_memory_refs was called.
4986 In some cases it is useful to look at this to decide what needs to be done.
4987
4988 MODE is the mode of the operand pointed to by X.
4989
4990 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4991
4992 static rtx
4993 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4994 machine_mode mode)
4995 {
4996 rtx orig_x = x;
4997
4998 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4999 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5000 force_operand (XEXP (x, 0), NULL_RTX));
5001 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5002 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5003 force_operand (XEXP (x, 1), NULL_RTX));
5004 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5005 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5006 XEXP (x, 1));
5007 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5008 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5009 force_operand (XEXP (x, 1), NULL_RTX));
5010
5011 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5012 return x;
5013
5014 if (sparc_tls_referenced_p (x))
5015 x = sparc_legitimize_tls_address (x);
5016 else if (flag_pic)
5017 x = sparc_legitimize_pic_address (x, NULL_RTX);
5018 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5019 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5020 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5021 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5022 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5023 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5024 else if (GET_CODE (x) == SYMBOL_REF
5025 || GET_CODE (x) == CONST
5026 || GET_CODE (x) == LABEL_REF)
5027 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5028
5029 return x;
5030 }
5031
5032 /* Delegitimize an address that was legitimized by the above function. */
5033
5034 static rtx
5035 sparc_delegitimize_address (rtx x)
5036 {
5037 x = delegitimize_mem_from_attrs (x);
5038
5039 if (GET_CODE (x) == LO_SUM)
5040 x = XEXP (x, 1);
5041
5042 if (GET_CODE (x) == UNSPEC)
5043 switch (XINT (x, 1))
5044 {
5045 case UNSPEC_MOVE_PIC:
5046 case UNSPEC_TLSLE:
5047 x = XVECEXP (x, 0, 0);
5048 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5049 break;
5050 case UNSPEC_MOVE_GOTDATA:
5051 x = XVECEXP (x, 0, 2);
5052 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5053 break;
5054 default:
5055 break;
5056 }
5057
5058 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5059 if (GET_CODE (x) == MINUS
5060 && (XEXP (x, 0) == got_register_rtx
5061 || sparc_pic_register_p (XEXP (x, 0))))
5062 {
5063 rtx y = XEXP (x, 1);
5064
5065 if (GET_CODE (y) == LO_SUM)
5066 y = XEXP (y, 1);
5067
5068 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5069 {
5070 x = XVECEXP (y, 0, 0);
5071 gcc_assert (GET_CODE (x) == LABEL_REF
5072 || (GET_CODE (x) == CONST
5073 && GET_CODE (XEXP (x, 0)) == PLUS
5074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5075 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5076 }
5077 }
5078
5079 return x;
5080 }
5081
5082 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5083 replace the input X, or the original X if no replacement is called for.
5084 The output parameter *WIN is 1 if the calling macro should goto WIN,
5085 0 if it should not.
5086
5087 For SPARC, we wish to handle addresses by splitting them into
5088 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5089 This cuts the number of extra insns by one.
5090
5091 Do nothing when generating PIC code and the address is a symbolic
5092 operand or requires a scratch register. */
5093
5094 rtx
5095 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5096 int opnum, int type,
5097 int ind_levels ATTRIBUTE_UNUSED, int *win)
5098 {
5099 /* Decompose SImode constants into HIGH+LO_SUM. */
5100 if (CONSTANT_P (x)
5101 && (mode != TFmode || TARGET_ARCH64)
5102 && GET_MODE (x) == SImode
5103 && GET_CODE (x) != LO_SUM
5104 && GET_CODE (x) != HIGH
5105 && sparc_code_model <= CM_MEDLOW
5106 && !(flag_pic
5107 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5108 {
5109 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5110 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5111 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5112 opnum, (enum reload_type)type);
5113 *win = 1;
5114 return x;
5115 }
5116
5117 /* We have to recognize what we have already generated above. */
5118 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5119 {
5120 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5121 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5122 opnum, (enum reload_type)type);
5123 *win = 1;
5124 return x;
5125 }
5126
5127 *win = 0;
5128 return x;
5129 }
5130
5131 /* Return true if ADDR (a legitimate address expression)
5132 has an effect that depends on the machine mode it is used for.
5133
5134 In PIC mode,
5135
5136 (mem:HI [%l7+a])
5137
5138 is not equivalent to
5139
5140 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5141
5142 because [%l7+a+1] is interpreted as the address of (a+1). */
5143
5144
5145 static bool
5146 sparc_mode_dependent_address_p (const_rtx addr,
5147 addr_space_t as ATTRIBUTE_UNUSED)
5148 {
5149 if (GET_CODE (addr) == PLUS
5150 && sparc_pic_register_p (XEXP (addr, 0))
5151 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5152 return true;
5153
5154 return false;
5155 }
5156
5157 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5158 address of the call target. */
5159
5160 void
5161 sparc_emit_call_insn (rtx pat, rtx addr)
5162 {
5163 rtx_insn *insn;
5164
5165 insn = emit_call_insn (pat);
5166
5167 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5168 if (TARGET_VXWORKS_RTP
5169 && flag_pic
5170 && GET_CODE (addr) == SYMBOL_REF
5171 && (SYMBOL_REF_DECL (addr)
5172 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5173 : !SYMBOL_REF_LOCAL_P (addr)))
5174 {
5175 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5176 crtl->uses_pic_offset_table = 1;
5177 }
5178 }
5179 \f
5180 /* Return 1 if RTX is a MEM which is known to be aligned to at
5181 least a DESIRED byte boundary. */
5182
5183 int
5184 mem_min_alignment (rtx mem, int desired)
5185 {
5186 rtx addr, base, offset;
5187
5188 /* If it's not a MEM we can't accept it. */
5189 if (GET_CODE (mem) != MEM)
5190 return 0;
5191
5192 /* Obviously... */
5193 if (!TARGET_UNALIGNED_DOUBLES
5194 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5195 return 1;
5196
5197 /* ??? The rest of the function predates MEM_ALIGN so
5198 there is probably a bit of redundancy. */
5199 addr = XEXP (mem, 0);
5200 base = offset = NULL_RTX;
5201 if (GET_CODE (addr) == PLUS)
5202 {
5203 if (GET_CODE (XEXP (addr, 0)) == REG)
5204 {
5205 base = XEXP (addr, 0);
5206
5207 /* What we are saying here is that if the base
5208 REG is aligned properly, the compiler will make
5209 sure any REG based index upon it will be so
5210 as well. */
5211 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5212 offset = XEXP (addr, 1);
5213 else
5214 offset = const0_rtx;
5215 }
5216 }
5217 else if (GET_CODE (addr) == REG)
5218 {
5219 base = addr;
5220 offset = const0_rtx;
5221 }
5222
5223 if (base != NULL_RTX)
5224 {
5225 int regno = REGNO (base);
5226
5227 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5228 {
5229 /* Check if the compiler has recorded some information
5230 about the alignment of the base REG. If reload has
5231 completed, we already matched with proper alignments.
5232 If not running global_alloc, reload might give us
5233 unaligned pointer to local stack though. */
5234 if (((cfun != 0
5235 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5236 || (optimize && reload_completed))
5237 && (INTVAL (offset) & (desired - 1)) == 0)
5238 return 1;
5239 }
5240 else
5241 {
5242 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5243 return 1;
5244 }
5245 }
5246 else if (! TARGET_UNALIGNED_DOUBLES
5247 || CONSTANT_P (addr)
5248 || GET_CODE (addr) == LO_SUM)
5249 {
5250 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5251 is true, in which case we can only assume that an access is aligned if
5252 it is to a constant address, or the address involves a LO_SUM. */
5253 return 1;
5254 }
5255
5256 /* An obviously unaligned address. */
5257 return 0;
5258 }
5259
5260 \f
5261 /* Vectors to keep interesting information about registers where it can easily
5262 be got. We used to use the actual mode value as the bit number, but there
5263 are more than 32 modes now. Instead we use two tables: one indexed by
5264 hard register number, and one indexed by mode. */
5265
5266 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5267 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5268 mapped into one sparc_mode_class mode. */
5269
5270 enum sparc_mode_class {
5271 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5272 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5273 CC_MODE, CCFP_MODE
5274 };
5275
5276 /* Modes for single-word and smaller quantities. */
5277 #define S_MODES \
5278 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5279
5280 /* Modes for double-word and smaller quantities. */
5281 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5282
5283 /* Modes for quad-word and smaller quantities. */
5284 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5285
5286 /* Modes for 8-word and smaller quantities. */
5287 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5288
5289 /* Modes for single-float quantities. */
5290 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5291
5292 /* Modes for double-float and smaller quantities. */
5293 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5294
5295 /* Modes for quad-float and smaller quantities. */
5296 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5297
5298 /* Modes for quad-float pairs and smaller quantities. */
5299 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5300
5301 /* Modes for double-float only quantities. */
5302 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5303
5304 /* Modes for quad-float and double-float only quantities. */
5305 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5306
5307 /* Modes for quad-float pairs and double-float only quantities. */
5308 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5309
5310 /* Modes for condition codes. */
5311 #define CC_MODES (1 << (int) CC_MODE)
5312 #define CCFP_MODES (1 << (int) CCFP_MODE)
5313
5314 /* Value is 1 if register/mode pair is acceptable on sparc.
5315
5316 The funny mixture of D and T modes is because integer operations
5317 do not specially operate on tetra quantities, so non-quad-aligned
5318 registers can hold quadword quantities (except %o4 and %i4 because
5319 they cross fixed registers).
5320
5321 ??? Note that, despite the settings, non-double-aligned parameter
5322 registers can hold double-word quantities in 32-bit mode. */
5323
5324 /* This points to either the 32-bit or the 64-bit version. */
5325 static const int *hard_regno_mode_classes;
5326
5327 static const int hard_32bit_mode_classes[] = {
5328 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5329 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5330 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5331 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5332
5333 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5334 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5335 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5336 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5337
5338 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5339 and none can hold SFmode/SImode values. */
5340 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5341 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5342 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5343 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5344
5345 /* %fcc[0123] */
5346 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5347
5348 /* %icc, %sfp, %gsr */
5349 CC_MODES, 0, D_MODES
5350 };
5351
5352 static const int hard_64bit_mode_classes[] = {
5353 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5354 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5355 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5356 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5357
5358 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5359 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5360 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5361 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5362
5363 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5364 and none can hold SFmode/SImode values. */
5365 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5366 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5367 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5368 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5369
5370 /* %fcc[0123] */
5371 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5372
5373 /* %icc, %sfp, %gsr */
5374 CC_MODES, 0, D_MODES
5375 };
5376
5377 static int sparc_mode_class [NUM_MACHINE_MODES];
5378
5379 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5380
5381 static void
5382 sparc_init_modes (void)
5383 {
5384 int i;
5385
5386 for (i = 0; i < NUM_MACHINE_MODES; i++)
5387 {
5388 machine_mode m = (machine_mode) i;
5389 unsigned int size = GET_MODE_SIZE (m);
5390
5391 switch (GET_MODE_CLASS (m))
5392 {
5393 case MODE_INT:
5394 case MODE_PARTIAL_INT:
5395 case MODE_COMPLEX_INT:
5396 if (size < 4)
5397 sparc_mode_class[i] = 1 << (int) H_MODE;
5398 else if (size == 4)
5399 sparc_mode_class[i] = 1 << (int) S_MODE;
5400 else if (size == 8)
5401 sparc_mode_class[i] = 1 << (int) D_MODE;
5402 else if (size == 16)
5403 sparc_mode_class[i] = 1 << (int) T_MODE;
5404 else if (size == 32)
5405 sparc_mode_class[i] = 1 << (int) O_MODE;
5406 else
5407 sparc_mode_class[i] = 0;
5408 break;
5409 case MODE_VECTOR_INT:
5410 if (size == 4)
5411 sparc_mode_class[i] = 1 << (int) SF_MODE;
5412 else if (size == 8)
5413 sparc_mode_class[i] = 1 << (int) DF_MODE;
5414 else
5415 sparc_mode_class[i] = 0;
5416 break;
5417 case MODE_FLOAT:
5418 case MODE_COMPLEX_FLOAT:
5419 if (size == 4)
5420 sparc_mode_class[i] = 1 << (int) SF_MODE;
5421 else if (size == 8)
5422 sparc_mode_class[i] = 1 << (int) DF_MODE;
5423 else if (size == 16)
5424 sparc_mode_class[i] = 1 << (int) TF_MODE;
5425 else if (size == 32)
5426 sparc_mode_class[i] = 1 << (int) OF_MODE;
5427 else
5428 sparc_mode_class[i] = 0;
5429 break;
5430 case MODE_CC:
5431 if (m == CCFPmode || m == CCFPEmode)
5432 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5433 else
5434 sparc_mode_class[i] = 1 << (int) CC_MODE;
5435 break;
5436 default:
5437 sparc_mode_class[i] = 0;
5438 break;
5439 }
5440 }
5441
5442 if (TARGET_ARCH64)
5443 hard_regno_mode_classes = hard_64bit_mode_classes;
5444 else
5445 hard_regno_mode_classes = hard_32bit_mode_classes;
5446
5447 /* Initialize the array used by REGNO_REG_CLASS. */
5448 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5449 {
5450 if (i < 16 && TARGET_V8PLUS)
5451 sparc_regno_reg_class[i] = I64_REGS;
5452 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5453 sparc_regno_reg_class[i] = GENERAL_REGS;
5454 else if (i < 64)
5455 sparc_regno_reg_class[i] = FP_REGS;
5456 else if (i < 96)
5457 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5458 else if (i < 100)
5459 sparc_regno_reg_class[i] = FPCC_REGS;
5460 else
5461 sparc_regno_reg_class[i] = NO_REGS;
5462 }
5463 }
5464 \f
5465 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5466
5467 static inline bool
5468 save_global_or_fp_reg_p (unsigned int regno,
5469 int leaf_function ATTRIBUTE_UNUSED)
5470 {
5471 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5472 }
5473
5474 /* Return whether the return address register (%i7) is needed. */
5475
5476 static inline bool
5477 return_addr_reg_needed_p (int leaf_function)
5478 {
5479 /* If it is live, for example because of __builtin_return_address (0). */
5480 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5481 return true;
5482
5483 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5484 if (!leaf_function
5485 /* Loading the GOT register clobbers %o7. */
5486 || crtl->uses_pic_offset_table
5487 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5488 return true;
5489
5490 return false;
5491 }
5492
5493 /* Return whether REGNO, a local or in register, must be saved/restored. */
5494
5495 static bool
5496 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5497 {
5498 /* General case: call-saved registers live at some point. */
5499 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5500 return true;
5501
5502 /* Frame pointer register (%fp) if needed. */
5503 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5504 return true;
5505
5506 /* Return address register (%i7) if needed. */
5507 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5508 return true;
5509
5510 /* GOT register (%l7) if needed. */
5511 if (got_register_rtx && regno == REGNO (got_register_rtx))
5512 return true;
5513
5514 /* If the function accesses prior frames, the frame pointer and the return
5515 address of the previous frame must be saved on the stack. */
5516 if (crtl->accesses_prior_frames
5517 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5518 return true;
5519
5520 return false;
5521 }
5522
5523 /* Compute the frame size required by the function. This function is called
5524 during the reload pass and also by sparc_expand_prologue. */
5525
5526 static HOST_WIDE_INT
5527 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5528 {
5529 HOST_WIDE_INT frame_size, apparent_frame_size;
5530 int args_size, n_global_fp_regs = 0;
5531 bool save_local_in_regs_p = false;
5532 unsigned int i;
5533
5534 /* If the function allocates dynamic stack space, the dynamic offset is
5535 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5536 if (leaf_function && !cfun->calls_alloca)
5537 args_size = 0;
5538 else
5539 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5540
5541 /* Calculate space needed for global registers. */
5542 if (TARGET_ARCH64)
5543 {
5544 for (i = 0; i < 8; i++)
5545 if (save_global_or_fp_reg_p (i, 0))
5546 n_global_fp_regs += 2;
5547 }
5548 else
5549 {
5550 for (i = 0; i < 8; i += 2)
5551 if (save_global_or_fp_reg_p (i, 0)
5552 || save_global_or_fp_reg_p (i + 1, 0))
5553 n_global_fp_regs += 2;
5554 }
5555
5556 /* In the flat window model, find out which local and in registers need to
5557 be saved. We don't reserve space in the current frame for them as they
5558 will be spilled into the register window save area of the caller's frame.
5559 However, as soon as we use this register window save area, we must create
5560 that of the current frame to make it the live one. */
5561 if (TARGET_FLAT)
5562 for (i = 16; i < 32; i++)
5563 if (save_local_or_in_reg_p (i, leaf_function))
5564 {
5565 save_local_in_regs_p = true;
5566 break;
5567 }
5568
5569 /* Calculate space needed for FP registers. */
5570 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5571 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5572 n_global_fp_regs += 2;
5573
5574 if (size == 0
5575 && n_global_fp_regs == 0
5576 && args_size == 0
5577 && !save_local_in_regs_p)
5578 frame_size = apparent_frame_size = 0;
5579 else
5580 {
5581 /* Start from the apparent frame size. */
5582 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5583
5584 /* We need to add the size of the outgoing argument area. */
5585 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5586
5587 /* And that of the register window save area. */
5588 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5589
5590 /* Finally, bump to the appropriate alignment. */
5591 frame_size = SPARC_STACK_ALIGN (frame_size);
5592 }
5593
5594 /* Set up values for use in prologue and epilogue. */
5595 sparc_frame_size = frame_size;
5596 sparc_apparent_frame_size = apparent_frame_size;
5597 sparc_n_global_fp_regs = n_global_fp_regs;
5598 sparc_save_local_in_regs_p = save_local_in_regs_p;
5599
5600 return frame_size;
5601 }
5602
5603 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5604
5605 int
5606 sparc_initial_elimination_offset (int to)
5607 {
5608 int offset;
5609
5610 if (to == STACK_POINTER_REGNUM)
5611 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5612 else
5613 offset = 0;
5614
5615 offset += SPARC_STACK_BIAS;
5616 return offset;
5617 }
5618
5619 /* Output any necessary .register pseudo-ops. */
5620
5621 void
5622 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5623 {
5624 int i;
5625
5626 if (TARGET_ARCH32)
5627 return;
5628
5629 /* Check if %g[2367] were used without
5630 .register being printed for them already. */
5631 for (i = 2; i < 8; i++)
5632 {
5633 if (df_regs_ever_live_p (i)
5634 && ! sparc_hard_reg_printed [i])
5635 {
5636 sparc_hard_reg_printed [i] = 1;
5637 /* %g7 is used as TLS base register, use #ignore
5638 for it instead of #scratch. */
5639 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5640 i == 7 ? "ignore" : "scratch");
5641 }
5642 if (i == 3) i = 5;
5643 }
5644 }
5645
5646 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5647
5648 #if PROBE_INTERVAL > 4096
5649 #error Cannot use indexed addressing mode for stack probing
5650 #endif
5651
5652 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5653 inclusive. These are offsets from the current stack pointer.
5654
5655 Note that we don't use the REG+REG addressing mode for the probes because
5656 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5657 so the advantages of having a single code win here. */
5658
5659 static void
5660 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5661 {
5662 rtx g1 = gen_rtx_REG (Pmode, 1);
5663
5664 /* See if we have a constant small number of probes to generate. If so,
5665 that's the easy case. */
5666 if (size <= PROBE_INTERVAL)
5667 {
5668 emit_move_insn (g1, GEN_INT (first));
5669 emit_insn (gen_rtx_SET (g1,
5670 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5671 emit_stack_probe (plus_constant (Pmode, g1, -size));
5672 }
5673
5674 /* The run-time loop is made up of 9 insns in the generic case while the
5675 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5676 else if (size <= 4 * PROBE_INTERVAL)
5677 {
5678 HOST_WIDE_INT i;
5679
5680 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5681 emit_insn (gen_rtx_SET (g1,
5682 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5683 emit_stack_probe (g1);
5684
5685 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5686 it exceeds SIZE. If only two probes are needed, this will not
5687 generate any code. Then probe at FIRST + SIZE. */
5688 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5689 {
5690 emit_insn (gen_rtx_SET (g1,
5691 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5692 emit_stack_probe (g1);
5693 }
5694
5695 emit_stack_probe (plus_constant (Pmode, g1,
5696 (i - PROBE_INTERVAL) - size));
5697 }
5698
5699 /* Otherwise, do the same as above, but in a loop. Note that we must be
5700 extra careful with variables wrapping around because we might be at
5701 the very top (or the very bottom) of the address space and we have
5702 to be able to handle this case properly; in particular, we use an
5703 equality test for the loop condition. */
5704 else
5705 {
5706 HOST_WIDE_INT rounded_size;
5707 rtx g4 = gen_rtx_REG (Pmode, 4);
5708
5709 emit_move_insn (g1, GEN_INT (first));
5710
5711
5712 /* Step 1: round SIZE to the previous multiple of the interval. */
5713
5714 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5715 emit_move_insn (g4, GEN_INT (rounded_size));
5716
5717
5718 /* Step 2: compute initial and final value of the loop counter. */
5719
5720 /* TEST_ADDR = SP + FIRST. */
5721 emit_insn (gen_rtx_SET (g1,
5722 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5723
5724 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5725 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5726
5727
5728 /* Step 3: the loop
5729
5730 while (TEST_ADDR != LAST_ADDR)
5731 {
5732 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5733 probe at TEST_ADDR
5734 }
5735
5736 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5737 until it is equal to ROUNDED_SIZE. */
5738
5739 if (TARGET_ARCH64)
5740 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5741 else
5742 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5743
5744
5745 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5746 that SIZE is equal to ROUNDED_SIZE. */
5747
5748 if (size != rounded_size)
5749 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5750 }
5751
5752 /* Make sure nothing is scheduled before we are done. */
5753 emit_insn (gen_blockage ());
5754 }
5755
5756 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5757 absolute addresses. */
5758
5759 const char *
5760 output_probe_stack_range (rtx reg1, rtx reg2)
5761 {
5762 static int labelno = 0;
5763 char loop_lab[32];
5764 rtx xops[2];
5765
5766 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5767
5768 /* Loop. */
5769 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5770
5771 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5772 xops[0] = reg1;
5773 xops[1] = GEN_INT (-PROBE_INTERVAL);
5774 output_asm_insn ("add\t%0, %1, %0", xops);
5775
5776 /* Test if TEST_ADDR == LAST_ADDR. */
5777 xops[1] = reg2;
5778 output_asm_insn ("cmp\t%0, %1", xops);
5779
5780 /* Probe at TEST_ADDR and branch. */
5781 if (TARGET_ARCH64)
5782 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5783 else
5784 fputs ("\tbne\t", asm_out_file);
5785 assemble_name_raw (asm_out_file, loop_lab);
5786 fputc ('\n', asm_out_file);
5787 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5788 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5789
5790 return "";
5791 }
5792
5793 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5794 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5795 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5796 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5797 the action to be performed if it returns false. Return the new offset. */
5798
5799 typedef bool (*sorr_pred_t) (unsigned int, int);
5800 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5801
5802 static int
5803 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5804 int offset, int leaf_function, sorr_pred_t save_p,
5805 sorr_act_t action_true, sorr_act_t action_false)
5806 {
5807 unsigned int i;
5808 rtx mem;
5809 rtx_insn *insn;
5810
5811 if (TARGET_ARCH64 && high <= 32)
5812 {
5813 int fp_offset = -1;
5814
5815 for (i = low; i < high; i++)
5816 {
5817 if (save_p (i, leaf_function))
5818 {
5819 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5820 base, offset));
5821 if (action_true == SORR_SAVE)
5822 {
5823 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5824 RTX_FRAME_RELATED_P (insn) = 1;
5825 }
5826 else /* action_true == SORR_RESTORE */
5827 {
5828 /* The frame pointer must be restored last since its old
5829 value may be used as base address for the frame. This
5830 is problematic in 64-bit mode only because of the lack
5831 of double-word load instruction. */
5832 if (i == HARD_FRAME_POINTER_REGNUM)
5833 fp_offset = offset;
5834 else
5835 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5836 }
5837 offset += 8;
5838 }
5839 else if (action_false == SORR_ADVANCE)
5840 offset += 8;
5841 }
5842
5843 if (fp_offset >= 0)
5844 {
5845 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5846 emit_move_insn (hard_frame_pointer_rtx, mem);
5847 }
5848 }
5849 else
5850 {
5851 for (i = low; i < high; i += 2)
5852 {
5853 bool reg0 = save_p (i, leaf_function);
5854 bool reg1 = save_p (i + 1, leaf_function);
5855 machine_mode mode;
5856 int regno;
5857
5858 if (reg0 && reg1)
5859 {
5860 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5861 regno = i;
5862 }
5863 else if (reg0)
5864 {
5865 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5866 regno = i;
5867 }
5868 else if (reg1)
5869 {
5870 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5871 regno = i + 1;
5872 offset += 4;
5873 }
5874 else
5875 {
5876 if (action_false == SORR_ADVANCE)
5877 offset += 8;
5878 continue;
5879 }
5880
5881 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5882 if (action_true == SORR_SAVE)
5883 {
5884 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5885 RTX_FRAME_RELATED_P (insn) = 1;
5886 if (mode == DImode)
5887 {
5888 rtx set1, set2;
5889 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5890 offset));
5891 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5892 RTX_FRAME_RELATED_P (set1) = 1;
5893 mem
5894 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5895 offset + 4));
5896 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5897 RTX_FRAME_RELATED_P (set2) = 1;
5898 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5899 gen_rtx_PARALLEL (VOIDmode,
5900 gen_rtvec (2, set1, set2)));
5901 }
5902 }
5903 else /* action_true == SORR_RESTORE */
5904 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5905
5906 /* Bump and round down to double word
5907 in case we already bumped by 4. */
5908 offset = ROUND_DOWN (offset + 8, 8);
5909 }
5910 }
5911
5912 return offset;
5913 }
5914
5915 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5916
5917 static rtx
5918 emit_adjust_base_to_offset (rtx base, int offset)
5919 {
5920 /* ??? This might be optimized a little as %g1 might already have a
5921 value close enough that a single add insn will do. */
5922 /* ??? Although, all of this is probably only a temporary fix because
5923 if %g1 can hold a function result, then sparc_expand_epilogue will
5924 lose (the result will be clobbered). */
5925 rtx new_base = gen_rtx_REG (Pmode, 1);
5926 emit_move_insn (new_base, GEN_INT (offset));
5927 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5928 return new_base;
5929 }
5930
5931 /* Emit code to save/restore call-saved global and FP registers. */
5932
5933 static void
5934 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5935 {
5936 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5937 {
5938 base = emit_adjust_base_to_offset (base, offset);
5939 offset = 0;
5940 }
5941
5942 offset
5943 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5944 save_global_or_fp_reg_p, action, SORR_NONE);
5945 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5946 save_global_or_fp_reg_p, action, SORR_NONE);
5947 }
5948
5949 /* Emit code to save/restore call-saved local and in registers. */
5950
5951 static void
5952 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5953 {
5954 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5955 {
5956 base = emit_adjust_base_to_offset (base, offset);
5957 offset = 0;
5958 }
5959
5960 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5961 save_local_or_in_reg_p, action, SORR_ADVANCE);
5962 }
5963
5964 /* Emit a window_save insn. */
5965
5966 static rtx_insn *
5967 emit_window_save (rtx increment)
5968 {
5969 rtx_insn *insn = emit_insn (gen_window_save (increment));
5970 RTX_FRAME_RELATED_P (insn) = 1;
5971
5972 /* The incoming return address (%o7) is saved in %i7. */
5973 add_reg_note (insn, REG_CFA_REGISTER,
5974 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5975 gen_rtx_REG (Pmode,
5976 INCOMING_RETURN_ADDR_REGNUM)));
5977
5978 /* The window save event. */
5979 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5980
5981 /* The CFA is %fp, the hard frame pointer. */
5982 add_reg_note (insn, REG_CFA_DEF_CFA,
5983 plus_constant (Pmode, hard_frame_pointer_rtx,
5984 INCOMING_FRAME_SP_OFFSET));
5985
5986 return insn;
5987 }
5988
5989 /* Generate an increment for the stack pointer. */
5990
5991 static rtx
5992 gen_stack_pointer_inc (rtx increment)
5993 {
5994 return gen_rtx_SET (stack_pointer_rtx,
5995 gen_rtx_PLUS (Pmode,
5996 stack_pointer_rtx,
5997 increment));
5998 }
5999
6000 /* Expand the function prologue. The prologue is responsible for reserving
6001 storage for the frame, saving the call-saved registers and loading the
6002 GOT register if needed. */
6003
6004 void
6005 sparc_expand_prologue (void)
6006 {
6007 HOST_WIDE_INT size;
6008 rtx_insn *insn;
6009
6010 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
6011 on the final value of the flag means deferring the prologue/epilogue
6012 expansion until just before the second scheduling pass, which is too
6013 late to emit multiple epilogues or return insns.
6014
6015 Of course we are making the assumption that the value of the flag
6016 will not change between now and its final value. Of the three parts
6017 of the formula, only the last one can reasonably vary. Let's take a
6018 closer look, after assuming that the first two ones are set to true
6019 (otherwise the last value is effectively silenced).
6020
6021 If only_leaf_regs_used returns false, the global predicate will also
6022 be false so the actual frame size calculated below will be positive.
6023 As a consequence, the save_register_window insn will be emitted in
6024 the instruction stream; now this insn explicitly references %fp
6025 which is not a leaf register so only_leaf_regs_used will always
6026 return false subsequently.
6027
6028 If only_leaf_regs_used returns true, we hope that the subsequent
6029 optimization passes won't cause non-leaf registers to pop up. For
6030 example, the regrename pass has special provisions to not rename to
6031 non-leaf registers in a leaf function. */
6032 sparc_leaf_function_p
6033 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6034
6035 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6036
6037 if (flag_stack_usage_info)
6038 current_function_static_stack_size = size;
6039
6040 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6041 || flag_stack_clash_protection)
6042 {
6043 if (crtl->is_leaf && !cfun->calls_alloca)
6044 {
6045 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6046 sparc_emit_probe_stack_range (get_stack_check_protect (),
6047 size - get_stack_check_protect ());
6048 }
6049 else if (size > 0)
6050 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6051 }
6052
6053 if (size == 0)
6054 ; /* do nothing. */
6055 else if (sparc_leaf_function_p)
6056 {
6057 rtx size_int_rtx = GEN_INT (-size);
6058
6059 if (size <= 4096)
6060 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6061 else if (size <= 8192)
6062 {
6063 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6064 RTX_FRAME_RELATED_P (insn) = 1;
6065
6066 /* %sp is still the CFA register. */
6067 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6068 }
6069 else
6070 {
6071 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6072 emit_move_insn (size_rtx, size_int_rtx);
6073 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6074 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6075 gen_stack_pointer_inc (size_int_rtx));
6076 }
6077
6078 RTX_FRAME_RELATED_P (insn) = 1;
6079 }
6080 else
6081 {
6082 rtx size_int_rtx = GEN_INT (-size);
6083
6084 if (size <= 4096)
6085 emit_window_save (size_int_rtx);
6086 else if (size <= 8192)
6087 {
6088 emit_window_save (GEN_INT (-4096));
6089
6090 /* %sp is not the CFA register anymore. */
6091 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6092
6093 /* Make sure no %fp-based store is issued until after the frame is
6094 established. The offset between the frame pointer and the stack
6095 pointer is calculated relative to the value of the stack pointer
6096 at the end of the function prologue, and moving instructions that
6097 access the stack via the frame pointer between the instructions
6098 that decrement the stack pointer could result in accessing the
6099 register window save area, which is volatile. */
6100 emit_insn (gen_frame_blockage ());
6101 }
6102 else
6103 {
6104 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6105 emit_move_insn (size_rtx, size_int_rtx);
6106 emit_window_save (size_rtx);
6107 }
6108 }
6109
6110 if (sparc_leaf_function_p)
6111 {
6112 sparc_frame_base_reg = stack_pointer_rtx;
6113 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6114 }
6115 else
6116 {
6117 sparc_frame_base_reg = hard_frame_pointer_rtx;
6118 sparc_frame_base_offset = SPARC_STACK_BIAS;
6119 }
6120
6121 if (sparc_n_global_fp_regs > 0)
6122 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6123 sparc_frame_base_offset
6124 - sparc_apparent_frame_size,
6125 SORR_SAVE);
6126
6127 /* Advertise that the data calculated just above are now valid. */
6128 sparc_prologue_data_valid_p = true;
6129 }
6130
6131 /* Expand the function prologue. The prologue is responsible for reserving
6132 storage for the frame, saving the call-saved registers and loading the
6133 GOT register if needed. */
6134
6135 void
6136 sparc_flat_expand_prologue (void)
6137 {
6138 HOST_WIDE_INT size;
6139 rtx_insn *insn;
6140
6141 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6142
6143 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6144
6145 if (flag_stack_usage_info)
6146 current_function_static_stack_size = size;
6147
6148 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6149 || flag_stack_clash_protection)
6150 {
6151 if (crtl->is_leaf && !cfun->calls_alloca)
6152 {
6153 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6154 sparc_emit_probe_stack_range (get_stack_check_protect (),
6155 size - get_stack_check_protect ());
6156 }
6157 else if (size > 0)
6158 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6159 }
6160
6161 if (sparc_save_local_in_regs_p)
6162 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6163 SORR_SAVE);
6164
6165 if (size == 0)
6166 ; /* do nothing. */
6167 else
6168 {
6169 rtx size_int_rtx, size_rtx;
6170
6171 size_rtx = size_int_rtx = GEN_INT (-size);
6172
6173 /* We establish the frame (i.e. decrement the stack pointer) first, even
6174 if we use a frame pointer, because we cannot clobber any call-saved
6175 registers, including the frame pointer, if we haven't created a new
6176 register save area, for the sake of compatibility with the ABI. */
6177 if (size <= 4096)
6178 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6179 else if (size <= 8192 && !frame_pointer_needed)
6180 {
6181 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6182 RTX_FRAME_RELATED_P (insn) = 1;
6183 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6184 }
6185 else
6186 {
6187 size_rtx = gen_rtx_REG (Pmode, 1);
6188 emit_move_insn (size_rtx, size_int_rtx);
6189 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6190 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6191 gen_stack_pointer_inc (size_int_rtx));
6192 }
6193 RTX_FRAME_RELATED_P (insn) = 1;
6194
6195 /* Ensure nothing is scheduled until after the frame is established. */
6196 emit_insn (gen_blockage ());
6197
6198 if (frame_pointer_needed)
6199 {
6200 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6201 gen_rtx_MINUS (Pmode,
6202 stack_pointer_rtx,
6203 size_rtx)));
6204 RTX_FRAME_RELATED_P (insn) = 1;
6205
6206 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6207 gen_rtx_SET (hard_frame_pointer_rtx,
6208 plus_constant (Pmode, stack_pointer_rtx,
6209 size)));
6210 }
6211
6212 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6213 {
6214 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6215 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6216
6217 insn = emit_move_insn (i7, o7);
6218 RTX_FRAME_RELATED_P (insn) = 1;
6219
6220 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6221
6222 /* Prevent this instruction from ever being considered dead,
6223 even if this function has no epilogue. */
6224 emit_use (i7);
6225 }
6226 }
6227
6228 if (frame_pointer_needed)
6229 {
6230 sparc_frame_base_reg = hard_frame_pointer_rtx;
6231 sparc_frame_base_offset = SPARC_STACK_BIAS;
6232 }
6233 else
6234 {
6235 sparc_frame_base_reg = stack_pointer_rtx;
6236 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6237 }
6238
6239 if (sparc_n_global_fp_regs > 0)
6240 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6241 sparc_frame_base_offset
6242 - sparc_apparent_frame_size,
6243 SORR_SAVE);
6244
6245 /* Advertise that the data calculated just above are now valid. */
6246 sparc_prologue_data_valid_p = true;
6247 }
6248
6249 /* This function generates the assembly code for function entry, which boils
6250 down to emitting the necessary .register directives. */
6251
6252 static void
6253 sparc_asm_function_prologue (FILE *file)
6254 {
6255 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6256 if (!TARGET_FLAT)
6257 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6258
6259 sparc_output_scratch_registers (file);
6260 }
6261
6262 /* Expand the function epilogue, either normal or part of a sibcall.
6263 We emit all the instructions except the return or the call. */
6264
6265 void
6266 sparc_expand_epilogue (bool for_eh)
6267 {
6268 HOST_WIDE_INT size = sparc_frame_size;
6269
6270 if (cfun->calls_alloca)
6271 emit_insn (gen_frame_blockage ());
6272
6273 if (sparc_n_global_fp_regs > 0)
6274 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6275 sparc_frame_base_offset
6276 - sparc_apparent_frame_size,
6277 SORR_RESTORE);
6278
6279 if (size == 0 || for_eh)
6280 ; /* do nothing. */
6281 else if (sparc_leaf_function_p)
6282 {
6283 if (size <= 4096)
6284 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6285 else if (size <= 8192)
6286 {
6287 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6288 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6289 }
6290 else
6291 {
6292 rtx reg = gen_rtx_REG (Pmode, 1);
6293 emit_move_insn (reg, GEN_INT (size));
6294 emit_insn (gen_stack_pointer_inc (reg));
6295 }
6296 }
6297 }
6298
6299 /* Expand the function epilogue, either normal or part of a sibcall.
6300 We emit all the instructions except the return or the call. */
6301
6302 void
6303 sparc_flat_expand_epilogue (bool for_eh)
6304 {
6305 HOST_WIDE_INT size = sparc_frame_size;
6306
6307 if (sparc_n_global_fp_regs > 0)
6308 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6309 sparc_frame_base_offset
6310 - sparc_apparent_frame_size,
6311 SORR_RESTORE);
6312
6313 /* If we have a frame pointer, we'll need both to restore it before the
6314 frame is destroyed and use its current value in destroying the frame.
6315 Since we don't have an atomic way to do that in the flat window model,
6316 we save the current value into a temporary register (%g1). */
6317 if (frame_pointer_needed && !for_eh)
6318 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6319
6320 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6321 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6322 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6323
6324 if (sparc_save_local_in_regs_p)
6325 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6326 sparc_frame_base_offset,
6327 SORR_RESTORE);
6328
6329 if (size == 0 || for_eh)
6330 ; /* do nothing. */
6331 else if (frame_pointer_needed)
6332 {
6333 /* Make sure the frame is destroyed after everything else is done. */
6334 emit_insn (gen_blockage ());
6335
6336 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6337 }
6338 else
6339 {
6340 /* Likewise. */
6341 emit_insn (gen_blockage ());
6342
6343 if (size <= 4096)
6344 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6345 else if (size <= 8192)
6346 {
6347 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6348 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6349 }
6350 else
6351 {
6352 rtx reg = gen_rtx_REG (Pmode, 1);
6353 emit_move_insn (reg, GEN_INT (size));
6354 emit_insn (gen_stack_pointer_inc (reg));
6355 }
6356 }
6357 }
6358
6359 /* Return true if it is appropriate to emit `return' instructions in the
6360 body of a function. */
6361
6362 bool
6363 sparc_can_use_return_insn_p (void)
6364 {
6365 return sparc_prologue_data_valid_p
6366 && sparc_n_global_fp_regs == 0
6367 && TARGET_FLAT
6368 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6369 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6370 }
6371
6372 /* This function generates the assembly code for function exit. */
6373
6374 static void
6375 sparc_asm_function_epilogue (FILE *file)
6376 {
6377 /* If the last two instructions of a function are "call foo; dslot;"
6378 the return address might point to the first instruction in the next
6379 function and we have to output a dummy nop for the sake of sane
6380 backtraces in such cases. This is pointless for sibling calls since
6381 the return address is explicitly adjusted. */
6382
6383 rtx_insn *insn = get_last_insn ();
6384
6385 rtx last_real_insn = prev_real_insn (insn);
6386 if (last_real_insn
6387 && NONJUMP_INSN_P (last_real_insn)
6388 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6389 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6390
6391 if (last_real_insn
6392 && CALL_P (last_real_insn)
6393 && !SIBLING_CALL_P (last_real_insn))
6394 fputs("\tnop\n", file);
6395
6396 sparc_output_deferred_case_vectors ();
6397 }
6398
6399 /* Output a 'restore' instruction. */
6400
6401 static void
6402 output_restore (rtx pat)
6403 {
6404 rtx operands[3];
6405
6406 if (! pat)
6407 {
6408 fputs ("\t restore\n", asm_out_file);
6409 return;
6410 }
6411
6412 gcc_assert (GET_CODE (pat) == SET);
6413
6414 operands[0] = SET_DEST (pat);
6415 pat = SET_SRC (pat);
6416
6417 switch (GET_CODE (pat))
6418 {
6419 case PLUS:
6420 operands[1] = XEXP (pat, 0);
6421 operands[2] = XEXP (pat, 1);
6422 output_asm_insn (" restore %r1, %2, %Y0", operands);
6423 break;
6424 case LO_SUM:
6425 operands[1] = XEXP (pat, 0);
6426 operands[2] = XEXP (pat, 1);
6427 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6428 break;
6429 case ASHIFT:
6430 operands[1] = XEXP (pat, 0);
6431 gcc_assert (XEXP (pat, 1) == const1_rtx);
6432 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6433 break;
6434 default:
6435 operands[1] = pat;
6436 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6437 break;
6438 }
6439 }
6440
6441 /* Output a return. */
6442
6443 const char *
6444 output_return (rtx_insn *insn)
6445 {
6446 if (crtl->calls_eh_return)
6447 {
6448 /* If the function uses __builtin_eh_return, the eh_return
6449 machinery occupies the delay slot. */
6450 gcc_assert (!final_sequence);
6451
6452 if (flag_delayed_branch)
6453 {
6454 if (!TARGET_FLAT && TARGET_V9)
6455 fputs ("\treturn\t%i7+8\n", asm_out_file);
6456 else
6457 {
6458 if (!TARGET_FLAT)
6459 fputs ("\trestore\n", asm_out_file);
6460
6461 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6462 }
6463
6464 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6465 }
6466 else
6467 {
6468 if (!TARGET_FLAT)
6469 fputs ("\trestore\n", asm_out_file);
6470
6471 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6472 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6473 }
6474 }
6475 else if (sparc_leaf_function_p || TARGET_FLAT)
6476 {
6477 /* This is a leaf or flat function so we don't have to bother restoring
6478 the register window, which frees us from dealing with the convoluted
6479 semantics of restore/return. We simply output the jump to the
6480 return address and the insn in the delay slot (if any). */
6481
6482 return "jmp\t%%o7+%)%#";
6483 }
6484 else
6485 {
6486 /* This is a regular function so we have to restore the register window.
6487 We may have a pending insn for the delay slot, which will be either
6488 combined with the 'restore' instruction or put in the delay slot of
6489 the 'return' instruction. */
6490
6491 if (final_sequence)
6492 {
6493 rtx_insn *delay;
6494 rtx pat;
6495
6496 delay = NEXT_INSN (insn);
6497 gcc_assert (delay);
6498
6499 pat = PATTERN (delay);
6500
6501 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6502 {
6503 epilogue_renumber (&pat, 0);
6504 return "return\t%%i7+%)%#";
6505 }
6506 else
6507 {
6508 output_asm_insn ("jmp\t%%i7+%)", NULL);
6509
6510 /* We're going to output the insn in the delay slot manually.
6511 Make sure to output its source location first. */
6512 PATTERN (delay) = gen_blockage ();
6513 INSN_CODE (delay) = -1;
6514 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6515 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6516
6517 output_restore (pat);
6518 }
6519 }
6520 else
6521 {
6522 /* The delay slot is empty. */
6523 if (TARGET_V9)
6524 return "return\t%%i7+%)\n\t nop";
6525 else if (flag_delayed_branch)
6526 return "jmp\t%%i7+%)\n\t restore";
6527 else
6528 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6529 }
6530 }
6531
6532 return "";
6533 }
6534
6535 /* Output a sibling call. */
6536
6537 const char *
6538 output_sibcall (rtx_insn *insn, rtx call_operand)
6539 {
6540 rtx operands[1];
6541
6542 gcc_assert (flag_delayed_branch);
6543
6544 operands[0] = call_operand;
6545
6546 if (sparc_leaf_function_p || TARGET_FLAT)
6547 {
6548 /* This is a leaf or flat function so we don't have to bother restoring
6549 the register window. We simply output the jump to the function and
6550 the insn in the delay slot (if any). */
6551
6552 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6553
6554 if (final_sequence)
6555 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6556 operands);
6557 else
6558 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6559 it into branch if possible. */
6560 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6561 operands);
6562 }
6563 else
6564 {
6565 /* This is a regular function so we have to restore the register window.
6566 We may have a pending insn for the delay slot, which will be combined
6567 with the 'restore' instruction. */
6568
6569 output_asm_insn ("call\t%a0, 0", operands);
6570
6571 if (final_sequence)
6572 {
6573 rtx_insn *delay;
6574 rtx pat;
6575
6576 delay = NEXT_INSN (insn);
6577 gcc_assert (delay);
6578
6579 pat = PATTERN (delay);
6580
6581 /* We're going to output the insn in the delay slot manually.
6582 Make sure to output its source location first. */
6583 PATTERN (delay) = gen_blockage ();
6584 INSN_CODE (delay) = -1;
6585 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6586 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6587
6588 output_restore (pat);
6589 }
6590 else
6591 output_restore (NULL_RTX);
6592 }
6593
6594 return "";
6595 }
6596 \f
6597 /* Functions for handling argument passing.
6598
6599 For 32-bit, the first 6 args are normally in registers and the rest are
6600 pushed. Any arg that starts within the first 6 words is at least
6601 partially passed in a register unless its data type forbids.
6602
6603 For 64-bit, the argument registers are laid out as an array of 16 elements
6604 and arguments are added sequentially. The first 6 int args and up to the
6605 first 16 fp args (depending on size) are passed in regs.
6606
6607 Slot Stack Integral Float Float in structure Double Long Double
6608 ---- ----- -------- ----- ------------------ ------ -----------
6609 15 [SP+248] %f31 %f30,%f31 %d30
6610 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6611 13 [SP+232] %f27 %f26,%f27 %d26
6612 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6613 11 [SP+216] %f23 %f22,%f23 %d22
6614 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6615 9 [SP+200] %f19 %f18,%f19 %d18
6616 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6617 7 [SP+184] %f15 %f14,%f15 %d14
6618 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6619 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6620 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6621 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6622 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6623 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6624 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6625
6626 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6627
6628 Integral arguments are always passed as 64-bit quantities appropriately
6629 extended.
6630
6631 Passing of floating point values is handled as follows.
6632 If a prototype is in scope:
6633 If the value is in a named argument (i.e. not a stdarg function or a
6634 value not part of the `...') then the value is passed in the appropriate
6635 fp reg.
6636 If the value is part of the `...' and is passed in one of the first 6
6637 slots then the value is passed in the appropriate int reg.
6638 If the value is part of the `...' and is not passed in one of the first 6
6639 slots then the value is passed in memory.
6640 If a prototype is not in scope:
6641 If the value is one of the first 6 arguments the value is passed in the
6642 appropriate integer reg and the appropriate fp reg.
6643 If the value is not one of the first 6 arguments the value is passed in
6644 the appropriate fp reg and in memory.
6645
6646
6647 Summary of the calling conventions implemented by GCC on the SPARC:
6648
6649 32-bit ABI:
6650 size argument return value
6651
6652 small integer <4 int. reg. int. reg.
6653 word 4 int. reg. int. reg.
6654 double word 8 int. reg. int. reg.
6655
6656 _Complex small integer <8 int. reg. int. reg.
6657 _Complex word 8 int. reg. int. reg.
6658 _Complex double word 16 memory int. reg.
6659
6660 vector integer <=8 int. reg. FP reg.
6661 vector integer >8 memory memory
6662
6663 float 4 int. reg. FP reg.
6664 double 8 int. reg. FP reg.
6665 long double 16 memory memory
6666
6667 _Complex float 8 memory FP reg.
6668 _Complex double 16 memory FP reg.
6669 _Complex long double 32 memory FP reg.
6670
6671 vector float any memory memory
6672
6673 aggregate any memory memory
6674
6675
6676
6677 64-bit ABI:
6678 size argument return value
6679
6680 small integer <8 int. reg. int. reg.
6681 word 8 int. reg. int. reg.
6682 double word 16 int. reg. int. reg.
6683
6684 _Complex small integer <16 int. reg. int. reg.
6685 _Complex word 16 int. reg. int. reg.
6686 _Complex double word 32 memory int. reg.
6687
6688 vector integer <=16 FP reg. FP reg.
6689 vector integer 16<s<=32 memory FP reg.
6690 vector integer >32 memory memory
6691
6692 float 4 FP reg. FP reg.
6693 double 8 FP reg. FP reg.
6694 long double 16 FP reg. FP reg.
6695
6696 _Complex float 8 FP reg. FP reg.
6697 _Complex double 16 FP reg. FP reg.
6698 _Complex long double 32 memory FP reg.
6699
6700 vector float <=16 FP reg. FP reg.
6701 vector float 16<s<=32 memory FP reg.
6702 vector float >32 memory memory
6703
6704 aggregate <=16 reg. reg.
6705 aggregate 16<s<=32 memory reg.
6706 aggregate >32 memory memory
6707
6708
6709
6710 Note #1: complex floating-point types follow the extended SPARC ABIs as
6711 implemented by the Sun compiler.
6712
6713 Note #2: integer vector types follow the scalar floating-point types
6714 conventions to match what is implemented by the Sun VIS SDK.
6715
6716 Note #3: floating-point vector types follow the aggregate types
6717 conventions. */
6718
6719
6720 /* Maximum number of int regs for args. */
6721 #define SPARC_INT_ARG_MAX 6
6722 /* Maximum number of fp regs for args. */
6723 #define SPARC_FP_ARG_MAX 16
6724 /* Number of words (partially) occupied for a given size in units. */
6725 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6726
6727 /* Handle the INIT_CUMULATIVE_ARGS macro.
6728 Initialize a variable CUM of type CUMULATIVE_ARGS
6729 for a call to a function whose data type is FNTYPE.
6730 For a library call, FNTYPE is 0. */
6731
6732 void
6733 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6734 {
6735 cum->words = 0;
6736 cum->prototype_p = fntype && prototype_p (fntype);
6737 cum->libcall_p = !fntype;
6738 }
6739
6740 /* Handle promotion of pointer and integer arguments. */
6741
6742 static machine_mode
6743 sparc_promote_function_mode (const_tree type, machine_mode mode,
6744 int *punsignedp, const_tree, int)
6745 {
6746 if (type && POINTER_TYPE_P (type))
6747 {
6748 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6749 return Pmode;
6750 }
6751
6752 /* Integral arguments are passed as full words, as per the ABI. */
6753 if (GET_MODE_CLASS (mode) == MODE_INT
6754 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6755 return word_mode;
6756
6757 return mode;
6758 }
6759
6760 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6761
6762 static bool
6763 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6764 {
6765 return TARGET_ARCH64 ? true : false;
6766 }
6767
6768 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6769 Specify whether to pass the argument by reference. */
6770
6771 static bool
6772 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6773 {
6774 tree type = arg.type;
6775 machine_mode mode = arg.mode;
6776 if (TARGET_ARCH32)
6777 /* Original SPARC 32-bit ABI says that structures and unions,
6778 and quad-precision floats are passed by reference.
6779 All other base types are passed in registers.
6780
6781 Extended ABI (as implemented by the Sun compiler) says that all
6782 complex floats are passed by reference. Pass complex integers
6783 in registers up to 8 bytes. More generally, enforce the 2-word
6784 cap for passing arguments in registers.
6785
6786 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6787 vectors are passed like floats of the same size, that is in
6788 registers up to 8 bytes. Pass all vector floats by reference
6789 like structure and unions. */
6790 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6791 || mode == SCmode
6792 /* Catch CDImode, TFmode, DCmode and TCmode. */
6793 || GET_MODE_SIZE (mode) > 8
6794 || (type
6795 && VECTOR_TYPE_P (type)
6796 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6797 else
6798 /* Original SPARC 64-bit ABI says that structures and unions
6799 smaller than 16 bytes are passed in registers, as well as
6800 all other base types.
6801
6802 Extended ABI (as implemented by the Sun compiler) says that
6803 complex floats are passed in registers up to 16 bytes. Pass
6804 all complex integers in registers up to 16 bytes. More generally,
6805 enforce the 2-word cap for passing arguments in registers.
6806
6807 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6808 vectors are passed like floats of the same size, that is in
6809 registers (up to 16 bytes). Pass all vector floats like structure
6810 and unions. */
6811 return ((type
6812 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6813 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6814 /* Catch CTImode and TCmode. */
6815 || GET_MODE_SIZE (mode) > 16);
6816 }
6817
6818 /* Traverse the record TYPE recursively and call FUNC on its fields.
6819 NAMED is true if this is for a named parameter. DATA is passed
6820 to FUNC for each field. OFFSET is the starting position and
6821 PACKED is true if we are inside a packed record. */
6822
6823 template <typename T, void Func (const_tree, int, bool, T*)>
6824 static void
6825 traverse_record_type (const_tree type, bool named, T *data,
6826 int offset = 0, bool packed = false)
6827 {
6828 /* The ABI obviously doesn't specify how packed structures are passed.
6829 These are passed in integer regs if possible, otherwise memory. */
6830 if (!packed)
6831 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6832 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6833 {
6834 packed = true;
6835 break;
6836 }
6837
6838 /* Walk the real fields, but skip those with no size or a zero size.
6839 ??? Fields with variable offset are handled as having zero offset. */
6840 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6841 if (TREE_CODE (field) == FIELD_DECL)
6842 {
6843 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6844 continue;
6845
6846 int bitpos = offset;
6847 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6848 bitpos += int_bit_position (field);
6849
6850 tree field_type = TREE_TYPE (field);
6851 if (TREE_CODE (field_type) == RECORD_TYPE)
6852 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6853 packed);
6854 else
6855 {
6856 const bool fp_type
6857 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6858 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6859 data);
6860 }
6861 }
6862 }
6863
6864 /* Handle recursive register classifying for structure layout. */
6865
6866 typedef struct
6867 {
6868 bool fp_regs; /* true if field eligible to FP registers. */
6869 bool fp_regs_in_first_word; /* true if such field in first word. */
6870 } classify_data_t;
6871
6872 /* A subroutine of function_arg_slotno. Classify the field. */
6873
6874 inline void
6875 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6876 {
6877 if (fp)
6878 {
6879 data->fp_regs = true;
6880 if (bitpos < BITS_PER_WORD)
6881 data->fp_regs_in_first_word = true;
6882 }
6883 }
6884
6885 /* Compute the slot number to pass an argument in.
6886 Return the slot number or -1 if passing on the stack.
6887
6888 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6889 the preceding args and about the function being called.
6890 MODE is the argument's machine mode.
6891 TYPE is the data type of the argument (as a tree).
6892 This is null for libcalls where that information may
6893 not be available.
6894 NAMED is nonzero if this argument is a named parameter
6895 (otherwise it is an extra parameter matching an ellipsis).
6896 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6897 *PREGNO records the register number to use if scalar type.
6898 *PPADDING records the amount of padding needed in words. */
6899
6900 static int
6901 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6902 const_tree type, bool named, bool incoming,
6903 int *pregno, int *ppadding)
6904 {
6905 const int regbase
6906 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6907 int slotno = cum->words, regno;
6908 enum mode_class mclass = GET_MODE_CLASS (mode);
6909
6910 /* Silence warnings in the callers. */
6911 *pregno = -1;
6912 *ppadding = -1;
6913
6914 if (type && TREE_ADDRESSABLE (type))
6915 return -1;
6916
6917 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6918 if (TARGET_ARCH64
6919 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6920 && (slotno & 1) != 0)
6921 {
6922 slotno++;
6923 *ppadding = 1;
6924 }
6925 else
6926 *ppadding = 0;
6927
6928 /* Vector types deserve special treatment because they are polymorphic wrt
6929 their mode, depending upon whether VIS instructions are enabled. */
6930 if (type && VECTOR_TYPE_P (type))
6931 {
6932 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6933 {
6934 /* The SPARC port defines no floating-point vector modes. */
6935 gcc_assert (mode == BLKmode);
6936 }
6937 else
6938 {
6939 /* Integer vector types should either have a vector
6940 mode or an integral mode, because we are guaranteed
6941 by pass_by_reference that their size is not greater
6942 than 16 bytes and TImode is 16-byte wide. */
6943 gcc_assert (mode != BLKmode);
6944
6945 /* Integer vectors are handled like floats as per
6946 the Sun VIS SDK. */
6947 mclass = MODE_FLOAT;
6948 }
6949 }
6950
6951 switch (mclass)
6952 {
6953 case MODE_FLOAT:
6954 case MODE_COMPLEX_FLOAT:
6955 case MODE_VECTOR_INT:
6956 if (TARGET_ARCH64 && TARGET_FPU && named)
6957 {
6958 /* If all arg slots are filled, then must pass on stack. */
6959 if (slotno >= SPARC_FP_ARG_MAX)
6960 return -1;
6961
6962 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6963 /* Arguments filling only one single FP register are
6964 right-justified in the outer double FP register. */
6965 if (GET_MODE_SIZE (mode) <= 4)
6966 regno++;
6967 break;
6968 }
6969 /* fallthrough */
6970
6971 case MODE_INT:
6972 case MODE_COMPLEX_INT:
6973 /* If all arg slots are filled, then must pass on stack. */
6974 if (slotno >= SPARC_INT_ARG_MAX)
6975 return -1;
6976
6977 regno = regbase + slotno;
6978 break;
6979
6980 case MODE_RANDOM:
6981 /* MODE is VOIDmode when generating the actual call. */
6982 if (mode == VOIDmode)
6983 return -1;
6984
6985 if (TARGET_64BIT && TARGET_FPU && named
6986 && type
6987 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6988 {
6989 /* If all arg slots are filled, then must pass on stack. */
6990 if (slotno >= SPARC_FP_ARG_MAX)
6991 return -1;
6992
6993 if (TREE_CODE (type) == RECORD_TYPE)
6994 {
6995 classify_data_t data = { false, false };
6996 traverse_record_type<classify_data_t, classify_registers>
6997 (type, named, &data);
6998
6999 if (data.fp_regs)
7000 {
7001 /* If all FP slots are filled except for the last one and
7002 there is no FP field in the first word, then must pass
7003 on stack. */
7004 if (slotno >= SPARC_FP_ARG_MAX - 1
7005 && !data.fp_regs_in_first_word)
7006 return -1;
7007 }
7008 else
7009 {
7010 /* If all int slots are filled, then must pass on stack. */
7011 if (slotno >= SPARC_INT_ARG_MAX)
7012 return -1;
7013 }
7014
7015 /* PREGNO isn't set since both int and FP regs can be used. */
7016 return slotno;
7017 }
7018
7019 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7020 }
7021 else
7022 {
7023 /* If all arg slots are filled, then must pass on stack. */
7024 if (slotno >= SPARC_INT_ARG_MAX)
7025 return -1;
7026
7027 regno = regbase + slotno;
7028 }
7029 break;
7030
7031 default :
7032 gcc_unreachable ();
7033 }
7034
7035 *pregno = regno;
7036 return slotno;
7037 }
7038
7039 /* Handle recursive register counting/assigning for structure layout. */
7040
7041 typedef struct
7042 {
7043 int slotno; /* slot number of the argument. */
7044 int regbase; /* regno of the base register. */
7045 int intoffset; /* offset of the first pending integer field. */
7046 int nregs; /* number of words passed in registers. */
7047 bool stack; /* true if part of the argument is on the stack. */
7048 rtx ret; /* return expression being built. */
7049 } assign_data_t;
7050
7051 /* A subroutine of function_arg_record_value. Compute the number of integer
7052 registers to be assigned between PARMS->intoffset and BITPOS. Return
7053 true if at least one integer register is assigned or false otherwise. */
7054
7055 static bool
7056 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7057 {
7058 if (data->intoffset < 0)
7059 return false;
7060
7061 const int intoffset = data->intoffset;
7062 data->intoffset = -1;
7063
7064 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7065 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7066 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7067 int nregs = (endbit - startbit) / BITS_PER_WORD;
7068
7069 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7070 {
7071 nregs = SPARC_INT_ARG_MAX - this_slotno;
7072
7073 /* We need to pass this field (partly) on the stack. */
7074 data->stack = 1;
7075 }
7076
7077 if (nregs <= 0)
7078 return false;
7079
7080 *pnregs = nregs;
7081 return true;
7082 }
7083
7084 /* A subroutine of function_arg_record_value. Compute the number and the mode
7085 of the FP registers to be assigned for FIELD. Return true if at least one
7086 FP register is assigned or false otherwise. */
7087
7088 static bool
7089 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7090 int *pnregs, machine_mode *pmode)
7091 {
7092 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7093 machine_mode mode = DECL_MODE (field);
7094 int nregs, nslots;
7095
7096 /* Slots are counted as words while regs are counted as having the size of
7097 the (inner) mode. */
7098 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7099 {
7100 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7101 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7102 }
7103 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7104 {
7105 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7106 nregs = 2;
7107 }
7108 else
7109 nregs = 1;
7110
7111 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7112
7113 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7114 {
7115 nslots = SPARC_FP_ARG_MAX - this_slotno;
7116 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7117
7118 /* We need to pass this field (partly) on the stack. */
7119 data->stack = 1;
7120
7121 if (nregs <= 0)
7122 return false;
7123 }
7124
7125 *pnregs = nregs;
7126 *pmode = mode;
7127 return true;
7128 }
7129
7130 /* A subroutine of function_arg_record_value. Count the number of registers
7131 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7132
7133 inline void
7134 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7135 {
7136 if (fp)
7137 {
7138 int nregs;
7139 machine_mode mode;
7140
7141 if (compute_int_layout (bitpos, data, &nregs))
7142 data->nregs += nregs;
7143
7144 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7145 data->nregs += nregs;
7146 }
7147 else
7148 {
7149 if (data->intoffset < 0)
7150 data->intoffset = bitpos;
7151 }
7152 }
7153
7154 /* A subroutine of function_arg_record_value. Assign the bits of the
7155 structure between PARMS->intoffset and BITPOS to integer registers. */
7156
7157 static void
7158 assign_int_registers (int bitpos, assign_data_t *data)
7159 {
7160 int intoffset = data->intoffset;
7161 machine_mode mode;
7162 int nregs;
7163
7164 if (!compute_int_layout (bitpos, data, &nregs))
7165 return;
7166
7167 /* If this is the trailing part of a word, only load that much into
7168 the register. Otherwise load the whole register. Note that in
7169 the latter case we may pick up unwanted bits. It's not a problem
7170 at the moment but may wish to revisit. */
7171 if (intoffset % BITS_PER_WORD != 0)
7172 mode = smallest_int_mode_for_size (BITS_PER_WORD
7173 - intoffset % BITS_PER_WORD);
7174 else
7175 mode = word_mode;
7176
7177 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7178 unsigned int regno = data->regbase + this_slotno;
7179 intoffset /= BITS_PER_UNIT;
7180
7181 do
7182 {
7183 rtx reg = gen_rtx_REG (mode, regno);
7184 XVECEXP (data->ret, 0, data->stack + data->nregs)
7185 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7186 data->nregs += 1;
7187 mode = word_mode;
7188 regno += 1;
7189 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7190 }
7191 while (--nregs > 0);
7192 }
7193
7194 /* A subroutine of function_arg_record_value. Assign FIELD at position
7195 BITPOS to FP registers. */
7196
7197 static void
7198 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7199 {
7200 int nregs;
7201 machine_mode mode;
7202
7203 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7204 return;
7205
7206 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7207 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7208 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7209 regno++;
7210 int pos = bitpos / BITS_PER_UNIT;
7211
7212 do
7213 {
7214 rtx reg = gen_rtx_REG (mode, regno);
7215 XVECEXP (data->ret, 0, data->stack + data->nregs)
7216 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7217 data->nregs += 1;
7218 regno += GET_MODE_SIZE (mode) / 4;
7219 pos += GET_MODE_SIZE (mode);
7220 }
7221 while (--nregs > 0);
7222 }
7223
7224 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7225 the structure between PARMS->intoffset and BITPOS to registers. */
7226
7227 inline void
7228 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7229 {
7230 if (fp)
7231 {
7232 assign_int_registers (bitpos, data);
7233
7234 assign_fp_registers (field, bitpos, data);
7235 }
7236 else
7237 {
7238 if (data->intoffset < 0)
7239 data->intoffset = bitpos;
7240 }
7241 }
7242
7243 /* Used by function_arg and function_value to implement the complex
7244 conventions of the 64-bit ABI for passing and returning structures.
7245 Return an expression valid as a return value for the FUNCTION_ARG
7246 and TARGET_FUNCTION_VALUE.
7247
7248 TYPE is the data type of the argument (as a tree).
7249 This is null for libcalls where that information may
7250 not be available.
7251 MODE is the argument's machine mode.
7252 SLOTNO is the index number of the argument's slot in the parameter array.
7253 NAMED is true if this argument is a named parameter
7254 (otherwise it is an extra parameter matching an ellipsis).
7255 REGBASE is the regno of the base register for the parameter array. */
7256
7257 static rtx
7258 function_arg_record_value (const_tree type, machine_mode mode,
7259 int slotno, bool named, int regbase)
7260 {
7261 const int size = int_size_in_bytes (type);
7262 assign_data_t data;
7263 int nregs;
7264
7265 data.slotno = slotno;
7266 data.regbase = regbase;
7267
7268 /* Count how many registers we need. */
7269 data.nregs = 0;
7270 data.intoffset = 0;
7271 data.stack = false;
7272 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7273
7274 /* Take into account pending integer fields. */
7275 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7276 data.nregs += nregs;
7277
7278 /* Allocate the vector and handle some annoying special cases. */
7279 nregs = data.nregs;
7280
7281 if (nregs == 0)
7282 {
7283 /* ??? Empty structure has no value? Duh? */
7284 if (size <= 0)
7285 {
7286 /* Though there's nothing really to store, return a word register
7287 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7288 leads to breakage due to the fact that there are zero bytes to
7289 load. */
7290 return gen_rtx_REG (mode, regbase);
7291 }
7292
7293 /* ??? C++ has structures with no fields, and yet a size. Give up
7294 for now and pass everything back in integer registers. */
7295 nregs = CEIL_NWORDS (size);
7296 if (nregs + slotno > SPARC_INT_ARG_MAX)
7297 nregs = SPARC_INT_ARG_MAX - slotno;
7298 }
7299
7300 gcc_assert (nregs > 0);
7301
7302 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7303
7304 /* If at least one field must be passed on the stack, generate
7305 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7306 also be passed on the stack. We can't do much better because the
7307 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7308 of structures for which the fields passed exclusively in registers
7309 are not at the beginning of the structure. */
7310 if (data.stack)
7311 XVECEXP (data.ret, 0, 0)
7312 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7313
7314 /* Assign the registers. */
7315 data.nregs = 0;
7316 data.intoffset = 0;
7317 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7318
7319 /* Assign pending integer fields. */
7320 assign_int_registers (size * BITS_PER_UNIT, &data);
7321
7322 gcc_assert (data.nregs == nregs);
7323
7324 return data.ret;
7325 }
7326
7327 /* Used by function_arg and function_value to implement the conventions
7328 of the 64-bit ABI for passing and returning unions.
7329 Return an expression valid as a return value for the FUNCTION_ARG
7330 and TARGET_FUNCTION_VALUE.
7331
7332 SIZE is the size in bytes of the union.
7333 MODE is the argument's machine mode.
7334 SLOTNO is the index number of the argument's slot in the parameter array.
7335 REGNO is the hard register the union will be passed in. */
7336
7337 static rtx
7338 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7339 {
7340 unsigned int nwords;
7341
7342 /* See comment in function_arg_record_value for empty structures. */
7343 if (size <= 0)
7344 return gen_rtx_REG (mode, regno);
7345
7346 if (slotno == SPARC_INT_ARG_MAX - 1)
7347 nwords = 1;
7348 else
7349 nwords = CEIL_NWORDS (size);
7350
7351 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7352
7353 /* Unions are passed left-justified. */
7354 for (unsigned int i = 0; i < nwords; i++)
7355 XVECEXP (regs, 0, i)
7356 = gen_rtx_EXPR_LIST (VOIDmode,
7357 gen_rtx_REG (word_mode, regno + i),
7358 GEN_INT (UNITS_PER_WORD * i));
7359
7360 return regs;
7361 }
7362
7363 /* Used by function_arg and function_value to implement the conventions
7364 of the 64-bit ABI for passing and returning BLKmode vectors.
7365 Return an expression valid as a return value for the FUNCTION_ARG
7366 and TARGET_FUNCTION_VALUE.
7367
7368 SIZE is the size in bytes of the vector.
7369 SLOTNO is the index number of the argument's slot in the parameter array.
7370 NAMED is true if this argument is a named parameter
7371 (otherwise it is an extra parameter matching an ellipsis).
7372 REGNO is the hard register the vector will be passed in. */
7373
7374 static rtx
7375 function_arg_vector_value (int size, int slotno, bool named, int regno)
7376 {
7377 const int mult = (named ? 2 : 1);
7378 unsigned int nwords;
7379
7380 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7381 nwords = 1;
7382 else
7383 nwords = CEIL_NWORDS (size);
7384
7385 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7386
7387 if (size < UNITS_PER_WORD)
7388 XVECEXP (regs, 0, 0)
7389 = gen_rtx_EXPR_LIST (VOIDmode,
7390 gen_rtx_REG (SImode, regno),
7391 const0_rtx);
7392 else
7393 for (unsigned int i = 0; i < nwords; i++)
7394 XVECEXP (regs, 0, i)
7395 = gen_rtx_EXPR_LIST (VOIDmode,
7396 gen_rtx_REG (word_mode, regno + i * mult),
7397 GEN_INT (i * UNITS_PER_WORD));
7398
7399 return regs;
7400 }
7401
7402 /* Determine where to put an argument to a function.
7403 Value is zero to push the argument on the stack,
7404 or a hard register in which to store the argument.
7405
7406 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7407 the preceding args and about the function being called.
7408 ARG is a description of the argument.
7409 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7410 TARGET_FUNCTION_INCOMING_ARG. */
7411
7412 static rtx
7413 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7414 bool incoming)
7415 {
7416 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7417 const int regbase
7418 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7419 int slotno, regno, padding;
7420 tree type = arg.type;
7421 machine_mode mode = arg.mode;
7422 enum mode_class mclass = GET_MODE_CLASS (mode);
7423 bool named = arg.named;
7424
7425 slotno
7426 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7427 if (slotno == -1)
7428 return 0;
7429
7430 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7431 if (type && VECTOR_INTEGER_TYPE_P (type))
7432 mclass = MODE_FLOAT;
7433
7434 if (TARGET_ARCH32)
7435 return gen_rtx_REG (mode, regno);
7436
7437 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7438 and are promoted to registers if possible. */
7439 if (type && TREE_CODE (type) == RECORD_TYPE)
7440 {
7441 const int size = int_size_in_bytes (type);
7442 gcc_assert (size <= 16);
7443
7444 return function_arg_record_value (type, mode, slotno, named, regbase);
7445 }
7446
7447 /* Unions up to 16 bytes in size are passed in integer registers. */
7448 else if (type && TREE_CODE (type) == UNION_TYPE)
7449 {
7450 const int size = int_size_in_bytes (type);
7451 gcc_assert (size <= 16);
7452
7453 return function_arg_union_value (size, mode, slotno, regno);
7454 }
7455
7456 /* Floating-point vectors up to 16 bytes are passed in registers. */
7457 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7458 {
7459 const int size = int_size_in_bytes (type);
7460 gcc_assert (size <= 16);
7461
7462 return function_arg_vector_value (size, slotno, named, regno);
7463 }
7464
7465 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7466 but also have the slot allocated for them.
7467 If no prototype is in scope fp values in register slots get passed
7468 in two places, either fp regs and int regs or fp regs and memory. */
7469 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7470 && SPARC_FP_REG_P (regno))
7471 {
7472 rtx reg = gen_rtx_REG (mode, regno);
7473 if (cum->prototype_p || cum->libcall_p)
7474 return reg;
7475 else
7476 {
7477 rtx v0, v1;
7478
7479 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7480 {
7481 int intreg;
7482
7483 /* On incoming, we don't need to know that the value
7484 is passed in %f0 and %i0, and it confuses other parts
7485 causing needless spillage even on the simplest cases. */
7486 if (incoming)
7487 return reg;
7488
7489 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7490 + (regno - SPARC_FP_ARG_FIRST) / 2);
7491
7492 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7493 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7494 const0_rtx);
7495 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7496 }
7497 else
7498 {
7499 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7500 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7501 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7502 }
7503 }
7504 }
7505
7506 /* All other aggregate types are passed in an integer register in a mode
7507 corresponding to the size of the type. */
7508 else if (type && AGGREGATE_TYPE_P (type))
7509 {
7510 const int size = int_size_in_bytes (type);
7511 gcc_assert (size <= 16);
7512
7513 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7514 }
7515
7516 return gen_rtx_REG (mode, regno);
7517 }
7518
7519 /* Handle the TARGET_FUNCTION_ARG target hook. */
7520
7521 static rtx
7522 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7523 {
7524 return sparc_function_arg_1 (cum, arg, false);
7525 }
7526
7527 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7528
7529 static rtx
7530 sparc_function_incoming_arg (cumulative_args_t cum,
7531 const function_arg_info &arg)
7532 {
7533 return sparc_function_arg_1 (cum, arg, true);
7534 }
7535
7536 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7537
7538 static unsigned int
7539 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7540 {
7541 return ((TARGET_ARCH64
7542 && (GET_MODE_ALIGNMENT (mode) == 128
7543 || (type && TYPE_ALIGN (type) == 128)))
7544 ? 128
7545 : PARM_BOUNDARY);
7546 }
7547
7548 /* For an arg passed partly in registers and partly in memory,
7549 this is the number of bytes of registers used.
7550 For args passed entirely in registers or entirely in memory, zero.
7551
7552 Any arg that starts in the first 6 regs but won't entirely fit in them
7553 needs partial registers on v8. On v9, structures with integer
7554 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7555 values that begin in the last fp reg [where "last fp reg" varies with the
7556 mode] will be split between that reg and memory. */
7557
7558 static int
7559 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7560 {
7561 int slotno, regno, padding;
7562
7563 /* We pass false for incoming here, it doesn't matter. */
7564 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7565 arg.named, false, &regno, &padding);
7566
7567 if (slotno == -1)
7568 return 0;
7569
7570 if (TARGET_ARCH32)
7571 {
7572 /* We are guaranteed by pass_by_reference that the size of the
7573 argument is not greater than 8 bytes, so we only need to return
7574 one word if the argument is partially passed in registers. */
7575 const int size = GET_MODE_SIZE (arg.mode);
7576
7577 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7578 return UNITS_PER_WORD;
7579 }
7580 else
7581 {
7582 /* We are guaranteed by pass_by_reference that the size of the
7583 argument is not greater than 16 bytes, so we only need to return
7584 one word if the argument is partially passed in registers. */
7585 if (arg.aggregate_type_p ())
7586 {
7587 const int size = int_size_in_bytes (arg.type);
7588
7589 if (size > UNITS_PER_WORD
7590 && (slotno == SPARC_INT_ARG_MAX - 1
7591 || slotno == SPARC_FP_ARG_MAX - 1))
7592 return UNITS_PER_WORD;
7593 }
7594 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7595 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7596 || (arg.type && VECTOR_TYPE_P (arg.type)))
7597 && !(TARGET_FPU && arg.named)))
7598 {
7599 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7600 ? int_size_in_bytes (arg.type)
7601 : GET_MODE_SIZE (arg.mode);
7602
7603 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7604 return UNITS_PER_WORD;
7605 }
7606 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7607 || (arg.type && VECTOR_TYPE_P (arg.type)))
7608 {
7609 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7610 ? int_size_in_bytes (arg.type)
7611 : GET_MODE_SIZE (arg.mode);
7612
7613 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7614 return UNITS_PER_WORD;
7615 }
7616 }
7617
7618 return 0;
7619 }
7620
7621 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7622 Update the data in CUM to advance over argument ARG. */
7623
7624 static void
7625 sparc_function_arg_advance (cumulative_args_t cum_v,
7626 const function_arg_info &arg)
7627 {
7628 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7629 tree type = arg.type;
7630 machine_mode mode = arg.mode;
7631 int regno, padding;
7632
7633 /* We pass false for incoming here, it doesn't matter. */
7634 function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7635
7636 /* If argument requires leading padding, add it. */
7637 cum->words += padding;
7638
7639 if (TARGET_ARCH32)
7640 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7641 else
7642 {
7643 /* For types that can have BLKmode, get the size from the type. */
7644 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7645 {
7646 const int size = int_size_in_bytes (type);
7647
7648 /* See comment in function_arg_record_value for empty structures. */
7649 if (size <= 0)
7650 cum->words++;
7651 else
7652 cum->words += CEIL_NWORDS (size);
7653 }
7654 else
7655 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7656 }
7657 }
7658
7659 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7660 are always stored left shifted in their argument slot. */
7661
7662 static pad_direction
7663 sparc_function_arg_padding (machine_mode mode, const_tree type)
7664 {
7665 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7666 return PAD_UPWARD;
7667
7668 /* Fall back to the default. */
7669 return default_function_arg_padding (mode, type);
7670 }
7671
7672 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7673 Specify whether to return the return value in memory. */
7674
7675 static bool
7676 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7677 {
7678 if (TARGET_ARCH32)
7679 /* Original SPARC 32-bit ABI says that structures and unions, and
7680 quad-precision floats are returned in memory. But note that the
7681 first part is implemented through -fpcc-struct-return being the
7682 default, so here we only implement -freg-struct-return instead.
7683 All other base types are returned in registers.
7684
7685 Extended ABI (as implemented by the Sun compiler) says that
7686 all complex floats are returned in registers (8 FP registers
7687 at most for '_Complex long double'). Return all complex integers
7688 in registers (4 at most for '_Complex long long').
7689
7690 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7691 integers are returned like floats of the same size, that is in
7692 registers up to 8 bytes and in memory otherwise. Return all
7693 vector floats in memory like structure and unions; note that
7694 they always have BLKmode like the latter. */
7695 return (TYPE_MODE (type) == BLKmode
7696 || TYPE_MODE (type) == TFmode
7697 || (TREE_CODE (type) == VECTOR_TYPE
7698 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7699 else
7700 /* Original SPARC 64-bit ABI says that structures and unions
7701 smaller than 32 bytes are returned in registers, as well as
7702 all other base types.
7703
7704 Extended ABI (as implemented by the Sun compiler) says that all
7705 complex floats are returned in registers (8 FP registers at most
7706 for '_Complex long double'). Return all complex integers in
7707 registers (4 at most for '_Complex TItype').
7708
7709 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7710 integers are returned like floats of the same size, that is in
7711 registers. Return all vector floats like structure and unions;
7712 note that they always have BLKmode like the latter. */
7713 return (TYPE_MODE (type) == BLKmode
7714 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7715 }
7716
7717 /* Handle the TARGET_STRUCT_VALUE target hook.
7718 Return where to find the structure return value address. */
7719
7720 static rtx
7721 sparc_struct_value_rtx (tree fndecl, int incoming)
7722 {
7723 if (TARGET_ARCH64)
7724 return NULL_RTX;
7725 else
7726 {
7727 rtx mem;
7728
7729 if (incoming)
7730 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7731 STRUCT_VALUE_OFFSET));
7732 else
7733 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7734 STRUCT_VALUE_OFFSET));
7735
7736 /* Only follow the SPARC ABI for fixed-size structure returns.
7737 Variable size structure returns are handled per the normal
7738 procedures in GCC. This is enabled by -mstd-struct-return */
7739 if (incoming == 2
7740 && sparc_std_struct_return
7741 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7742 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7743 {
7744 /* We must check and adjust the return address, as it is optional
7745 as to whether the return object is really provided. */
7746 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7747 rtx scratch = gen_reg_rtx (SImode);
7748 rtx_code_label *endlab = gen_label_rtx ();
7749
7750 /* Calculate the return object size. */
7751 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7752 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7753 /* Construct a temporary return value. */
7754 rtx temp_val
7755 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7756
7757 /* Implement SPARC 32-bit psABI callee return struct checking:
7758
7759 Fetch the instruction where we will return to and see if
7760 it's an unimp instruction (the most significant 10 bits
7761 will be zero). */
7762 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7763 plus_constant (Pmode,
7764 ret_reg, 8)));
7765 /* Assume the size is valid and pre-adjust. */
7766 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7767 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7768 0, endlab);
7769 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7770 /* Write the address of the memory pointed to by temp_val into
7771 the memory pointed to by mem. */
7772 emit_move_insn (mem, XEXP (temp_val, 0));
7773 emit_label (endlab);
7774 }
7775
7776 return mem;
7777 }
7778 }
7779
7780 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7781 For v9, function return values are subject to the same rules as arguments,
7782 except that up to 32 bytes may be returned in registers. */
7783
7784 static rtx
7785 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7786 {
7787 /* Beware that the two values are swapped here wrt function_arg. */
7788 const int regbase
7789 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7790 enum mode_class mclass = GET_MODE_CLASS (mode);
7791 int regno;
7792
7793 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7794 Note that integer vectors larger than 16 bytes have BLKmode so
7795 they need to be handled like floating-point vectors below. */
7796 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7797 mclass = MODE_FLOAT;
7798
7799 if (TARGET_ARCH64 && type)
7800 {
7801 /* Structures up to 32 bytes in size are returned in registers. */
7802 if (TREE_CODE (type) == RECORD_TYPE)
7803 {
7804 const int size = int_size_in_bytes (type);
7805 gcc_assert (size <= 32);
7806
7807 return function_arg_record_value (type, mode, 0, true, regbase);
7808 }
7809
7810 /* Unions up to 32 bytes in size are returned in integer registers. */
7811 else if (TREE_CODE (type) == UNION_TYPE)
7812 {
7813 const int size = int_size_in_bytes (type);
7814 gcc_assert (size <= 32);
7815
7816 return function_arg_union_value (size, mode, 0, regbase);
7817 }
7818
7819 /* Vectors up to 32 bytes are returned in FP registers. */
7820 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7821 {
7822 const int size = int_size_in_bytes (type);
7823 gcc_assert (size <= 32);
7824
7825 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7826 }
7827
7828 /* Objects that require it are returned in FP registers. */
7829 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7830 ;
7831
7832 /* All other aggregate types are returned in an integer register in a
7833 mode corresponding to the size of the type. */
7834 else if (AGGREGATE_TYPE_P (type))
7835 {
7836 /* All other aggregate types are passed in an integer register
7837 in a mode corresponding to the size of the type. */
7838 const int size = int_size_in_bytes (type);
7839 gcc_assert (size <= 32);
7840
7841 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7842
7843 /* ??? We probably should have made the same ABI change in
7844 3.4.0 as the one we made for unions. The latter was
7845 required by the SCD though, while the former is not
7846 specified, so we favored compatibility and efficiency.
7847
7848 Now we're stuck for aggregates larger than 16 bytes,
7849 because OImode vanished in the meantime. Let's not
7850 try to be unduly clever, and simply follow the ABI
7851 for unions in that case. */
7852 if (mode == BLKmode)
7853 return function_arg_union_value (size, mode, 0, regbase);
7854 else
7855 mclass = MODE_INT;
7856 }
7857
7858 /* We should only have pointer and integer types at this point. This
7859 must match sparc_promote_function_mode. */
7860 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7861 mode = word_mode;
7862 }
7863
7864 /* We should only have pointer and integer types at this point, except with
7865 -freg-struct-return. This must match sparc_promote_function_mode. */
7866 else if (TARGET_ARCH32
7867 && !(type && AGGREGATE_TYPE_P (type))
7868 && mclass == MODE_INT
7869 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7870 mode = word_mode;
7871
7872 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7873 regno = SPARC_FP_ARG_FIRST;
7874 else
7875 regno = regbase;
7876
7877 return gen_rtx_REG (mode, regno);
7878 }
7879
7880 /* Handle TARGET_FUNCTION_VALUE.
7881 On the SPARC, the value is found in the first "output" register, but the
7882 called function leaves it in the first "input" register. */
7883
7884 static rtx
7885 sparc_function_value (const_tree valtype,
7886 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7887 bool outgoing)
7888 {
7889 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7890 }
7891
7892 /* Handle TARGET_LIBCALL_VALUE. */
7893
7894 static rtx
7895 sparc_libcall_value (machine_mode mode,
7896 const_rtx fun ATTRIBUTE_UNUSED)
7897 {
7898 return sparc_function_value_1 (NULL_TREE, mode, false);
7899 }
7900
7901 /* Handle FUNCTION_VALUE_REGNO_P.
7902 On the SPARC, the first "output" reg is used for integer values, and the
7903 first floating point register is used for floating point values. */
7904
7905 static bool
7906 sparc_function_value_regno_p (const unsigned int regno)
7907 {
7908 return (regno == 8 || (TARGET_FPU && regno == 32));
7909 }
7910
7911 /* Do what is necessary for `va_start'. We look at the current function
7912 to determine if stdarg or varargs is used and return the address of
7913 the first unnamed parameter. */
7914
7915 static rtx
7916 sparc_builtin_saveregs (void)
7917 {
7918 int first_reg = crtl->args.info.words;
7919 rtx address;
7920 int regno;
7921
7922 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7923 emit_move_insn (gen_rtx_MEM (word_mode,
7924 gen_rtx_PLUS (Pmode,
7925 frame_pointer_rtx,
7926 GEN_INT (FIRST_PARM_OFFSET (0)
7927 + (UNITS_PER_WORD
7928 * regno)))),
7929 gen_rtx_REG (word_mode,
7930 SPARC_INCOMING_INT_ARG_FIRST + regno));
7931
7932 address = gen_rtx_PLUS (Pmode,
7933 frame_pointer_rtx,
7934 GEN_INT (FIRST_PARM_OFFSET (0)
7935 + UNITS_PER_WORD * first_reg));
7936
7937 return address;
7938 }
7939
7940 /* Implement `va_start' for stdarg. */
7941
7942 static void
7943 sparc_va_start (tree valist, rtx nextarg)
7944 {
7945 nextarg = expand_builtin_saveregs ();
7946 std_expand_builtin_va_start (valist, nextarg);
7947 }
7948
7949 /* Implement `va_arg' for stdarg. */
7950
7951 static tree
7952 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7953 gimple_seq *post_p)
7954 {
7955 HOST_WIDE_INT size, rsize, align;
7956 tree addr, incr;
7957 bool indirect;
7958 tree ptrtype = build_pointer_type (type);
7959
7960 if (pass_va_arg_by_reference (type))
7961 {
7962 indirect = true;
7963 size = rsize = UNITS_PER_WORD;
7964 align = 0;
7965 }
7966 else
7967 {
7968 indirect = false;
7969 size = int_size_in_bytes (type);
7970 rsize = ROUND_UP (size, UNITS_PER_WORD);
7971 align = 0;
7972
7973 if (TARGET_ARCH64)
7974 {
7975 /* For SPARC64, objects requiring 16-byte alignment get it. */
7976 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7977 align = 2 * UNITS_PER_WORD;
7978
7979 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7980 are left-justified in their slots. */
7981 if (AGGREGATE_TYPE_P (type))
7982 {
7983 if (size == 0)
7984 size = rsize = UNITS_PER_WORD;
7985 else
7986 size = rsize;
7987 }
7988 }
7989 }
7990
7991 incr = valist;
7992 if (align)
7993 {
7994 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7995 incr = fold_convert (sizetype, incr);
7996 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7997 size_int (-align));
7998 incr = fold_convert (ptr_type_node, incr);
7999 }
8000
8001 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8002 addr = incr;
8003
8004 if (BYTES_BIG_ENDIAN && size < rsize)
8005 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8006
8007 if (indirect)
8008 {
8009 addr = fold_convert (build_pointer_type (ptrtype), addr);
8010 addr = build_va_arg_indirect_ref (addr);
8011 }
8012
8013 /* If the address isn't aligned properly for the type, we need a temporary.
8014 FIXME: This is inefficient, usually we can do this in registers. */
8015 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8016 {
8017 tree tmp = create_tmp_var (type, "va_arg_tmp");
8018 tree dest_addr = build_fold_addr_expr (tmp);
8019 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8020 3, dest_addr, addr, size_int (rsize));
8021 TREE_ADDRESSABLE (tmp) = 1;
8022 gimplify_and_add (copy, pre_p);
8023 addr = dest_addr;
8024 }
8025
8026 else
8027 addr = fold_convert (ptrtype, addr);
8028
8029 incr = fold_build_pointer_plus_hwi (incr, rsize);
8030 gimplify_assign (valist, incr, post_p);
8031
8032 return build_va_arg_indirect_ref (addr);
8033 }
8034 \f
8035 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8036 Specify whether the vector mode is supported by the hardware. */
8037
8038 static bool
8039 sparc_vector_mode_supported_p (machine_mode mode)
8040 {
8041 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8042 }
8043 \f
8044 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8045
8046 static machine_mode
8047 sparc_preferred_simd_mode (scalar_mode mode)
8048 {
8049 if (TARGET_VIS)
8050 switch (mode)
8051 {
8052 case E_SImode:
8053 return V2SImode;
8054 case E_HImode:
8055 return V4HImode;
8056 case E_QImode:
8057 return V8QImode;
8058
8059 default:;
8060 }
8061
8062 return word_mode;
8063 }
8064 \f
8065 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8066
8067 static bool
8068 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8069 {
8070 /* Do not fold unconditional jumps that have been created for crossing
8071 partition boundaries. */
8072 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8073 return false;
8074
8075 return true;
8076 }
8077
8078 /* Return the string to output an unconditional branch to LABEL, which is
8079 the operand number of the label.
8080
8081 DEST is the destination insn (i.e. the label), INSN is the source. */
8082
8083 const char *
8084 output_ubranch (rtx dest, rtx_insn *insn)
8085 {
8086 static char string[64];
8087 bool v9_form = false;
8088 int delta;
8089 char *p;
8090
8091 /* Even if we are trying to use cbcond for this, evaluate
8092 whether we can use V9 branches as our backup plan. */
8093 delta = 5000000;
8094 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8095 delta = (INSN_ADDRESSES (INSN_UID (dest))
8096 - INSN_ADDRESSES (INSN_UID (insn)));
8097
8098 /* Leave some instructions for "slop". */
8099 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8100 v9_form = true;
8101
8102 if (TARGET_CBCOND)
8103 {
8104 bool emit_nop = emit_cbcond_nop (insn);
8105 bool far = false;
8106 const char *rval;
8107
8108 if (delta < -500 || delta > 500)
8109 far = true;
8110
8111 if (far)
8112 {
8113 if (v9_form)
8114 rval = "ba,a,pt\t%%xcc, %l0";
8115 else
8116 rval = "b,a\t%l0";
8117 }
8118 else
8119 {
8120 if (emit_nop)
8121 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8122 else
8123 rval = "cwbe\t%%g0, %%g0, %l0";
8124 }
8125 return rval;
8126 }
8127
8128 if (v9_form)
8129 strcpy (string, "ba%*,pt\t%%xcc, ");
8130 else
8131 strcpy (string, "b%*\t");
8132
8133 p = strchr (string, '\0');
8134 *p++ = '%';
8135 *p++ = 'l';
8136 *p++ = '0';
8137 *p++ = '%';
8138 *p++ = '(';
8139 *p = '\0';
8140
8141 return string;
8142 }
8143
8144 /* Return the string to output a conditional branch to LABEL, which is
8145 the operand number of the label. OP is the conditional expression.
8146 XEXP (OP, 0) is assumed to be a condition code register (integer or
8147 floating point) and its mode specifies what kind of comparison we made.
8148
8149 DEST is the destination insn (i.e. the label), INSN is the source.
8150
8151 REVERSED is nonzero if we should reverse the sense of the comparison.
8152
8153 ANNUL is nonzero if we should generate an annulling branch. */
8154
8155 const char *
8156 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8157 rtx_insn *insn)
8158 {
8159 static char string[64];
8160 enum rtx_code code = GET_CODE (op);
8161 rtx cc_reg = XEXP (op, 0);
8162 machine_mode mode = GET_MODE (cc_reg);
8163 const char *labelno, *branch;
8164 int spaces = 8, far;
8165 char *p;
8166
8167 /* v9 branches are limited to +-1MB. If it is too far away,
8168 change
8169
8170 bne,pt %xcc, .LC30
8171
8172 to
8173
8174 be,pn %xcc, .+12
8175 nop
8176 ba .LC30
8177
8178 and
8179
8180 fbne,a,pn %fcc2, .LC29
8181
8182 to
8183
8184 fbe,pt %fcc2, .+16
8185 nop
8186 ba .LC29 */
8187
8188 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8189 if (reversed ^ far)
8190 {
8191 /* Reversal of FP compares takes care -- an ordered compare
8192 becomes an unordered compare and vice versa. */
8193 if (mode == CCFPmode || mode == CCFPEmode)
8194 code = reverse_condition_maybe_unordered (code);
8195 else
8196 code = reverse_condition (code);
8197 }
8198
8199 /* Start by writing the branch condition. */
8200 if (mode == CCFPmode || mode == CCFPEmode)
8201 {
8202 switch (code)
8203 {
8204 case NE:
8205 branch = "fbne";
8206 break;
8207 case EQ:
8208 branch = "fbe";
8209 break;
8210 case GE:
8211 branch = "fbge";
8212 break;
8213 case GT:
8214 branch = "fbg";
8215 break;
8216 case LE:
8217 branch = "fble";
8218 break;
8219 case LT:
8220 branch = "fbl";
8221 break;
8222 case UNORDERED:
8223 branch = "fbu";
8224 break;
8225 case ORDERED:
8226 branch = "fbo";
8227 break;
8228 case UNGT:
8229 branch = "fbug";
8230 break;
8231 case UNLT:
8232 branch = "fbul";
8233 break;
8234 case UNEQ:
8235 branch = "fbue";
8236 break;
8237 case UNGE:
8238 branch = "fbuge";
8239 break;
8240 case UNLE:
8241 branch = "fbule";
8242 break;
8243 case LTGT:
8244 branch = "fblg";
8245 break;
8246 default:
8247 gcc_unreachable ();
8248 }
8249
8250 /* ??? !v9: FP branches cannot be preceded by another floating point
8251 insn. Because there is currently no concept of pre-delay slots,
8252 we can fix this only by always emitting a nop before a floating
8253 point branch. */
8254
8255 string[0] = '\0';
8256 if (! TARGET_V9)
8257 strcpy (string, "nop\n\t");
8258 strcat (string, branch);
8259 }
8260 else
8261 {
8262 switch (code)
8263 {
8264 case NE:
8265 if (mode == CCVmode || mode == CCXVmode)
8266 branch = "bvs";
8267 else
8268 branch = "bne";
8269 break;
8270 case EQ:
8271 if (mode == CCVmode || mode == CCXVmode)
8272 branch = "bvc";
8273 else
8274 branch = "be";
8275 break;
8276 case GE:
8277 if (mode == CCNZmode || mode == CCXNZmode)
8278 branch = "bpos";
8279 else
8280 branch = "bge";
8281 break;
8282 case GT:
8283 branch = "bg";
8284 break;
8285 case LE:
8286 branch = "ble";
8287 break;
8288 case LT:
8289 if (mode == CCNZmode || mode == CCXNZmode)
8290 branch = "bneg";
8291 else
8292 branch = "bl";
8293 break;
8294 case GEU:
8295 branch = "bgeu";
8296 break;
8297 case GTU:
8298 branch = "bgu";
8299 break;
8300 case LEU:
8301 branch = "bleu";
8302 break;
8303 case LTU:
8304 branch = "blu";
8305 break;
8306 default:
8307 gcc_unreachable ();
8308 }
8309 strcpy (string, branch);
8310 }
8311 spaces -= strlen (branch);
8312 p = strchr (string, '\0');
8313
8314 /* Now add the annulling, the label, and a possible noop. */
8315 if (annul && ! far)
8316 {
8317 strcpy (p, ",a");
8318 p += 2;
8319 spaces -= 2;
8320 }
8321
8322 if (TARGET_V9)
8323 {
8324 rtx note;
8325 int v8 = 0;
8326
8327 if (! far && insn && INSN_ADDRESSES_SET_P ())
8328 {
8329 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8330 - INSN_ADDRESSES (INSN_UID (insn)));
8331 /* Leave some instructions for "slop". */
8332 if (delta < -260000 || delta >= 260000)
8333 v8 = 1;
8334 }
8335
8336 switch (mode)
8337 {
8338 case E_CCmode:
8339 case E_CCNZmode:
8340 case E_CCCmode:
8341 case E_CCVmode:
8342 labelno = "%%icc, ";
8343 if (v8)
8344 labelno = "";
8345 break;
8346 case E_CCXmode:
8347 case E_CCXNZmode:
8348 case E_CCXCmode:
8349 case E_CCXVmode:
8350 labelno = "%%xcc, ";
8351 gcc_assert (!v8);
8352 break;
8353 case E_CCFPmode:
8354 case E_CCFPEmode:
8355 {
8356 static char v9_fcc_labelno[] = "%%fccX, ";
8357 /* Set the char indicating the number of the fcc reg to use. */
8358 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8359 labelno = v9_fcc_labelno;
8360 if (v8)
8361 {
8362 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8363 labelno = "";
8364 }
8365 }
8366 break;
8367 default:
8368 gcc_unreachable ();
8369 }
8370
8371 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8372 {
8373 strcpy (p,
8374 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8375 >= profile_probability::even ()) ^ far)
8376 ? ",pt" : ",pn");
8377 p += 3;
8378 spaces -= 3;
8379 }
8380 }
8381 else
8382 labelno = "";
8383
8384 if (spaces > 0)
8385 *p++ = '\t';
8386 else
8387 *p++ = ' ';
8388 strcpy (p, labelno);
8389 p = strchr (p, '\0');
8390 if (far)
8391 {
8392 strcpy (p, ".+12\n\t nop\n\tb\t");
8393 /* Skip the next insn if requested or
8394 if we know that it will be a nop. */
8395 if (annul || ! final_sequence)
8396 p[3] = '6';
8397 p += 14;
8398 }
8399 *p++ = '%';
8400 *p++ = 'l';
8401 *p++ = label + '0';
8402 *p++ = '%';
8403 *p++ = '#';
8404 *p = '\0';
8405
8406 return string;
8407 }
8408
8409 /* Emit a library call comparison between floating point X and Y.
8410 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8411 Return the new operator to be used in the comparison sequence.
8412
8413 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8414 values as arguments instead of the TFmode registers themselves,
8415 that's why we cannot call emit_float_lib_cmp. */
8416
8417 rtx
8418 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8419 {
8420 const char *qpfunc;
8421 rtx slot0, slot1, result, tem, tem2, libfunc;
8422 machine_mode mode;
8423 enum rtx_code new_comparison;
8424
8425 switch (comparison)
8426 {
8427 case EQ:
8428 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8429 break;
8430
8431 case NE:
8432 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8433 break;
8434
8435 case GT:
8436 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8437 break;
8438
8439 case GE:
8440 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8441 break;
8442
8443 case LT:
8444 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8445 break;
8446
8447 case LE:
8448 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8449 break;
8450
8451 case ORDERED:
8452 case UNORDERED:
8453 case UNGT:
8454 case UNLT:
8455 case UNEQ:
8456 case UNGE:
8457 case UNLE:
8458 case LTGT:
8459 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8460 break;
8461
8462 default:
8463 gcc_unreachable ();
8464 }
8465
8466 if (TARGET_ARCH64)
8467 {
8468 if (MEM_P (x))
8469 {
8470 tree expr = MEM_EXPR (x);
8471 if (expr)
8472 mark_addressable (expr);
8473 slot0 = x;
8474 }
8475 else
8476 {
8477 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8478 emit_move_insn (slot0, x);
8479 }
8480
8481 if (MEM_P (y))
8482 {
8483 tree expr = MEM_EXPR (y);
8484 if (expr)
8485 mark_addressable (expr);
8486 slot1 = y;
8487 }
8488 else
8489 {
8490 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8491 emit_move_insn (slot1, y);
8492 }
8493
8494 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8495 emit_library_call (libfunc, LCT_NORMAL,
8496 DImode,
8497 XEXP (slot0, 0), Pmode,
8498 XEXP (slot1, 0), Pmode);
8499 mode = DImode;
8500 }
8501 else
8502 {
8503 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8504 emit_library_call (libfunc, LCT_NORMAL,
8505 SImode,
8506 x, TFmode, y, TFmode);
8507 mode = SImode;
8508 }
8509
8510
8511 /* Immediately move the result of the libcall into a pseudo
8512 register so reload doesn't clobber the value if it needs
8513 the return register for a spill reg. */
8514 result = gen_reg_rtx (mode);
8515 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8516
8517 switch (comparison)
8518 {
8519 default:
8520 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8521 case ORDERED:
8522 case UNORDERED:
8523 new_comparison = (comparison == UNORDERED ? EQ : NE);
8524 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8525 case UNGT:
8526 case UNGE:
8527 new_comparison = (comparison == UNGT ? GT : NE);
8528 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8529 case UNLE:
8530 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8531 case UNLT:
8532 tem = gen_reg_rtx (mode);
8533 if (TARGET_ARCH32)
8534 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8535 else
8536 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8537 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8538 case UNEQ:
8539 case LTGT:
8540 tem = gen_reg_rtx (mode);
8541 if (TARGET_ARCH32)
8542 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8543 else
8544 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8545 tem2 = gen_reg_rtx (mode);
8546 if (TARGET_ARCH32)
8547 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8548 else
8549 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8550 new_comparison = (comparison == UNEQ ? EQ : NE);
8551 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8552 }
8553
8554 gcc_unreachable ();
8555 }
8556
8557 /* Generate an unsigned DImode to FP conversion. This is the same code
8558 optabs would emit if we didn't have TFmode patterns. */
8559
8560 void
8561 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8562 {
8563 rtx i0, i1, f0, in, out;
8564
8565 out = operands[0];
8566 in = force_reg (DImode, operands[1]);
8567 rtx_code_label *neglab = gen_label_rtx ();
8568 rtx_code_label *donelab = gen_label_rtx ();
8569 i0 = gen_reg_rtx (DImode);
8570 i1 = gen_reg_rtx (DImode);
8571 f0 = gen_reg_rtx (mode);
8572
8573 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8574
8575 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8576 emit_jump_insn (gen_jump (donelab));
8577 emit_barrier ();
8578
8579 emit_label (neglab);
8580
8581 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8582 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8583 emit_insn (gen_iordi3 (i0, i0, i1));
8584 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8585 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8586
8587 emit_label (donelab);
8588 }
8589
8590 /* Generate an FP to unsigned DImode conversion. This is the same code
8591 optabs would emit if we didn't have TFmode patterns. */
8592
8593 void
8594 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8595 {
8596 rtx i0, i1, f0, in, out, limit;
8597
8598 out = operands[0];
8599 in = force_reg (mode, operands[1]);
8600 rtx_code_label *neglab = gen_label_rtx ();
8601 rtx_code_label *donelab = gen_label_rtx ();
8602 i0 = gen_reg_rtx (DImode);
8603 i1 = gen_reg_rtx (DImode);
8604 limit = gen_reg_rtx (mode);
8605 f0 = gen_reg_rtx (mode);
8606
8607 emit_move_insn (limit,
8608 const_double_from_real_value (
8609 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8610 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8611
8612 emit_insn (gen_rtx_SET (out,
8613 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8614 emit_jump_insn (gen_jump (donelab));
8615 emit_barrier ();
8616
8617 emit_label (neglab);
8618
8619 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8620 emit_insn (gen_rtx_SET (i0,
8621 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8622 emit_insn (gen_movdi (i1, const1_rtx));
8623 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8624 emit_insn (gen_xordi3 (out, i0, i1));
8625
8626 emit_label (donelab);
8627 }
8628
8629 /* Return the string to output a compare and branch instruction to DEST.
8630 DEST is the destination insn (i.e. the label), INSN is the source,
8631 and OP is the conditional expression. */
8632
8633 const char *
8634 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8635 {
8636 machine_mode mode = GET_MODE (XEXP (op, 0));
8637 enum rtx_code code = GET_CODE (op);
8638 const char *cond_str, *tmpl;
8639 int far, emit_nop, len;
8640 static char string[64];
8641 char size_char;
8642
8643 /* Compare and Branch is limited to +-2KB. If it is too far away,
8644 change
8645
8646 cxbne X, Y, .LC30
8647
8648 to
8649
8650 cxbe X, Y, .+16
8651 nop
8652 ba,pt xcc, .LC30
8653 nop */
8654
8655 len = get_attr_length (insn);
8656
8657 far = len == 4;
8658 emit_nop = len == 2;
8659
8660 if (far)
8661 code = reverse_condition (code);
8662
8663 size_char = ((mode == SImode) ? 'w' : 'x');
8664
8665 switch (code)
8666 {
8667 case NE:
8668 cond_str = "ne";
8669 break;
8670
8671 case EQ:
8672 cond_str = "e";
8673 break;
8674
8675 case GE:
8676 cond_str = "ge";
8677 break;
8678
8679 case GT:
8680 cond_str = "g";
8681 break;
8682
8683 case LE:
8684 cond_str = "le";
8685 break;
8686
8687 case LT:
8688 cond_str = "l";
8689 break;
8690
8691 case GEU:
8692 cond_str = "cc";
8693 break;
8694
8695 case GTU:
8696 cond_str = "gu";
8697 break;
8698
8699 case LEU:
8700 cond_str = "leu";
8701 break;
8702
8703 case LTU:
8704 cond_str = "cs";
8705 break;
8706
8707 default:
8708 gcc_unreachable ();
8709 }
8710
8711 if (far)
8712 {
8713 int veryfar = 1, delta;
8714
8715 if (INSN_ADDRESSES_SET_P ())
8716 {
8717 delta = (INSN_ADDRESSES (INSN_UID (dest))
8718 - INSN_ADDRESSES (INSN_UID (insn)));
8719 /* Leave some instructions for "slop". */
8720 if (delta >= -260000 && delta < 260000)
8721 veryfar = 0;
8722 }
8723
8724 if (veryfar)
8725 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8726 else
8727 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8728 }
8729 else
8730 {
8731 if (emit_nop)
8732 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8733 else
8734 tmpl = "c%cb%s\t%%1, %%2, %%3";
8735 }
8736
8737 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8738
8739 return string;
8740 }
8741
8742 /* Return the string to output a conditional branch to LABEL, testing
8743 register REG. LABEL is the operand number of the label; REG is the
8744 operand number of the reg. OP is the conditional expression. The mode
8745 of REG says what kind of comparison we made.
8746
8747 DEST is the destination insn (i.e. the label), INSN is the source.
8748
8749 REVERSED is nonzero if we should reverse the sense of the comparison.
8750
8751 ANNUL is nonzero if we should generate an annulling branch. */
8752
8753 const char *
8754 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8755 int annul, rtx_insn *insn)
8756 {
8757 static char string[64];
8758 enum rtx_code code = GET_CODE (op);
8759 machine_mode mode = GET_MODE (XEXP (op, 0));
8760 rtx note;
8761 int far;
8762 char *p;
8763
8764 /* branch on register are limited to +-128KB. If it is too far away,
8765 change
8766
8767 brnz,pt %g1, .LC30
8768
8769 to
8770
8771 brz,pn %g1, .+12
8772 nop
8773 ba,pt %xcc, .LC30
8774
8775 and
8776
8777 brgez,a,pn %o1, .LC29
8778
8779 to
8780
8781 brlz,pt %o1, .+16
8782 nop
8783 ba,pt %xcc, .LC29 */
8784
8785 far = get_attr_length (insn) >= 3;
8786
8787 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8788 if (reversed ^ far)
8789 code = reverse_condition (code);
8790
8791 /* Only 64-bit versions of these instructions exist. */
8792 gcc_assert (mode == DImode);
8793
8794 /* Start by writing the branch condition. */
8795
8796 switch (code)
8797 {
8798 case NE:
8799 strcpy (string, "brnz");
8800 break;
8801
8802 case EQ:
8803 strcpy (string, "brz");
8804 break;
8805
8806 case GE:
8807 strcpy (string, "brgez");
8808 break;
8809
8810 case LT:
8811 strcpy (string, "brlz");
8812 break;
8813
8814 case LE:
8815 strcpy (string, "brlez");
8816 break;
8817
8818 case GT:
8819 strcpy (string, "brgz");
8820 break;
8821
8822 default:
8823 gcc_unreachable ();
8824 }
8825
8826 p = strchr (string, '\0');
8827
8828 /* Now add the annulling, reg, label, and nop. */
8829 if (annul && ! far)
8830 {
8831 strcpy (p, ",a");
8832 p += 2;
8833 }
8834
8835 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8836 {
8837 strcpy (p,
8838 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8839 >= profile_probability::even ()) ^ far)
8840 ? ",pt" : ",pn");
8841 p += 3;
8842 }
8843
8844 *p = p < string + 8 ? '\t' : ' ';
8845 p++;
8846 *p++ = '%';
8847 *p++ = '0' + reg;
8848 *p++ = ',';
8849 *p++ = ' ';
8850 if (far)
8851 {
8852 int veryfar = 1, delta;
8853
8854 if (INSN_ADDRESSES_SET_P ())
8855 {
8856 delta = (INSN_ADDRESSES (INSN_UID (dest))
8857 - INSN_ADDRESSES (INSN_UID (insn)));
8858 /* Leave some instructions for "slop". */
8859 if (delta >= -260000 && delta < 260000)
8860 veryfar = 0;
8861 }
8862
8863 strcpy (p, ".+12\n\t nop\n\t");
8864 /* Skip the next insn if requested or
8865 if we know that it will be a nop. */
8866 if (annul || ! final_sequence)
8867 p[3] = '6';
8868 p += 12;
8869 if (veryfar)
8870 {
8871 strcpy (p, "b\t");
8872 p += 2;
8873 }
8874 else
8875 {
8876 strcpy (p, "ba,pt\t%%xcc, ");
8877 p += 13;
8878 }
8879 }
8880 *p++ = '%';
8881 *p++ = 'l';
8882 *p++ = '0' + label;
8883 *p++ = '%';
8884 *p++ = '#';
8885 *p = '\0';
8886
8887 return string;
8888 }
8889
8890 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8891 Such instructions cannot be used in the delay slot of return insn on v9.
8892 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8893 */
8894
8895 static int
8896 epilogue_renumber (register rtx *where, int test)
8897 {
8898 register const char *fmt;
8899 register int i;
8900 register enum rtx_code code;
8901
8902 if (*where == 0)
8903 return 0;
8904
8905 code = GET_CODE (*where);
8906
8907 switch (code)
8908 {
8909 case REG:
8910 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8911 return 1;
8912 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8913 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8914 /* fallthrough */
8915 case SCRATCH:
8916 case CC0:
8917 case PC:
8918 case CONST_INT:
8919 case CONST_WIDE_INT:
8920 case CONST_DOUBLE:
8921 return 0;
8922
8923 /* Do not replace the frame pointer with the stack pointer because
8924 it can cause the delayed instruction to load below the stack.
8925 This occurs when instructions like:
8926
8927 (set (reg/i:SI 24 %i0)
8928 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8929 (const_int -20 [0xffffffec])) 0))
8930
8931 are in the return delayed slot. */
8932 case PLUS:
8933 if (GET_CODE (XEXP (*where, 0)) == REG
8934 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8935 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8936 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8937 return 1;
8938 break;
8939
8940 case MEM:
8941 if (SPARC_STACK_BIAS
8942 && GET_CODE (XEXP (*where, 0)) == REG
8943 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8944 return 1;
8945 break;
8946
8947 default:
8948 break;
8949 }
8950
8951 fmt = GET_RTX_FORMAT (code);
8952
8953 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8954 {
8955 if (fmt[i] == 'E')
8956 {
8957 register int j;
8958 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8959 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8960 return 1;
8961 }
8962 else if (fmt[i] == 'e'
8963 && epilogue_renumber (&(XEXP (*where, i)), test))
8964 return 1;
8965 }
8966 return 0;
8967 }
8968 \f
8969 /* Leaf functions and non-leaf functions have different needs. */
8970
8971 static const int
8972 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8973
8974 static const int
8975 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8976
8977 static const int *const reg_alloc_orders[] = {
8978 reg_leaf_alloc_order,
8979 reg_nonleaf_alloc_order};
8980
8981 void
8982 order_regs_for_local_alloc (void)
8983 {
8984 static int last_order_nonleaf = 1;
8985
8986 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8987 {
8988 last_order_nonleaf = !last_order_nonleaf;
8989 memcpy ((char *) reg_alloc_order,
8990 (const char *) reg_alloc_orders[last_order_nonleaf],
8991 FIRST_PSEUDO_REGISTER * sizeof (int));
8992 }
8993 }
8994 \f
8995 /* Return 1 if REG and MEM are legitimate enough to allow the various
8996 MEM<-->REG splits to be run. */
8997
8998 int
8999 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9000 {
9001 /* Punt if we are here by mistake. */
9002 gcc_assert (reload_completed);
9003
9004 /* We must have an offsettable memory reference. */
9005 if (!offsettable_memref_p (mem))
9006 return 0;
9007
9008 /* If we have legitimate args for ldd/std, we do not want
9009 the split to happen. */
9010 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9011 return 0;
9012
9013 /* Success. */
9014 return 1;
9015 }
9016
9017 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9018
9019 void
9020 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9021 {
9022 rtx high_part = gen_highpart (mode, dest);
9023 rtx low_part = gen_lowpart (mode, dest);
9024 rtx word0 = adjust_address (src, mode, 0);
9025 rtx word1 = adjust_address (src, mode, 4);
9026
9027 if (reg_overlap_mentioned_p (high_part, word1))
9028 {
9029 emit_move_insn_1 (low_part, word1);
9030 emit_move_insn_1 (high_part, word0);
9031 }
9032 else
9033 {
9034 emit_move_insn_1 (high_part, word0);
9035 emit_move_insn_1 (low_part, word1);
9036 }
9037 }
9038
9039 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9040
9041 void
9042 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9043 {
9044 rtx word0 = adjust_address (dest, mode, 0);
9045 rtx word1 = adjust_address (dest, mode, 4);
9046 rtx high_part = gen_highpart (mode, src);
9047 rtx low_part = gen_lowpart (mode, src);
9048
9049 emit_move_insn_1 (word0, high_part);
9050 emit_move_insn_1 (word1, low_part);
9051 }
9052
9053 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9054
9055 int
9056 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9057 {
9058 /* Punt if we are here by mistake. */
9059 gcc_assert (reload_completed);
9060
9061 if (GET_CODE (reg1) == SUBREG)
9062 reg1 = SUBREG_REG (reg1);
9063 if (GET_CODE (reg1) != REG)
9064 return 0;
9065 const int regno1 = REGNO (reg1);
9066
9067 if (GET_CODE (reg2) == SUBREG)
9068 reg2 = SUBREG_REG (reg2);
9069 if (GET_CODE (reg2) != REG)
9070 return 0;
9071 const int regno2 = REGNO (reg2);
9072
9073 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9074 return 1;
9075
9076 if (TARGET_VIS3)
9077 {
9078 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9079 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9080 return 1;
9081 }
9082
9083 return 0;
9084 }
9085
9086 /* Split a REG <--> REG move into a pair of moves in MODE. */
9087
9088 void
9089 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9090 {
9091 rtx dest1 = gen_highpart (mode, dest);
9092 rtx dest2 = gen_lowpart (mode, dest);
9093 rtx src1 = gen_highpart (mode, src);
9094 rtx src2 = gen_lowpart (mode, src);
9095
9096 /* Now emit using the real source and destination we found, swapping
9097 the order if we detect overlap. */
9098 if (reg_overlap_mentioned_p (dest1, src2))
9099 {
9100 emit_move_insn_1 (dest2, src2);
9101 emit_move_insn_1 (dest1, src1);
9102 }
9103 else
9104 {
9105 emit_move_insn_1 (dest1, src1);
9106 emit_move_insn_1 (dest2, src2);
9107 }
9108 }
9109
9110 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9111 This makes them candidates for using ldd and std insns.
9112
9113 Note reg1 and reg2 *must* be hard registers. */
9114
9115 int
9116 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9117 {
9118 /* We might have been passed a SUBREG. */
9119 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9120 return 0;
9121
9122 if (REGNO (reg1) % 2 != 0)
9123 return 0;
9124
9125 /* Integer ldd is deprecated in SPARC V9 */
9126 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9127 return 0;
9128
9129 return (REGNO (reg1) == REGNO (reg2) - 1);
9130 }
9131
9132 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9133 an ldd or std insn.
9134
9135 This can only happen when addr1 and addr2, the addresses in mem1
9136 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9137 addr1 must also be aligned on a 64-bit boundary.
9138
9139 Also iff dependent_reg_rtx is not null it should not be used to
9140 compute the address for mem1, i.e. we cannot optimize a sequence
9141 like:
9142 ld [%o0], %o0
9143 ld [%o0 + 4], %o1
9144 to
9145 ldd [%o0], %o0
9146 nor:
9147 ld [%g3 + 4], %g3
9148 ld [%g3], %g2
9149 to
9150 ldd [%g3], %g2
9151
9152 But, note that the transformation from:
9153 ld [%g2 + 4], %g3
9154 ld [%g2], %g2
9155 to
9156 ldd [%g2], %g2
9157 is perfectly fine. Thus, the peephole2 patterns always pass us
9158 the destination register of the first load, never the second one.
9159
9160 For stores we don't have a similar problem, so dependent_reg_rtx is
9161 NULL_RTX. */
9162
9163 int
9164 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9165 {
9166 rtx addr1, addr2;
9167 unsigned int reg1;
9168 HOST_WIDE_INT offset1;
9169
9170 /* The mems cannot be volatile. */
9171 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9172 return 0;
9173
9174 /* MEM1 should be aligned on a 64-bit boundary. */
9175 if (MEM_ALIGN (mem1) < 64)
9176 return 0;
9177
9178 addr1 = XEXP (mem1, 0);
9179 addr2 = XEXP (mem2, 0);
9180
9181 /* Extract a register number and offset (if used) from the first addr. */
9182 if (GET_CODE (addr1) == PLUS)
9183 {
9184 /* If not a REG, return zero. */
9185 if (GET_CODE (XEXP (addr1, 0)) != REG)
9186 return 0;
9187 else
9188 {
9189 reg1 = REGNO (XEXP (addr1, 0));
9190 /* The offset must be constant! */
9191 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9192 return 0;
9193 offset1 = INTVAL (XEXP (addr1, 1));
9194 }
9195 }
9196 else if (GET_CODE (addr1) != REG)
9197 return 0;
9198 else
9199 {
9200 reg1 = REGNO (addr1);
9201 /* This was a simple (mem (reg)) expression. Offset is 0. */
9202 offset1 = 0;
9203 }
9204
9205 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9206 if (GET_CODE (addr2) != PLUS)
9207 return 0;
9208
9209 if (GET_CODE (XEXP (addr2, 0)) != REG
9210 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9211 return 0;
9212
9213 if (reg1 != REGNO (XEXP (addr2, 0)))
9214 return 0;
9215
9216 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9217 return 0;
9218
9219 /* The first offset must be evenly divisible by 8 to ensure the
9220 address is 64-bit aligned. */
9221 if (offset1 % 8 != 0)
9222 return 0;
9223
9224 /* The offset for the second addr must be 4 more than the first addr. */
9225 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9226 return 0;
9227
9228 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9229 instructions. */
9230 return 1;
9231 }
9232
9233 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9234
9235 rtx
9236 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9237 {
9238 rtx x = widen_memory_access (mem1, mode, 0);
9239 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9240 return x;
9241 }
9242
9243 /* Return 1 if reg is a pseudo, or is the first register in
9244 a hard register pair. This makes it suitable for use in
9245 ldd and std insns. */
9246
9247 int
9248 register_ok_for_ldd (rtx reg)
9249 {
9250 /* We might have been passed a SUBREG. */
9251 if (!REG_P (reg))
9252 return 0;
9253
9254 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9255 return (REGNO (reg) % 2 == 0);
9256
9257 return 1;
9258 }
9259
9260 /* Return 1 if OP, a MEM, has an address which is known to be
9261 aligned to an 8-byte boundary. */
9262
9263 int
9264 memory_ok_for_ldd (rtx op)
9265 {
9266 /* In 64-bit mode, we assume that the address is word-aligned. */
9267 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9268 return 0;
9269
9270 if (! can_create_pseudo_p ()
9271 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9272 return 0;
9273
9274 return 1;
9275 }
9276 \f
9277 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9278
9279 static bool
9280 sparc_print_operand_punct_valid_p (unsigned char code)
9281 {
9282 if (code == '#'
9283 || code == '*'
9284 || code == '('
9285 || code == ')'
9286 || code == '_'
9287 || code == '&')
9288 return true;
9289
9290 return false;
9291 }
9292
9293 /* Implement TARGET_PRINT_OPERAND.
9294 Print operand X (an rtx) in assembler syntax to file FILE.
9295 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9296 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9297
9298 static void
9299 sparc_print_operand (FILE *file, rtx x, int code)
9300 {
9301 const char *s;
9302
9303 switch (code)
9304 {
9305 case '#':
9306 /* Output an insn in a delay slot. */
9307 if (final_sequence)
9308 sparc_indent_opcode = 1;
9309 else
9310 fputs ("\n\t nop", file);
9311 return;
9312 case '*':
9313 /* Output an annul flag if there's nothing for the delay slot and we
9314 are optimizing. This is always used with '(' below.
9315 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9316 this is a dbx bug. So, we only do this when optimizing.
9317 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9318 Always emit a nop in case the next instruction is a branch. */
9319 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9320 fputs (",a", file);
9321 return;
9322 case '(':
9323 /* Output a 'nop' if there's nothing for the delay slot and we are
9324 not optimizing. This is always used with '*' above. */
9325 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9326 fputs ("\n\t nop", file);
9327 else if (final_sequence)
9328 sparc_indent_opcode = 1;
9329 return;
9330 case ')':
9331 /* Output the right displacement from the saved PC on function return.
9332 The caller may have placed an "unimp" insn immediately after the call
9333 so we have to account for it. This insn is used in the 32-bit ABI
9334 when calling a function that returns a non zero-sized structure. The
9335 64-bit ABI doesn't have it. Be careful to have this test be the same
9336 as that for the call. The exception is when sparc_std_struct_return
9337 is enabled, the psABI is followed exactly and the adjustment is made
9338 by the code in sparc_struct_value_rtx. The call emitted is the same
9339 when sparc_std_struct_return is enabled. */
9340 if (!TARGET_ARCH64
9341 && cfun->returns_struct
9342 && !sparc_std_struct_return
9343 && DECL_SIZE (DECL_RESULT (current_function_decl))
9344 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9345 == INTEGER_CST
9346 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9347 fputs ("12", file);
9348 else
9349 fputc ('8', file);
9350 return;
9351 case '_':
9352 /* Output the Embedded Medium/Anywhere code model base register. */
9353 fputs (EMBMEDANY_BASE_REG, file);
9354 return;
9355 case '&':
9356 /* Print some local dynamic TLS name. */
9357 if (const char *name = get_some_local_dynamic_name ())
9358 assemble_name (file, name);
9359 else
9360 output_operand_lossage ("'%%&' used without any "
9361 "local dynamic TLS references");
9362 return;
9363
9364 case 'Y':
9365 /* Adjust the operand to take into account a RESTORE operation. */
9366 if (GET_CODE (x) == CONST_INT)
9367 break;
9368 else if (GET_CODE (x) != REG)
9369 output_operand_lossage ("invalid %%Y operand");
9370 else if (REGNO (x) < 8)
9371 fputs (reg_names[REGNO (x)], file);
9372 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9373 fputs (reg_names[REGNO (x)-16], file);
9374 else
9375 output_operand_lossage ("invalid %%Y operand");
9376 return;
9377 case 'L':
9378 /* Print out the low order register name of a register pair. */
9379 if (WORDS_BIG_ENDIAN)
9380 fputs (reg_names[REGNO (x)+1], file);
9381 else
9382 fputs (reg_names[REGNO (x)], file);
9383 return;
9384 case 'H':
9385 /* Print out the high order register name of a register pair. */
9386 if (WORDS_BIG_ENDIAN)
9387 fputs (reg_names[REGNO (x)], file);
9388 else
9389 fputs (reg_names[REGNO (x)+1], file);
9390 return;
9391 case 'R':
9392 /* Print out the second register name of a register pair or quad.
9393 I.e., R (%o0) => %o1. */
9394 fputs (reg_names[REGNO (x)+1], file);
9395 return;
9396 case 'S':
9397 /* Print out the third register name of a register quad.
9398 I.e., S (%o0) => %o2. */
9399 fputs (reg_names[REGNO (x)+2], file);
9400 return;
9401 case 'T':
9402 /* Print out the fourth register name of a register quad.
9403 I.e., T (%o0) => %o3. */
9404 fputs (reg_names[REGNO (x)+3], file);
9405 return;
9406 case 'x':
9407 /* Print a condition code register. */
9408 if (REGNO (x) == SPARC_ICC_REG)
9409 {
9410 switch (GET_MODE (x))
9411 {
9412 case E_CCmode:
9413 case E_CCNZmode:
9414 case E_CCCmode:
9415 case E_CCVmode:
9416 s = "%icc";
9417 break;
9418 case E_CCXmode:
9419 case E_CCXNZmode:
9420 case E_CCXCmode:
9421 case E_CCXVmode:
9422 s = "%xcc";
9423 break;
9424 default:
9425 gcc_unreachable ();
9426 }
9427 fputs (s, file);
9428 }
9429 else
9430 /* %fccN register */
9431 fputs (reg_names[REGNO (x)], file);
9432 return;
9433 case 'm':
9434 /* Print the operand's address only. */
9435 output_address (GET_MODE (x), XEXP (x, 0));
9436 return;
9437 case 'r':
9438 /* In this case we need a register. Use %g0 if the
9439 operand is const0_rtx. */
9440 if (x == const0_rtx
9441 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9442 {
9443 fputs ("%g0", file);
9444 return;
9445 }
9446 else
9447 break;
9448
9449 case 'A':
9450 switch (GET_CODE (x))
9451 {
9452 case IOR:
9453 s = "or";
9454 break;
9455 case AND:
9456 s = "and";
9457 break;
9458 case XOR:
9459 s = "xor";
9460 break;
9461 default:
9462 output_operand_lossage ("invalid %%A operand");
9463 s = "";
9464 break;
9465 }
9466 fputs (s, file);
9467 return;
9468
9469 case 'B':
9470 switch (GET_CODE (x))
9471 {
9472 case IOR:
9473 s = "orn";
9474 break;
9475 case AND:
9476 s = "andn";
9477 break;
9478 case XOR:
9479 s = "xnor";
9480 break;
9481 default:
9482 output_operand_lossage ("invalid %%B operand");
9483 s = "";
9484 break;
9485 }
9486 fputs (s, file);
9487 return;
9488
9489 /* This is used by the conditional move instructions. */
9490 case 'C':
9491 {
9492 machine_mode mode = GET_MODE (XEXP (x, 0));
9493 switch (GET_CODE (x))
9494 {
9495 case NE:
9496 if (mode == CCVmode || mode == CCXVmode)
9497 s = "vs";
9498 else
9499 s = "ne";
9500 break;
9501 case EQ:
9502 if (mode == CCVmode || mode == CCXVmode)
9503 s = "vc";
9504 else
9505 s = "e";
9506 break;
9507 case GE:
9508 if (mode == CCNZmode || mode == CCXNZmode)
9509 s = "pos";
9510 else
9511 s = "ge";
9512 break;
9513 case GT:
9514 s = "g";
9515 break;
9516 case LE:
9517 s = "le";
9518 break;
9519 case LT:
9520 if (mode == CCNZmode || mode == CCXNZmode)
9521 s = "neg";
9522 else
9523 s = "l";
9524 break;
9525 case GEU:
9526 s = "geu";
9527 break;
9528 case GTU:
9529 s = "gu";
9530 break;
9531 case LEU:
9532 s = "leu";
9533 break;
9534 case LTU:
9535 s = "lu";
9536 break;
9537 case LTGT:
9538 s = "lg";
9539 break;
9540 case UNORDERED:
9541 s = "u";
9542 break;
9543 case ORDERED:
9544 s = "o";
9545 break;
9546 case UNLT:
9547 s = "ul";
9548 break;
9549 case UNLE:
9550 s = "ule";
9551 break;
9552 case UNGT:
9553 s = "ug";
9554 break;
9555 case UNGE:
9556 s = "uge"
9557 ; break;
9558 case UNEQ:
9559 s = "ue";
9560 break;
9561 default:
9562 output_operand_lossage ("invalid %%C operand");
9563 s = "";
9564 break;
9565 }
9566 fputs (s, file);
9567 return;
9568 }
9569
9570 /* This are used by the movr instruction pattern. */
9571 case 'D':
9572 {
9573 switch (GET_CODE (x))
9574 {
9575 case NE:
9576 s = "ne";
9577 break;
9578 case EQ:
9579 s = "e";
9580 break;
9581 case GE:
9582 s = "gez";
9583 break;
9584 case LT:
9585 s = "lz";
9586 break;
9587 case LE:
9588 s = "lez";
9589 break;
9590 case GT:
9591 s = "gz";
9592 break;
9593 default:
9594 output_operand_lossage ("invalid %%D operand");
9595 s = "";
9596 break;
9597 }
9598 fputs (s, file);
9599 return;
9600 }
9601
9602 case 'b':
9603 {
9604 /* Print a sign-extended character. */
9605 int i = trunc_int_for_mode (INTVAL (x), QImode);
9606 fprintf (file, "%d", i);
9607 return;
9608 }
9609
9610 case 'f':
9611 /* Operand must be a MEM; write its address. */
9612 if (GET_CODE (x) != MEM)
9613 output_operand_lossage ("invalid %%f operand");
9614 output_address (GET_MODE (x), XEXP (x, 0));
9615 return;
9616
9617 case 's':
9618 {
9619 /* Print a sign-extended 32-bit value. */
9620 HOST_WIDE_INT i;
9621 if (GET_CODE(x) == CONST_INT)
9622 i = INTVAL (x);
9623 else
9624 {
9625 output_operand_lossage ("invalid %%s operand");
9626 return;
9627 }
9628 i = trunc_int_for_mode (i, SImode);
9629 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9630 return;
9631 }
9632
9633 case 0:
9634 /* Do nothing special. */
9635 break;
9636
9637 default:
9638 /* Undocumented flag. */
9639 output_operand_lossage ("invalid operand output code");
9640 }
9641
9642 if (GET_CODE (x) == REG)
9643 fputs (reg_names[REGNO (x)], file);
9644 else if (GET_CODE (x) == MEM)
9645 {
9646 fputc ('[', file);
9647 /* Poor Sun assembler doesn't understand absolute addressing. */
9648 if (CONSTANT_P (XEXP (x, 0)))
9649 fputs ("%g0+", file);
9650 output_address (GET_MODE (x), XEXP (x, 0));
9651 fputc (']', file);
9652 }
9653 else if (GET_CODE (x) == HIGH)
9654 {
9655 fputs ("%hi(", file);
9656 output_addr_const (file, XEXP (x, 0));
9657 fputc (')', file);
9658 }
9659 else if (GET_CODE (x) == LO_SUM)
9660 {
9661 sparc_print_operand (file, XEXP (x, 0), 0);
9662 if (TARGET_CM_MEDMID)
9663 fputs ("+%l44(", file);
9664 else
9665 fputs ("+%lo(", file);
9666 output_addr_const (file, XEXP (x, 1));
9667 fputc (')', file);
9668 }
9669 else if (GET_CODE (x) == CONST_DOUBLE)
9670 output_operand_lossage ("floating-point constant not a valid immediate operand");
9671 else
9672 output_addr_const (file, x);
9673 }
9674
9675 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9676
9677 static void
9678 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9679 {
9680 register rtx base, index = 0;
9681 int offset = 0;
9682 register rtx addr = x;
9683
9684 if (REG_P (addr))
9685 fputs (reg_names[REGNO (addr)], file);
9686 else if (GET_CODE (addr) == PLUS)
9687 {
9688 if (CONST_INT_P (XEXP (addr, 0)))
9689 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9690 else if (CONST_INT_P (XEXP (addr, 1)))
9691 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9692 else
9693 base = XEXP (addr, 0), index = XEXP (addr, 1);
9694 if (GET_CODE (base) == LO_SUM)
9695 {
9696 gcc_assert (USE_AS_OFFSETABLE_LO10
9697 && TARGET_ARCH64
9698 && ! TARGET_CM_MEDMID);
9699 output_operand (XEXP (base, 0), 0);
9700 fputs ("+%lo(", file);
9701 output_address (VOIDmode, XEXP (base, 1));
9702 fprintf (file, ")+%d", offset);
9703 }
9704 else
9705 {
9706 fputs (reg_names[REGNO (base)], file);
9707 if (index == 0)
9708 fprintf (file, "%+d", offset);
9709 else if (REG_P (index))
9710 fprintf (file, "+%s", reg_names[REGNO (index)]);
9711 else if (GET_CODE (index) == SYMBOL_REF
9712 || GET_CODE (index) == LABEL_REF
9713 || GET_CODE (index) == CONST)
9714 fputc ('+', file), output_addr_const (file, index);
9715 else gcc_unreachable ();
9716 }
9717 }
9718 else if (GET_CODE (addr) == MINUS
9719 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9720 {
9721 output_addr_const (file, XEXP (addr, 0));
9722 fputs ("-(", file);
9723 output_addr_const (file, XEXP (addr, 1));
9724 fputs ("-.)", file);
9725 }
9726 else if (GET_CODE (addr) == LO_SUM)
9727 {
9728 output_operand (XEXP (addr, 0), 0);
9729 if (TARGET_CM_MEDMID)
9730 fputs ("+%l44(", file);
9731 else
9732 fputs ("+%lo(", file);
9733 output_address (VOIDmode, XEXP (addr, 1));
9734 fputc (')', file);
9735 }
9736 else if (flag_pic
9737 && GET_CODE (addr) == CONST
9738 && GET_CODE (XEXP (addr, 0)) == MINUS
9739 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9740 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9741 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9742 {
9743 addr = XEXP (addr, 0);
9744 output_addr_const (file, XEXP (addr, 0));
9745 /* Group the args of the second CONST in parenthesis. */
9746 fputs ("-(", file);
9747 /* Skip past the second CONST--it does nothing for us. */
9748 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9749 /* Close the parenthesis. */
9750 fputc (')', file);
9751 }
9752 else
9753 {
9754 output_addr_const (file, addr);
9755 }
9756 }
9757 \f
9758 /* Target hook for assembling integer objects. The sparc version has
9759 special handling for aligned DI-mode objects. */
9760
9761 static bool
9762 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9763 {
9764 /* ??? We only output .xword's for symbols and only then in environments
9765 where the assembler can handle them. */
9766 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9767 {
9768 if (TARGET_V9)
9769 {
9770 assemble_integer_with_op ("\t.xword\t", x);
9771 return true;
9772 }
9773 else
9774 {
9775 assemble_aligned_integer (4, const0_rtx);
9776 assemble_aligned_integer (4, x);
9777 return true;
9778 }
9779 }
9780 return default_assemble_integer (x, size, aligned_p);
9781 }
9782 \f
9783 /* Return the value of a code used in the .proc pseudo-op that says
9784 what kind of result this function returns. For non-C types, we pick
9785 the closest C type. */
9786
9787 #ifndef SHORT_TYPE_SIZE
9788 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9789 #endif
9790
9791 #ifndef INT_TYPE_SIZE
9792 #define INT_TYPE_SIZE BITS_PER_WORD
9793 #endif
9794
9795 #ifndef LONG_TYPE_SIZE
9796 #define LONG_TYPE_SIZE BITS_PER_WORD
9797 #endif
9798
9799 #ifndef LONG_LONG_TYPE_SIZE
9800 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9801 #endif
9802
9803 #ifndef FLOAT_TYPE_SIZE
9804 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9805 #endif
9806
9807 #ifndef DOUBLE_TYPE_SIZE
9808 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9809 #endif
9810
9811 #ifndef LONG_DOUBLE_TYPE_SIZE
9812 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9813 #endif
9814
9815 unsigned long
9816 sparc_type_code (register tree type)
9817 {
9818 register unsigned long qualifiers = 0;
9819 register unsigned shift;
9820
9821 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9822 setting more, since some assemblers will give an error for this. Also,
9823 we must be careful to avoid shifts of 32 bits or more to avoid getting
9824 unpredictable results. */
9825
9826 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9827 {
9828 switch (TREE_CODE (type))
9829 {
9830 case ERROR_MARK:
9831 return qualifiers;
9832
9833 case ARRAY_TYPE:
9834 qualifiers |= (3 << shift);
9835 break;
9836
9837 case FUNCTION_TYPE:
9838 case METHOD_TYPE:
9839 qualifiers |= (2 << shift);
9840 break;
9841
9842 case POINTER_TYPE:
9843 case REFERENCE_TYPE:
9844 case OFFSET_TYPE:
9845 qualifiers |= (1 << shift);
9846 break;
9847
9848 case RECORD_TYPE:
9849 return (qualifiers | 8);
9850
9851 case UNION_TYPE:
9852 case QUAL_UNION_TYPE:
9853 return (qualifiers | 9);
9854
9855 case ENUMERAL_TYPE:
9856 return (qualifiers | 10);
9857
9858 case VOID_TYPE:
9859 return (qualifiers | 16);
9860
9861 case INTEGER_TYPE:
9862 /* If this is a range type, consider it to be the underlying
9863 type. */
9864 if (TREE_TYPE (type) != 0)
9865 break;
9866
9867 /* Carefully distinguish all the standard types of C,
9868 without messing up if the language is not C. We do this by
9869 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9870 look at both the names and the above fields, but that's redundant.
9871 Any type whose size is between two C types will be considered
9872 to be the wider of the two types. Also, we do not have a
9873 special code to use for "long long", so anything wider than
9874 long is treated the same. Note that we can't distinguish
9875 between "int" and "long" in this code if they are the same
9876 size, but that's fine, since neither can the assembler. */
9877
9878 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9879 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9880
9881 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9882 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9883
9884 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9885 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9886
9887 else
9888 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9889
9890 case REAL_TYPE:
9891 /* If this is a range type, consider it to be the underlying
9892 type. */
9893 if (TREE_TYPE (type) != 0)
9894 break;
9895
9896 /* Carefully distinguish all the standard types of C,
9897 without messing up if the language is not C. */
9898
9899 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9900 return (qualifiers | 6);
9901
9902 else
9903 return (qualifiers | 7);
9904
9905 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9906 /* ??? We need to distinguish between double and float complex types,
9907 but I don't know how yet because I can't reach this code from
9908 existing front-ends. */
9909 return (qualifiers | 7); /* Who knows? */
9910
9911 case VECTOR_TYPE:
9912 case BOOLEAN_TYPE: /* Boolean truth value type. */
9913 case LANG_TYPE:
9914 case NULLPTR_TYPE:
9915 return qualifiers;
9916
9917 default:
9918 gcc_unreachable (); /* Not a type! */
9919 }
9920 }
9921
9922 return qualifiers;
9923 }
9924 \f
9925 /* Nested function support. */
9926
9927 /* Emit RTL insns to initialize the variable parts of a trampoline.
9928 FNADDR is an RTX for the address of the function's pure code.
9929 CXT is an RTX for the static chain value for the function.
9930
9931 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9932 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9933 (to store insns). This is a bit excessive. Perhaps a different
9934 mechanism would be better here.
9935
9936 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9937
9938 static void
9939 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9940 {
9941 /* SPARC 32-bit trampoline:
9942
9943 sethi %hi(fn), %g1
9944 sethi %hi(static), %g2
9945 jmp %g1+%lo(fn)
9946 or %g2, %lo(static), %g2
9947
9948 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9949 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9950 */
9951
9952 emit_move_insn
9953 (adjust_address (m_tramp, SImode, 0),
9954 expand_binop (SImode, ior_optab,
9955 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9956 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9957 NULL_RTX, 1, OPTAB_DIRECT));
9958
9959 emit_move_insn
9960 (adjust_address (m_tramp, SImode, 4),
9961 expand_binop (SImode, ior_optab,
9962 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9963 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9964 NULL_RTX, 1, OPTAB_DIRECT));
9965
9966 emit_move_insn
9967 (adjust_address (m_tramp, SImode, 8),
9968 expand_binop (SImode, ior_optab,
9969 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9970 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9971 NULL_RTX, 1, OPTAB_DIRECT));
9972
9973 emit_move_insn
9974 (adjust_address (m_tramp, SImode, 12),
9975 expand_binop (SImode, ior_optab,
9976 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9977 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9978 NULL_RTX, 1, OPTAB_DIRECT));
9979
9980 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9981 aligned on a 16 byte boundary so one flush clears it all. */
9982 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9983 if (sparc_cpu != PROCESSOR_ULTRASPARC
9984 && sparc_cpu != PROCESSOR_ULTRASPARC3
9985 && sparc_cpu != PROCESSOR_NIAGARA
9986 && sparc_cpu != PROCESSOR_NIAGARA2
9987 && sparc_cpu != PROCESSOR_NIAGARA3
9988 && sparc_cpu != PROCESSOR_NIAGARA4
9989 && sparc_cpu != PROCESSOR_NIAGARA7
9990 && sparc_cpu != PROCESSOR_M8)
9991 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9992
9993 /* Call __enable_execute_stack after writing onto the stack to make sure
9994 the stack address is accessible. */
9995 #ifdef HAVE_ENABLE_EXECUTE_STACK
9996 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9997 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9998 #endif
9999
10000 }
10001
10002 /* The 64-bit version is simpler because it makes more sense to load the
10003 values as "immediate" data out of the trampoline. It's also easier since
10004 we can read the PC without clobbering a register. */
10005
10006 static void
10007 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10008 {
10009 /* SPARC 64-bit trampoline:
10010
10011 rd %pc, %g1
10012 ldx [%g1+24], %g5
10013 jmp %g5
10014 ldx [%g1+16], %g5
10015 +16 bytes data
10016 */
10017
10018 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10019 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10020 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10021 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10022 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10023 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10024 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10025 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10026 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10027 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10028 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10029
10030 if (sparc_cpu != PROCESSOR_ULTRASPARC
10031 && sparc_cpu != PROCESSOR_ULTRASPARC3
10032 && sparc_cpu != PROCESSOR_NIAGARA
10033 && sparc_cpu != PROCESSOR_NIAGARA2
10034 && sparc_cpu != PROCESSOR_NIAGARA3
10035 && sparc_cpu != PROCESSOR_NIAGARA4
10036 && sparc_cpu != PROCESSOR_NIAGARA7
10037 && sparc_cpu != PROCESSOR_M8)
10038 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10039
10040 /* Call __enable_execute_stack after writing onto the stack to make sure
10041 the stack address is accessible. */
10042 #ifdef HAVE_ENABLE_EXECUTE_STACK
10043 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10044 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10045 #endif
10046 }
10047
10048 /* Worker for TARGET_TRAMPOLINE_INIT. */
10049
10050 static void
10051 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10052 {
10053 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10054 cxt = force_reg (Pmode, cxt);
10055 if (TARGET_ARCH64)
10056 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10057 else
10058 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10059 }
10060 \f
10061 /* Adjust the cost of a scheduling dependency. Return the new cost of
10062 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10063
10064 static int
10065 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10066 int cost)
10067 {
10068 enum attr_type insn_type;
10069
10070 if (recog_memoized (insn) < 0)
10071 return cost;
10072
10073 insn_type = get_attr_type (insn);
10074
10075 if (dep_type == 0)
10076 {
10077 /* Data dependency; DEP_INSN writes a register that INSN reads some
10078 cycles later. */
10079
10080 /* if a load, then the dependence must be on the memory address;
10081 add an extra "cycle". Note that the cost could be two cycles
10082 if the reg was written late in an instruction group; we ca not tell
10083 here. */
10084 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10085 return cost + 3;
10086
10087 /* Get the delay only if the address of the store is the dependence. */
10088 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10089 {
10090 rtx pat = PATTERN(insn);
10091 rtx dep_pat = PATTERN (dep_insn);
10092
10093 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10094 return cost; /* This should not happen! */
10095
10096 /* The dependency between the two instructions was on the data that
10097 is being stored. Assume that this implies that the address of the
10098 store is not dependent. */
10099 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10100 return cost;
10101
10102 return cost + 3; /* An approximation. */
10103 }
10104
10105 /* A shift instruction cannot receive its data from an instruction
10106 in the same cycle; add a one cycle penalty. */
10107 if (insn_type == TYPE_SHIFT)
10108 return cost + 3; /* Split before cascade into shift. */
10109 }
10110 else
10111 {
10112 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10113 INSN writes some cycles later. */
10114
10115 /* These are only significant for the fpu unit; writing a fp reg before
10116 the fpu has finished with it stalls the processor. */
10117
10118 /* Reusing an integer register causes no problems. */
10119 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10120 return 0;
10121 }
10122
10123 return cost;
10124 }
10125
10126 static int
10127 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10128 int cost)
10129 {
10130 enum attr_type insn_type, dep_type;
10131 rtx pat = PATTERN(insn);
10132 rtx dep_pat = PATTERN (dep_insn);
10133
10134 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10135 return cost;
10136
10137 insn_type = get_attr_type (insn);
10138 dep_type = get_attr_type (dep_insn);
10139
10140 switch (dtype)
10141 {
10142 case 0:
10143 /* Data dependency; DEP_INSN writes a register that INSN reads some
10144 cycles later. */
10145
10146 switch (insn_type)
10147 {
10148 case TYPE_STORE:
10149 case TYPE_FPSTORE:
10150 /* Get the delay iff the address of the store is the dependence. */
10151 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10152 return cost;
10153
10154 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10155 return cost;
10156 return cost + 3;
10157
10158 case TYPE_LOAD:
10159 case TYPE_SLOAD:
10160 case TYPE_FPLOAD:
10161 /* If a load, then the dependence must be on the memory address. If
10162 the addresses aren't equal, then it might be a false dependency */
10163 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10164 {
10165 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10166 || GET_CODE (SET_DEST (dep_pat)) != MEM
10167 || GET_CODE (SET_SRC (pat)) != MEM
10168 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10169 XEXP (SET_SRC (pat), 0)))
10170 return cost + 2;
10171
10172 return cost + 8;
10173 }
10174 break;
10175
10176 case TYPE_BRANCH:
10177 /* Compare to branch latency is 0. There is no benefit from
10178 separating compare and branch. */
10179 if (dep_type == TYPE_COMPARE)
10180 return 0;
10181 /* Floating point compare to branch latency is less than
10182 compare to conditional move. */
10183 if (dep_type == TYPE_FPCMP)
10184 return cost - 1;
10185 break;
10186 default:
10187 break;
10188 }
10189 break;
10190
10191 case REG_DEP_ANTI:
10192 /* Anti-dependencies only penalize the fpu unit. */
10193 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10194 return 0;
10195 break;
10196
10197 default:
10198 break;
10199 }
10200
10201 return cost;
10202 }
10203
10204 static int
10205 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10206 unsigned int)
10207 {
10208 switch (sparc_cpu)
10209 {
10210 case PROCESSOR_SUPERSPARC:
10211 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10212 break;
10213 case PROCESSOR_HYPERSPARC:
10214 case PROCESSOR_SPARCLITE86X:
10215 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10216 break;
10217 default:
10218 break;
10219 }
10220 return cost;
10221 }
10222
10223 static void
10224 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10225 int sched_verbose ATTRIBUTE_UNUSED,
10226 int max_ready ATTRIBUTE_UNUSED)
10227 {}
10228
10229 static int
10230 sparc_use_sched_lookahead (void)
10231 {
10232 switch (sparc_cpu)
10233 {
10234 case PROCESSOR_ULTRASPARC:
10235 case PROCESSOR_ULTRASPARC3:
10236 return 4;
10237 case PROCESSOR_SUPERSPARC:
10238 case PROCESSOR_HYPERSPARC:
10239 case PROCESSOR_SPARCLITE86X:
10240 return 3;
10241 case PROCESSOR_NIAGARA4:
10242 case PROCESSOR_NIAGARA7:
10243 case PROCESSOR_M8:
10244 return 2;
10245 case PROCESSOR_NIAGARA:
10246 case PROCESSOR_NIAGARA2:
10247 case PROCESSOR_NIAGARA3:
10248 default:
10249 return 0;
10250 }
10251 }
10252
10253 static int
10254 sparc_issue_rate (void)
10255 {
10256 switch (sparc_cpu)
10257 {
10258 case PROCESSOR_ULTRASPARC:
10259 case PROCESSOR_ULTRASPARC3:
10260 case PROCESSOR_M8:
10261 return 4;
10262 case PROCESSOR_SUPERSPARC:
10263 return 3;
10264 case PROCESSOR_HYPERSPARC:
10265 case PROCESSOR_SPARCLITE86X:
10266 case PROCESSOR_V9:
10267 /* Assume V9 processors are capable of at least dual-issue. */
10268 case PROCESSOR_NIAGARA4:
10269 case PROCESSOR_NIAGARA7:
10270 return 2;
10271 case PROCESSOR_NIAGARA:
10272 case PROCESSOR_NIAGARA2:
10273 case PROCESSOR_NIAGARA3:
10274 default:
10275 return 1;
10276 }
10277 }
10278
10279 int
10280 sparc_branch_cost (bool speed_p, bool predictable_p)
10281 {
10282 if (!speed_p)
10283 return 2;
10284
10285 /* For pre-V9 processors we use a single value (usually 3) to take into
10286 account the potential annulling of the delay slot (which ends up being
10287 a bubble in the pipeline slot) plus a cycle to take into consideration
10288 the instruction cache effects.
10289
10290 On V9 and later processors, which have branch prediction facilities,
10291 we take into account whether the branch is (easily) predictable. */
10292 const int cost = sparc_costs->branch_cost;
10293
10294 switch (sparc_cpu)
10295 {
10296 case PROCESSOR_V9:
10297 case PROCESSOR_ULTRASPARC:
10298 case PROCESSOR_ULTRASPARC3:
10299 case PROCESSOR_NIAGARA:
10300 case PROCESSOR_NIAGARA2:
10301 case PROCESSOR_NIAGARA3:
10302 case PROCESSOR_NIAGARA4:
10303 case PROCESSOR_NIAGARA7:
10304 case PROCESSOR_M8:
10305 return cost + (predictable_p ? 0 : 2);
10306
10307 default:
10308 return cost;
10309 }
10310 }
10311
10312 static int
10313 set_extends (rtx_insn *insn)
10314 {
10315 register rtx pat = PATTERN (insn);
10316
10317 switch (GET_CODE (SET_SRC (pat)))
10318 {
10319 /* Load and some shift instructions zero extend. */
10320 case MEM:
10321 case ZERO_EXTEND:
10322 /* sethi clears the high bits */
10323 case HIGH:
10324 /* LO_SUM is used with sethi. sethi cleared the high
10325 bits and the values used with lo_sum are positive */
10326 case LO_SUM:
10327 /* Store flag stores 0 or 1 */
10328 case LT: case LTU:
10329 case GT: case GTU:
10330 case LE: case LEU:
10331 case GE: case GEU:
10332 case EQ:
10333 case NE:
10334 return 1;
10335 case AND:
10336 {
10337 rtx op0 = XEXP (SET_SRC (pat), 0);
10338 rtx op1 = XEXP (SET_SRC (pat), 1);
10339 if (GET_CODE (op1) == CONST_INT)
10340 return INTVAL (op1) >= 0;
10341 if (GET_CODE (op0) != REG)
10342 return 0;
10343 if (sparc_check_64 (op0, insn) == 1)
10344 return 1;
10345 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10346 }
10347 case IOR:
10348 case XOR:
10349 {
10350 rtx op0 = XEXP (SET_SRC (pat), 0);
10351 rtx op1 = XEXP (SET_SRC (pat), 1);
10352 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10353 return 0;
10354 if (GET_CODE (op1) == CONST_INT)
10355 return INTVAL (op1) >= 0;
10356 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10357 }
10358 case LSHIFTRT:
10359 return GET_MODE (SET_SRC (pat)) == SImode;
10360 /* Positive integers leave the high bits zero. */
10361 case CONST_INT:
10362 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10363 case ASHIFTRT:
10364 case SIGN_EXTEND:
10365 return - (GET_MODE (SET_SRC (pat)) == SImode);
10366 case REG:
10367 return sparc_check_64 (SET_SRC (pat), insn);
10368 default:
10369 return 0;
10370 }
10371 }
10372
10373 /* We _ought_ to have only one kind per function, but... */
10374 static GTY(()) rtx sparc_addr_diff_list;
10375 static GTY(()) rtx sparc_addr_list;
10376
10377 void
10378 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10379 {
10380 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10381 if (diff)
10382 sparc_addr_diff_list
10383 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10384 else
10385 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10386 }
10387
10388 static void
10389 sparc_output_addr_vec (rtx vec)
10390 {
10391 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10392 int idx, vlen = XVECLEN (body, 0);
10393
10394 #ifdef ASM_OUTPUT_ADDR_VEC_START
10395 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10396 #endif
10397
10398 #ifdef ASM_OUTPUT_CASE_LABEL
10399 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10400 NEXT_INSN (lab));
10401 #else
10402 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10403 #endif
10404
10405 for (idx = 0; idx < vlen; idx++)
10406 {
10407 ASM_OUTPUT_ADDR_VEC_ELT
10408 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10409 }
10410
10411 #ifdef ASM_OUTPUT_ADDR_VEC_END
10412 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10413 #endif
10414 }
10415
10416 static void
10417 sparc_output_addr_diff_vec (rtx vec)
10418 {
10419 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10420 rtx base = XEXP (XEXP (body, 0), 0);
10421 int idx, vlen = XVECLEN (body, 1);
10422
10423 #ifdef ASM_OUTPUT_ADDR_VEC_START
10424 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10425 #endif
10426
10427 #ifdef ASM_OUTPUT_CASE_LABEL
10428 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10429 NEXT_INSN (lab));
10430 #else
10431 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10432 #endif
10433
10434 for (idx = 0; idx < vlen; idx++)
10435 {
10436 ASM_OUTPUT_ADDR_DIFF_ELT
10437 (asm_out_file,
10438 body,
10439 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10440 CODE_LABEL_NUMBER (base));
10441 }
10442
10443 #ifdef ASM_OUTPUT_ADDR_VEC_END
10444 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10445 #endif
10446 }
10447
10448 static void
10449 sparc_output_deferred_case_vectors (void)
10450 {
10451 rtx t;
10452 int align;
10453
10454 if (sparc_addr_list == NULL_RTX
10455 && sparc_addr_diff_list == NULL_RTX)
10456 return;
10457
10458 /* Align to cache line in the function's code section. */
10459 switch_to_section (current_function_section ());
10460
10461 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10462 if (align > 0)
10463 ASM_OUTPUT_ALIGN (asm_out_file, align);
10464
10465 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10466 sparc_output_addr_vec (XEXP (t, 0));
10467 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10468 sparc_output_addr_diff_vec (XEXP (t, 0));
10469
10470 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10471 }
10472
10473 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10474 unknown. Return 1 if the high bits are zero, -1 if the register is
10475 sign extended. */
10476 int
10477 sparc_check_64 (rtx x, rtx_insn *insn)
10478 {
10479 /* If a register is set only once it is safe to ignore insns this
10480 code does not know how to handle. The loop will either recognize
10481 the single set and return the correct value or fail to recognize
10482 it and return 0. */
10483 int set_once = 0;
10484 rtx y = x;
10485
10486 gcc_assert (GET_CODE (x) == REG);
10487
10488 if (GET_MODE (x) == DImode)
10489 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10490
10491 if (flag_expensive_optimizations
10492 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10493 set_once = 1;
10494
10495 if (insn == 0)
10496 {
10497 if (set_once)
10498 insn = get_last_insn_anywhere ();
10499 else
10500 return 0;
10501 }
10502
10503 while ((insn = PREV_INSN (insn)))
10504 {
10505 switch (GET_CODE (insn))
10506 {
10507 case JUMP_INSN:
10508 case NOTE:
10509 break;
10510 case CODE_LABEL:
10511 case CALL_INSN:
10512 default:
10513 if (! set_once)
10514 return 0;
10515 break;
10516 case INSN:
10517 {
10518 rtx pat = PATTERN (insn);
10519 if (GET_CODE (pat) != SET)
10520 return 0;
10521 if (rtx_equal_p (x, SET_DEST (pat)))
10522 return set_extends (insn);
10523 if (y && rtx_equal_p (y, SET_DEST (pat)))
10524 return set_extends (insn);
10525 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10526 return 0;
10527 }
10528 }
10529 }
10530 return 0;
10531 }
10532
10533 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10534 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10535
10536 const char *
10537 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10538 {
10539 static char asm_code[60];
10540
10541 /* The scratch register is only required when the destination
10542 register is not a 64-bit global or out register. */
10543 if (which_alternative != 2)
10544 operands[3] = operands[0];
10545
10546 /* We can only shift by constants <= 63. */
10547 if (GET_CODE (operands[2]) == CONST_INT)
10548 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10549
10550 if (GET_CODE (operands[1]) == CONST_INT)
10551 {
10552 output_asm_insn ("mov\t%1, %3", operands);
10553 }
10554 else
10555 {
10556 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10557 if (sparc_check_64 (operands[1], insn) <= 0)
10558 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10559 output_asm_insn ("or\t%L1, %3, %3", operands);
10560 }
10561
10562 strcpy (asm_code, opcode);
10563
10564 if (which_alternative != 2)
10565 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10566 else
10567 return
10568 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10569 }
10570 \f
10571 /* Output rtl to increment the profiler label LABELNO
10572 for profiling a function entry. */
10573
10574 void
10575 sparc_profile_hook (int labelno)
10576 {
10577 char buf[32];
10578 rtx lab, fun;
10579
10580 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10581 if (NO_PROFILE_COUNTERS)
10582 {
10583 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10584 }
10585 else
10586 {
10587 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10588 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10589 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10590 }
10591 }
10592 \f
10593 #ifdef TARGET_SOLARIS
10594 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10595
10596 static void
10597 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10598 tree decl ATTRIBUTE_UNUSED)
10599 {
10600 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10601 {
10602 solaris_elf_asm_comdat_section (name, flags, decl);
10603 return;
10604 }
10605
10606 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10607
10608 if (!(flags & SECTION_DEBUG))
10609 fputs (",#alloc", asm_out_file);
10610 #if HAVE_GAS_SECTION_EXCLUDE
10611 if (flags & SECTION_EXCLUDE)
10612 fputs (",#exclude", asm_out_file);
10613 #endif
10614 if (flags & SECTION_WRITE)
10615 fputs (",#write", asm_out_file);
10616 if (flags & SECTION_TLS)
10617 fputs (",#tls", asm_out_file);
10618 if (flags & SECTION_CODE)
10619 fputs (",#execinstr", asm_out_file);
10620
10621 if (flags & SECTION_NOTYPE)
10622 ;
10623 else if (flags & SECTION_BSS)
10624 fputs (",#nobits", asm_out_file);
10625 else
10626 fputs (",#progbits", asm_out_file);
10627
10628 fputc ('\n', asm_out_file);
10629 }
10630 #endif /* TARGET_SOLARIS */
10631
10632 /* We do not allow indirect calls to be optimized into sibling calls.
10633
10634 We cannot use sibling calls when delayed branches are disabled
10635 because they will likely require the call delay slot to be filled.
10636
10637 Also, on SPARC 32-bit we cannot emit a sibling call when the
10638 current function returns a structure. This is because the "unimp
10639 after call" convention would cause the callee to return to the
10640 wrong place. The generic code already disallows cases where the
10641 function being called returns a structure.
10642
10643 It may seem strange how this last case could occur. Usually there
10644 is code after the call which jumps to epilogue code which dumps the
10645 return value into the struct return area. That ought to invalidate
10646 the sibling call right? Well, in the C++ case we can end up passing
10647 the pointer to the struct return area to a constructor (which returns
10648 void) and then nothing else happens. Such a sibling call would look
10649 valid without the added check here.
10650
10651 VxWorks PIC PLT entries require the global pointer to be initialized
10652 on entry. We therefore can't emit sibling calls to them. */
10653 static bool
10654 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10655 {
10656 return (decl
10657 && flag_delayed_branch
10658 && (TARGET_ARCH64 || ! cfun->returns_struct)
10659 && !(TARGET_VXWORKS_RTP
10660 && flag_pic
10661 && !targetm.binds_local_p (decl)));
10662 }
10663 \f
10664 /* libfunc renaming. */
10665
10666 static void
10667 sparc_init_libfuncs (void)
10668 {
10669 if (TARGET_ARCH32)
10670 {
10671 /* Use the subroutines that Sun's library provides for integer
10672 multiply and divide. The `*' prevents an underscore from
10673 being prepended by the compiler. .umul is a little faster
10674 than .mul. */
10675 set_optab_libfunc (smul_optab, SImode, "*.umul");
10676 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10677 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10678 set_optab_libfunc (smod_optab, SImode, "*.rem");
10679 set_optab_libfunc (umod_optab, SImode, "*.urem");
10680
10681 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10682 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10683 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10684 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10685 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10686 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10687
10688 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10689 is because with soft-float, the SFmode and DFmode sqrt
10690 instructions will be absent, and the compiler will notice and
10691 try to use the TFmode sqrt instruction for calls to the
10692 builtin function sqrt, but this fails. */
10693 if (TARGET_FPU)
10694 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10695
10696 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10697 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10698 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10699 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10700 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10701 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10702
10703 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10704 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10705 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10706 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10707
10708 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10709 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10710 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10711 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10712
10713 if (DITF_CONVERSION_LIBFUNCS)
10714 {
10715 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10716 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10717 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10718 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10719 }
10720
10721 if (SUN_CONVERSION_LIBFUNCS)
10722 {
10723 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10724 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10725 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10726 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10727 }
10728 }
10729 if (TARGET_ARCH64)
10730 {
10731 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10732 do not exist in the library. Make sure the compiler does not
10733 emit calls to them by accident. (It should always use the
10734 hardware instructions.) */
10735 set_optab_libfunc (smul_optab, SImode, 0);
10736 set_optab_libfunc (sdiv_optab, SImode, 0);
10737 set_optab_libfunc (udiv_optab, SImode, 0);
10738 set_optab_libfunc (smod_optab, SImode, 0);
10739 set_optab_libfunc (umod_optab, SImode, 0);
10740
10741 if (SUN_INTEGER_MULTIPLY_64)
10742 {
10743 set_optab_libfunc (smul_optab, DImode, "__mul64");
10744 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10745 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10746 set_optab_libfunc (smod_optab, DImode, "__rem64");
10747 set_optab_libfunc (umod_optab, DImode, "__urem64");
10748 }
10749
10750 if (SUN_CONVERSION_LIBFUNCS)
10751 {
10752 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10753 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10754 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10755 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10756 }
10757 }
10758 }
10759 \f
10760 /* SPARC builtins. */
10761 enum sparc_builtins
10762 {
10763 /* FPU builtins. */
10764 SPARC_BUILTIN_LDFSR,
10765 SPARC_BUILTIN_STFSR,
10766
10767 /* VIS 1.0 builtins. */
10768 SPARC_BUILTIN_FPACK16,
10769 SPARC_BUILTIN_FPACK32,
10770 SPARC_BUILTIN_FPACKFIX,
10771 SPARC_BUILTIN_FEXPAND,
10772 SPARC_BUILTIN_FPMERGE,
10773 SPARC_BUILTIN_FMUL8X16,
10774 SPARC_BUILTIN_FMUL8X16AU,
10775 SPARC_BUILTIN_FMUL8X16AL,
10776 SPARC_BUILTIN_FMUL8SUX16,
10777 SPARC_BUILTIN_FMUL8ULX16,
10778 SPARC_BUILTIN_FMULD8SUX16,
10779 SPARC_BUILTIN_FMULD8ULX16,
10780 SPARC_BUILTIN_FALIGNDATAV4HI,
10781 SPARC_BUILTIN_FALIGNDATAV8QI,
10782 SPARC_BUILTIN_FALIGNDATAV2SI,
10783 SPARC_BUILTIN_FALIGNDATADI,
10784 SPARC_BUILTIN_WRGSR,
10785 SPARC_BUILTIN_RDGSR,
10786 SPARC_BUILTIN_ALIGNADDR,
10787 SPARC_BUILTIN_ALIGNADDRL,
10788 SPARC_BUILTIN_PDIST,
10789 SPARC_BUILTIN_EDGE8,
10790 SPARC_BUILTIN_EDGE8L,
10791 SPARC_BUILTIN_EDGE16,
10792 SPARC_BUILTIN_EDGE16L,
10793 SPARC_BUILTIN_EDGE32,
10794 SPARC_BUILTIN_EDGE32L,
10795 SPARC_BUILTIN_FCMPLE16,
10796 SPARC_BUILTIN_FCMPLE32,
10797 SPARC_BUILTIN_FCMPNE16,
10798 SPARC_BUILTIN_FCMPNE32,
10799 SPARC_BUILTIN_FCMPGT16,
10800 SPARC_BUILTIN_FCMPGT32,
10801 SPARC_BUILTIN_FCMPEQ16,
10802 SPARC_BUILTIN_FCMPEQ32,
10803 SPARC_BUILTIN_FPADD16,
10804 SPARC_BUILTIN_FPADD16S,
10805 SPARC_BUILTIN_FPADD32,
10806 SPARC_BUILTIN_FPADD32S,
10807 SPARC_BUILTIN_FPSUB16,
10808 SPARC_BUILTIN_FPSUB16S,
10809 SPARC_BUILTIN_FPSUB32,
10810 SPARC_BUILTIN_FPSUB32S,
10811 SPARC_BUILTIN_ARRAY8,
10812 SPARC_BUILTIN_ARRAY16,
10813 SPARC_BUILTIN_ARRAY32,
10814
10815 /* VIS 2.0 builtins. */
10816 SPARC_BUILTIN_EDGE8N,
10817 SPARC_BUILTIN_EDGE8LN,
10818 SPARC_BUILTIN_EDGE16N,
10819 SPARC_BUILTIN_EDGE16LN,
10820 SPARC_BUILTIN_EDGE32N,
10821 SPARC_BUILTIN_EDGE32LN,
10822 SPARC_BUILTIN_BMASK,
10823 SPARC_BUILTIN_BSHUFFLEV4HI,
10824 SPARC_BUILTIN_BSHUFFLEV8QI,
10825 SPARC_BUILTIN_BSHUFFLEV2SI,
10826 SPARC_BUILTIN_BSHUFFLEDI,
10827
10828 /* VIS 3.0 builtins. */
10829 SPARC_BUILTIN_CMASK8,
10830 SPARC_BUILTIN_CMASK16,
10831 SPARC_BUILTIN_CMASK32,
10832 SPARC_BUILTIN_FCHKSM16,
10833 SPARC_BUILTIN_FSLL16,
10834 SPARC_BUILTIN_FSLAS16,
10835 SPARC_BUILTIN_FSRL16,
10836 SPARC_BUILTIN_FSRA16,
10837 SPARC_BUILTIN_FSLL32,
10838 SPARC_BUILTIN_FSLAS32,
10839 SPARC_BUILTIN_FSRL32,
10840 SPARC_BUILTIN_FSRA32,
10841 SPARC_BUILTIN_PDISTN,
10842 SPARC_BUILTIN_FMEAN16,
10843 SPARC_BUILTIN_FPADD64,
10844 SPARC_BUILTIN_FPSUB64,
10845 SPARC_BUILTIN_FPADDS16,
10846 SPARC_BUILTIN_FPADDS16S,
10847 SPARC_BUILTIN_FPSUBS16,
10848 SPARC_BUILTIN_FPSUBS16S,
10849 SPARC_BUILTIN_FPADDS32,
10850 SPARC_BUILTIN_FPADDS32S,
10851 SPARC_BUILTIN_FPSUBS32,
10852 SPARC_BUILTIN_FPSUBS32S,
10853 SPARC_BUILTIN_FUCMPLE8,
10854 SPARC_BUILTIN_FUCMPNE8,
10855 SPARC_BUILTIN_FUCMPGT8,
10856 SPARC_BUILTIN_FUCMPEQ8,
10857 SPARC_BUILTIN_FHADDS,
10858 SPARC_BUILTIN_FHADDD,
10859 SPARC_BUILTIN_FHSUBS,
10860 SPARC_BUILTIN_FHSUBD,
10861 SPARC_BUILTIN_FNHADDS,
10862 SPARC_BUILTIN_FNHADDD,
10863 SPARC_BUILTIN_UMULXHI,
10864 SPARC_BUILTIN_XMULX,
10865 SPARC_BUILTIN_XMULXHI,
10866
10867 /* VIS 4.0 builtins. */
10868 SPARC_BUILTIN_FPADD8,
10869 SPARC_BUILTIN_FPADDS8,
10870 SPARC_BUILTIN_FPADDUS8,
10871 SPARC_BUILTIN_FPADDUS16,
10872 SPARC_BUILTIN_FPCMPLE8,
10873 SPARC_BUILTIN_FPCMPGT8,
10874 SPARC_BUILTIN_FPCMPULE16,
10875 SPARC_BUILTIN_FPCMPUGT16,
10876 SPARC_BUILTIN_FPCMPULE32,
10877 SPARC_BUILTIN_FPCMPUGT32,
10878 SPARC_BUILTIN_FPMAX8,
10879 SPARC_BUILTIN_FPMAX16,
10880 SPARC_BUILTIN_FPMAX32,
10881 SPARC_BUILTIN_FPMAXU8,
10882 SPARC_BUILTIN_FPMAXU16,
10883 SPARC_BUILTIN_FPMAXU32,
10884 SPARC_BUILTIN_FPMIN8,
10885 SPARC_BUILTIN_FPMIN16,
10886 SPARC_BUILTIN_FPMIN32,
10887 SPARC_BUILTIN_FPMINU8,
10888 SPARC_BUILTIN_FPMINU16,
10889 SPARC_BUILTIN_FPMINU32,
10890 SPARC_BUILTIN_FPSUB8,
10891 SPARC_BUILTIN_FPSUBS8,
10892 SPARC_BUILTIN_FPSUBUS8,
10893 SPARC_BUILTIN_FPSUBUS16,
10894
10895 /* VIS 4.0B builtins. */
10896
10897 /* Note that all the DICTUNPACK* entries should be kept
10898 contiguous. */
10899 SPARC_BUILTIN_FIRST_DICTUNPACK,
10900 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10901 SPARC_BUILTIN_DICTUNPACK16,
10902 SPARC_BUILTIN_DICTUNPACK32,
10903 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10904
10905 /* Note that all the FPCMP*SHL entries should be kept
10906 contiguous. */
10907 SPARC_BUILTIN_FIRST_FPCMPSHL,
10908 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10909 SPARC_BUILTIN_FPCMPGT8SHL,
10910 SPARC_BUILTIN_FPCMPEQ8SHL,
10911 SPARC_BUILTIN_FPCMPNE8SHL,
10912 SPARC_BUILTIN_FPCMPLE16SHL,
10913 SPARC_BUILTIN_FPCMPGT16SHL,
10914 SPARC_BUILTIN_FPCMPEQ16SHL,
10915 SPARC_BUILTIN_FPCMPNE16SHL,
10916 SPARC_BUILTIN_FPCMPLE32SHL,
10917 SPARC_BUILTIN_FPCMPGT32SHL,
10918 SPARC_BUILTIN_FPCMPEQ32SHL,
10919 SPARC_BUILTIN_FPCMPNE32SHL,
10920 SPARC_BUILTIN_FPCMPULE8SHL,
10921 SPARC_BUILTIN_FPCMPUGT8SHL,
10922 SPARC_BUILTIN_FPCMPULE16SHL,
10923 SPARC_BUILTIN_FPCMPUGT16SHL,
10924 SPARC_BUILTIN_FPCMPULE32SHL,
10925 SPARC_BUILTIN_FPCMPUGT32SHL,
10926 SPARC_BUILTIN_FPCMPDE8SHL,
10927 SPARC_BUILTIN_FPCMPDE16SHL,
10928 SPARC_BUILTIN_FPCMPDE32SHL,
10929 SPARC_BUILTIN_FPCMPUR8SHL,
10930 SPARC_BUILTIN_FPCMPUR16SHL,
10931 SPARC_BUILTIN_FPCMPUR32SHL,
10932 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10933
10934 SPARC_BUILTIN_MAX
10935 };
10936
10937 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10938 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10939
10940 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10941 The instruction should require a constant operand of some sort. The
10942 function prints an error if OPVAL is not valid. */
10943
10944 static int
10945 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10946 {
10947 if (GET_CODE (opval) != CONST_INT)
10948 {
10949 error ("%qs expects a constant argument", insn_data[icode].name);
10950 return false;
10951 }
10952
10953 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10954 {
10955 error ("constant argument out of range for %qs", insn_data[icode].name);
10956 return false;
10957 }
10958 return true;
10959 }
10960
10961 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10962 function decl or NULL_TREE if the builtin was not added. */
10963
10964 static tree
10965 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10966 tree type)
10967 {
10968 tree t
10969 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10970
10971 if (t)
10972 {
10973 sparc_builtins[code] = t;
10974 sparc_builtins_icode[code] = icode;
10975 }
10976
10977 return t;
10978 }
10979
10980 /* Likewise, but also marks the function as "const". */
10981
10982 static tree
10983 def_builtin_const (const char *name, enum insn_code icode,
10984 enum sparc_builtins code, tree type)
10985 {
10986 tree t = def_builtin (name, icode, code, type);
10987
10988 if (t)
10989 TREE_READONLY (t) = 1;
10990
10991 return t;
10992 }
10993
10994 /* Implement the TARGET_INIT_BUILTINS target hook.
10995 Create builtin functions for special SPARC instructions. */
10996
10997 static void
10998 sparc_init_builtins (void)
10999 {
11000 if (TARGET_FPU)
11001 sparc_fpu_init_builtins ();
11002
11003 if (TARGET_VIS)
11004 sparc_vis_init_builtins ();
11005 }
11006
11007 /* Create builtin functions for FPU instructions. */
11008
11009 static void
11010 sparc_fpu_init_builtins (void)
11011 {
11012 tree ftype
11013 = build_function_type_list (void_type_node,
11014 build_pointer_type (unsigned_type_node), 0);
11015 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11016 SPARC_BUILTIN_LDFSR, ftype);
11017 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11018 SPARC_BUILTIN_STFSR, ftype);
11019 }
11020
11021 /* Create builtin functions for VIS instructions. */
11022
11023 static void
11024 sparc_vis_init_builtins (void)
11025 {
11026 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11027 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11028 tree v4hi = build_vector_type (intHI_type_node, 4);
11029 tree v2hi = build_vector_type (intHI_type_node, 2);
11030 tree v2si = build_vector_type (intSI_type_node, 2);
11031 tree v1si = build_vector_type (intSI_type_node, 1);
11032
11033 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11034 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11035 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11036 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11037 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11038 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11039 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11040 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11041 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11042 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11043 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11044 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11045 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11046 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11047 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11048 v8qi, v8qi,
11049 intDI_type_node, 0);
11050 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11051 v8qi, v8qi, 0);
11052 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11053 v8qi, v8qi, 0);
11054 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11055 intSI_type_node, 0);
11056 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11057 intSI_type_node, 0);
11058 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11059 intDI_type_node, 0);
11060 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11061 intDI_type_node,
11062 intDI_type_node, 0);
11063 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11064 intSI_type_node,
11065 intSI_type_node, 0);
11066 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11067 ptr_type_node,
11068 intSI_type_node, 0);
11069 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11070 ptr_type_node,
11071 intDI_type_node, 0);
11072 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11073 ptr_type_node,
11074 ptr_type_node, 0);
11075 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11076 ptr_type_node,
11077 ptr_type_node, 0);
11078 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11079 v4hi, v4hi, 0);
11080 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11081 v2si, v2si, 0);
11082 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11083 v4hi, v4hi, 0);
11084 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11085 v2si, v2si, 0);
11086 tree void_ftype_di = build_function_type_list (void_type_node,
11087 intDI_type_node, 0);
11088 tree di_ftype_void = build_function_type_list (intDI_type_node,
11089 void_type_node, 0);
11090 tree void_ftype_si = build_function_type_list (void_type_node,
11091 intSI_type_node, 0);
11092 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11093 float_type_node,
11094 float_type_node, 0);
11095 tree df_ftype_df_df = build_function_type_list (double_type_node,
11096 double_type_node,
11097 double_type_node, 0);
11098
11099 /* Packing and expanding vectors. */
11100 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11101 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11102 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11103 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11104 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11105 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11106 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11107 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11108 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11109 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11110
11111 /* Multiplications. */
11112 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11113 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11114 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11115 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11116 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11117 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11118 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11119 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11120 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11121 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11122 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11123 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11124 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11125 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11126
11127 /* Data aligning. */
11128 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11129 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11130 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11131 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11132 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11133 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11134 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11135 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11136
11137 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11138 SPARC_BUILTIN_WRGSR, void_ftype_di);
11139 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11140 SPARC_BUILTIN_RDGSR, di_ftype_void);
11141
11142 if (TARGET_ARCH64)
11143 {
11144 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11145 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11146 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11147 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11148 }
11149 else
11150 {
11151 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11152 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11153 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11154 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11155 }
11156
11157 /* Pixel distance. */
11158 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11159 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11160
11161 /* Edge handling. */
11162 if (TARGET_ARCH64)
11163 {
11164 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11165 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11166 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11167 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11168 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11169 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11170 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11171 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11172 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11173 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11174 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11175 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11176 }
11177 else
11178 {
11179 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11180 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11181 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11182 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11183 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11184 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11185 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11186 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11187 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11188 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11189 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11190 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11191 }
11192
11193 /* Pixel compare. */
11194 if (TARGET_ARCH64)
11195 {
11196 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11197 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11198 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11199 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11200 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11201 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11202 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11203 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11204 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11205 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11206 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11207 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11208 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11209 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11210 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11211 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11212 }
11213 else
11214 {
11215 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11216 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11217 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11218 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11219 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11220 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11221 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11222 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11223 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11224 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11225 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11226 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11227 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11228 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11229 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11230 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11231 }
11232
11233 /* Addition and subtraction. */
11234 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11235 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11236 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11237 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11238 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11239 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11240 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11241 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11242 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11243 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11244 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11245 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11246 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11247 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11248 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11249 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11250
11251 /* Three-dimensional array addressing. */
11252 if (TARGET_ARCH64)
11253 {
11254 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11255 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11256 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11257 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11258 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11259 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11260 }
11261 else
11262 {
11263 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11264 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11265 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11266 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11267 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11268 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11269 }
11270
11271 if (TARGET_VIS2)
11272 {
11273 /* Edge handling. */
11274 if (TARGET_ARCH64)
11275 {
11276 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11277 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11278 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11279 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11280 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11281 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11282 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11283 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11284 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11285 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11286 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11287 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11288 }
11289 else
11290 {
11291 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11292 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11293 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11294 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11295 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11296 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11297 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11298 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11299 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11300 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11301 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11302 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11303 }
11304
11305 /* Byte mask and shuffle. */
11306 if (TARGET_ARCH64)
11307 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11308 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11309 else
11310 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11311 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11312 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11313 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11314 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11315 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11316 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11317 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11318 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11319 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11320 }
11321
11322 if (TARGET_VIS3)
11323 {
11324 if (TARGET_ARCH64)
11325 {
11326 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11327 SPARC_BUILTIN_CMASK8, void_ftype_di);
11328 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11329 SPARC_BUILTIN_CMASK16, void_ftype_di);
11330 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11331 SPARC_BUILTIN_CMASK32, void_ftype_di);
11332 }
11333 else
11334 {
11335 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11336 SPARC_BUILTIN_CMASK8, void_ftype_si);
11337 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11338 SPARC_BUILTIN_CMASK16, void_ftype_si);
11339 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11340 SPARC_BUILTIN_CMASK32, void_ftype_si);
11341 }
11342
11343 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11344 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11345
11346 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11347 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11348 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11349 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11350 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11351 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11352 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11353 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11354 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11355 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11356 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11357 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11358 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11359 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11360 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11361 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11362
11363 if (TARGET_ARCH64)
11364 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11365 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11366 else
11367 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11368 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11369
11370 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11371 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11372 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11373 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11374 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11375 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11376
11377 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11378 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11379 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11380 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11381 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11382 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11383 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11384 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11385 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11386 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11387 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11388 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11389 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11390 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11391 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11392 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11393
11394 if (TARGET_ARCH64)
11395 {
11396 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11397 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11398 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11399 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11400 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11401 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11402 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11403 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11404 }
11405 else
11406 {
11407 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11408 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11409 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11410 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11411 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11412 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11413 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11414 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11415 }
11416
11417 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11418 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11419 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11420 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11421 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11422 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11423 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11424 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11425 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11426 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11427 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11428 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11429
11430 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11431 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11432 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11433 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11434 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11435 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11436 }
11437
11438 if (TARGET_VIS4)
11439 {
11440 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11441 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11442 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11443 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11444 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11445 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11446 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11447 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11448
11449
11450 if (TARGET_ARCH64)
11451 {
11452 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11453 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11454 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11455 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11456 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11457 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11458 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11459 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11460 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11461 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11462 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11463 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11464 }
11465 else
11466 {
11467 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11468 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11469 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11470 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11471 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11472 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11473 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11474 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11475 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11476 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11477 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11478 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11479 }
11480
11481 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11482 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11483 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11484 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11485 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11486 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11487 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11488 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11489 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11490 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11491 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11492 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11493 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11494 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11495 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11496 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11497 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11498 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11499 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11500 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11501 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11502 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11503 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11504 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11505 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11506 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11507 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11508 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11509 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11510 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11511 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11512 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11513 }
11514
11515 if (TARGET_VIS4B)
11516 {
11517 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11518 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11519 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11520 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11521 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11522 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11523
11524 if (TARGET_ARCH64)
11525 {
11526 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11527 v8qi, v8qi,
11528 intSI_type_node, 0);
11529 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11530 v4hi, v4hi,
11531 intSI_type_node, 0);
11532 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11533 v2si, v2si,
11534 intSI_type_node, 0);
11535
11536 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11537 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11538 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11539 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11540 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11541 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11542 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11543 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11544
11545 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11546 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11547 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11548 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11549 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11550 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11551 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11552 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11553
11554 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11555 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11556 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11557 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11558 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11559 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11560 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11561 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11562
11563
11564 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11565 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11566 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11567 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11568
11569 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11570 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11571 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11572 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11573
11574 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11575 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11576 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11577 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11578
11579 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11580 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11581 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11582 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11583 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11584 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11585
11586 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11587 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11588 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11589 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11590 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11591 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11592
11593 }
11594 else
11595 {
11596 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11597 v8qi, v8qi,
11598 intSI_type_node, 0);
11599 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11600 v4hi, v4hi,
11601 intSI_type_node, 0);
11602 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11603 v2si, v2si,
11604 intSI_type_node, 0);
11605
11606 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11607 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11608 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11609 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11610 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11611 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11612 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11613 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11614
11615 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11616 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11617 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11618 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11619 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11620 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11621 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11622 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11623
11624 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11625 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11626 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11627 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11628 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11629 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11630 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11631 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11632
11633
11634 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11635 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11636 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11637 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11638
11639 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11640 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11641 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11642 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11643
11644 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11645 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11646 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11647 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11648
11649 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11650 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11651 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11652 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11653 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11654 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11655
11656 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11657 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11658 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11659 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11660 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11661 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11662 }
11663 }
11664 }
11665
11666 /* Implement TARGET_BUILTIN_DECL hook. */
11667
11668 static tree
11669 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11670 {
11671 if (code >= SPARC_BUILTIN_MAX)
11672 return error_mark_node;
11673
11674 return sparc_builtins[code];
11675 }
11676
11677 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11678
11679 static rtx
11680 sparc_expand_builtin (tree exp, rtx target,
11681 rtx subtarget ATTRIBUTE_UNUSED,
11682 machine_mode tmode ATTRIBUTE_UNUSED,
11683 int ignore ATTRIBUTE_UNUSED)
11684 {
11685 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11686 enum sparc_builtins code
11687 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11688 enum insn_code icode = sparc_builtins_icode[code];
11689 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11690 call_expr_arg_iterator iter;
11691 int arg_count = 0;
11692 rtx pat, op[4];
11693 tree arg;
11694
11695 if (nonvoid)
11696 {
11697 machine_mode tmode = insn_data[icode].operand[0].mode;
11698 if (!target
11699 || GET_MODE (target) != tmode
11700 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11701 op[0] = gen_reg_rtx (tmode);
11702 else
11703 op[0] = target;
11704 }
11705 else
11706 op[0] = NULL_RTX;
11707
11708 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11709 {
11710 const struct insn_operand_data *insn_op;
11711 int idx;
11712
11713 if (arg == error_mark_node)
11714 return NULL_RTX;
11715
11716 arg_count++;
11717 idx = arg_count - !nonvoid;
11718 insn_op = &insn_data[icode].operand[idx];
11719 op[arg_count] = expand_normal (arg);
11720
11721 /* Some of the builtins require constant arguments. We check
11722 for this here. */
11723 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11724 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11725 && arg_count == 3)
11726 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11727 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11728 && arg_count == 2))
11729 {
11730 if (!check_constant_argument (icode, idx, op[arg_count]))
11731 return const0_rtx;
11732 }
11733
11734 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11735 {
11736 if (!address_operand (op[arg_count], SImode))
11737 {
11738 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11739 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11740 }
11741 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11742 }
11743
11744 else if (insn_op->mode == V1DImode
11745 && GET_MODE (op[arg_count]) == DImode)
11746 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11747
11748 else if (insn_op->mode == V1SImode
11749 && GET_MODE (op[arg_count]) == SImode)
11750 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11751
11752 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11753 insn_op->mode))
11754 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11755 }
11756
11757 switch (arg_count)
11758 {
11759 case 0:
11760 pat = GEN_FCN (icode) (op[0]);
11761 break;
11762 case 1:
11763 if (nonvoid)
11764 pat = GEN_FCN (icode) (op[0], op[1]);
11765 else
11766 pat = GEN_FCN (icode) (op[1]);
11767 break;
11768 case 2:
11769 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11770 break;
11771 case 3:
11772 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11773 break;
11774 default:
11775 gcc_unreachable ();
11776 }
11777
11778 if (!pat)
11779 return NULL_RTX;
11780
11781 emit_insn (pat);
11782
11783 return (nonvoid ? op[0] : const0_rtx);
11784 }
11785
11786 /* Return the upper 16 bits of the 8x16 multiplication. */
11787
11788 static int
11789 sparc_vis_mul8x16 (int e8, int e16)
11790 {
11791 return (e8 * e16 + 128) / 256;
11792 }
11793
11794 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11795 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11796
11797 static void
11798 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11799 tree inner_type, tree cst0, tree cst1)
11800 {
11801 unsigned i, num = VECTOR_CST_NELTS (cst0);
11802 int scale;
11803
11804 switch (fncode)
11805 {
11806 case SPARC_BUILTIN_FMUL8X16:
11807 for (i = 0; i < num; ++i)
11808 {
11809 int val
11810 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11811 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11812 n_elts->quick_push (build_int_cst (inner_type, val));
11813 }
11814 break;
11815
11816 case SPARC_BUILTIN_FMUL8X16AU:
11817 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11818
11819 for (i = 0; i < num; ++i)
11820 {
11821 int val
11822 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11823 scale);
11824 n_elts->quick_push (build_int_cst (inner_type, val));
11825 }
11826 break;
11827
11828 case SPARC_BUILTIN_FMUL8X16AL:
11829 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11830
11831 for (i = 0; i < num; ++i)
11832 {
11833 int val
11834 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11835 scale);
11836 n_elts->quick_push (build_int_cst (inner_type, val));
11837 }
11838 break;
11839
11840 default:
11841 gcc_unreachable ();
11842 }
11843 }
11844
11845 /* Implement TARGET_FOLD_BUILTIN hook.
11846
11847 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11848 result of the function call is ignored. NULL_TREE is returned if the
11849 function could not be folded. */
11850
11851 static tree
11852 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11853 tree *args, bool ignore)
11854 {
11855 enum sparc_builtins code
11856 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11857 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11858 tree arg0, arg1, arg2;
11859
11860 if (ignore)
11861 switch (code)
11862 {
11863 case SPARC_BUILTIN_LDFSR:
11864 case SPARC_BUILTIN_STFSR:
11865 case SPARC_BUILTIN_ALIGNADDR:
11866 case SPARC_BUILTIN_WRGSR:
11867 case SPARC_BUILTIN_BMASK:
11868 case SPARC_BUILTIN_CMASK8:
11869 case SPARC_BUILTIN_CMASK16:
11870 case SPARC_BUILTIN_CMASK32:
11871 break;
11872
11873 default:
11874 return build_zero_cst (rtype);
11875 }
11876
11877 switch (code)
11878 {
11879 case SPARC_BUILTIN_FEXPAND:
11880 arg0 = args[0];
11881 STRIP_NOPS (arg0);
11882
11883 if (TREE_CODE (arg0) == VECTOR_CST)
11884 {
11885 tree inner_type = TREE_TYPE (rtype);
11886 unsigned i;
11887
11888 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11889 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11890 {
11891 unsigned HOST_WIDE_INT val
11892 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11893 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11894 }
11895 return n_elts.build ();
11896 }
11897 break;
11898
11899 case SPARC_BUILTIN_FMUL8X16:
11900 case SPARC_BUILTIN_FMUL8X16AU:
11901 case SPARC_BUILTIN_FMUL8X16AL:
11902 arg0 = args[0];
11903 arg1 = args[1];
11904 STRIP_NOPS (arg0);
11905 STRIP_NOPS (arg1);
11906
11907 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11908 {
11909 tree inner_type = TREE_TYPE (rtype);
11910 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11911 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11912 return n_elts.build ();
11913 }
11914 break;
11915
11916 case SPARC_BUILTIN_FPMERGE:
11917 arg0 = args[0];
11918 arg1 = args[1];
11919 STRIP_NOPS (arg0);
11920 STRIP_NOPS (arg1);
11921
11922 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11923 {
11924 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11925 unsigned i;
11926 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11927 {
11928 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11929 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11930 }
11931
11932 return n_elts.build ();
11933 }
11934 break;
11935
11936 case SPARC_BUILTIN_PDIST:
11937 case SPARC_BUILTIN_PDISTN:
11938 arg0 = args[0];
11939 arg1 = args[1];
11940 STRIP_NOPS (arg0);
11941 STRIP_NOPS (arg1);
11942 if (code == SPARC_BUILTIN_PDIST)
11943 {
11944 arg2 = args[2];
11945 STRIP_NOPS (arg2);
11946 }
11947 else
11948 arg2 = integer_zero_node;
11949
11950 if (TREE_CODE (arg0) == VECTOR_CST
11951 && TREE_CODE (arg1) == VECTOR_CST
11952 && TREE_CODE (arg2) == INTEGER_CST)
11953 {
11954 bool overflow = false;
11955 widest_int result = wi::to_widest (arg2);
11956 widest_int tmp;
11957 unsigned i;
11958
11959 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11960 {
11961 tree e0 = VECTOR_CST_ELT (arg0, i);
11962 tree e1 = VECTOR_CST_ELT (arg1, i);
11963
11964 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11965
11966 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11967 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11968 if (wi::neg_p (tmp))
11969 tmp = wi::neg (tmp, &neg2_ovf);
11970 else
11971 neg2_ovf = wi::OVF_NONE;
11972 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11973 overflow |= ((neg1_ovf != wi::OVF_NONE)
11974 | (neg2_ovf != wi::OVF_NONE)
11975 | (add1_ovf != wi::OVF_NONE)
11976 | (add2_ovf != wi::OVF_NONE));
11977 }
11978
11979 gcc_assert (!overflow);
11980
11981 return wide_int_to_tree (rtype, result);
11982 }
11983
11984 default:
11985 break;
11986 }
11987
11988 return NULL_TREE;
11989 }
11990 \f
11991 /* ??? This duplicates information provided to the compiler by the
11992 ??? scheduler description. Some day, teach genautomata to output
11993 ??? the latencies and then CSE will just use that. */
11994
11995 static bool
11996 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11997 int opno ATTRIBUTE_UNUSED,
11998 int *total, bool speed ATTRIBUTE_UNUSED)
11999 {
12000 int code = GET_CODE (x);
12001 bool float_mode_p = FLOAT_MODE_P (mode);
12002
12003 switch (code)
12004 {
12005 case CONST_INT:
12006 if (SMALL_INT (x))
12007 *total = 0;
12008 else
12009 *total = 2;
12010 return true;
12011
12012 case CONST_WIDE_INT:
12013 *total = 0;
12014 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12015 *total += 2;
12016 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12017 *total += 2;
12018 return true;
12019
12020 case HIGH:
12021 *total = 2;
12022 return true;
12023
12024 case CONST:
12025 case LABEL_REF:
12026 case SYMBOL_REF:
12027 *total = 4;
12028 return true;
12029
12030 case CONST_DOUBLE:
12031 *total = 8;
12032 return true;
12033
12034 case MEM:
12035 /* If outer-code was a sign or zero extension, a cost
12036 of COSTS_N_INSNS (1) was already added in. This is
12037 why we are subtracting it back out. */
12038 if (outer_code == ZERO_EXTEND)
12039 {
12040 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12041 }
12042 else if (outer_code == SIGN_EXTEND)
12043 {
12044 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12045 }
12046 else if (float_mode_p)
12047 {
12048 *total = sparc_costs->float_load;
12049 }
12050 else
12051 {
12052 *total = sparc_costs->int_load;
12053 }
12054
12055 return true;
12056
12057 case PLUS:
12058 case MINUS:
12059 if (float_mode_p)
12060 *total = sparc_costs->float_plusminus;
12061 else
12062 *total = COSTS_N_INSNS (1);
12063 return false;
12064
12065 case FMA:
12066 {
12067 rtx sub;
12068
12069 gcc_assert (float_mode_p);
12070 *total = sparc_costs->float_mul;
12071
12072 sub = XEXP (x, 0);
12073 if (GET_CODE (sub) == NEG)
12074 sub = XEXP (sub, 0);
12075 *total += rtx_cost (sub, mode, FMA, 0, speed);
12076
12077 sub = XEXP (x, 2);
12078 if (GET_CODE (sub) == NEG)
12079 sub = XEXP (sub, 0);
12080 *total += rtx_cost (sub, mode, FMA, 2, speed);
12081 return true;
12082 }
12083
12084 case MULT:
12085 if (float_mode_p)
12086 *total = sparc_costs->float_mul;
12087 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12088 *total = COSTS_N_INSNS (25);
12089 else
12090 {
12091 int bit_cost;
12092
12093 bit_cost = 0;
12094 if (sparc_costs->int_mul_bit_factor)
12095 {
12096 int nbits;
12097
12098 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12099 {
12100 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12101 for (nbits = 0; value != 0; value &= value - 1)
12102 nbits++;
12103 }
12104 else
12105 nbits = 7;
12106
12107 if (nbits < 3)
12108 nbits = 3;
12109 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12110 bit_cost = COSTS_N_INSNS (bit_cost);
12111 }
12112
12113 if (mode == DImode || !TARGET_HARD_MUL)
12114 *total = sparc_costs->int_mulX + bit_cost;
12115 else
12116 *total = sparc_costs->int_mul + bit_cost;
12117 }
12118 return false;
12119
12120 case ASHIFT:
12121 case ASHIFTRT:
12122 case LSHIFTRT:
12123 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12124 return false;
12125
12126 case DIV:
12127 case UDIV:
12128 case MOD:
12129 case UMOD:
12130 if (float_mode_p)
12131 {
12132 if (mode == DFmode)
12133 *total = sparc_costs->float_div_df;
12134 else
12135 *total = sparc_costs->float_div_sf;
12136 }
12137 else
12138 {
12139 if (mode == DImode)
12140 *total = sparc_costs->int_divX;
12141 else
12142 *total = sparc_costs->int_div;
12143 }
12144 return false;
12145
12146 case NEG:
12147 if (! float_mode_p)
12148 {
12149 *total = COSTS_N_INSNS (1);
12150 return false;
12151 }
12152 /* FALLTHRU */
12153
12154 case ABS:
12155 case FLOAT:
12156 case UNSIGNED_FLOAT:
12157 case FIX:
12158 case UNSIGNED_FIX:
12159 case FLOAT_EXTEND:
12160 case FLOAT_TRUNCATE:
12161 *total = sparc_costs->float_move;
12162 return false;
12163
12164 case SQRT:
12165 if (mode == DFmode)
12166 *total = sparc_costs->float_sqrt_df;
12167 else
12168 *total = sparc_costs->float_sqrt_sf;
12169 return false;
12170
12171 case COMPARE:
12172 if (float_mode_p)
12173 *total = sparc_costs->float_cmp;
12174 else
12175 *total = COSTS_N_INSNS (1);
12176 return false;
12177
12178 case IF_THEN_ELSE:
12179 if (float_mode_p)
12180 *total = sparc_costs->float_cmove;
12181 else
12182 *total = sparc_costs->int_cmove;
12183 return false;
12184
12185 case IOR:
12186 /* Handle the NAND vector patterns. */
12187 if (sparc_vector_mode_supported_p (mode)
12188 && GET_CODE (XEXP (x, 0)) == NOT
12189 && GET_CODE (XEXP (x, 1)) == NOT)
12190 {
12191 *total = COSTS_N_INSNS (1);
12192 return true;
12193 }
12194 else
12195 return false;
12196
12197 default:
12198 return false;
12199 }
12200 }
12201
12202 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12203
12204 static inline bool
12205 general_or_i64_p (reg_class_t rclass)
12206 {
12207 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12208 }
12209
12210 /* Implement TARGET_REGISTER_MOVE_COST. */
12211
12212 static int
12213 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12214 reg_class_t from, reg_class_t to)
12215 {
12216 bool need_memory = false;
12217
12218 /* This helps postreload CSE to eliminate redundant comparisons. */
12219 if (from == NO_REGS || to == NO_REGS)
12220 return 100;
12221
12222 if (from == FPCC_REGS || to == FPCC_REGS)
12223 need_memory = true;
12224 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12225 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12226 {
12227 if (TARGET_VIS3)
12228 {
12229 int size = GET_MODE_SIZE (mode);
12230 if (size == 8 || size == 4)
12231 {
12232 if (! TARGET_ARCH32 || size == 4)
12233 return 4;
12234 else
12235 return 6;
12236 }
12237 }
12238 need_memory = true;
12239 }
12240
12241 if (need_memory)
12242 {
12243 if (sparc_cpu == PROCESSOR_ULTRASPARC
12244 || sparc_cpu == PROCESSOR_ULTRASPARC3
12245 || sparc_cpu == PROCESSOR_NIAGARA
12246 || sparc_cpu == PROCESSOR_NIAGARA2
12247 || sparc_cpu == PROCESSOR_NIAGARA3
12248 || sparc_cpu == PROCESSOR_NIAGARA4
12249 || sparc_cpu == PROCESSOR_NIAGARA7
12250 || sparc_cpu == PROCESSOR_M8)
12251 return 12;
12252
12253 return 6;
12254 }
12255
12256 return 2;
12257 }
12258
12259 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12260 This is achieved by means of a manual dynamic stack space allocation in
12261 the current frame. We make the assumption that SEQ doesn't contain any
12262 function calls, with the possible exception of calls to the GOT helper. */
12263
12264 static void
12265 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12266 {
12267 /* We must preserve the lowest 16 words for the register save area. */
12268 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12269 /* We really need only 2 words of fresh stack space. */
12270 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12271
12272 rtx slot
12273 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12274 SPARC_STACK_BIAS + offset));
12275
12276 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12277 emit_insn (gen_rtx_SET (slot, reg));
12278 if (reg2)
12279 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12280 reg2));
12281 emit_insn (seq);
12282 if (reg2)
12283 emit_insn (gen_rtx_SET (reg2,
12284 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12285 emit_insn (gen_rtx_SET (reg, slot));
12286 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12287 }
12288
12289 /* Output the assembler code for a thunk function. THUNK_DECL is the
12290 declaration for the thunk function itself, FUNCTION is the decl for
12291 the target function. DELTA is an immediate constant offset to be
12292 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12293 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12294
12295 static void
12296 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12297 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12298 tree function)
12299 {
12300 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12301 rtx this_rtx, funexp;
12302 rtx_insn *insn;
12303 unsigned int int_arg_first;
12304
12305 reload_completed = 1;
12306 epilogue_completed = 1;
12307
12308 emit_note (NOTE_INSN_PROLOGUE_END);
12309
12310 if (TARGET_FLAT)
12311 {
12312 sparc_leaf_function_p = 1;
12313
12314 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12315 }
12316 else if (flag_delayed_branch)
12317 {
12318 /* We will emit a regular sibcall below, so we need to instruct
12319 output_sibcall that we are in a leaf function. */
12320 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12321
12322 /* This will cause final.c to invoke leaf_renumber_regs so we
12323 must behave as if we were in a not-yet-leafified function. */
12324 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12325 }
12326 else
12327 {
12328 /* We will emit the sibcall manually below, so we will need to
12329 manually spill non-leaf registers. */
12330 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12331
12332 /* We really are in a leaf function. */
12333 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12334 }
12335
12336 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12337 returns a structure, the structure return pointer is there instead. */
12338 if (TARGET_ARCH64
12339 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12340 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12341 else
12342 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12343
12344 /* Add DELTA. When possible use a plain add, otherwise load it into
12345 a register first. */
12346 if (delta)
12347 {
12348 rtx delta_rtx = GEN_INT (delta);
12349
12350 if (! SPARC_SIMM13_P (delta))
12351 {
12352 rtx scratch = gen_rtx_REG (Pmode, 1);
12353 emit_move_insn (scratch, delta_rtx);
12354 delta_rtx = scratch;
12355 }
12356
12357 /* THIS_RTX += DELTA. */
12358 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12359 }
12360
12361 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12362 if (vcall_offset)
12363 {
12364 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12365 rtx scratch = gen_rtx_REG (Pmode, 1);
12366
12367 gcc_assert (vcall_offset < 0);
12368
12369 /* SCRATCH = *THIS_RTX. */
12370 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12371
12372 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12373 may not have any available scratch register at this point. */
12374 if (SPARC_SIMM13_P (vcall_offset))
12375 ;
12376 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12377 else if (! fixed_regs[5]
12378 /* The below sequence is made up of at least 2 insns,
12379 while the default method may need only one. */
12380 && vcall_offset < -8192)
12381 {
12382 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12383 emit_move_insn (scratch2, vcall_offset_rtx);
12384 vcall_offset_rtx = scratch2;
12385 }
12386 else
12387 {
12388 rtx increment = GEN_INT (-4096);
12389
12390 /* VCALL_OFFSET is a negative number whose typical range can be
12391 estimated as -32768..0 in 32-bit mode. In almost all cases
12392 it is therefore cheaper to emit multiple add insns than
12393 spilling and loading the constant into a register (at least
12394 6 insns). */
12395 while (! SPARC_SIMM13_P (vcall_offset))
12396 {
12397 emit_insn (gen_add2_insn (scratch, increment));
12398 vcall_offset += 4096;
12399 }
12400 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12401 }
12402
12403 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12404 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12405 gen_rtx_PLUS (Pmode,
12406 scratch,
12407 vcall_offset_rtx)));
12408
12409 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12410 emit_insn (gen_add2_insn (this_rtx, scratch));
12411 }
12412
12413 /* Generate a tail call to the target function. */
12414 if (! TREE_USED (function))
12415 {
12416 assemble_external (function);
12417 TREE_USED (function) = 1;
12418 }
12419 funexp = XEXP (DECL_RTL (function), 0);
12420
12421 if (flag_delayed_branch)
12422 {
12423 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12424 insn = emit_call_insn (gen_sibcall (funexp));
12425 SIBLING_CALL_P (insn) = 1;
12426 }
12427 else
12428 {
12429 /* The hoops we have to jump through in order to generate a sibcall
12430 without using delay slots... */
12431 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12432
12433 if (flag_pic)
12434 {
12435 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12436 start_sequence ();
12437 load_got_register (); /* clobbers %o7 */
12438 if (!TARGET_VXWORKS_RTP)
12439 pic_offset_table_rtx = got_register_rtx;
12440 scratch = sparc_legitimize_pic_address (funexp, scratch);
12441 seq = get_insns ();
12442 end_sequence ();
12443 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12444 }
12445 else if (TARGET_ARCH32)
12446 {
12447 emit_insn (gen_rtx_SET (scratch,
12448 gen_rtx_HIGH (SImode, funexp)));
12449 emit_insn (gen_rtx_SET (scratch,
12450 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12451 }
12452 else /* TARGET_ARCH64 */
12453 {
12454 switch (sparc_code_model)
12455 {
12456 case CM_MEDLOW:
12457 case CM_MEDMID:
12458 /* The destination can serve as a temporary. */
12459 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12460 break;
12461
12462 case CM_MEDANY:
12463 case CM_EMBMEDANY:
12464 /* The destination cannot serve as a temporary. */
12465 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12466 start_sequence ();
12467 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12468 seq = get_insns ();
12469 end_sequence ();
12470 emit_and_preserve (seq, spill_reg, 0);
12471 break;
12472
12473 default:
12474 gcc_unreachable ();
12475 }
12476 }
12477
12478 emit_jump_insn (gen_indirect_jump (scratch));
12479 }
12480
12481 emit_barrier ();
12482
12483 /* Run just enough of rest_of_compilation to get the insns emitted.
12484 There's not really enough bulk here to make other passes such as
12485 instruction scheduling worth while. */
12486 insn = get_insns ();
12487 shorten_branches (insn);
12488 assemble_start_function (thunk_fndecl, fnname);
12489 final_start_function (insn, file, 1);
12490 final (insn, file, 1);
12491 final_end_function ();
12492 assemble_end_function (thunk_fndecl, fnname);
12493
12494 reload_completed = 0;
12495 epilogue_completed = 0;
12496 }
12497
12498 /* Return true if sparc_output_mi_thunk would be able to output the
12499 assembler code for the thunk function specified by the arguments
12500 it is passed, and false otherwise. */
12501 static bool
12502 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12503 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12504 HOST_WIDE_INT vcall_offset,
12505 const_tree function ATTRIBUTE_UNUSED)
12506 {
12507 /* Bound the loop used in the default method above. */
12508 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12509 }
12510
12511 /* How to allocate a 'struct machine_function'. */
12512
12513 static struct machine_function *
12514 sparc_init_machine_status (void)
12515 {
12516 return ggc_cleared_alloc<machine_function> ();
12517 }
12518 \f
12519 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12520
12521 static unsigned HOST_WIDE_INT
12522 sparc_asan_shadow_offset (void)
12523 {
12524 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12525 }
12526 \f
12527 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12528 We need to emit DTP-relative relocations. */
12529
12530 static void
12531 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12532 {
12533 switch (size)
12534 {
12535 case 4:
12536 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12537 break;
12538 case 8:
12539 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12540 break;
12541 default:
12542 gcc_unreachable ();
12543 }
12544 output_addr_const (file, x);
12545 fputs (")", file);
12546 }
12547
12548 /* Do whatever processing is required at the end of a file. */
12549
12550 static void
12551 sparc_file_end (void)
12552 {
12553 /* If we need to emit the special GOT helper function, do so now. */
12554 if (got_helper_needed)
12555 {
12556 const char *name = XSTR (got_helper_rtx, 0);
12557 #ifdef DWARF2_UNWIND_INFO
12558 bool do_cfi;
12559 #endif
12560
12561 if (USE_HIDDEN_LINKONCE)
12562 {
12563 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12564 get_identifier (name),
12565 build_function_type_list (void_type_node,
12566 NULL_TREE));
12567 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12568 NULL_TREE, void_type_node);
12569 TREE_PUBLIC (decl) = 1;
12570 TREE_STATIC (decl) = 1;
12571 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12572 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12573 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12574 resolve_unique_section (decl, 0, flag_function_sections);
12575 allocate_struct_function (decl, true);
12576 cfun->is_thunk = 1;
12577 current_function_decl = decl;
12578 init_varasm_status ();
12579 assemble_start_function (decl, name);
12580 }
12581 else
12582 {
12583 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12584 switch_to_section (text_section);
12585 if (align > 0)
12586 ASM_OUTPUT_ALIGN (asm_out_file, align);
12587 ASM_OUTPUT_LABEL (asm_out_file, name);
12588 }
12589
12590 #ifdef DWARF2_UNWIND_INFO
12591 do_cfi = dwarf2out_do_cfi_asm ();
12592 if (do_cfi)
12593 output_asm_insn (".cfi_startproc", NULL);
12594 #endif
12595 if (flag_delayed_branch)
12596 {
12597 output_asm_insn ("jmp\t%%o7+8", NULL);
12598 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12599 }
12600 else
12601 {
12602 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12603 output_asm_insn ("jmp\t%%o7+8", NULL);
12604 output_asm_insn (" nop", NULL);
12605 }
12606 #ifdef DWARF2_UNWIND_INFO
12607 if (do_cfi)
12608 output_asm_insn (".cfi_endproc", NULL);
12609 #endif
12610 }
12611
12612 if (NEED_INDICATE_EXEC_STACK)
12613 file_end_indicate_exec_stack ();
12614
12615 #ifdef TARGET_SOLARIS
12616 solaris_file_end ();
12617 #endif
12618 }
12619
12620 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12621 /* Implement TARGET_MANGLE_TYPE. */
12622
12623 static const char *
12624 sparc_mangle_type (const_tree type)
12625 {
12626 if (TARGET_ARCH32
12627 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12628 && TARGET_LONG_DOUBLE_128)
12629 return "g";
12630
12631 /* For all other types, use normal C++ mangling. */
12632 return NULL;
12633 }
12634 #endif
12635
12636 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12637 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12638 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12639
12640 void
12641 sparc_emit_membar_for_model (enum memmodel model,
12642 int load_store, int before_after)
12643 {
12644 /* Bits for the MEMBAR mmask field. */
12645 const int LoadLoad = 1;
12646 const int StoreLoad = 2;
12647 const int LoadStore = 4;
12648 const int StoreStore = 8;
12649
12650 int mm = 0, implied = 0;
12651
12652 switch (sparc_memory_model)
12653 {
12654 case SMM_SC:
12655 /* Sequential Consistency. All memory transactions are immediately
12656 visible in sequential execution order. No barriers needed. */
12657 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12658 break;
12659
12660 case SMM_TSO:
12661 /* Total Store Ordering: all memory transactions with store semantics
12662 are followed by an implied StoreStore. */
12663 implied |= StoreStore;
12664
12665 /* If we're not looking for a raw barrer (before+after), then atomic
12666 operations get the benefit of being both load and store. */
12667 if (load_store == 3 && before_after == 1)
12668 implied |= StoreLoad;
12669 /* FALLTHRU */
12670
12671 case SMM_PSO:
12672 /* Partial Store Ordering: all memory transactions with load semantics
12673 are followed by an implied LoadLoad | LoadStore. */
12674 implied |= LoadLoad | LoadStore;
12675
12676 /* If we're not looking for a raw barrer (before+after), then atomic
12677 operations get the benefit of being both load and store. */
12678 if (load_store == 3 && before_after == 2)
12679 implied |= StoreLoad | StoreStore;
12680 /* FALLTHRU */
12681
12682 case SMM_RMO:
12683 /* Relaxed Memory Ordering: no implicit bits. */
12684 break;
12685
12686 default:
12687 gcc_unreachable ();
12688 }
12689
12690 if (before_after & 1)
12691 {
12692 if (is_mm_release (model) || is_mm_acq_rel (model)
12693 || is_mm_seq_cst (model))
12694 {
12695 if (load_store & 1)
12696 mm |= LoadLoad | StoreLoad;
12697 if (load_store & 2)
12698 mm |= LoadStore | StoreStore;
12699 }
12700 }
12701 if (before_after & 2)
12702 {
12703 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12704 || is_mm_seq_cst (model))
12705 {
12706 if (load_store & 1)
12707 mm |= LoadLoad | LoadStore;
12708 if (load_store & 2)
12709 mm |= StoreLoad | StoreStore;
12710 }
12711 }
12712
12713 /* Remove the bits implied by the system memory model. */
12714 mm &= ~implied;
12715
12716 /* For raw barriers (before+after), always emit a barrier.
12717 This will become a compile-time barrier if needed. */
12718 if (mm || before_after == 3)
12719 emit_insn (gen_membar (GEN_INT (mm)));
12720 }
12721
12722 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12723 compare and swap on the word containing the byte or half-word. */
12724
12725 static void
12726 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12727 rtx oldval, rtx newval)
12728 {
12729 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12730 rtx addr = gen_reg_rtx (Pmode);
12731 rtx off = gen_reg_rtx (SImode);
12732 rtx oldv = gen_reg_rtx (SImode);
12733 rtx newv = gen_reg_rtx (SImode);
12734 rtx oldvalue = gen_reg_rtx (SImode);
12735 rtx newvalue = gen_reg_rtx (SImode);
12736 rtx res = gen_reg_rtx (SImode);
12737 rtx resv = gen_reg_rtx (SImode);
12738 rtx memsi, val, mask, cc;
12739
12740 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12741
12742 if (Pmode != SImode)
12743 addr1 = gen_lowpart (SImode, addr1);
12744 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12745
12746 memsi = gen_rtx_MEM (SImode, addr);
12747 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12748 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12749
12750 val = copy_to_reg (memsi);
12751
12752 emit_insn (gen_rtx_SET (off,
12753 gen_rtx_XOR (SImode, off,
12754 GEN_INT (GET_MODE (mem) == QImode
12755 ? 3 : 2))));
12756
12757 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12758
12759 if (GET_MODE (mem) == QImode)
12760 mask = force_reg (SImode, GEN_INT (0xff));
12761 else
12762 mask = force_reg (SImode, GEN_INT (0xffff));
12763
12764 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12765
12766 emit_insn (gen_rtx_SET (val,
12767 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12768 val)));
12769
12770 oldval = gen_lowpart (SImode, oldval);
12771 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12772
12773 newval = gen_lowpart_common (SImode, newval);
12774 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12775
12776 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12777
12778 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12779
12780 rtx_code_label *end_label = gen_label_rtx ();
12781 rtx_code_label *loop_label = gen_label_rtx ();
12782 emit_label (loop_label);
12783
12784 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12785
12786 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12787
12788 emit_move_insn (bool_result, const1_rtx);
12789
12790 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12791
12792 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12793
12794 emit_insn (gen_rtx_SET (resv,
12795 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12796 res)));
12797
12798 emit_move_insn (bool_result, const0_rtx);
12799
12800 cc = gen_compare_reg_1 (NE, resv, val);
12801 emit_insn (gen_rtx_SET (val, resv));
12802
12803 /* Use cbranchcc4 to separate the compare and branch! */
12804 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12805 cc, const0_rtx, loop_label));
12806
12807 emit_label (end_label);
12808
12809 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12810
12811 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12812
12813 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12814 }
12815
12816 /* Expand code to perform a compare-and-swap. */
12817
12818 void
12819 sparc_expand_compare_and_swap (rtx operands[])
12820 {
12821 rtx bval, retval, mem, oldval, newval;
12822 machine_mode mode;
12823 enum memmodel model;
12824
12825 bval = operands[0];
12826 retval = operands[1];
12827 mem = operands[2];
12828 oldval = operands[3];
12829 newval = operands[4];
12830 model = (enum memmodel) INTVAL (operands[6]);
12831 mode = GET_MODE (mem);
12832
12833 sparc_emit_membar_for_model (model, 3, 1);
12834
12835 if (reg_overlap_mentioned_p (retval, oldval))
12836 oldval = copy_to_reg (oldval);
12837
12838 if (mode == QImode || mode == HImode)
12839 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12840 else
12841 {
12842 rtx (*gen) (rtx, rtx, rtx, rtx);
12843 rtx x;
12844
12845 if (mode == SImode)
12846 gen = gen_atomic_compare_and_swapsi_1;
12847 else
12848 gen = gen_atomic_compare_and_swapdi_1;
12849 emit_insn (gen (retval, mem, oldval, newval));
12850
12851 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12852 if (x != bval)
12853 convert_move (bval, x, 1);
12854 }
12855
12856 sparc_emit_membar_for_model (model, 3, 2);
12857 }
12858
12859 void
12860 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12861 {
12862 rtx t_1, t_2, t_3;
12863
12864 sel = gen_lowpart (DImode, sel);
12865 switch (vmode)
12866 {
12867 case E_V2SImode:
12868 /* inp = xxxxxxxAxxxxxxxB */
12869 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12870 NULL_RTX, 1, OPTAB_DIRECT);
12871 /* t_1 = ....xxxxxxxAxxx. */
12872 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12873 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12874 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12875 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12876 /* sel = .......B */
12877 /* t_1 = ...A.... */
12878 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12879 /* sel = ...A...B */
12880 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12881 /* sel = AAAABBBB * 4 */
12882 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12883 /* sel = { A*4, A*4+1, A*4+2, ... } */
12884 break;
12885
12886 case E_V4HImode:
12887 /* inp = xxxAxxxBxxxCxxxD */
12888 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12889 NULL_RTX, 1, OPTAB_DIRECT);
12890 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12891 NULL_RTX, 1, OPTAB_DIRECT);
12892 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12893 NULL_RTX, 1, OPTAB_DIRECT);
12894 /* t_1 = ..xxxAxxxBxxxCxx */
12895 /* t_2 = ....xxxAxxxBxxxC */
12896 /* t_3 = ......xxxAxxxBxx */
12897 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12898 GEN_INT (0x07),
12899 NULL_RTX, 1, OPTAB_DIRECT);
12900 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12901 GEN_INT (0x0700),
12902 NULL_RTX, 1, OPTAB_DIRECT);
12903 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12904 GEN_INT (0x070000),
12905 NULL_RTX, 1, OPTAB_DIRECT);
12906 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12907 GEN_INT (0x07000000),
12908 NULL_RTX, 1, OPTAB_DIRECT);
12909 /* sel = .......D */
12910 /* t_1 = .....C.. */
12911 /* t_2 = ...B.... */
12912 /* t_3 = .A...... */
12913 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12914 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12915 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12916 /* sel = .A.B.C.D */
12917 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12918 /* sel = AABBCCDD * 2 */
12919 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12920 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12921 break;
12922
12923 case E_V8QImode:
12924 /* input = xAxBxCxDxExFxGxH */
12925 sel = expand_simple_binop (DImode, AND, sel,
12926 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12927 | 0x0f0f0f0f),
12928 NULL_RTX, 1, OPTAB_DIRECT);
12929 /* sel = .A.B.C.D.E.F.G.H */
12930 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12931 NULL_RTX, 1, OPTAB_DIRECT);
12932 /* t_1 = ..A.B.C.D.E.F.G. */
12933 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12934 NULL_RTX, 1, OPTAB_DIRECT);
12935 /* sel = .AABBCCDDEEFFGGH */
12936 sel = expand_simple_binop (DImode, AND, sel,
12937 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12938 | 0xff00ff),
12939 NULL_RTX, 1, OPTAB_DIRECT);
12940 /* sel = ..AB..CD..EF..GH */
12941 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12942 NULL_RTX, 1, OPTAB_DIRECT);
12943 /* t_1 = ....AB..CD..EF.. */
12944 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12945 NULL_RTX, 1, OPTAB_DIRECT);
12946 /* sel = ..ABABCDCDEFEFGH */
12947 sel = expand_simple_binop (DImode, AND, sel,
12948 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12949 NULL_RTX, 1, OPTAB_DIRECT);
12950 /* sel = ....ABCD....EFGH */
12951 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12952 NULL_RTX, 1, OPTAB_DIRECT);
12953 /* t_1 = ........ABCD.... */
12954 sel = gen_lowpart (SImode, sel);
12955 t_1 = gen_lowpart (SImode, t_1);
12956 break;
12957
12958 default:
12959 gcc_unreachable ();
12960 }
12961
12962 /* Always perform the final addition/merge within the bmask insn. */
12963 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12964 }
12965
12966 /* Implement TARGET_VEC_PERM_CONST. */
12967
12968 static bool
12969 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12970 rtx op1, const vec_perm_indices &sel)
12971 {
12972 if (!TARGET_VIS2)
12973 return false;
12974
12975 /* All permutes are supported. */
12976 if (!target)
12977 return true;
12978
12979 /* Force target-independent code to convert constant permutations on other
12980 modes down to V8QI. Rely on this to avoid the complexity of the byte
12981 order of the permutation. */
12982 if (vmode != V8QImode)
12983 return false;
12984
12985 unsigned int i, mask;
12986 for (i = mask = 0; i < 8; ++i)
12987 mask |= (sel[i] & 0xf) << (28 - i*4);
12988 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12989
12990 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12991 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12992 return true;
12993 }
12994
12995 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12996
12997 static bool
12998 sparc_frame_pointer_required (void)
12999 {
13000 /* If the stack pointer is dynamically modified in the function, it cannot
13001 serve as the frame pointer. */
13002 if (cfun->calls_alloca)
13003 return true;
13004
13005 /* If the function receives nonlocal gotos, it needs to save the frame
13006 pointer in the nonlocal_goto_save_area object. */
13007 if (cfun->has_nonlocal_label)
13008 return true;
13009
13010 /* In flat mode, that's it. */
13011 if (TARGET_FLAT)
13012 return false;
13013
13014 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13015 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13016 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13017 }
13018
13019 /* The way this is structured, we can't eliminate SFP in favor of SP
13020 if the frame pointer is required: we want to use the SFP->HFP elimination
13021 in that case. But the test in update_eliminables doesn't know we are
13022 assuming below that we only do the former elimination. */
13023
13024 static bool
13025 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13026 {
13027 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13028 }
13029
13030 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13031 they won't be allocated. */
13032
13033 static void
13034 sparc_conditional_register_usage (void)
13035 {
13036 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13037 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13038 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13039 /* then honor it. */
13040 if (TARGET_ARCH32 && fixed_regs[5])
13041 fixed_regs[5] = 1;
13042 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13043 fixed_regs[5] = 0;
13044 if (! TARGET_V9)
13045 {
13046 int regno;
13047 for (regno = SPARC_FIRST_V9_FP_REG;
13048 regno <= SPARC_LAST_V9_FP_REG;
13049 regno++)
13050 fixed_regs[regno] = 1;
13051 /* %fcc0 is used by v8 and v9. */
13052 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13053 regno <= SPARC_LAST_V9_FCC_REG;
13054 regno++)
13055 fixed_regs[regno] = 1;
13056 }
13057 if (! TARGET_FPU)
13058 {
13059 int regno;
13060 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13061 fixed_regs[regno] = 1;
13062 }
13063 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13064 /* then honor it. Likewise with g3 and g4. */
13065 if (fixed_regs[2] == 2)
13066 fixed_regs[2] = ! TARGET_APP_REGS;
13067 if (fixed_regs[3] == 2)
13068 fixed_regs[3] = ! TARGET_APP_REGS;
13069 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13070 fixed_regs[4] = ! TARGET_APP_REGS;
13071 else if (TARGET_CM_EMBMEDANY)
13072 fixed_regs[4] = 1;
13073 else if (fixed_regs[4] == 2)
13074 fixed_regs[4] = 0;
13075 if (TARGET_FLAT)
13076 {
13077 int regno;
13078 /* Disable leaf functions. */
13079 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13080 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13081 leaf_reg_remap [regno] = regno;
13082 }
13083 if (TARGET_VIS)
13084 global_regs[SPARC_GSR_REG] = 1;
13085 }
13086
13087 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13088
13089 static bool
13090 sparc_use_pseudo_pic_reg (void)
13091 {
13092 return !TARGET_VXWORKS_RTP && flag_pic;
13093 }
13094
13095 /* Implement TARGET_INIT_PIC_REG. */
13096
13097 static void
13098 sparc_init_pic_reg (void)
13099 {
13100 edge entry_edge;
13101 rtx_insn *seq;
13102
13103 /* In PIC mode, we need to always initialize the PIC register if optimization
13104 is enabled, because we are called from IRA and LRA may later force things
13105 to the constant pool for optimization purposes. */
13106 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13107 return;
13108
13109 start_sequence ();
13110 load_got_register ();
13111 if (!TARGET_VXWORKS_RTP)
13112 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13113 seq = get_insns ();
13114 end_sequence ();
13115
13116 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13117 insert_insn_on_edge (seq, entry_edge);
13118 commit_one_edge_insertion (entry_edge);
13119 }
13120
13121 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13122
13123 - We can't load constants into FP registers.
13124 - We can't load FP constants into integer registers when soft-float,
13125 because there is no soft-float pattern with a r/F constraint.
13126 - We can't load FP constants into integer registers for TFmode unless
13127 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13128 - Try and reload integer constants (symbolic or otherwise) back into
13129 registers directly, rather than having them dumped to memory. */
13130
13131 static reg_class_t
13132 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13133 {
13134 machine_mode mode = GET_MODE (x);
13135 if (CONSTANT_P (x))
13136 {
13137 if (FP_REG_CLASS_P (rclass)
13138 || rclass == GENERAL_OR_FP_REGS
13139 || rclass == GENERAL_OR_EXTRA_FP_REGS
13140 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13141 || (mode == TFmode && ! const_zero_operand (x, mode)))
13142 return NO_REGS;
13143
13144 if (GET_MODE_CLASS (mode) == MODE_INT)
13145 return GENERAL_REGS;
13146
13147 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13148 {
13149 if (! FP_REG_CLASS_P (rclass)
13150 || !(const_zero_operand (x, mode)
13151 || const_all_ones_operand (x, mode)))
13152 return NO_REGS;
13153 }
13154 }
13155
13156 if (TARGET_VIS3
13157 && ! TARGET_ARCH64
13158 && (rclass == EXTRA_FP_REGS
13159 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13160 {
13161 int regno = true_regnum (x);
13162
13163 if (SPARC_INT_REG_P (regno))
13164 return (rclass == EXTRA_FP_REGS
13165 ? FP_REGS : GENERAL_OR_FP_REGS);
13166 }
13167
13168 return rclass;
13169 }
13170
13171 /* Return true if we use LRA instead of reload pass. */
13172
13173 static bool
13174 sparc_lra_p (void)
13175 {
13176 return TARGET_LRA;
13177 }
13178
13179 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13180 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13181
13182 const char *
13183 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13184 {
13185 char mulstr[32];
13186
13187 gcc_assert (! TARGET_ARCH64);
13188
13189 if (sparc_check_64 (operands[1], insn) <= 0)
13190 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13191 if (which_alternative == 1)
13192 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13193 if (GET_CODE (operands[2]) == CONST_INT)
13194 {
13195 if (which_alternative == 1)
13196 {
13197 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13198 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13199 output_asm_insn (mulstr, operands);
13200 return "srlx\t%L0, 32, %H0";
13201 }
13202 else
13203 {
13204 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13205 output_asm_insn ("or\t%L1, %3, %3", operands);
13206 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13207 output_asm_insn (mulstr, operands);
13208 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13209 return "mov\t%3, %L0";
13210 }
13211 }
13212 else if (rtx_equal_p (operands[1], operands[2]))
13213 {
13214 if (which_alternative == 1)
13215 {
13216 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13217 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13218 output_asm_insn (mulstr, operands);
13219 return "srlx\t%L0, 32, %H0";
13220 }
13221 else
13222 {
13223 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13224 output_asm_insn ("or\t%L1, %3, %3", operands);
13225 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13226 output_asm_insn (mulstr, operands);
13227 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13228 return "mov\t%3, %L0";
13229 }
13230 }
13231 if (sparc_check_64 (operands[2], insn) <= 0)
13232 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13233 if (which_alternative == 1)
13234 {
13235 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13236 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13237 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13238 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13239 output_asm_insn (mulstr, operands);
13240 return "srlx\t%L0, 32, %H0";
13241 }
13242 else
13243 {
13244 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13245 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13246 output_asm_insn ("or\t%L1, %3, %3", operands);
13247 output_asm_insn ("or\t%L2, %4, %4", operands);
13248 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13249 output_asm_insn (mulstr, operands);
13250 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13251 return "mov\t%3, %L0";
13252 }
13253 }
13254
13255 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13256 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13257 and INNER_MODE are the modes describing TARGET. */
13258
13259 static void
13260 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13261 machine_mode inner_mode)
13262 {
13263 rtx t1, final_insn, sel;
13264 int bmask;
13265
13266 t1 = gen_reg_rtx (mode);
13267
13268 elt = convert_modes (SImode, inner_mode, elt, true);
13269 emit_move_insn (gen_lowpart(SImode, t1), elt);
13270
13271 switch (mode)
13272 {
13273 case E_V2SImode:
13274 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13275 bmask = 0x45674567;
13276 break;
13277 case E_V4HImode:
13278 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13279 bmask = 0x67676767;
13280 break;
13281 case E_V8QImode:
13282 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13283 bmask = 0x77777777;
13284 break;
13285 default:
13286 gcc_unreachable ();
13287 }
13288
13289 sel = force_reg (SImode, GEN_INT (bmask));
13290 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13291 emit_insn (final_insn);
13292 }
13293
13294 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13295 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13296
13297 static void
13298 vector_init_fpmerge (rtx target, rtx elt)
13299 {
13300 rtx t1, t2, t2_low, t3, t3_low;
13301
13302 t1 = gen_reg_rtx (V4QImode);
13303 elt = convert_modes (SImode, QImode, elt, true);
13304 emit_move_insn (gen_lowpart (SImode, t1), elt);
13305
13306 t2 = gen_reg_rtx (V8QImode);
13307 t2_low = gen_lowpart (V4QImode, t2);
13308 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13309
13310 t3 = gen_reg_rtx (V8QImode);
13311 t3_low = gen_lowpart (V4QImode, t3);
13312 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13313
13314 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13315 }
13316
13317 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13318 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13319
13320 static void
13321 vector_init_faligndata (rtx target, rtx elt)
13322 {
13323 rtx t1 = gen_reg_rtx (V4HImode);
13324 int i;
13325
13326 elt = convert_modes (SImode, HImode, elt, true);
13327 emit_move_insn (gen_lowpart (SImode, t1), elt);
13328
13329 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13330 force_reg (SImode, GEN_INT (6)),
13331 const0_rtx));
13332
13333 for (i = 0; i < 4; i++)
13334 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13335 }
13336
13337 /* Emit code to initialize TARGET to values for individual fields VALS. */
13338
13339 void
13340 sparc_expand_vector_init (rtx target, rtx vals)
13341 {
13342 const machine_mode mode = GET_MODE (target);
13343 const machine_mode inner_mode = GET_MODE_INNER (mode);
13344 const int n_elts = GET_MODE_NUNITS (mode);
13345 int i, n_var = 0;
13346 bool all_same = true;
13347 rtx mem;
13348
13349 for (i = 0; i < n_elts; i++)
13350 {
13351 rtx x = XVECEXP (vals, 0, i);
13352 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13353 n_var++;
13354
13355 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13356 all_same = false;
13357 }
13358
13359 if (n_var == 0)
13360 {
13361 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13362 return;
13363 }
13364
13365 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13366 {
13367 if (GET_MODE_SIZE (inner_mode) == 4)
13368 {
13369 emit_move_insn (gen_lowpart (SImode, target),
13370 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13371 return;
13372 }
13373 else if (GET_MODE_SIZE (inner_mode) == 8)
13374 {
13375 emit_move_insn (gen_lowpart (DImode, target),
13376 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13377 return;
13378 }
13379 }
13380 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13381 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13382 {
13383 emit_move_insn (gen_highpart (word_mode, target),
13384 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13385 emit_move_insn (gen_lowpart (word_mode, target),
13386 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13387 return;
13388 }
13389
13390 if (all_same && GET_MODE_SIZE (mode) == 8)
13391 {
13392 if (TARGET_VIS2)
13393 {
13394 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13395 return;
13396 }
13397 if (mode == V8QImode)
13398 {
13399 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13400 return;
13401 }
13402 if (mode == V4HImode)
13403 {
13404 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13405 return;
13406 }
13407 }
13408
13409 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13410 for (i = 0; i < n_elts; i++)
13411 emit_move_insn (adjust_address_nv (mem, inner_mode,
13412 i * GET_MODE_SIZE (inner_mode)),
13413 XVECEXP (vals, 0, i));
13414 emit_move_insn (target, mem);
13415 }
13416
13417 /* Implement TARGET_SECONDARY_RELOAD. */
13418
13419 static reg_class_t
13420 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13421 machine_mode mode, secondary_reload_info *sri)
13422 {
13423 enum reg_class rclass = (enum reg_class) rclass_i;
13424
13425 sri->icode = CODE_FOR_nothing;
13426 sri->extra_cost = 0;
13427
13428 /* We need a temporary when loading/storing a HImode/QImode value
13429 between memory and the FPU registers. This can happen when combine puts
13430 a paradoxical subreg in a float/fix conversion insn. */
13431 if (FP_REG_CLASS_P (rclass)
13432 && (mode == HImode || mode == QImode)
13433 && (GET_CODE (x) == MEM
13434 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13435 && true_regnum (x) == -1)))
13436 return GENERAL_REGS;
13437
13438 /* On 32-bit we need a temporary when loading/storing a DFmode value
13439 between unaligned memory and the upper FPU registers. */
13440 if (TARGET_ARCH32
13441 && rclass == EXTRA_FP_REGS
13442 && mode == DFmode
13443 && GET_CODE (x) == MEM
13444 && ! mem_min_alignment (x, 8))
13445 return FP_REGS;
13446
13447 if (((TARGET_CM_MEDANY
13448 && symbolic_operand (x, mode))
13449 || (TARGET_CM_EMBMEDANY
13450 && text_segment_operand (x, mode)))
13451 && ! flag_pic)
13452 {
13453 if (in_p)
13454 sri->icode = direct_optab_handler (reload_in_optab, mode);
13455 else
13456 sri->icode = direct_optab_handler (reload_out_optab, mode);
13457 return NO_REGS;
13458 }
13459
13460 if (TARGET_VIS3 && TARGET_ARCH32)
13461 {
13462 int regno = true_regnum (x);
13463
13464 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13465 to move 8-byte values in 4-byte pieces. This only works via
13466 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13467 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13468 an FP_REGS intermediate move. */
13469 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13470 || ((general_or_i64_p (rclass)
13471 || rclass == GENERAL_OR_FP_REGS)
13472 && SPARC_FP_REG_P (regno)))
13473 {
13474 sri->extra_cost = 2;
13475 return FP_REGS;
13476 }
13477 }
13478
13479 return NO_REGS;
13480 }
13481
13482 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13483
13484 On SPARC when not VIS3 it is not possible to directly move data
13485 between GENERAL_REGS and FP_REGS. */
13486
13487 static bool
13488 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13489 reg_class_t class2)
13490 {
13491 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13492 && (! TARGET_VIS3
13493 || GET_MODE_SIZE (mode) > 8
13494 || GET_MODE_SIZE (mode) < 4));
13495 }
13496
13497 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13498
13499 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13500 because the movsi and movsf patterns don't handle r/f moves.
13501 For v8 we copy the default definition. */
13502
13503 static machine_mode
13504 sparc_secondary_memory_needed_mode (machine_mode mode)
13505 {
13506 if (TARGET_ARCH64)
13507 {
13508 if (GET_MODE_BITSIZE (mode) < 32)
13509 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13510 return mode;
13511 }
13512 else
13513 {
13514 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13515 return mode_for_size (BITS_PER_WORD,
13516 GET_MODE_CLASS (mode), 0).require ();
13517 return mode;
13518 }
13519 }
13520
13521 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13522 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13523
13524 bool
13525 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13526 {
13527 enum rtx_code rc = GET_CODE (operands[1]);
13528 machine_mode cmp_mode;
13529 rtx cc_reg, dst, cmp;
13530
13531 cmp = operands[1];
13532 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13533 return false;
13534
13535 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13536 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13537
13538 cmp_mode = GET_MODE (XEXP (cmp, 0));
13539 rc = GET_CODE (cmp);
13540
13541 dst = operands[0];
13542 if (! rtx_equal_p (operands[2], dst)
13543 && ! rtx_equal_p (operands[3], dst))
13544 {
13545 if (reg_overlap_mentioned_p (dst, cmp))
13546 dst = gen_reg_rtx (mode);
13547
13548 emit_move_insn (dst, operands[3]);
13549 }
13550 else if (operands[2] == dst)
13551 {
13552 operands[2] = operands[3];
13553
13554 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13555 rc = reverse_condition_maybe_unordered (rc);
13556 else
13557 rc = reverse_condition (rc);
13558 }
13559
13560 if (XEXP (cmp, 1) == const0_rtx
13561 && GET_CODE (XEXP (cmp, 0)) == REG
13562 && cmp_mode == DImode
13563 && v9_regcmp_p (rc))
13564 cc_reg = XEXP (cmp, 0);
13565 else
13566 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13567
13568 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13569
13570 emit_insn (gen_rtx_SET (dst,
13571 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13572
13573 if (dst != operands[0])
13574 emit_move_insn (operands[0], dst);
13575
13576 return true;
13577 }
13578
13579 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13580 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13581 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13582 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13583 code to be used for the condition mask. */
13584
13585 void
13586 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13587 {
13588 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13589 enum rtx_code code = GET_CODE (operands[3]);
13590
13591 mask = gen_reg_rtx (Pmode);
13592 cop0 = operands[4];
13593 cop1 = operands[5];
13594 if (code == LT || code == GE)
13595 {
13596 rtx t;
13597
13598 code = swap_condition (code);
13599 t = cop0; cop0 = cop1; cop1 = t;
13600 }
13601
13602 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13603
13604 fcmp = gen_rtx_UNSPEC (Pmode,
13605 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13606 fcode);
13607
13608 cmask = gen_rtx_UNSPEC (DImode,
13609 gen_rtvec (2, mask, gsr),
13610 ccode);
13611
13612 bshuf = gen_rtx_UNSPEC (mode,
13613 gen_rtvec (3, operands[1], operands[2], gsr),
13614 UNSPEC_BSHUFFLE);
13615
13616 emit_insn (gen_rtx_SET (mask, fcmp));
13617 emit_insn (gen_rtx_SET (gsr, cmask));
13618
13619 emit_insn (gen_rtx_SET (operands[0], bshuf));
13620 }
13621
13622 /* On sparc, any mode which naturally allocates into the float
13623 registers should return 4 here. */
13624
13625 unsigned int
13626 sparc_regmode_natural_size (machine_mode mode)
13627 {
13628 int size = UNITS_PER_WORD;
13629
13630 if (TARGET_ARCH64)
13631 {
13632 enum mode_class mclass = GET_MODE_CLASS (mode);
13633
13634 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13635 size = 4;
13636 }
13637
13638 return size;
13639 }
13640
13641 /* Implement TARGET_HARD_REGNO_NREGS.
13642
13643 On SPARC, ordinary registers hold 32 bits worth; this means both
13644 integer and floating point registers. On v9, integer regs hold 64
13645 bits worth; floating point regs hold 32 bits worth (this includes the
13646 new fp regs as even the odd ones are included in the hard register
13647 count). */
13648
13649 static unsigned int
13650 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13651 {
13652 if (regno == SPARC_GSR_REG)
13653 return 1;
13654 if (TARGET_ARCH64)
13655 {
13656 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13657 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13658 return CEIL (GET_MODE_SIZE (mode), 4);
13659 }
13660 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13661 }
13662
13663 /* Implement TARGET_HARD_REGNO_MODE_OK.
13664
13665 ??? Because of the funny way we pass parameters we should allow certain
13666 ??? types of float/complex values to be in integer registers during
13667 ??? RTL generation. This only matters on arch32. */
13668
13669 static bool
13670 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13671 {
13672 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13673 }
13674
13675 /* Implement TARGET_MODES_TIEABLE_P.
13676
13677 For V9 we have to deal with the fact that only the lower 32 floating
13678 point registers are 32-bit addressable. */
13679
13680 static bool
13681 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13682 {
13683 enum mode_class mclass1, mclass2;
13684 unsigned short size1, size2;
13685
13686 if (mode1 == mode2)
13687 return true;
13688
13689 mclass1 = GET_MODE_CLASS (mode1);
13690 mclass2 = GET_MODE_CLASS (mode2);
13691 if (mclass1 != mclass2)
13692 return false;
13693
13694 if (! TARGET_V9)
13695 return true;
13696
13697 /* Classes are the same and we are V9 so we have to deal with upper
13698 vs. lower floating point registers. If one of the modes is a
13699 4-byte mode, and the other is not, we have to mark them as not
13700 tieable because only the lower 32 floating point register are
13701 addressable 32-bits at a time.
13702
13703 We can't just test explicitly for SFmode, otherwise we won't
13704 cover the vector mode cases properly. */
13705
13706 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13707 return true;
13708
13709 size1 = GET_MODE_SIZE (mode1);
13710 size2 = GET_MODE_SIZE (mode2);
13711 if ((size1 > 4 && size2 == 4)
13712 || (size2 > 4 && size1 == 4))
13713 return false;
13714
13715 return true;
13716 }
13717
13718 /* Implement TARGET_CSTORE_MODE. */
13719
13720 static scalar_int_mode
13721 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13722 {
13723 return (TARGET_ARCH64 ? DImode : SImode);
13724 }
13725
13726 /* Return the compound expression made of T1 and T2. */
13727
13728 static inline tree
13729 compound_expr (tree t1, tree t2)
13730 {
13731 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13732 }
13733
13734 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13735
13736 static void
13737 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13738 {
13739 if (!TARGET_FPU)
13740 return;
13741
13742 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13743 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13744
13745 /* We generate the equivalent of feholdexcept (&fenv_var):
13746
13747 unsigned int fenv_var;
13748 __builtin_store_fsr (&fenv_var);
13749
13750 unsigned int tmp1_var;
13751 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13752
13753 __builtin_load_fsr (&tmp1_var); */
13754
13755 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13756 TREE_ADDRESSABLE (fenv_var) = 1;
13757 tree fenv_addr = build_fold_addr_expr (fenv_var);
13758 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13759 tree hold_stfsr
13760 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13761 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13762
13763 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13764 TREE_ADDRESSABLE (tmp1_var) = 1;
13765 tree masked_fenv_var
13766 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13767 build_int_cst (unsigned_type_node,
13768 ~(accrued_exception_mask | trap_enable_mask)));
13769 tree hold_mask
13770 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13771 NULL_TREE, NULL_TREE);
13772
13773 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13774 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13775 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13776
13777 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13778
13779 /* We reload the value of tmp1_var to clear the exceptions:
13780
13781 __builtin_load_fsr (&tmp1_var); */
13782
13783 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13784
13785 /* We generate the equivalent of feupdateenv (&fenv_var):
13786
13787 unsigned int tmp2_var;
13788 __builtin_store_fsr (&tmp2_var);
13789
13790 __builtin_load_fsr (&fenv_var);
13791
13792 if (SPARC_LOW_FE_EXCEPT_VALUES)
13793 tmp2_var >>= 5;
13794 __atomic_feraiseexcept ((int) tmp2_var); */
13795
13796 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13797 TREE_ADDRESSABLE (tmp2_var) = 1;
13798 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13799 tree update_stfsr
13800 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13801 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13802
13803 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13804
13805 tree atomic_feraiseexcept
13806 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13807 tree update_call
13808 = build_call_expr (atomic_feraiseexcept, 1,
13809 fold_convert (integer_type_node, tmp2_var));
13810
13811 if (SPARC_LOW_FE_EXCEPT_VALUES)
13812 {
13813 tree shifted_tmp2_var
13814 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13815 build_int_cst (unsigned_type_node, 5));
13816 tree update_shift
13817 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13818 update_call = compound_expr (update_shift, update_call);
13819 }
13820
13821 *update
13822 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13823 }
13824
13825 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13826
13827 SImode loads to floating-point registers are not zero-extended.
13828 The definition for LOAD_EXTEND_OP specifies that integer loads
13829 narrower than BITS_PER_WORD will be zero-extended. As a result,
13830 we inhibit changes from SImode unless they are to a mode that is
13831 identical in size.
13832
13833 Likewise for SFmode, since word-mode paradoxical subregs are
13834 problematic on big-endian architectures. */
13835
13836 static bool
13837 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13838 reg_class_t rclass)
13839 {
13840 if (TARGET_ARCH64
13841 && GET_MODE_SIZE (from) == 4
13842 && GET_MODE_SIZE (to) != 4)
13843 return !reg_classes_intersect_p (rclass, FP_REGS);
13844 return true;
13845 }
13846
13847 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13848
13849 static HOST_WIDE_INT
13850 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13851 {
13852 if (TREE_CODE (exp) == STRING_CST)
13853 return MAX (align, FASTEST_ALIGNMENT);
13854 return align;
13855 }
13856
13857 #include "gt-sparc.h"