]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2022 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs leon5_costs = {
274 COSTS_N_INSNS (1), /* int load */
275 COSTS_N_INSNS (1), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (17), /* fdivs */
284 COSTS_N_INSNS (18), /* fdivd */
285 COSTS_N_INSNS (25), /* fsqrts */
286 COSTS_N_INSNS (26), /* fsqrtd */
287 COSTS_N_INSNS (4), /* imul */
288 COSTS_N_INSNS (4), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (35), /* idiv */
291 COSTS_N_INSNS (35), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs sparclet_costs = {
299 COSTS_N_INSNS (3), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (1), /* int zeroed load */
302 COSTS_N_INSNS (1), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (1), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (1), /* fmov, fmovr */
307 COSTS_N_INSNS (1), /* fmul */
308 COSTS_N_INSNS (1), /* fdivs */
309 COSTS_N_INSNS (1), /* fdivd */
310 COSTS_N_INSNS (1), /* fsqrts */
311 COSTS_N_INSNS (1), /* fsqrtd */
312 COSTS_N_INSNS (5), /* imul */
313 COSTS_N_INSNS (5), /* imulX */
314 0, /* imul bit factor */
315 COSTS_N_INSNS (5), /* idiv */
316 COSTS_N_INSNS (5), /* idivX */
317 COSTS_N_INSNS (1), /* movcc/movr */
318 0, /* shift penalty */
319 3 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (2), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (1), /* fcmp */
331 COSTS_N_INSNS (2), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (13), /* fdivs */
334 COSTS_N_INSNS (23), /* fdivd */
335 COSTS_N_INSNS (13), /* fsqrts */
336 COSTS_N_INSNS (23), /* fsqrtd */
337 COSTS_N_INSNS (4), /* imul */
338 COSTS_N_INSNS (4), /* imulX */
339 2, /* imul bit factor */
340 COSTS_N_INSNS (37), /* idiv */
341 COSTS_N_INSNS (68), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 2, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs ultrasparc3_costs = {
349 COSTS_N_INSNS (2), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (2), /* float load */
353 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (4), /* fadd, fsub */
355 COSTS_N_INSNS (5), /* fcmp */
356 COSTS_N_INSNS (3), /* fmov, fmovr */
357 COSTS_N_INSNS (4), /* fmul */
358 COSTS_N_INSNS (17), /* fdivs */
359 COSTS_N_INSNS (20), /* fdivd */
360 COSTS_N_INSNS (20), /* fsqrts */
361 COSTS_N_INSNS (29), /* fsqrtd */
362 COSTS_N_INSNS (6), /* imul */
363 COSTS_N_INSNS (6), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (40), /* idiv */
366 COSTS_N_INSNS (71), /* idivX */
367 COSTS_N_INSNS (2), /* movcc/movr */
368 0, /* shift penalty */
369 2 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (9), /* float load */
378 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (8), /* fadd, fsub */
380 COSTS_N_INSNS (26), /* fcmp */
381 COSTS_N_INSNS (8), /* fmov, fmovr */
382 COSTS_N_INSNS (29), /* fmul */
383 COSTS_N_INSNS (54), /* fdivs */
384 COSTS_N_INSNS (83), /* fdivd */
385 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
386 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
387 COSTS_N_INSNS (11), /* imul */
388 COSTS_N_INSNS (11), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (72), /* idiv */
391 COSTS_N_INSNS (72), /* idivX */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 4 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara2_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (6), /* fadd, fsub */
405 COSTS_N_INSNS (6), /* fcmp */
406 COSTS_N_INSNS (6), /* fmov, fmovr */
407 COSTS_N_INSNS (6), /* fmul */
408 COSTS_N_INSNS (19), /* fdivs */
409 COSTS_N_INSNS (33), /* fdivd */
410 COSTS_N_INSNS (19), /* fsqrts */
411 COSTS_N_INSNS (33), /* fsqrtd */
412 COSTS_N_INSNS (5), /* imul */
413 COSTS_N_INSNS (5), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
416 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara3_costs = {
424 COSTS_N_INSNS (3), /* int load */
425 COSTS_N_INSNS (3), /* int signed load */
426 COSTS_N_INSNS (3), /* int zeroed load */
427 COSTS_N_INSNS (3), /* float load */
428 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (9), /* fadd, fsub */
430 COSTS_N_INSNS (9), /* fcmp */
431 COSTS_N_INSNS (9), /* fmov, fmovr */
432 COSTS_N_INSNS (9), /* fmul */
433 COSTS_N_INSNS (23), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (23), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (9), /* imul */
438 COSTS_N_INSNS (9), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
441 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 5 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara4_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 2 /* branch cost */
470 };
471
472 static const
473 struct processor_costs niagara7_costs = {
474 COSTS_N_INSNS (5), /* int load */
475 COSTS_N_INSNS (5), /* int signed load */
476 COSTS_N_INSNS (5), /* int zeroed load */
477 COSTS_N_INSNS (5), /* float load */
478 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (11), /* fadd, fsub */
480 COSTS_N_INSNS (11), /* fcmp */
481 COSTS_N_INSNS (11), /* fmov, fmovr */
482 COSTS_N_INSNS (11), /* fmul */
483 COSTS_N_INSNS (24), /* fdivs */
484 COSTS_N_INSNS (37), /* fdivd */
485 COSTS_N_INSNS (24), /* fsqrts */
486 COSTS_N_INSNS (37), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (12), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
491 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const
498 struct processor_costs m8_costs = {
499 COSTS_N_INSNS (3), /* int load */
500 COSTS_N_INSNS (3), /* int signed load */
501 COSTS_N_INSNS (3), /* int zeroed load */
502 COSTS_N_INSNS (3), /* float load */
503 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
504 COSTS_N_INSNS (9), /* fadd, fsub */
505 COSTS_N_INSNS (9), /* fcmp */
506 COSTS_N_INSNS (9), /* fmov, fmovr */
507 COSTS_N_INSNS (9), /* fmul */
508 COSTS_N_INSNS (26), /* fdivs */
509 COSTS_N_INSNS (30), /* fdivd */
510 COSTS_N_INSNS (33), /* fsqrts */
511 COSTS_N_INSNS (41), /* fsqrtd */
512 COSTS_N_INSNS (12), /* imul */
513 COSTS_N_INSNS (10), /* imulX */
514 0, /* imul bit factor */
515 COSTS_N_INSNS (57), /* udiv/sdiv */
516 COSTS_N_INSNS (30), /* udivx/sdivx */
517 COSTS_N_INSNS (1), /* movcc/movr */
518 0, /* shift penalty */
519 1 /* branch cost */
520 };
521
522 static const struct processor_costs *sparc_costs = &cypress_costs;
523
524 #ifdef HAVE_AS_RELAX_OPTION
525 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
526 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
527 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
528 somebody does not branch between the sethi and jmp. */
529 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
530 #else
531 #define LEAF_SIBCALL_SLOT_RESERVED_P \
532 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
533 #endif
534
535 /* Vector, indexed by hard register number, which contains 1
536 for a register that is allowable in a candidate for leaf
537 function treatment. */
538 char sparc_leaf_regs[] =
539 { 1, 1, 1, 1, 1, 1, 1, 1,
540 0, 0, 0, 0, 0, 0, 1, 0,
541 0, 0, 0, 0, 0, 0, 0, 0,
542 1, 1, 1, 1, 1, 1, 0, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 1,
546 1, 1, 1, 1, 1, 1, 1, 1,
547 1, 1, 1, 1, 1, 1, 1, 1,
548 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1,
550 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1};
552
553 struct GTY(()) machine_function
554 {
555 /* Size of the frame of the function. */
556 HOST_WIDE_INT frame_size;
557
558 /* Size of the frame of the function minus the register window save area
559 and the outgoing argument area. */
560 HOST_WIDE_INT apparent_frame_size;
561
562 /* Register we pretend the frame pointer is allocated to. Normally, this
563 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
564 record "offset" separately as it may be too big for (reg + disp). */
565 rtx frame_base_reg;
566 HOST_WIDE_INT frame_base_offset;
567
568 /* Number of global or FP registers to be saved (as 4-byte quantities). */
569 int n_global_fp_regs;
570
571 /* True if the current function is leaf and uses only leaf regs,
572 so that the SPARC leaf function optimization can be applied.
573 Private version of crtl->uses_only_leaf_regs, see
574 sparc_expand_prologue for the rationale. */
575 int leaf_function_p;
576
577 /* True if the prologue saves local or in registers. */
578 bool save_local_in_regs_p;
579
580 /* True if the data calculated by sparc_expand_prologue are valid. */
581 bool prologue_data_valid_p;
582 };
583
584 #define sparc_frame_size cfun->machine->frame_size
585 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
586 #define sparc_frame_base_reg cfun->machine->frame_base_reg
587 #define sparc_frame_base_offset cfun->machine->frame_base_offset
588 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
589 #define sparc_leaf_function_p cfun->machine->leaf_function_p
590 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
591 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
592
593 /* 1 if the next opcode is to be specially indented. */
594 int sparc_indent_opcode = 0;
595
596 static void sparc_option_override (void);
597 static void sparc_init_modes (void);
598 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
599 const_tree, bool, bool, int *, int *);
600
601 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
602 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
603 static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
604
605 static void sparc_emit_set_const32 (rtx, rtx);
606 static void sparc_emit_set_const64 (rtx, rtx);
607 static void sparc_output_addr_vec (rtx);
608 static void sparc_output_addr_diff_vec (rtx);
609 static void sparc_output_deferred_case_vectors (void);
610 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
611 static bool sparc_legitimate_constant_p (machine_mode, rtx);
612 static rtx sparc_builtin_saveregs (void);
613 static int epilogue_renumber (rtx *, int);
614 static bool sparc_assemble_integer (rtx, unsigned int, int);
615 static int set_extends (rtx_insn *);
616 static void sparc_asm_function_prologue (FILE *);
617 static void sparc_asm_function_epilogue (FILE *);
618 #ifdef TARGET_SOLARIS
619 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
620 tree) ATTRIBUTE_UNUSED;
621 #endif
622 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
623 static int sparc_issue_rate (void);
624 static void sparc_sched_init (FILE *, int, int);
625 static int sparc_use_sched_lookahead (void);
626
627 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
628 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
629 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
630 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
631 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
632
633 static bool sparc_function_ok_for_sibcall (tree, tree);
634 static void sparc_init_libfuncs (void);
635 static void sparc_init_builtins (void);
636 static void sparc_fpu_init_builtins (void);
637 static void sparc_vis_init_builtins (void);
638 static tree sparc_builtin_decl (unsigned, bool);
639 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
640 static tree sparc_fold_builtin (tree, int, tree *, bool);
641 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
642 HOST_WIDE_INT, tree);
643 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
644 HOST_WIDE_INT, const_tree);
645 static struct machine_function * sparc_init_machine_status (void);
646 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
647 static rtx sparc_tls_get_addr (void);
648 static rtx sparc_tls_got (void);
649 static int sparc_register_move_cost (machine_mode,
650 reg_class_t, reg_class_t);
651 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
652 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
653 int *, const_tree, int);
654 static bool sparc_strict_argument_naming (cumulative_args_t);
655 static void sparc_va_start (tree, rtx);
656 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
657 static bool sparc_vector_mode_supported_p (machine_mode);
658 static bool sparc_tls_referenced_p (rtx);
659 static rtx sparc_legitimize_tls_address (rtx);
660 static rtx sparc_legitimize_pic_address (rtx, rtx);
661 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
662 static rtx sparc_delegitimize_address (rtx);
663 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
664 static bool sparc_pass_by_reference (cumulative_args_t,
665 const function_arg_info &);
666 static void sparc_function_arg_advance (cumulative_args_t,
667 const function_arg_info &);
668 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
669 static rtx sparc_function_incoming_arg (cumulative_args_t,
670 const function_arg_info &);
671 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
672 static unsigned int sparc_function_arg_boundary (machine_mode,
673 const_tree);
674 static int sparc_arg_partial_bytes (cumulative_args_t,
675 const function_arg_info &);
676 static bool sparc_return_in_memory (const_tree, const_tree);
677 static rtx sparc_struct_value_rtx (tree, int);
678 static rtx sparc_function_value (const_tree, const_tree, bool);
679 static rtx sparc_libcall_value (machine_mode, const_rtx);
680 static bool sparc_function_value_regno_p (const unsigned int);
681 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
682 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
683 static void sparc_file_end (void);
684 static bool sparc_frame_pointer_required (void);
685 static bool sparc_can_eliminate (const int, const int);
686 static void sparc_conditional_register_usage (void);
687 static bool sparc_use_pseudo_pic_reg (void);
688 static void sparc_init_pic_reg (void);
689 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
690 static const char *sparc_mangle_type (const_tree);
691 #endif
692 static void sparc_trampoline_init (rtx, tree, rtx);
693 static machine_mode sparc_preferred_simd_mode (scalar_mode);
694 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
695 static bool sparc_lra_p (void);
696 static bool sparc_print_operand_punct_valid_p (unsigned char);
697 static void sparc_print_operand (FILE *, rtx, int);
698 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
699 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
700 machine_mode,
701 secondary_reload_info *);
702 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
703 reg_class_t);
704 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
705 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
706 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
707 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
708 static unsigned int sparc_min_arithmetic_precision (void);
709 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
710 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
711 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
712 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
713 reg_class_t);
714 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
715 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
716 const vec_perm_indices &);
717 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
718 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
719 \f
720 #ifdef SUBTARGET_ATTRIBUTE_TABLE
721 /* Table of valid machine attributes. */
722 static const struct attribute_spec sparc_attribute_table[] =
723 {
724 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
725 do_diagnostic, handler, exclude } */
726 SUBTARGET_ATTRIBUTE_TABLE,
727 { NULL, 0, 0, false, false, false, false, NULL, NULL }
728 };
729 #endif
730 \f
731 char sparc_hard_reg_printed[8];
732
733 /* Initialize the GCC target structure. */
734
735 /* The default is to use .half rather than .short for aligned HI objects. */
736 #undef TARGET_ASM_ALIGNED_HI_OP
737 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
738
739 #undef TARGET_ASM_UNALIGNED_HI_OP
740 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
741 #undef TARGET_ASM_UNALIGNED_SI_OP
742 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
743 #undef TARGET_ASM_UNALIGNED_DI_OP
744 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
745
746 /* The target hook has to handle DI-mode values. */
747 #undef TARGET_ASM_INTEGER
748 #define TARGET_ASM_INTEGER sparc_assemble_integer
749
750 #undef TARGET_ASM_FUNCTION_PROLOGUE
751 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
752 #undef TARGET_ASM_FUNCTION_EPILOGUE
753 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
754
755 #undef TARGET_SCHED_ADJUST_COST
756 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
757 #undef TARGET_SCHED_ISSUE_RATE
758 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
759 #undef TARGET_SCHED_INIT
760 #define TARGET_SCHED_INIT sparc_sched_init
761 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
762 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
763
764 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
765 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
766
767 #undef TARGET_INIT_LIBFUNCS
768 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
769
770 #undef TARGET_LEGITIMIZE_ADDRESS
771 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
772 #undef TARGET_DELEGITIMIZE_ADDRESS
773 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
774 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
775 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
776
777 #undef TARGET_INIT_BUILTINS
778 #define TARGET_INIT_BUILTINS sparc_init_builtins
779 #undef TARGET_BUILTIN_DECL
780 #define TARGET_BUILTIN_DECL sparc_builtin_decl
781 #undef TARGET_EXPAND_BUILTIN
782 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
783 #undef TARGET_FOLD_BUILTIN
784 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
785
786 #if TARGET_TLS
787 #undef TARGET_HAVE_TLS
788 #define TARGET_HAVE_TLS true
789 #endif
790
791 #undef TARGET_CANNOT_FORCE_CONST_MEM
792 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
793
794 #undef TARGET_ASM_OUTPUT_MI_THUNK
795 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
796 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
797 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
798
799 #undef TARGET_RTX_COSTS
800 #define TARGET_RTX_COSTS sparc_rtx_costs
801 #undef TARGET_ADDRESS_COST
802 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
803 #undef TARGET_REGISTER_MOVE_COST
804 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
805
806 #undef TARGET_PROMOTE_FUNCTION_MODE
807 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
808 #undef TARGET_STRICT_ARGUMENT_NAMING
809 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
810
811 #undef TARGET_MUST_PASS_IN_STACK
812 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
813 #undef TARGET_PASS_BY_REFERENCE
814 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
815 #undef TARGET_ARG_PARTIAL_BYTES
816 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
817 #undef TARGET_FUNCTION_ARG_ADVANCE
818 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
819 #undef TARGET_FUNCTION_ARG
820 #define TARGET_FUNCTION_ARG sparc_function_arg
821 #undef TARGET_FUNCTION_INCOMING_ARG
822 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
823 #undef TARGET_FUNCTION_ARG_PADDING
824 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
825 #undef TARGET_FUNCTION_ARG_BOUNDARY
826 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
827
828 #undef TARGET_RETURN_IN_MEMORY
829 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
830 #undef TARGET_STRUCT_VALUE_RTX
831 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
832 #undef TARGET_FUNCTION_VALUE
833 #define TARGET_FUNCTION_VALUE sparc_function_value
834 #undef TARGET_LIBCALL_VALUE
835 #define TARGET_LIBCALL_VALUE sparc_libcall_value
836 #undef TARGET_FUNCTION_VALUE_REGNO_P
837 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
838
839 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
840 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
841
842 #undef TARGET_ASAN_SHADOW_OFFSET
843 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
844
845 #undef TARGET_EXPAND_BUILTIN_VA_START
846 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
847 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
848 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
849
850 #undef TARGET_VECTOR_MODE_SUPPORTED_P
851 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
852
853 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
854 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
855
856 #ifdef SUBTARGET_INSERT_ATTRIBUTES
857 #undef TARGET_INSERT_ATTRIBUTES
858 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
859 #endif
860
861 #ifdef SUBTARGET_ATTRIBUTE_TABLE
862 #undef TARGET_ATTRIBUTE_TABLE
863 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
864 #endif
865
866 #undef TARGET_OPTION_OVERRIDE
867 #define TARGET_OPTION_OVERRIDE sparc_option_override
868
869 #ifdef TARGET_THREAD_SSP_OFFSET
870 #undef TARGET_STACK_PROTECT_GUARD
871 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
872 #endif
873
874 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
875 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
876 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
877 #endif
878
879 #undef TARGET_ASM_FILE_END
880 #define TARGET_ASM_FILE_END sparc_file_end
881
882 #undef TARGET_FRAME_POINTER_REQUIRED
883 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
884
885 #undef TARGET_CAN_ELIMINATE
886 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
887
888 #undef TARGET_PREFERRED_RELOAD_CLASS
889 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
890
891 #undef TARGET_SECONDARY_RELOAD
892 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
893 #undef TARGET_SECONDARY_MEMORY_NEEDED
894 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
895 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
896 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
897
898 #undef TARGET_CONDITIONAL_REGISTER_USAGE
899 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
900
901 #undef TARGET_INIT_PIC_REG
902 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
903
904 #undef TARGET_USE_PSEUDO_PIC_REG
905 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
906
907 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
908 #undef TARGET_MANGLE_TYPE
909 #define TARGET_MANGLE_TYPE sparc_mangle_type
910 #endif
911
912 #undef TARGET_LRA_P
913 #define TARGET_LRA_P sparc_lra_p
914
915 #undef TARGET_LEGITIMATE_ADDRESS_P
916 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
917
918 #undef TARGET_LEGITIMATE_CONSTANT_P
919 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
920
921 #undef TARGET_TRAMPOLINE_INIT
922 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
923
924 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
925 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
926 #undef TARGET_PRINT_OPERAND
927 #define TARGET_PRINT_OPERAND sparc_print_operand
928 #undef TARGET_PRINT_OPERAND_ADDRESS
929 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
930
931 /* The value stored by LDSTUB. */
932 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
933 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
934
935 #undef TARGET_CSTORE_MODE
936 #define TARGET_CSTORE_MODE sparc_cstore_mode
937
938 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
939 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
940
941 #undef TARGET_FIXED_CONDITION_CODE_REGS
942 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
943
944 #undef TARGET_MIN_ARITHMETIC_PRECISION
945 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
946
947 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
948 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
949
950 #undef TARGET_HARD_REGNO_NREGS
951 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
952 #undef TARGET_HARD_REGNO_MODE_OK
953 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
954
955 #undef TARGET_MODES_TIEABLE_P
956 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
957
958 #undef TARGET_CAN_CHANGE_MODE_CLASS
959 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
960
961 #undef TARGET_CONSTANT_ALIGNMENT
962 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
963
964 #undef TARGET_VECTORIZE_VEC_PERM_CONST
965 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
966
967 #undef TARGET_CAN_FOLLOW_JUMP
968 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
969
970 #undef TARGET_ZERO_CALL_USED_REGS
971 #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
972
973 #ifdef SPARC_GCOV_TYPE_SIZE
974 static HOST_WIDE_INT
975 sparc_gcov_type_size (void)
976 {
977 return SPARC_GCOV_TYPE_SIZE;
978 }
979
980 #undef TARGET_GCOV_TYPE_SIZE
981 #define TARGET_GCOV_TYPE_SIZE sparc_gcov_type_size
982 #endif
983
984 struct gcc_target targetm = TARGET_INITIALIZER;
985
986 /* Return the memory reference contained in X if any, zero otherwise. */
987
988 static rtx
989 mem_ref (rtx x)
990 {
991 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
992 x = XEXP (x, 0);
993
994 if (MEM_P (x))
995 return x;
996
997 return NULL_RTX;
998 }
999
1000 /* True if any of INSN's source register(s) is REG. */
1001
1002 static bool
1003 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1004 {
1005 extract_insn (insn);
1006 return ((REG_P (recog_data.operand[1])
1007 && REGNO (recog_data.operand[1]) == reg)
1008 || (recog_data.n_operands == 3
1009 && REG_P (recog_data.operand[2])
1010 && REGNO (recog_data.operand[2]) == reg));
1011 }
1012
1013 /* True if INSN is a floating-point division or square-root. */
1014
1015 static bool
1016 div_sqrt_insn_p (rtx_insn *insn)
1017 {
1018 if (GET_CODE (PATTERN (insn)) != SET)
1019 return false;
1020
1021 switch (get_attr_type (insn))
1022 {
1023 case TYPE_FPDIVS:
1024 case TYPE_FPSQRTS:
1025 case TYPE_FPDIVD:
1026 case TYPE_FPSQRTD:
1027 return true;
1028 default:
1029 return false;
1030 }
1031 }
1032
1033 /* True if INSN is a floating-point instruction. */
1034
1035 static bool
1036 fpop_insn_p (rtx_insn *insn)
1037 {
1038 if (GET_CODE (PATTERN (insn)) != SET)
1039 return false;
1040
1041 switch (get_attr_type (insn))
1042 {
1043 case TYPE_FPMOVE:
1044 case TYPE_FPCMOVE:
1045 case TYPE_FP:
1046 case TYPE_FPCMP:
1047 case TYPE_FPMUL:
1048 case TYPE_FPDIVS:
1049 case TYPE_FPSQRTS:
1050 case TYPE_FPDIVD:
1051 case TYPE_FPSQRTD:
1052 return true;
1053 default:
1054 return false;
1055 }
1056 }
1057
1058 /* True if INSN is an atomic instruction. */
1059
1060 static bool
1061 atomic_insn_for_leon3_p (rtx_insn *insn)
1062 {
1063 switch (INSN_CODE (insn))
1064 {
1065 case CODE_FOR_swapsi:
1066 case CODE_FOR_ldstub:
1067 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1068 return true;
1069 default:
1070 return false;
1071 }
1072 }
1073
1074 /* True if INSN is a store instruction. */
1075
1076 static bool
1077 store_insn_p (rtx_insn *insn)
1078 {
1079 if (GET_CODE (PATTERN (insn)) != SET)
1080 return false;
1081
1082 switch (get_attr_type (insn))
1083 {
1084 case TYPE_STORE:
1085 case TYPE_FPSTORE:
1086 return true;
1087 default:
1088 return false;
1089 }
1090 }
1091
1092 /* True if INSN is a load instruction. */
1093
1094 static bool
1095 load_insn_p (rtx_insn *insn)
1096 {
1097 if (GET_CODE (PATTERN (insn)) != SET)
1098 return false;
1099
1100 switch (get_attr_type (insn))
1101 {
1102 case TYPE_LOAD:
1103 case TYPE_SLOAD:
1104 case TYPE_FPLOAD:
1105 return true;
1106 default:
1107 return false;
1108 }
1109 }
1110
1111 /* We use a machine specific pass to enable workarounds for errata.
1112
1113 We need to have the (essentially) final form of the insn stream in order
1114 to properly detect the various hazards. Therefore, this machine specific
1115 pass runs as late as possible. */
1116
1117 /* True if INSN is a md pattern or asm statement. */
1118 #define USEFUL_INSN_P(INSN) \
1119 (NONDEBUG_INSN_P (INSN) \
1120 && GET_CODE (PATTERN (INSN)) != USE \
1121 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1122
1123 rtx_insn *
1124 next_active_non_empty_insn (rtx_insn *insn)
1125 {
1126 insn = next_active_insn (insn);
1127
1128 while (insn
1129 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1130 || GET_CODE (PATTERN (insn)) == ASM_INPUT
1131 || (USEFUL_INSN_P (insn)
1132 && (asm_noperands (PATTERN (insn)) >= 0)
1133 && !strcmp (decode_asm_operands (PATTERN (insn),
1134 NULL, NULL, NULL,
1135 NULL, NULL), ""))))
1136 insn = next_active_insn (insn);
1137
1138 return insn;
1139 }
1140
1141 static unsigned int
1142 sparc_do_work_around_errata (void)
1143 {
1144 rtx_insn *insn, *next;
1145 bool find_first_useful = true;
1146
1147 /* Force all instructions to be split into their final form. */
1148 split_all_insns_noflow ();
1149
1150 /* Now look for specific patterns in the insn stream. */
1151 for (insn = get_insns (); insn; insn = next)
1152 {
1153 bool insert_nop = false;
1154 rtx set;
1155 rtx_insn *jump;
1156 rtx_sequence *seq;
1157
1158 /* Look into the instruction in a delay slot. */
1159 if (NONJUMP_INSN_P (insn)
1160 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1161 {
1162 jump = seq->insn (0);
1163 insn = seq->insn (1);
1164 }
1165 else if (JUMP_P (insn))
1166 jump = insn;
1167 else
1168 jump = NULL;
1169
1170 /* Do not begin function with atomic instruction. */
1171 if (sparc_fix_ut700
1172 && find_first_useful
1173 && USEFUL_INSN_P (insn))
1174 {
1175 find_first_useful = false;
1176 if (atomic_insn_for_leon3_p (insn))
1177 emit_insn_before (gen_nop (), insn);
1178 }
1179
1180 /* Place a NOP at the branch target of an integer branch if it is a
1181 floating-point operation or a floating-point branch. */
1182 if (sparc_fix_gr712rc
1183 && jump
1184 && jump_to_label_p (jump)
1185 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1186 {
1187 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1188 if (target
1189 && (fpop_insn_p (target)
1190 || (JUMP_P (target)
1191 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1192 emit_insn_before (gen_nop (), target);
1193 }
1194
1195 /* Insert a NOP between load instruction and atomic instruction. Insert
1196 a NOP at branch target if there is a load in delay slot and an atomic
1197 instruction at branch target. */
1198 if (sparc_fix_ut700
1199 && NONJUMP_INSN_P (insn)
1200 && load_insn_p (insn))
1201 {
1202 if (jump && jump_to_label_p (jump))
1203 {
1204 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1205 if (target && atomic_insn_for_leon3_p (target))
1206 emit_insn_before (gen_nop (), target);
1207 }
1208
1209 next = next_active_non_empty_insn (insn);
1210 if (!next)
1211 break;
1212
1213 if (atomic_insn_for_leon3_p (next))
1214 insert_nop = true;
1215 }
1216
1217 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1218 ends with another fdiv or fsqrt instruction with no dependencies on
1219 the former, along with an appropriate pattern in between. */
1220 if (sparc_fix_lost_divsqrt
1221 && NONJUMP_INSN_P (insn)
1222 && div_sqrt_insn_p (insn))
1223 {
1224 int i;
1225 int fp_found = 0;
1226 rtx_insn *after;
1227
1228 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1229
1230 next = next_active_insn (insn);
1231 if (!next)
1232 break;
1233
1234 for (after = next, i = 0; i < 4; i++)
1235 {
1236 /* Count floating-point operations. */
1237 if (i != 3 && fpop_insn_p (after))
1238 {
1239 /* If the insn uses the destination register of
1240 the div/sqrt, then it cannot be problematic. */
1241 if (insn_uses_reg_p (after, dest_reg))
1242 break;
1243 fp_found++;
1244 }
1245
1246 /* Count floating-point loads. */
1247 if (i != 3
1248 && (set = single_set (after)) != NULL_RTX
1249 && REG_P (SET_DEST (set))
1250 && REGNO (SET_DEST (set)) > 31)
1251 {
1252 /* If the insn uses the destination register of
1253 the div/sqrt, then it cannot be problematic. */
1254 if (REGNO (SET_DEST (set)) == dest_reg)
1255 break;
1256 fp_found++;
1257 }
1258
1259 /* Check if this is a problematic sequence. */
1260 if (i > 1
1261 && fp_found >= 2
1262 && div_sqrt_insn_p (after))
1263 {
1264 /* If this is the short version of the problematic
1265 sequence we add two NOPs in a row to also prevent
1266 the long version. */
1267 if (i == 2)
1268 emit_insn_before (gen_nop (), next);
1269 insert_nop = true;
1270 break;
1271 }
1272
1273 /* No need to scan past a second div/sqrt. */
1274 if (div_sqrt_insn_p (after))
1275 break;
1276
1277 /* Insert NOP before branch. */
1278 if (i < 3
1279 && (!NONJUMP_INSN_P (after)
1280 || GET_CODE (PATTERN (after)) == SEQUENCE))
1281 {
1282 insert_nop = true;
1283 break;
1284 }
1285
1286 after = next_active_insn (after);
1287 if (!after)
1288 break;
1289 }
1290 }
1291
1292 /* Look for either of these two sequences:
1293
1294 Sequence A:
1295 1. store of word size or less (e.g. st / stb / sth / stf)
1296 2. any single instruction that is not a load or store
1297 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1298
1299 Sequence B:
1300 1. store of double word size (e.g. std / stdf)
1301 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1302 if (sparc_fix_b2bst
1303 && NONJUMP_INSN_P (insn)
1304 && (set = single_set (insn)) != NULL_RTX
1305 && store_insn_p (insn))
1306 {
1307 /* Sequence B begins with a double-word store. */
1308 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1309 rtx_insn *after;
1310 int i;
1311
1312 next = next_active_non_empty_insn (insn);
1313 if (!next)
1314 break;
1315
1316 for (after = next, i = 0; i < 2; i++)
1317 {
1318 /* If the insn is a branch, then it cannot be problematic. */
1319 if (!NONJUMP_INSN_P (after)
1320 || GET_CODE (PATTERN (after)) == SEQUENCE)
1321 break;
1322
1323 /* Sequence B is only two instructions long. */
1324 if (seq_b)
1325 {
1326 /* Add NOP if followed by a store. */
1327 if (store_insn_p (after))
1328 insert_nop = true;
1329
1330 /* Otherwise it is ok. */
1331 break;
1332 }
1333
1334 /* If the second instruction is a load or a store,
1335 then the sequence cannot be problematic. */
1336 if (i == 0)
1337 {
1338 if ((set = single_set (after)) != NULL_RTX
1339 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1340 break;
1341
1342 after = next_active_non_empty_insn (after);
1343 if (!after)
1344 break;
1345 }
1346
1347 /* Add NOP if third instruction is a store. */
1348 if (i == 1
1349 && store_insn_p (after))
1350 insert_nop = true;
1351 }
1352 }
1353
1354 /* Look for a single-word load into an odd-numbered FP register. */
1355 else if (sparc_fix_at697f
1356 && NONJUMP_INSN_P (insn)
1357 && (set = single_set (insn)) != NULL_RTX
1358 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1359 && mem_ref (SET_SRC (set))
1360 && REG_P (SET_DEST (set))
1361 && REGNO (SET_DEST (set)) > 31
1362 && REGNO (SET_DEST (set)) % 2 != 0)
1363 {
1364 /* The wrong dependency is on the enclosing double register. */
1365 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1366 unsigned int src1, src2, dest;
1367 int code;
1368
1369 next = next_active_insn (insn);
1370 if (!next)
1371 break;
1372 /* If the insn is a branch, then it cannot be problematic. */
1373 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1374 continue;
1375
1376 extract_insn (next);
1377 code = INSN_CODE (next);
1378
1379 switch (code)
1380 {
1381 case CODE_FOR_adddf3:
1382 case CODE_FOR_subdf3:
1383 case CODE_FOR_muldf3:
1384 case CODE_FOR_divdf3:
1385 dest = REGNO (recog_data.operand[0]);
1386 src1 = REGNO (recog_data.operand[1]);
1387 src2 = REGNO (recog_data.operand[2]);
1388 if (src1 != src2)
1389 {
1390 /* Case [1-4]:
1391 ld [address], %fx+1
1392 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1393 if ((src1 == x || src2 == x)
1394 && (dest == src1 || dest == src2))
1395 insert_nop = true;
1396 }
1397 else
1398 {
1399 /* Case 5:
1400 ld [address], %fx+1
1401 FPOPd %fx, %fx, %fx */
1402 if (src1 == x
1403 && dest == src1
1404 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1405 insert_nop = true;
1406 }
1407 break;
1408
1409 case CODE_FOR_sqrtdf2:
1410 dest = REGNO (recog_data.operand[0]);
1411 src1 = REGNO (recog_data.operand[1]);
1412 /* Case 6:
1413 ld [address], %fx+1
1414 fsqrtd %fx, %fx */
1415 if (src1 == x && dest == src1)
1416 insert_nop = true;
1417 break;
1418
1419 default:
1420 break;
1421 }
1422 }
1423
1424 /* Look for a single-word load into an integer register. */
1425 else if (sparc_fix_ut699
1426 && NONJUMP_INSN_P (insn)
1427 && (set = single_set (insn)) != NULL_RTX
1428 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1429 && (mem_ref (SET_SRC (set)) != NULL_RTX
1430 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1431 && REG_P (SET_DEST (set))
1432 && REGNO (SET_DEST (set)) < 32)
1433 {
1434 /* There is no problem if the second memory access has a data
1435 dependency on the first single-cycle load. */
1436 rtx x = SET_DEST (set);
1437
1438 next = next_active_insn (insn);
1439 if (!next)
1440 break;
1441 /* If the insn is a branch, then it cannot be problematic. */
1442 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1443 continue;
1444
1445 /* Look for a second memory access to/from an integer register. */
1446 if ((set = single_set (next)) != NULL_RTX)
1447 {
1448 rtx src = SET_SRC (set);
1449 rtx dest = SET_DEST (set);
1450 rtx mem;
1451
1452 /* LDD is affected. */
1453 if ((mem = mem_ref (src)) != NULL_RTX
1454 && REG_P (dest)
1455 && REGNO (dest) < 32
1456 && !reg_mentioned_p (x, XEXP (mem, 0)))
1457 insert_nop = true;
1458
1459 /* STD is *not* affected. */
1460 else if (MEM_P (dest)
1461 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1462 && (src == CONST0_RTX (GET_MODE (dest))
1463 || (REG_P (src)
1464 && REGNO (src) < 32
1465 && REGNO (src) != REGNO (x)))
1466 && !reg_mentioned_p (x, XEXP (dest, 0)))
1467 insert_nop = true;
1468
1469 /* GOT accesses uses LD. */
1470 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1471 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1472 insert_nop = true;
1473 }
1474 }
1475
1476 /* Look for a single-word load/operation into an FP register. */
1477 else if (sparc_fix_ut699
1478 && NONJUMP_INSN_P (insn)
1479 && (set = single_set (insn)) != NULL_RTX
1480 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1481 && REG_P (SET_DEST (set))
1482 && REGNO (SET_DEST (set)) > 31)
1483 {
1484 /* Number of instructions in the problematic window. */
1485 const int n_insns = 4;
1486 /* The problematic combination is with the sibling FP register. */
1487 const unsigned int x = REGNO (SET_DEST (set));
1488 const unsigned int y = x ^ 1;
1489 rtx_insn *after;
1490 int i;
1491
1492 next = next_active_insn (insn);
1493 if (!next)
1494 break;
1495 /* If the insn is a branch, then it cannot be problematic. */
1496 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1497 continue;
1498
1499 /* Look for a second load/operation into the sibling FP register. */
1500 if (!((set = single_set (next)) != NULL_RTX
1501 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1502 && REG_P (SET_DEST (set))
1503 && REGNO (SET_DEST (set)) == y))
1504 continue;
1505
1506 /* Look for a (possible) store from the FP register in the next N
1507 instructions, but bail out if it is again modified or if there
1508 is a store from the sibling FP register before this store. */
1509 for (after = next, i = 0; i < n_insns; i++)
1510 {
1511 bool branch_p;
1512
1513 after = next_active_insn (after);
1514 if (!after)
1515 break;
1516
1517 /* This is a branch with an empty delay slot. */
1518 if (!NONJUMP_INSN_P (after))
1519 {
1520 if (++i == n_insns)
1521 break;
1522 branch_p = true;
1523 after = NULL;
1524 }
1525 /* This is a branch with a filled delay slot. */
1526 else if (rtx_sequence *seq =
1527 dyn_cast <rtx_sequence *> (PATTERN (after)))
1528 {
1529 if (++i == n_insns)
1530 break;
1531 branch_p = true;
1532 after = seq->insn (1);
1533 }
1534 /* This is a regular instruction. */
1535 else
1536 branch_p = false;
1537
1538 if (after && (set = single_set (after)) != NULL_RTX)
1539 {
1540 const rtx src = SET_SRC (set);
1541 const rtx dest = SET_DEST (set);
1542 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1543
1544 /* If the FP register is again modified before the store,
1545 then the store isn't affected. */
1546 if (REG_P (dest)
1547 && (REGNO (dest) == x
1548 || (REGNO (dest) == y && size == 8)))
1549 break;
1550
1551 if (MEM_P (dest) && REG_P (src))
1552 {
1553 /* If there is a store from the sibling FP register
1554 before the store, then the store is not affected. */
1555 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1556 break;
1557
1558 /* Otherwise, the store is affected. */
1559 if (REGNO (src) == x && size == 4)
1560 {
1561 insert_nop = true;
1562 break;
1563 }
1564 }
1565 }
1566
1567 /* If we have a branch in the first M instructions, then we
1568 cannot see the (M+2)th instruction so we play safe. */
1569 if (branch_p && i <= (n_insns - 2))
1570 {
1571 insert_nop = true;
1572 break;
1573 }
1574 }
1575 }
1576
1577 else
1578 next = NEXT_INSN (insn);
1579
1580 if (insert_nop)
1581 emit_insn_before (gen_nop (), next);
1582 }
1583
1584 return 0;
1585 }
1586
1587 namespace {
1588
1589 const pass_data pass_data_work_around_errata =
1590 {
1591 RTL_PASS, /* type */
1592 "errata", /* name */
1593 OPTGROUP_NONE, /* optinfo_flags */
1594 TV_MACH_DEP, /* tv_id */
1595 0, /* properties_required */
1596 0, /* properties_provided */
1597 0, /* properties_destroyed */
1598 0, /* todo_flags_start */
1599 0, /* todo_flags_finish */
1600 };
1601
1602 class pass_work_around_errata : public rtl_opt_pass
1603 {
1604 public:
1605 pass_work_around_errata(gcc::context *ctxt)
1606 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1607 {}
1608
1609 /* opt_pass methods: */
1610 virtual bool gate (function *)
1611 {
1612 return sparc_fix_at697f
1613 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1614 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1615 }
1616
1617 virtual unsigned int execute (function *)
1618 {
1619 return sparc_do_work_around_errata ();
1620 }
1621
1622 }; // class pass_work_around_errata
1623
1624 } // anon namespace
1625
1626 rtl_opt_pass *
1627 make_pass_work_around_errata (gcc::context *ctxt)
1628 {
1629 return new pass_work_around_errata (ctxt);
1630 }
1631
1632 /* Helpers for TARGET_DEBUG_OPTIONS. */
1633 static void
1634 dump_target_flag_bits (const int flags)
1635 {
1636 if (flags & MASK_64BIT)
1637 fprintf (stderr, "64BIT ");
1638 if (flags & MASK_APP_REGS)
1639 fprintf (stderr, "APP_REGS ");
1640 if (flags & MASK_FASTER_STRUCTS)
1641 fprintf (stderr, "FASTER_STRUCTS ");
1642 if (flags & MASK_FLAT)
1643 fprintf (stderr, "FLAT ");
1644 if (flags & MASK_FMAF)
1645 fprintf (stderr, "FMAF ");
1646 if (flags & MASK_FSMULD)
1647 fprintf (stderr, "FSMULD ");
1648 if (flags & MASK_FPU)
1649 fprintf (stderr, "FPU ");
1650 if (flags & MASK_HARD_QUAD)
1651 fprintf (stderr, "HARD_QUAD ");
1652 if (flags & MASK_POPC)
1653 fprintf (stderr, "POPC ");
1654 if (flags & MASK_PTR64)
1655 fprintf (stderr, "PTR64 ");
1656 if (flags & MASK_STACK_BIAS)
1657 fprintf (stderr, "STACK_BIAS ");
1658 if (flags & MASK_UNALIGNED_DOUBLES)
1659 fprintf (stderr, "UNALIGNED_DOUBLES ");
1660 if (flags & MASK_V8PLUS)
1661 fprintf (stderr, "V8PLUS ");
1662 if (flags & MASK_VIS)
1663 fprintf (stderr, "VIS ");
1664 if (flags & MASK_VIS2)
1665 fprintf (stderr, "VIS2 ");
1666 if (flags & MASK_VIS3)
1667 fprintf (stderr, "VIS3 ");
1668 if (flags & MASK_VIS4)
1669 fprintf (stderr, "VIS4 ");
1670 if (flags & MASK_VIS4B)
1671 fprintf (stderr, "VIS4B ");
1672 if (flags & MASK_CBCOND)
1673 fprintf (stderr, "CBCOND ");
1674 if (flags & MASK_DEPRECATED_V8_INSNS)
1675 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1676 if (flags & MASK_LEON)
1677 fprintf (stderr, "LEON ");
1678 if (flags & MASK_LEON3)
1679 fprintf (stderr, "LEON3 ");
1680 if (flags & MASK_SPARCLET)
1681 fprintf (stderr, "SPARCLET ");
1682 if (flags & MASK_SPARCLITE)
1683 fprintf (stderr, "SPARCLITE ");
1684 if (flags & MASK_V8)
1685 fprintf (stderr, "V8 ");
1686 if (flags & MASK_V9)
1687 fprintf (stderr, "V9 ");
1688 }
1689
1690 static void
1691 dump_target_flags (const char *prefix, const int flags)
1692 {
1693 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1694 dump_target_flag_bits (flags);
1695 fprintf(stderr, "]\n");
1696 }
1697
1698 /* Validate and override various options, and do some machine dependent
1699 initialization. */
1700
1701 static void
1702 sparc_option_override (void)
1703 {
1704 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1705 static struct cpu_default {
1706 const int cpu;
1707 const enum sparc_processor_type processor;
1708 } const cpu_default[] = {
1709 /* There must be one entry here for each TARGET_CPU value. */
1710 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1711 { TARGET_CPU_v8, PROCESSOR_V8 },
1712 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1713 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1714 { TARGET_CPU_leon, PROCESSOR_LEON },
1715 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1716 { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1717 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1718 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1719 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1720 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1721 { TARGET_CPU_v9, PROCESSOR_V9 },
1722 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1723 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1724 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1725 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1726 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1727 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1728 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1729 { TARGET_CPU_m8, PROCESSOR_M8 },
1730 { -1, PROCESSOR_V7 }
1731 };
1732 const struct cpu_default *def;
1733 /* Table of values for -m{cpu,tune}=. This must match the order of
1734 the enum processor_type in sparc-opts.h. */
1735 static struct cpu_table {
1736 const char *const name;
1737 const int disable;
1738 const int enable;
1739 } const cpu_table[] = {
1740 { "v7", MASK_ISA, 0 },
1741 { "cypress", MASK_ISA, 0 },
1742 { "v8", MASK_ISA, MASK_V8 },
1743 /* TI TMS390Z55 supersparc */
1744 { "supersparc", MASK_ISA, MASK_V8 },
1745 { "hypersparc", MASK_ISA, MASK_V8 },
1746 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1747 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1748 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
1749 { "leon3v7", MASK_ISA, MASK_LEON3 },
1750 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1751 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1752 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1753 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1754 { "f934", MASK_ISA, MASK_SPARCLITE },
1755 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1756 { "sparclet", MASK_ISA, MASK_SPARCLET },
1757 /* TEMIC sparclet */
1758 { "tsc701", MASK_ISA, MASK_SPARCLET },
1759 { "v9", MASK_ISA, MASK_V9 },
1760 /* UltraSPARC I, II, IIi */
1761 { "ultrasparc", MASK_ISA,
1762 /* Although insns using %y are deprecated, it is a clear win. */
1763 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1764 /* UltraSPARC III */
1765 /* ??? Check if %y issue still holds true. */
1766 { "ultrasparc3", MASK_ISA,
1767 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1768 /* UltraSPARC T1 */
1769 { "niagara", MASK_ISA,
1770 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1771 /* UltraSPARC T2 */
1772 { "niagara2", MASK_ISA,
1773 MASK_V9|MASK_POPC|MASK_VIS2 },
1774 /* UltraSPARC T3 */
1775 { "niagara3", MASK_ISA,
1776 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1777 /* UltraSPARC T4 */
1778 { "niagara4", MASK_ISA,
1779 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1780 /* UltraSPARC M7 */
1781 { "niagara7", MASK_ISA,
1782 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1783 /* UltraSPARC M8 */
1784 { "m8", MASK_ISA,
1785 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1786 };
1787 const struct cpu_table *cpu;
1788 unsigned int i;
1789
1790 if (sparc_debug_string != NULL)
1791 {
1792 const char *q;
1793 char *p;
1794
1795 p = ASTRDUP (sparc_debug_string);
1796 while ((q = strtok (p, ",")) != NULL)
1797 {
1798 bool invert;
1799 int mask;
1800
1801 p = NULL;
1802 if (*q == '!')
1803 {
1804 invert = true;
1805 q++;
1806 }
1807 else
1808 invert = false;
1809
1810 if (! strcmp (q, "all"))
1811 mask = MASK_DEBUG_ALL;
1812 else if (! strcmp (q, "options"))
1813 mask = MASK_DEBUG_OPTIONS;
1814 else
1815 error ("unknown %<-mdebug-%s%> switch", q);
1816
1817 if (invert)
1818 sparc_debug &= ~mask;
1819 else
1820 sparc_debug |= mask;
1821 }
1822 }
1823
1824 /* Enable the FsMULd instruction by default if not explicitly specified by
1825 the user. It may be later disabled by the CPU (explicitly or not). */
1826 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1827 target_flags |= MASK_FSMULD;
1828
1829 if (TARGET_DEBUG_OPTIONS)
1830 {
1831 dump_target_flags("Initial target_flags", target_flags);
1832 dump_target_flags("target_flags_explicit", target_flags_explicit);
1833 }
1834
1835 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1836 SUBTARGET_OVERRIDE_OPTIONS;
1837 #endif
1838
1839 #ifndef SPARC_BI_ARCH
1840 /* Check for unsupported architecture size. */
1841 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1842 error ("%s is not supported by this configuration",
1843 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1844 #endif
1845
1846 /* We force all 64bit archs to use 128 bit long double */
1847 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1848 {
1849 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1850 target_flags |= MASK_LONG_DOUBLE_128;
1851 }
1852
1853 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1854 for (i = 8; i < 16; i++)
1855 if (!call_used_regs [i])
1856 {
1857 error ("%<-fcall-saved-REG%> is not supported for out registers");
1858 call_used_regs [i] = 1;
1859 }
1860
1861 /* Set the default CPU if no -mcpu option was specified. */
1862 if (!OPTION_SET_P (sparc_cpu_and_features))
1863 {
1864 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1865 if (def->cpu == TARGET_CPU_DEFAULT)
1866 break;
1867 gcc_assert (def->cpu != -1);
1868 sparc_cpu_and_features = def->processor;
1869 }
1870
1871 /* Set the default CPU if no -mtune option was specified. */
1872 if (!OPTION_SET_P (sparc_cpu))
1873 sparc_cpu = sparc_cpu_and_features;
1874
1875 cpu = &cpu_table[(int) sparc_cpu_and_features];
1876
1877 if (TARGET_DEBUG_OPTIONS)
1878 {
1879 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1880 dump_target_flags ("cpu->disable", cpu->disable);
1881 dump_target_flags ("cpu->enable", cpu->enable);
1882 }
1883
1884 target_flags &= ~cpu->disable;
1885 target_flags |= (cpu->enable
1886 #ifndef HAVE_AS_FMAF_HPC_VIS3
1887 & ~(MASK_FMAF | MASK_VIS3)
1888 #endif
1889 #ifndef HAVE_AS_SPARC4
1890 & ~MASK_CBCOND
1891 #endif
1892 #ifndef HAVE_AS_SPARC5_VIS4
1893 & ~(MASK_VIS4 | MASK_SUBXC)
1894 #endif
1895 #ifndef HAVE_AS_SPARC6
1896 & ~(MASK_VIS4B)
1897 #endif
1898 #ifndef HAVE_AS_LEON
1899 & ~(MASK_LEON | MASK_LEON3)
1900 #endif
1901 & ~(target_flags_explicit & MASK_FEATURES)
1902 );
1903
1904 /* FsMULd is a V8 instruction. */
1905 if (!TARGET_V8 && !TARGET_V9)
1906 target_flags &= ~MASK_FSMULD;
1907
1908 /* -mvis2 implies -mvis. */
1909 if (TARGET_VIS2)
1910 target_flags |= MASK_VIS;
1911
1912 /* -mvis3 implies -mvis2 and -mvis. */
1913 if (TARGET_VIS3)
1914 target_flags |= MASK_VIS2 | MASK_VIS;
1915
1916 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1917 if (TARGET_VIS4)
1918 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1919
1920 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1921 if (TARGET_VIS4B)
1922 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1923
1924 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1925 FPU is disabled. */
1926 if (!TARGET_FPU)
1927 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1928 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1929
1930 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1931 are available; -m64 also implies v9. */
1932 if (TARGET_VIS || TARGET_ARCH64)
1933 {
1934 target_flags |= MASK_V9;
1935 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1936 }
1937
1938 /* -mvis also implies -mv8plus on 32-bit. */
1939 if (TARGET_VIS && !TARGET_ARCH64)
1940 target_flags |= MASK_V8PLUS;
1941
1942 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1943 if (TARGET_V9 && TARGET_ARCH32)
1944 target_flags |= MASK_DEPRECATED_V8_INSNS;
1945
1946 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1947 if (!TARGET_V9 || TARGET_ARCH64)
1948 target_flags &= ~MASK_V8PLUS;
1949
1950 /* Don't use stack biasing in 32-bit mode. */
1951 if (TARGET_ARCH32)
1952 target_flags &= ~MASK_STACK_BIAS;
1953
1954 /* Use LRA instead of reload, unless otherwise instructed. */
1955 if (!(target_flags_explicit & MASK_LRA))
1956 target_flags |= MASK_LRA;
1957
1958 /* Enable applicable errata workarounds for LEON3FT. */
1959 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1960 {
1961 sparc_fix_b2bst = 1;
1962 sparc_fix_lost_divsqrt = 1;
1963 }
1964
1965 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1966 if (sparc_fix_ut699)
1967 target_flags &= ~MASK_FSMULD;
1968
1969 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1970 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1971 target_flags |= MASK_LONG_DOUBLE_128;
1972 #endif
1973
1974 if (TARGET_DEBUG_OPTIONS)
1975 dump_target_flags ("Final target_flags", target_flags);
1976
1977 /* Set the code model if no -mcmodel option was specified. */
1978 if (OPTION_SET_P (sparc_code_model))
1979 {
1980 if (TARGET_ARCH32)
1981 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1982 }
1983 else
1984 {
1985 if (TARGET_ARCH32)
1986 sparc_code_model = CM_32;
1987 else
1988 sparc_code_model = SPARC_DEFAULT_CMODEL;
1989 }
1990
1991 /* Set the memory model if no -mmemory-model option was specified. */
1992 if (!OPTION_SET_P (sparc_memory_model))
1993 {
1994 /* Choose the memory model for the operating system. */
1995 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1996 if (os_default != SMM_DEFAULT)
1997 sparc_memory_model = os_default;
1998 /* Choose the most relaxed model for the processor. */
1999 else if (TARGET_V9)
2000 sparc_memory_model = SMM_RMO;
2001 else if (TARGET_LEON3)
2002 sparc_memory_model = SMM_TSO;
2003 else if (TARGET_LEON)
2004 sparc_memory_model = SMM_SC;
2005 else if (TARGET_V8)
2006 sparc_memory_model = SMM_PSO;
2007 else
2008 sparc_memory_model = SMM_SC;
2009 }
2010
2011 /* Supply a default value for align_functions. */
2012 if (flag_align_functions && !str_align_functions)
2013 {
2014 if (sparc_cpu == PROCESSOR_ULTRASPARC
2015 || sparc_cpu == PROCESSOR_ULTRASPARC3
2016 || sparc_cpu == PROCESSOR_NIAGARA
2017 || sparc_cpu == PROCESSOR_NIAGARA2
2018 || sparc_cpu == PROCESSOR_NIAGARA3
2019 || sparc_cpu == PROCESSOR_NIAGARA4)
2020 str_align_functions = "32";
2021 else if (sparc_cpu == PROCESSOR_NIAGARA7
2022 || sparc_cpu == PROCESSOR_M8)
2023 str_align_functions = "64";
2024 }
2025
2026 /* Validate PCC_STRUCT_RETURN. */
2027 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2028 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2029
2030 /* Only use .uaxword when compiling for a 64-bit target. */
2031 if (!TARGET_ARCH64)
2032 targetm.asm_out.unaligned_op.di = NULL;
2033
2034 /* Set the processor costs. */
2035 switch (sparc_cpu)
2036 {
2037 case PROCESSOR_V7:
2038 case PROCESSOR_CYPRESS:
2039 sparc_costs = &cypress_costs;
2040 break;
2041 case PROCESSOR_V8:
2042 case PROCESSOR_SPARCLITE:
2043 case PROCESSOR_SUPERSPARC:
2044 sparc_costs = &supersparc_costs;
2045 break;
2046 case PROCESSOR_F930:
2047 case PROCESSOR_F934:
2048 case PROCESSOR_HYPERSPARC:
2049 case PROCESSOR_SPARCLITE86X:
2050 sparc_costs = &hypersparc_costs;
2051 break;
2052 case PROCESSOR_LEON:
2053 sparc_costs = &leon_costs;
2054 break;
2055 case PROCESSOR_LEON3:
2056 case PROCESSOR_LEON3V7:
2057 sparc_costs = &leon3_costs;
2058 break;
2059 case PROCESSOR_LEON5:
2060 sparc_costs = &leon5_costs;
2061 break;
2062 case PROCESSOR_SPARCLET:
2063 case PROCESSOR_TSC701:
2064 sparc_costs = &sparclet_costs;
2065 break;
2066 case PROCESSOR_V9:
2067 case PROCESSOR_ULTRASPARC:
2068 sparc_costs = &ultrasparc_costs;
2069 break;
2070 case PROCESSOR_ULTRASPARC3:
2071 sparc_costs = &ultrasparc3_costs;
2072 break;
2073 case PROCESSOR_NIAGARA:
2074 sparc_costs = &niagara_costs;
2075 break;
2076 case PROCESSOR_NIAGARA2:
2077 sparc_costs = &niagara2_costs;
2078 break;
2079 case PROCESSOR_NIAGARA3:
2080 sparc_costs = &niagara3_costs;
2081 break;
2082 case PROCESSOR_NIAGARA4:
2083 sparc_costs = &niagara4_costs;
2084 break;
2085 case PROCESSOR_NIAGARA7:
2086 sparc_costs = &niagara7_costs;
2087 break;
2088 case PROCESSOR_M8:
2089 sparc_costs = &m8_costs;
2090 break;
2091 case PROCESSOR_NATIVE:
2092 gcc_unreachable ();
2093 };
2094
2095 /* param_simultaneous_prefetches is the number of prefetches that
2096 can run at the same time. More important, it is the threshold
2097 defining when additional prefetches will be dropped by the
2098 hardware.
2099
2100 The UltraSPARC-III features a documented prefetch queue with a
2101 size of 8. Additional prefetches issued in the cpu are
2102 dropped.
2103
2104 Niagara processors are different. In these processors prefetches
2105 are handled much like regular loads. The L1 miss buffer is 32
2106 entries, but prefetches start getting affected when 30 entries
2107 become occupied. That occupation could be a mix of regular loads
2108 and prefetches though. And that buffer is shared by all threads.
2109 Once the threshold is reached, if the core is running a single
2110 thread the prefetch will retry. If more than one thread is
2111 running, the prefetch will be dropped.
2112
2113 All this makes it very difficult to determine how many
2114 simultaneous prefetches can be issued simultaneously, even in a
2115 single-threaded program. Experimental results show that setting
2116 this parameter to 32 works well when the number of threads is not
2117 high. */
2118 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2119 param_simultaneous_prefetches,
2120 ((sparc_cpu == PROCESSOR_ULTRASPARC
2121 || sparc_cpu == PROCESSOR_NIAGARA
2122 || sparc_cpu == PROCESSOR_NIAGARA2
2123 || sparc_cpu == PROCESSOR_NIAGARA3
2124 || sparc_cpu == PROCESSOR_NIAGARA4)
2125 ? 2
2126 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2127 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2128 || sparc_cpu == PROCESSOR_M8)
2129 ? 32 : 3))));
2130
2131 /* param_l1_cache_line_size is the size of the L1 cache line, in
2132 bytes.
2133
2134 The Oracle SPARC Architecture (previously the UltraSPARC
2135 Architecture) specification states that when a PREFETCH[A]
2136 instruction is executed an implementation-specific amount of data
2137 is prefetched, and that it is at least 64 bytes long (aligned to
2138 at least 64 bytes).
2139
2140 However, this is not correct. The M7 (and implementations prior
2141 to that) does not guarantee a 64B prefetch into a cache if the
2142 line size is smaller. A single cache line is all that is ever
2143 prefetched. So for the M7, where the L1D$ has 32B lines and the
2144 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2145 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2146 is a read_n prefetch, which is the only type which allocates to
2147 the L1.) */
2148 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2149 param_l1_cache_line_size,
2150 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2151
2152 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2153 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2154 Niagara processors feature a L1D$ of 16KB. */
2155 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2156 param_l1_cache_size,
2157 ((sparc_cpu == PROCESSOR_ULTRASPARC
2158 || sparc_cpu == PROCESSOR_ULTRASPARC3
2159 || sparc_cpu == PROCESSOR_NIAGARA
2160 || sparc_cpu == PROCESSOR_NIAGARA2
2161 || sparc_cpu == PROCESSOR_NIAGARA3
2162 || sparc_cpu == PROCESSOR_NIAGARA4
2163 || sparc_cpu == PROCESSOR_NIAGARA7
2164 || sparc_cpu == PROCESSOR_M8)
2165 ? 16 : 64));
2166
2167 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2168 that 512 is the default in params.def. */
2169 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2170 param_l2_cache_size,
2171 ((sparc_cpu == PROCESSOR_NIAGARA4
2172 || sparc_cpu == PROCESSOR_M8)
2173 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2174 ? 256 : 512)));
2175
2176
2177 /* Disable save slot sharing for call-clobbered registers by default.
2178 The IRA sharing algorithm works on single registers only and this
2179 pessimizes for double floating-point registers. */
2180 if (!OPTION_SET_P (flag_ira_share_save_slots))
2181 flag_ira_share_save_slots = 0;
2182
2183 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2184 redundant 32-to-64-bit extensions. */
2185 if (!OPTION_SET_P (flag_ree) && TARGET_ARCH32)
2186 flag_ree = 0;
2187
2188 /* Do various machine dependent initializations. */
2189 sparc_init_modes ();
2190
2191 /* Set up function hooks. */
2192 init_machine_status = sparc_init_machine_status;
2193 }
2194 \f
2195 /* Miscellaneous utilities. */
2196
2197 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2198 or branch on register contents instructions. */
2199
2200 int
2201 v9_regcmp_p (enum rtx_code code)
2202 {
2203 return (code == EQ || code == NE || code == GE || code == LT
2204 || code == LE || code == GT);
2205 }
2206
2207 /* Nonzero if OP is a floating point constant which can
2208 be loaded into an integer register using a single
2209 sethi instruction. */
2210
2211 int
2212 fp_sethi_p (rtx op)
2213 {
2214 if (GET_CODE (op) == CONST_DOUBLE)
2215 {
2216 long i;
2217
2218 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2219 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2220 }
2221
2222 return 0;
2223 }
2224
2225 /* Nonzero if OP is a floating point constant which can
2226 be loaded into an integer register using a single
2227 mov instruction. */
2228
2229 int
2230 fp_mov_p (rtx op)
2231 {
2232 if (GET_CODE (op) == CONST_DOUBLE)
2233 {
2234 long i;
2235
2236 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2237 return SPARC_SIMM13_P (i);
2238 }
2239
2240 return 0;
2241 }
2242
2243 /* Nonzero if OP is a floating point constant which can
2244 be loaded into an integer register using a high/losum
2245 instruction sequence. */
2246
2247 int
2248 fp_high_losum_p (rtx op)
2249 {
2250 /* The constraints calling this should only be in
2251 SFmode move insns, so any constant which cannot
2252 be moved using a single insn will do. */
2253 if (GET_CODE (op) == CONST_DOUBLE)
2254 {
2255 long i;
2256
2257 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2258 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2259 }
2260
2261 return 0;
2262 }
2263
2264 /* Return true if the address of LABEL can be loaded by means of the
2265 mov{si,di}_pic_label_ref patterns in PIC mode. */
2266
2267 static bool
2268 can_use_mov_pic_label_ref (rtx label)
2269 {
2270 /* VxWorks does not impose a fixed gap between segments; the run-time
2271 gap can be different from the object-file gap. We therefore can't
2272 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2273 are absolutely sure that X is in the same segment as the GOT.
2274 Unfortunately, the flexibility of linker scripts means that we
2275 can't be sure of that in general, so assume that GOT-relative
2276 accesses are never valid on VxWorks. */
2277 if (TARGET_VXWORKS_RTP)
2278 return false;
2279
2280 /* Similarly, if the label is non-local, it might end up being placed
2281 in a different section than the current one; now mov_pic_label_ref
2282 requires the label and the code to be in the same section. */
2283 if (LABEL_REF_NONLOCAL_P (label))
2284 return false;
2285
2286 /* Finally, if we are reordering basic blocks and partition into hot
2287 and cold sections, this might happen for any label. */
2288 if (flag_reorder_blocks_and_partition)
2289 return false;
2290
2291 return true;
2292 }
2293
2294 /* Expand a move instruction. Return true if all work is done. */
2295
2296 bool
2297 sparc_expand_move (machine_mode mode, rtx *operands)
2298 {
2299 /* Handle sets of MEM first. */
2300 if (GET_CODE (operands[0]) == MEM)
2301 {
2302 /* 0 is a register (or a pair of registers) on SPARC. */
2303 if (register_or_zero_operand (operands[1], mode))
2304 return false;
2305
2306 if (!reload_in_progress)
2307 {
2308 operands[0] = validize_mem (operands[0]);
2309 operands[1] = force_reg (mode, operands[1]);
2310 }
2311 }
2312
2313 /* Fix up TLS cases. */
2314 if (TARGET_HAVE_TLS
2315 && CONSTANT_P (operands[1])
2316 && sparc_tls_referenced_p (operands [1]))
2317 {
2318 operands[1] = sparc_legitimize_tls_address (operands[1]);
2319 return false;
2320 }
2321
2322 /* Fix up PIC cases. */
2323 if (flag_pic && CONSTANT_P (operands[1]))
2324 {
2325 if (pic_address_needs_scratch (operands[1]))
2326 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2327
2328 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2329 if ((GET_CODE (operands[1]) == LABEL_REF
2330 && can_use_mov_pic_label_ref (operands[1]))
2331 || (GET_CODE (operands[1]) == CONST
2332 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2333 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2334 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2335 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2336 {
2337 if (mode == SImode)
2338 {
2339 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2340 return true;
2341 }
2342
2343 if (mode == DImode)
2344 {
2345 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2346 return true;
2347 }
2348 }
2349
2350 if (symbolic_operand (operands[1], mode))
2351 {
2352 operands[1]
2353 = sparc_legitimize_pic_address (operands[1],
2354 reload_in_progress
2355 ? operands[0] : NULL_RTX);
2356 return false;
2357 }
2358 }
2359
2360 /* If we are trying to toss an integer constant into FP registers,
2361 or loading a FP or vector constant, force it into memory. */
2362 if (CONSTANT_P (operands[1])
2363 && REG_P (operands[0])
2364 && (SPARC_FP_REG_P (REGNO (operands[0]))
2365 || SCALAR_FLOAT_MODE_P (mode)
2366 || VECTOR_MODE_P (mode)))
2367 {
2368 /* emit_group_store will send such bogosity to us when it is
2369 not storing directly into memory. So fix this up to avoid
2370 crashes in output_constant_pool. */
2371 if (operands [1] == const0_rtx)
2372 operands[1] = CONST0_RTX (mode);
2373
2374 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2375 always other regs. */
2376 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2377 && (const_zero_operand (operands[1], mode)
2378 || const_all_ones_operand (operands[1], mode)))
2379 return false;
2380
2381 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2382 /* We are able to build any SF constant in integer registers
2383 with at most 2 instructions. */
2384 && (mode == SFmode
2385 /* And any DF constant in integer registers if needed. */
2386 || (mode == DFmode && !can_create_pseudo_p ())))
2387 return false;
2388
2389 operands[1] = force_const_mem (mode, operands[1]);
2390 if (!reload_in_progress)
2391 operands[1] = validize_mem (operands[1]);
2392 return false;
2393 }
2394
2395 /* Accept non-constants and valid constants unmodified. */
2396 if (!CONSTANT_P (operands[1])
2397 || GET_CODE (operands[1]) == HIGH
2398 || input_operand (operands[1], mode))
2399 return false;
2400
2401 switch (mode)
2402 {
2403 case E_QImode:
2404 /* All QImode constants require only one insn, so proceed. */
2405 break;
2406
2407 case E_HImode:
2408 case E_SImode:
2409 sparc_emit_set_const32 (operands[0], operands[1]);
2410 return true;
2411
2412 case E_DImode:
2413 /* input_operand should have filtered out 32-bit mode. */
2414 sparc_emit_set_const64 (operands[0], operands[1]);
2415 return true;
2416
2417 case E_TImode:
2418 {
2419 rtx high, low;
2420 /* TImode isn't available in 32-bit mode. */
2421 split_double (operands[1], &high, &low);
2422 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2423 high));
2424 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2425 low));
2426 }
2427 return true;
2428
2429 default:
2430 gcc_unreachable ();
2431 }
2432
2433 return false;
2434 }
2435
2436 /* Load OP1, a 32-bit constant, into OP0, a register.
2437 We know it can't be done in one insn when we get
2438 here, the move expander guarantees this. */
2439
2440 static void
2441 sparc_emit_set_const32 (rtx op0, rtx op1)
2442 {
2443 machine_mode mode = GET_MODE (op0);
2444 rtx temp = op0;
2445
2446 if (can_create_pseudo_p ())
2447 temp = gen_reg_rtx (mode);
2448
2449 if (GET_CODE (op1) == CONST_INT)
2450 {
2451 gcc_assert (!small_int_operand (op1, mode)
2452 && !const_high_operand (op1, mode));
2453
2454 /* Emit them as real moves instead of a HIGH/LO_SUM,
2455 this way CSE can see everything and reuse intermediate
2456 values if it wants. */
2457 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2458 & ~(HOST_WIDE_INT) 0x3ff)));
2459
2460 emit_insn (gen_rtx_SET (op0,
2461 gen_rtx_IOR (mode, temp,
2462 GEN_INT (INTVAL (op1) & 0x3ff))));
2463 }
2464 else
2465 {
2466 /* A symbol, emit in the traditional way. */
2467 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2468 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2469 }
2470 }
2471
2472 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2473 If TEMP is nonzero, we are forbidden to use any other scratch
2474 registers. Otherwise, we are allowed to generate them as needed.
2475
2476 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2477 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2478
2479 void
2480 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2481 {
2482 rtx cst, temp1, temp2, temp3, temp4, temp5;
2483 rtx ti_temp = 0;
2484
2485 /* Deal with too large offsets. */
2486 if (GET_CODE (op1) == CONST
2487 && GET_CODE (XEXP (op1, 0)) == PLUS
2488 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2489 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2490 {
2491 gcc_assert (!temp);
2492 temp1 = gen_reg_rtx (DImode);
2493 temp2 = gen_reg_rtx (DImode);
2494 sparc_emit_set_const64 (temp2, cst);
2495 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2496 NULL_RTX);
2497 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2498 return;
2499 }
2500
2501 if (temp && GET_MODE (temp) == TImode)
2502 {
2503 ti_temp = temp;
2504 temp = gen_rtx_REG (DImode, REGNO (temp));
2505 }
2506
2507 /* SPARC-V9 code model support. */
2508 switch (sparc_code_model)
2509 {
2510 case CM_MEDLOW:
2511 /* The range spanned by all instructions in the object is less
2512 than 2^31 bytes (2GB) and the distance from any instruction
2513 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2514 than 2^31 bytes (2GB).
2515
2516 The executable must be in the low 4TB of the virtual address
2517 space.
2518
2519 sethi %hi(symbol), %temp1
2520 or %temp1, %lo(symbol), %reg */
2521 if (temp)
2522 temp1 = temp; /* op0 is allowed. */
2523 else
2524 temp1 = gen_reg_rtx (DImode);
2525
2526 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2527 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2528 break;
2529
2530 case CM_MEDMID:
2531 /* The range spanned by all instructions in the object is less
2532 than 2^31 bytes (2GB) and the distance from any instruction
2533 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2534 than 2^31 bytes (2GB).
2535
2536 The executable must be in the low 16TB of the virtual address
2537 space.
2538
2539 sethi %h44(symbol), %temp1
2540 or %temp1, %m44(symbol), %temp2
2541 sllx %temp2, 12, %temp3
2542 or %temp3, %l44(symbol), %reg */
2543 if (temp)
2544 {
2545 temp1 = op0;
2546 temp2 = op0;
2547 temp3 = temp; /* op0 is allowed. */
2548 }
2549 else
2550 {
2551 temp1 = gen_reg_rtx (DImode);
2552 temp2 = gen_reg_rtx (DImode);
2553 temp3 = gen_reg_rtx (DImode);
2554 }
2555
2556 emit_insn (gen_seth44 (temp1, op1));
2557 emit_insn (gen_setm44 (temp2, temp1, op1));
2558 emit_insn (gen_rtx_SET (temp3,
2559 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2560 emit_insn (gen_setl44 (op0, temp3, op1));
2561 break;
2562
2563 case CM_MEDANY:
2564 /* The range spanned by all instructions in the object is less
2565 than 2^31 bytes (2GB) and the distance from any instruction
2566 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2567 than 2^31 bytes (2GB).
2568
2569 The executable can be placed anywhere in the virtual address
2570 space.
2571
2572 sethi %hh(symbol), %temp1
2573 sethi %lm(symbol), %temp2
2574 or %temp1, %hm(symbol), %temp3
2575 sllx %temp3, 32, %temp4
2576 or %temp4, %temp2, %temp5
2577 or %temp5, %lo(symbol), %reg */
2578 if (temp)
2579 {
2580 /* It is possible that one of the registers we got for operands[2]
2581 might coincide with that of operands[0] (which is why we made
2582 it TImode). Pick the other one to use as our scratch. */
2583 if (rtx_equal_p (temp, op0))
2584 {
2585 gcc_assert (ti_temp);
2586 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2587 }
2588 temp1 = op0;
2589 temp2 = temp; /* op0 is _not_ allowed, see above. */
2590 temp3 = op0;
2591 temp4 = op0;
2592 temp5 = op0;
2593 }
2594 else
2595 {
2596 temp1 = gen_reg_rtx (DImode);
2597 temp2 = gen_reg_rtx (DImode);
2598 temp3 = gen_reg_rtx (DImode);
2599 temp4 = gen_reg_rtx (DImode);
2600 temp5 = gen_reg_rtx (DImode);
2601 }
2602
2603 emit_insn (gen_sethh (temp1, op1));
2604 emit_insn (gen_setlm (temp2, op1));
2605 emit_insn (gen_sethm (temp3, temp1, op1));
2606 emit_insn (gen_rtx_SET (temp4,
2607 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2608 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2609 emit_insn (gen_setlo (op0, temp5, op1));
2610 break;
2611
2612 case CM_EMBMEDANY:
2613 /* Old old old backwards compatibility kruft here.
2614 Essentially it is MEDLOW with a fixed 64-bit
2615 virtual base added to all data segment addresses.
2616 Text-segment stuff is computed like MEDANY, we can't
2617 reuse the code above because the relocation knobs
2618 look different.
2619
2620 Data segment: sethi %hi(symbol), %temp1
2621 add %temp1, EMBMEDANY_BASE_REG, %temp2
2622 or %temp2, %lo(symbol), %reg */
2623 if (data_segment_operand (op1, GET_MODE (op1)))
2624 {
2625 if (temp)
2626 {
2627 temp1 = temp; /* op0 is allowed. */
2628 temp2 = op0;
2629 }
2630 else
2631 {
2632 temp1 = gen_reg_rtx (DImode);
2633 temp2 = gen_reg_rtx (DImode);
2634 }
2635
2636 emit_insn (gen_embmedany_sethi (temp1, op1));
2637 emit_insn (gen_embmedany_brsum (temp2, temp1));
2638 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2639 }
2640
2641 /* Text segment: sethi %uhi(symbol), %temp1
2642 sethi %hi(symbol), %temp2
2643 or %temp1, %ulo(symbol), %temp3
2644 sllx %temp3, 32, %temp4
2645 or %temp4, %temp2, %temp5
2646 or %temp5, %lo(symbol), %reg */
2647 else
2648 {
2649 if (temp)
2650 {
2651 /* It is possible that one of the registers we got for operands[2]
2652 might coincide with that of operands[0] (which is why we made
2653 it TImode). Pick the other one to use as our scratch. */
2654 if (rtx_equal_p (temp, op0))
2655 {
2656 gcc_assert (ti_temp);
2657 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2658 }
2659 temp1 = op0;
2660 temp2 = temp; /* op0 is _not_ allowed, see above. */
2661 temp3 = op0;
2662 temp4 = op0;
2663 temp5 = op0;
2664 }
2665 else
2666 {
2667 temp1 = gen_reg_rtx (DImode);
2668 temp2 = gen_reg_rtx (DImode);
2669 temp3 = gen_reg_rtx (DImode);
2670 temp4 = gen_reg_rtx (DImode);
2671 temp5 = gen_reg_rtx (DImode);
2672 }
2673
2674 emit_insn (gen_embmedany_textuhi (temp1, op1));
2675 emit_insn (gen_embmedany_texthi (temp2, op1));
2676 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2677 emit_insn (gen_rtx_SET (temp4,
2678 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2679 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2680 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2681 }
2682 break;
2683
2684 default:
2685 gcc_unreachable ();
2686 }
2687 }
2688
2689 /* These avoid problems when cross compiling. If we do not
2690 go through all this hair then the optimizer will see
2691 invalid REG_EQUAL notes or in some cases none at all. */
2692 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2693 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2694 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2695 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2696
2697 /* The optimizer is not to assume anything about exactly
2698 which bits are set for a HIGH, they are unspecified.
2699 Unfortunately this leads to many missed optimizations
2700 during CSE. We mask out the non-HIGH bits, and matches
2701 a plain movdi, to alleviate this problem. */
2702 static rtx
2703 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2704 {
2705 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2706 }
2707
2708 static rtx
2709 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2710 {
2711 return gen_rtx_SET (dest, GEN_INT (val));
2712 }
2713
2714 static rtx
2715 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2716 {
2717 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2718 }
2719
2720 static rtx
2721 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2722 {
2723 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2724 }
2725
2726 /* Worker routines for 64-bit constant formation on arch64.
2727 One of the key things to be doing in these emissions is
2728 to create as many temp REGs as possible. This makes it
2729 possible for half-built constants to be used later when
2730 such values are similar to something required later on.
2731 Without doing this, the optimizer cannot see such
2732 opportunities. */
2733
2734 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2735 unsigned HOST_WIDE_INT, int);
2736
2737 static void
2738 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2739 unsigned HOST_WIDE_INT low_bits, int is_neg)
2740 {
2741 unsigned HOST_WIDE_INT high_bits;
2742
2743 if (is_neg)
2744 high_bits = (~low_bits) & 0xffffffff;
2745 else
2746 high_bits = low_bits;
2747
2748 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2749 if (!is_neg)
2750 {
2751 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2752 }
2753 else
2754 {
2755 /* If we are XOR'ing with -1, then we should emit a one's complement
2756 instead. This way the combiner will notice logical operations
2757 such as ANDN later on and substitute. */
2758 if ((low_bits & 0x3ff) == 0x3ff)
2759 {
2760 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2761 }
2762 else
2763 {
2764 emit_insn (gen_rtx_SET (op0,
2765 gen_safe_XOR64 (temp,
2766 (-(HOST_WIDE_INT)0x400
2767 | (low_bits & 0x3ff)))));
2768 }
2769 }
2770 }
2771
2772 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2773 unsigned HOST_WIDE_INT, int);
2774
2775 static void
2776 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2777 unsigned HOST_WIDE_INT high_bits,
2778 unsigned HOST_WIDE_INT low_immediate,
2779 int shift_count)
2780 {
2781 rtx temp2 = op0;
2782
2783 if ((high_bits & 0xfffffc00) != 0)
2784 {
2785 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2786 if ((high_bits & ~0xfffffc00) != 0)
2787 emit_insn (gen_rtx_SET (op0,
2788 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2789 else
2790 temp2 = temp;
2791 }
2792 else
2793 {
2794 emit_insn (gen_safe_SET64 (temp, high_bits));
2795 temp2 = temp;
2796 }
2797
2798 /* Now shift it up into place. */
2799 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2800 GEN_INT (shift_count))));
2801
2802 /* If there is a low immediate part piece, finish up by
2803 putting that in as well. */
2804 if (low_immediate != 0)
2805 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2806 }
2807
2808 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2809 unsigned HOST_WIDE_INT);
2810
2811 /* Full 64-bit constant decomposition. Even though this is the
2812 'worst' case, we still optimize a few things away. */
2813 static void
2814 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2815 unsigned HOST_WIDE_INT high_bits,
2816 unsigned HOST_WIDE_INT low_bits)
2817 {
2818 rtx sub_temp = op0;
2819
2820 if (can_create_pseudo_p ())
2821 sub_temp = gen_reg_rtx (DImode);
2822
2823 if ((high_bits & 0xfffffc00) != 0)
2824 {
2825 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2826 if ((high_bits & ~0xfffffc00) != 0)
2827 emit_insn (gen_rtx_SET (sub_temp,
2828 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2829 else
2830 sub_temp = temp;
2831 }
2832 else
2833 {
2834 emit_insn (gen_safe_SET64 (temp, high_bits));
2835 sub_temp = temp;
2836 }
2837
2838 if (can_create_pseudo_p ())
2839 {
2840 rtx temp2 = gen_reg_rtx (DImode);
2841 rtx temp3 = gen_reg_rtx (DImode);
2842 rtx temp4 = gen_reg_rtx (DImode);
2843
2844 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2845 GEN_INT (32))));
2846
2847 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2848 if ((low_bits & ~0xfffffc00) != 0)
2849 {
2850 emit_insn (gen_rtx_SET (temp3,
2851 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2852 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2853 }
2854 else
2855 {
2856 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2857 }
2858 }
2859 else
2860 {
2861 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2862 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2863 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2864 int to_shift = 12;
2865
2866 /* We are in the middle of reload, so this is really
2867 painful. However we do still make an attempt to
2868 avoid emitting truly stupid code. */
2869 if (low1 != const0_rtx)
2870 {
2871 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2872 GEN_INT (to_shift))));
2873 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2874 sub_temp = op0;
2875 to_shift = 12;
2876 }
2877 else
2878 {
2879 to_shift += 12;
2880 }
2881 if (low2 != const0_rtx)
2882 {
2883 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2884 GEN_INT (to_shift))));
2885 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2886 sub_temp = op0;
2887 to_shift = 8;
2888 }
2889 else
2890 {
2891 to_shift += 8;
2892 }
2893 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2894 GEN_INT (to_shift))));
2895 if (low3 != const0_rtx)
2896 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2897 /* phew... */
2898 }
2899 }
2900
2901 /* Analyze a 64-bit constant for certain properties. */
2902 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2903 unsigned HOST_WIDE_INT,
2904 int *, int *, int *);
2905
2906 static void
2907 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2908 unsigned HOST_WIDE_INT low_bits,
2909 int *hbsp, int *lbsp, int *abbasp)
2910 {
2911 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2912 int i;
2913
2914 lowest_bit_set = highest_bit_set = -1;
2915 i = 0;
2916 do
2917 {
2918 if ((lowest_bit_set == -1)
2919 && ((low_bits >> i) & 1))
2920 lowest_bit_set = i;
2921 if ((highest_bit_set == -1)
2922 && ((high_bits >> (32 - i - 1)) & 1))
2923 highest_bit_set = (64 - i - 1);
2924 }
2925 while (++i < 32
2926 && ((highest_bit_set == -1)
2927 || (lowest_bit_set == -1)));
2928 if (i == 32)
2929 {
2930 i = 0;
2931 do
2932 {
2933 if ((lowest_bit_set == -1)
2934 && ((high_bits >> i) & 1))
2935 lowest_bit_set = i + 32;
2936 if ((highest_bit_set == -1)
2937 && ((low_bits >> (32 - i - 1)) & 1))
2938 highest_bit_set = 32 - i - 1;
2939 }
2940 while (++i < 32
2941 && ((highest_bit_set == -1)
2942 || (lowest_bit_set == -1)));
2943 }
2944 /* If there are no bits set this should have gone out
2945 as one instruction! */
2946 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2947 all_bits_between_are_set = 1;
2948 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2949 {
2950 if (i < 32)
2951 {
2952 if ((low_bits & (1 << i)) != 0)
2953 continue;
2954 }
2955 else
2956 {
2957 if ((high_bits & (1 << (i - 32))) != 0)
2958 continue;
2959 }
2960 all_bits_between_are_set = 0;
2961 break;
2962 }
2963 *hbsp = highest_bit_set;
2964 *lbsp = lowest_bit_set;
2965 *abbasp = all_bits_between_are_set;
2966 }
2967
2968 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2969
2970 static int
2971 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2972 unsigned HOST_WIDE_INT low_bits)
2973 {
2974 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2975
2976 if (high_bits == 0
2977 || high_bits == 0xffffffff)
2978 return 1;
2979
2980 analyze_64bit_constant (high_bits, low_bits,
2981 &highest_bit_set, &lowest_bit_set,
2982 &all_bits_between_are_set);
2983
2984 if ((highest_bit_set == 63
2985 || lowest_bit_set == 0)
2986 && all_bits_between_are_set != 0)
2987 return 1;
2988
2989 if ((highest_bit_set - lowest_bit_set) < 21)
2990 return 1;
2991
2992 return 0;
2993 }
2994
2995 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2996 unsigned HOST_WIDE_INT,
2997 int, int);
2998
2999 static unsigned HOST_WIDE_INT
3000 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
3001 unsigned HOST_WIDE_INT low_bits,
3002 int lowest_bit_set, int shift)
3003 {
3004 HOST_WIDE_INT hi, lo;
3005
3006 if (lowest_bit_set < 32)
3007 {
3008 lo = (low_bits >> lowest_bit_set) << shift;
3009 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3010 }
3011 else
3012 {
3013 lo = 0;
3014 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3015 }
3016 gcc_assert (! (hi & lo));
3017 return (hi | lo);
3018 }
3019
3020 /* Here we are sure to be arch64 and this is an integer constant
3021 being loaded into a register. Emit the most efficient
3022 insn sequence possible. Detection of all the 1-insn cases
3023 has been done already. */
3024 static void
3025 sparc_emit_set_const64 (rtx op0, rtx op1)
3026 {
3027 unsigned HOST_WIDE_INT high_bits, low_bits;
3028 int lowest_bit_set, highest_bit_set;
3029 int all_bits_between_are_set;
3030 rtx temp = 0;
3031
3032 /* Sanity check that we know what we are working with. */
3033 gcc_assert (TARGET_ARCH64
3034 && (GET_CODE (op0) == SUBREG
3035 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3036
3037 if (! can_create_pseudo_p ())
3038 temp = op0;
3039
3040 if (GET_CODE (op1) != CONST_INT)
3041 {
3042 sparc_emit_set_symbolic_const64 (op0, op1, temp);
3043 return;
3044 }
3045
3046 if (! temp)
3047 temp = gen_reg_rtx (DImode);
3048
3049 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3050 low_bits = (INTVAL (op1) & 0xffffffff);
3051
3052 /* low_bits bits 0 --> 31
3053 high_bits bits 32 --> 63 */
3054
3055 analyze_64bit_constant (high_bits, low_bits,
3056 &highest_bit_set, &lowest_bit_set,
3057 &all_bits_between_are_set);
3058
3059 /* First try for a 2-insn sequence. */
3060
3061 /* These situations are preferred because the optimizer can
3062 * do more things with them:
3063 * 1) mov -1, %reg
3064 * sllx %reg, shift, %reg
3065 * 2) mov -1, %reg
3066 * srlx %reg, shift, %reg
3067 * 3) mov some_small_const, %reg
3068 * sllx %reg, shift, %reg
3069 */
3070 if (((highest_bit_set == 63
3071 || lowest_bit_set == 0)
3072 && all_bits_between_are_set != 0)
3073 || ((highest_bit_set - lowest_bit_set) < 12))
3074 {
3075 HOST_WIDE_INT the_const = -1;
3076 int shift = lowest_bit_set;
3077
3078 if ((highest_bit_set != 63
3079 && lowest_bit_set != 0)
3080 || all_bits_between_are_set == 0)
3081 {
3082 the_const =
3083 create_simple_focus_bits (high_bits, low_bits,
3084 lowest_bit_set, 0);
3085 }
3086 else if (lowest_bit_set == 0)
3087 shift = -(63 - highest_bit_set);
3088
3089 gcc_assert (SPARC_SIMM13_P (the_const));
3090 gcc_assert (shift != 0);
3091
3092 emit_insn (gen_safe_SET64 (temp, the_const));
3093 if (shift > 0)
3094 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3095 GEN_INT (shift))));
3096 else if (shift < 0)
3097 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3098 GEN_INT (-shift))));
3099 return;
3100 }
3101
3102 /* Now a range of 22 or less bits set somewhere.
3103 * 1) sethi %hi(focus_bits), %reg
3104 * sllx %reg, shift, %reg
3105 * 2) sethi %hi(focus_bits), %reg
3106 * srlx %reg, shift, %reg
3107 */
3108 if ((highest_bit_set - lowest_bit_set) < 21)
3109 {
3110 unsigned HOST_WIDE_INT focus_bits =
3111 create_simple_focus_bits (high_bits, low_bits,
3112 lowest_bit_set, 10);
3113
3114 gcc_assert (SPARC_SETHI_P (focus_bits));
3115 gcc_assert (lowest_bit_set != 10);
3116
3117 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3118
3119 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3120 if (lowest_bit_set < 10)
3121 emit_insn (gen_rtx_SET (op0,
3122 gen_rtx_LSHIFTRT (DImode, temp,
3123 GEN_INT (10 - lowest_bit_set))));
3124 else if (lowest_bit_set > 10)
3125 emit_insn (gen_rtx_SET (op0,
3126 gen_rtx_ASHIFT (DImode, temp,
3127 GEN_INT (lowest_bit_set - 10))));
3128 return;
3129 }
3130
3131 /* 1) sethi %hi(low_bits), %reg
3132 * or %reg, %lo(low_bits), %reg
3133 * 2) sethi %hi(~low_bits), %reg
3134 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3135 */
3136 if (high_bits == 0
3137 || high_bits == 0xffffffff)
3138 {
3139 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3140 (high_bits == 0xffffffff));
3141 return;
3142 }
3143
3144 /* Now, try 3-insn sequences. */
3145
3146 /* 1) sethi %hi(high_bits), %reg
3147 * or %reg, %lo(high_bits), %reg
3148 * sllx %reg, 32, %reg
3149 */
3150 if (low_bits == 0)
3151 {
3152 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3153 return;
3154 }
3155
3156 /* We may be able to do something quick
3157 when the constant is negated, so try that. */
3158 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3159 (~low_bits) & 0xfffffc00))
3160 {
3161 /* NOTE: The trailing bits get XOR'd so we need the
3162 non-negated bits, not the negated ones. */
3163 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3164
3165 if ((((~high_bits) & 0xffffffff) == 0
3166 && ((~low_bits) & 0x80000000) == 0)
3167 || (((~high_bits) & 0xffffffff) == 0xffffffff
3168 && ((~low_bits) & 0x80000000) != 0))
3169 {
3170 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3171
3172 if ((SPARC_SETHI_P (fast_int)
3173 && (~high_bits & 0xffffffff) == 0)
3174 || SPARC_SIMM13_P (fast_int))
3175 emit_insn (gen_safe_SET64 (temp, fast_int));
3176 else
3177 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3178 }
3179 else
3180 {
3181 rtx negated_const;
3182 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3183 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3184 sparc_emit_set_const64 (temp, negated_const);
3185 }
3186
3187 /* If we are XOR'ing with -1, then we should emit a one's complement
3188 instead. This way the combiner will notice logical operations
3189 such as ANDN later on and substitute. */
3190 if (trailing_bits == 0x3ff)
3191 {
3192 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3193 }
3194 else
3195 {
3196 emit_insn (gen_rtx_SET (op0,
3197 gen_safe_XOR64 (temp,
3198 (-0x400 | trailing_bits))));
3199 }
3200 return;
3201 }
3202
3203 /* 1) sethi %hi(xxx), %reg
3204 * or %reg, %lo(xxx), %reg
3205 * sllx %reg, yyy, %reg
3206 *
3207 * ??? This is just a generalized version of the low_bits==0
3208 * thing above, FIXME...
3209 */
3210 if ((highest_bit_set - lowest_bit_set) < 32)
3211 {
3212 unsigned HOST_WIDE_INT focus_bits =
3213 create_simple_focus_bits (high_bits, low_bits,
3214 lowest_bit_set, 0);
3215
3216 /* We can't get here in this state. */
3217 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3218
3219 /* So what we know is that the set bits straddle the
3220 middle of the 64-bit word. */
3221 sparc_emit_set_const64_quick2 (op0, temp,
3222 focus_bits, 0,
3223 lowest_bit_set);
3224 return;
3225 }
3226
3227 /* 1) sethi %hi(high_bits), %reg
3228 * or %reg, %lo(high_bits), %reg
3229 * sllx %reg, 32, %reg
3230 * or %reg, low_bits, %reg
3231 */
3232 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3233 {
3234 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3235 return;
3236 }
3237
3238 /* The easiest way when all else fails, is full decomposition. */
3239 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3240 }
3241
3242 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3243
3244 static bool
3245 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3246 {
3247 *p1 = SPARC_ICC_REG;
3248 *p2 = SPARC_FCC_REG;
3249 return true;
3250 }
3251
3252 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3253
3254 static unsigned int
3255 sparc_min_arithmetic_precision (void)
3256 {
3257 return 32;
3258 }
3259
3260 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3261 return the mode to be used for the comparison. For floating-point,
3262 CCFP[E]mode is used. CCNZmode should be used when the first operand
3263 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3264 processing is needed. */
3265
3266 machine_mode
3267 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3268 {
3269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3270 {
3271 switch (op)
3272 {
3273 case EQ:
3274 case NE:
3275 case UNORDERED:
3276 case ORDERED:
3277 case UNLT:
3278 case UNLE:
3279 case UNGT:
3280 case UNGE:
3281 case UNEQ:
3282 return CCFPmode;
3283
3284 case LT:
3285 case LE:
3286 case GT:
3287 case GE:
3288 case LTGT:
3289 return CCFPEmode;
3290
3291 default:
3292 gcc_unreachable ();
3293 }
3294 }
3295 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3296 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3297 && y == const0_rtx)
3298 {
3299 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3300 return CCXNZmode;
3301 else
3302 return CCNZmode;
3303 }
3304 else
3305 {
3306 /* This is for the cmp<mode>_sne pattern. */
3307 if (GET_CODE (x) == NOT && y == constm1_rtx)
3308 {
3309 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3310 return CCXCmode;
3311 else
3312 return CCCmode;
3313 }
3314
3315 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3316 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3317 {
3318 if (GET_CODE (y) == UNSPEC
3319 && (XINT (y, 1) == UNSPEC_ADDV
3320 || XINT (y, 1) == UNSPEC_SUBV
3321 || XINT (y, 1) == UNSPEC_NEGV))
3322 return CCVmode;
3323 else
3324 return CCCmode;
3325 }
3326
3327 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3328 return CCXmode;
3329 else
3330 return CCmode;
3331 }
3332 }
3333
3334 /* Emit the compare insn and return the CC reg for a CODE comparison
3335 with operands X and Y. */
3336
3337 static rtx
3338 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3339 {
3340 machine_mode mode;
3341 rtx cc_reg;
3342
3343 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3344 return x;
3345
3346 mode = SELECT_CC_MODE (code, x, y);
3347
3348 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3349 fcc regs (cse can't tell they're really call clobbered regs and will
3350 remove a duplicate comparison even if there is an intervening function
3351 call - it will then try to reload the cc reg via an int reg which is why
3352 we need the movcc patterns). It is possible to provide the movcc
3353 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3354 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3355 to tell cse that CCFPE mode registers (even pseudos) are call
3356 clobbered. */
3357
3358 /* ??? This is an experiment. Rather than making changes to cse which may
3359 or may not be easy/clean, we do our own cse. This is possible because
3360 we will generate hard registers. Cse knows they're call clobbered (it
3361 doesn't know the same thing about pseudos). If we guess wrong, no big
3362 deal, but if we win, great! */
3363
3364 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3365 #if 1 /* experiment */
3366 {
3367 int reg;
3368 /* We cycle through the registers to ensure they're all exercised. */
3369 static int next_fcc_reg = 0;
3370 /* Previous x,y for each fcc reg. */
3371 static rtx prev_args[4][2];
3372
3373 /* Scan prev_args for x,y. */
3374 for (reg = 0; reg < 4; reg++)
3375 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3376 break;
3377 if (reg == 4)
3378 {
3379 reg = next_fcc_reg;
3380 prev_args[reg][0] = x;
3381 prev_args[reg][1] = y;
3382 next_fcc_reg = (next_fcc_reg + 1) & 3;
3383 }
3384 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3385 }
3386 #else
3387 cc_reg = gen_reg_rtx (mode);
3388 #endif /* ! experiment */
3389 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3390 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3391 else
3392 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3393
3394 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3395 will only result in an unrecognizable insn so no point in asserting. */
3396 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3397
3398 return cc_reg;
3399 }
3400
3401
3402 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3403
3404 rtx
3405 gen_compare_reg (rtx cmp)
3406 {
3407 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3408 }
3409
3410 /* This function is used for v9 only.
3411 DEST is the target of the Scc insn.
3412 CODE is the code for an Scc's comparison.
3413 X and Y are the values we compare.
3414
3415 This function is needed to turn
3416
3417 (set (reg:SI 110)
3418 (gt (reg:CCX 100 %icc)
3419 (const_int 0)))
3420 into
3421 (set (reg:SI 110)
3422 (gt:DI (reg:CCX 100 %icc)
3423 (const_int 0)))
3424
3425 IE: The instruction recognizer needs to see the mode of the comparison to
3426 find the right instruction. We could use "gt:DI" right in the
3427 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3428
3429 static int
3430 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3431 {
3432 if (! TARGET_ARCH64
3433 && (GET_MODE (x) == DImode
3434 || GET_MODE (dest) == DImode))
3435 return 0;
3436
3437 /* Try to use the movrCC insns. */
3438 if (TARGET_ARCH64
3439 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3440 && y == const0_rtx
3441 && v9_regcmp_p (compare_code))
3442 {
3443 rtx op0 = x;
3444 rtx temp;
3445
3446 /* Special case for op0 != 0. This can be done with one instruction if
3447 dest == x. */
3448
3449 if (compare_code == NE
3450 && GET_MODE (dest) == DImode
3451 && rtx_equal_p (op0, dest))
3452 {
3453 emit_insn (gen_rtx_SET (dest,
3454 gen_rtx_IF_THEN_ELSE (DImode,
3455 gen_rtx_fmt_ee (compare_code, DImode,
3456 op0, const0_rtx),
3457 const1_rtx,
3458 dest)));
3459 return 1;
3460 }
3461
3462 if (reg_overlap_mentioned_p (dest, op0))
3463 {
3464 /* Handle the case where dest == x.
3465 We "early clobber" the result. */
3466 op0 = gen_reg_rtx (GET_MODE (x));
3467 emit_move_insn (op0, x);
3468 }
3469
3470 emit_insn (gen_rtx_SET (dest, const0_rtx));
3471 if (GET_MODE (op0) != DImode)
3472 {
3473 temp = gen_reg_rtx (DImode);
3474 convert_move (temp, op0, 0);
3475 }
3476 else
3477 temp = op0;
3478 emit_insn (gen_rtx_SET (dest,
3479 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3480 gen_rtx_fmt_ee (compare_code, DImode,
3481 temp, const0_rtx),
3482 const1_rtx,
3483 dest)));
3484 return 1;
3485 }
3486 else
3487 {
3488 x = gen_compare_reg_1 (compare_code, x, y);
3489 y = const0_rtx;
3490
3491 emit_insn (gen_rtx_SET (dest, const0_rtx));
3492 emit_insn (gen_rtx_SET (dest,
3493 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3494 gen_rtx_fmt_ee (compare_code,
3495 GET_MODE (x), x, y),
3496 const1_rtx, dest)));
3497 return 1;
3498 }
3499 }
3500
3501
3502 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3503 without jumps using the addx/subx instructions. */
3504
3505 bool
3506 emit_scc_insn (rtx operands[])
3507 {
3508 rtx tem, x, y;
3509 enum rtx_code code;
3510 machine_mode mode;
3511
3512 /* The quad-word fp compare library routines all return nonzero to indicate
3513 true, which is different from the equivalent libgcc routines, so we must
3514 handle them specially here. */
3515 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3516 {
3517 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3518 GET_CODE (operands[1]));
3519 operands[2] = XEXP (operands[1], 0);
3520 operands[3] = XEXP (operands[1], 1);
3521 }
3522
3523 code = GET_CODE (operands[1]);
3524 x = operands[2];
3525 y = operands[3];
3526 mode = GET_MODE (x);
3527
3528 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3529 more applications). The exception to this is "reg != 0" which can
3530 be done in one instruction on v9 (so we do it). */
3531 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3532 {
3533 if (y != const0_rtx)
3534 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3535
3536 rtx pat = gen_rtx_SET (operands[0],
3537 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3538 x, const0_rtx));
3539
3540 /* If we can use addx/subx or addxc, add a clobber for CC. */
3541 if (mode == SImode || (code == NE && TARGET_VIS3))
3542 {
3543 rtx clobber
3544 = gen_rtx_CLOBBER (VOIDmode,
3545 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3546 SPARC_ICC_REG));
3547 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3548 }
3549
3550 emit_insn (pat);
3551 return true;
3552 }
3553
3554 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3555 if (TARGET_ARCH64
3556 && mode == DImode
3557 && !((code == LTU || code == GTU) && TARGET_VIS3)
3558 && gen_v9_scc (operands[0], code, x, y))
3559 return true;
3560
3561 /* We can do LTU and GEU using the addx/subx instructions too. And
3562 for GTU/LEU, if both operands are registers swap them and fall
3563 back to the easy case. */
3564 if (code == GTU || code == LEU)
3565 {
3566 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3567 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3568 {
3569 tem = x;
3570 x = y;
3571 y = tem;
3572 code = swap_condition (code);
3573 }
3574 }
3575
3576 if (code == LTU || code == GEU)
3577 {
3578 emit_insn (gen_rtx_SET (operands[0],
3579 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3580 gen_compare_reg_1 (code, x, y),
3581 const0_rtx)));
3582 return true;
3583 }
3584
3585 /* All the posibilities to use addx/subx based sequences has been
3586 exhausted, try for a 3 instruction sequence using v9 conditional
3587 moves. */
3588 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3589 return true;
3590
3591 /* Nope, do branches. */
3592 return false;
3593 }
3594
3595 /* Emit a conditional jump insn for the v9 architecture using comparison code
3596 CODE and jump target LABEL.
3597 This function exists to take advantage of the v9 brxx insns. */
3598
3599 static void
3600 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3601 {
3602 emit_jump_insn (gen_rtx_SET (pc_rtx,
3603 gen_rtx_IF_THEN_ELSE (VOIDmode,
3604 gen_rtx_fmt_ee (code, GET_MODE (op0),
3605 op0, const0_rtx),
3606 gen_rtx_LABEL_REF (VOIDmode, label),
3607 pc_rtx)));
3608 }
3609
3610 /* Emit a conditional jump insn for the UA2011 architecture using
3611 comparison code CODE and jump target LABEL. This function exists
3612 to take advantage of the UA2011 Compare and Branch insns. */
3613
3614 static void
3615 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3616 {
3617 rtx if_then_else;
3618
3619 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3620 gen_rtx_fmt_ee(code, GET_MODE(op0),
3621 op0, op1),
3622 gen_rtx_LABEL_REF (VOIDmode, label),
3623 pc_rtx);
3624
3625 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3626 }
3627
3628 void
3629 emit_conditional_branch_insn (rtx operands[])
3630 {
3631 /* The quad-word fp compare library routines all return nonzero to indicate
3632 true, which is different from the equivalent libgcc routines, so we must
3633 handle them specially here. */
3634 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3635 {
3636 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3637 GET_CODE (operands[0]));
3638 operands[1] = XEXP (operands[0], 0);
3639 operands[2] = XEXP (operands[0], 1);
3640 }
3641
3642 /* If we can tell early on that the comparison is against a constant
3643 that won't fit in the 5-bit signed immediate field of a cbcond,
3644 use one of the other v9 conditional branch sequences. */
3645 if (TARGET_CBCOND
3646 && GET_CODE (operands[1]) == REG
3647 && (GET_MODE (operands[1]) == SImode
3648 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3649 && (GET_CODE (operands[2]) != CONST_INT
3650 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3651 {
3652 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3653 return;
3654 }
3655
3656 if (TARGET_ARCH64 && operands[2] == const0_rtx
3657 && GET_CODE (operands[1]) == REG
3658 && GET_MODE (operands[1]) == DImode)
3659 {
3660 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3661 return;
3662 }
3663
3664 operands[1] = gen_compare_reg (operands[0]);
3665 operands[2] = const0_rtx;
3666 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3667 operands[1], operands[2]);
3668 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3669 operands[3]));
3670 }
3671
3672
3673 /* Generate a DFmode part of a hard TFmode register.
3674 REG is the TFmode hard register, LOW is 1 for the
3675 low 64bit of the register and 0 otherwise.
3676 */
3677 rtx
3678 gen_df_reg (rtx reg, int low)
3679 {
3680 int regno = REGNO (reg);
3681
3682 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3683 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3684 return gen_rtx_REG (DFmode, regno);
3685 }
3686 \f
3687 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3688 Unlike normal calls, TFmode operands are passed by reference. It is
3689 assumed that no more than 3 operands are required. */
3690
3691 static void
3692 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3693 {
3694 rtx ret_slot = NULL, arg[3], func_sym;
3695 int i;
3696
3697 /* We only expect to be called for conversions, unary, and binary ops. */
3698 gcc_assert (nargs == 2 || nargs == 3);
3699
3700 for (i = 0; i < nargs; ++i)
3701 {
3702 rtx this_arg = operands[i];
3703 rtx this_slot;
3704
3705 /* TFmode arguments and return values are passed by reference. */
3706 if (GET_MODE (this_arg) == TFmode)
3707 {
3708 int force_stack_temp;
3709
3710 force_stack_temp = 0;
3711 if (TARGET_BUGGY_QP_LIB && i == 0)
3712 force_stack_temp = 1;
3713
3714 if (GET_CODE (this_arg) == MEM
3715 && ! force_stack_temp)
3716 {
3717 tree expr = MEM_EXPR (this_arg);
3718 if (expr)
3719 mark_addressable (expr);
3720 this_arg = XEXP (this_arg, 0);
3721 }
3722 else if (CONSTANT_P (this_arg)
3723 && ! force_stack_temp)
3724 {
3725 this_slot = force_const_mem (TFmode, this_arg);
3726 this_arg = XEXP (this_slot, 0);
3727 }
3728 else
3729 {
3730 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3731
3732 /* Operand 0 is the return value. We'll copy it out later. */
3733 if (i > 0)
3734 emit_move_insn (this_slot, this_arg);
3735 else
3736 ret_slot = this_slot;
3737
3738 this_arg = XEXP (this_slot, 0);
3739 }
3740 }
3741
3742 arg[i] = this_arg;
3743 }
3744
3745 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3746
3747 if (GET_MODE (operands[0]) == TFmode)
3748 {
3749 if (nargs == 2)
3750 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3751 arg[0], GET_MODE (arg[0]),
3752 arg[1], GET_MODE (arg[1]));
3753 else
3754 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3755 arg[0], GET_MODE (arg[0]),
3756 arg[1], GET_MODE (arg[1]),
3757 arg[2], GET_MODE (arg[2]));
3758
3759 if (ret_slot)
3760 emit_move_insn (operands[0], ret_slot);
3761 }
3762 else
3763 {
3764 rtx ret;
3765
3766 gcc_assert (nargs == 2);
3767
3768 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3769 GET_MODE (operands[0]),
3770 arg[1], GET_MODE (arg[1]));
3771
3772 if (ret != operands[0])
3773 emit_move_insn (operands[0], ret);
3774 }
3775 }
3776
3777 /* Expand soft-float TFmode calls to sparc abi routines. */
3778
3779 static void
3780 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3781 {
3782 const char *func;
3783
3784 switch (code)
3785 {
3786 case PLUS:
3787 func = "_Qp_add";
3788 break;
3789 case MINUS:
3790 func = "_Qp_sub";
3791 break;
3792 case MULT:
3793 func = "_Qp_mul";
3794 break;
3795 case DIV:
3796 func = "_Qp_div";
3797 break;
3798 default:
3799 gcc_unreachable ();
3800 }
3801
3802 emit_soft_tfmode_libcall (func, 3, operands);
3803 }
3804
3805 static void
3806 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3807 {
3808 const char *func;
3809
3810 gcc_assert (code == SQRT);
3811 func = "_Qp_sqrt";
3812
3813 emit_soft_tfmode_libcall (func, 2, operands);
3814 }
3815
3816 static void
3817 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3818 {
3819 const char *func;
3820
3821 switch (code)
3822 {
3823 case FLOAT_EXTEND:
3824 switch (GET_MODE (operands[1]))
3825 {
3826 case E_SFmode:
3827 func = "_Qp_stoq";
3828 break;
3829 case E_DFmode:
3830 func = "_Qp_dtoq";
3831 break;
3832 default:
3833 gcc_unreachable ();
3834 }
3835 break;
3836
3837 case FLOAT_TRUNCATE:
3838 switch (GET_MODE (operands[0]))
3839 {
3840 case E_SFmode:
3841 func = "_Qp_qtos";
3842 break;
3843 case E_DFmode:
3844 func = "_Qp_qtod";
3845 break;
3846 default:
3847 gcc_unreachable ();
3848 }
3849 break;
3850
3851 case FLOAT:
3852 switch (GET_MODE (operands[1]))
3853 {
3854 case E_SImode:
3855 func = "_Qp_itoq";
3856 if (TARGET_ARCH64)
3857 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3858 break;
3859 case E_DImode:
3860 func = "_Qp_xtoq";
3861 break;
3862 default:
3863 gcc_unreachable ();
3864 }
3865 break;
3866
3867 case UNSIGNED_FLOAT:
3868 switch (GET_MODE (operands[1]))
3869 {
3870 case E_SImode:
3871 func = "_Qp_uitoq";
3872 if (TARGET_ARCH64)
3873 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3874 break;
3875 case E_DImode:
3876 func = "_Qp_uxtoq";
3877 break;
3878 default:
3879 gcc_unreachable ();
3880 }
3881 break;
3882
3883 case FIX:
3884 switch (GET_MODE (operands[0]))
3885 {
3886 case E_SImode:
3887 func = "_Qp_qtoi";
3888 break;
3889 case E_DImode:
3890 func = "_Qp_qtox";
3891 break;
3892 default:
3893 gcc_unreachable ();
3894 }
3895 break;
3896
3897 case UNSIGNED_FIX:
3898 switch (GET_MODE (operands[0]))
3899 {
3900 case E_SImode:
3901 func = "_Qp_qtoui";
3902 break;
3903 case E_DImode:
3904 func = "_Qp_qtoux";
3905 break;
3906 default:
3907 gcc_unreachable ();
3908 }
3909 break;
3910
3911 default:
3912 gcc_unreachable ();
3913 }
3914
3915 emit_soft_tfmode_libcall (func, 2, operands);
3916 }
3917
3918 /* Expand a hard-float tfmode operation. All arguments must be in
3919 registers. */
3920
3921 static void
3922 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3923 {
3924 rtx op, dest;
3925
3926 if (GET_RTX_CLASS (code) == RTX_UNARY)
3927 {
3928 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3929 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3930 }
3931 else
3932 {
3933 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3934 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3935 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3936 operands[1], operands[2]);
3937 }
3938
3939 if (register_operand (operands[0], VOIDmode))
3940 dest = operands[0];
3941 else
3942 dest = gen_reg_rtx (GET_MODE (operands[0]));
3943
3944 emit_insn (gen_rtx_SET (dest, op));
3945
3946 if (dest != operands[0])
3947 emit_move_insn (operands[0], dest);
3948 }
3949
3950 void
3951 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3952 {
3953 if (TARGET_HARD_QUAD)
3954 emit_hard_tfmode_operation (code, operands);
3955 else
3956 emit_soft_tfmode_binop (code, operands);
3957 }
3958
3959 void
3960 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3961 {
3962 if (TARGET_HARD_QUAD)
3963 emit_hard_tfmode_operation (code, operands);
3964 else
3965 emit_soft_tfmode_unop (code, operands);
3966 }
3967
3968 void
3969 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3970 {
3971 if (TARGET_HARD_QUAD)
3972 emit_hard_tfmode_operation (code, operands);
3973 else
3974 emit_soft_tfmode_cvt (code, operands);
3975 }
3976 \f
3977 /* Return nonzero if a branch/jump/call instruction will be emitting
3978 nop into its delay slot. */
3979
3980 int
3981 empty_delay_slot (rtx_insn *insn)
3982 {
3983 rtx seq;
3984
3985 /* If no previous instruction (should not happen), return true. */
3986 if (PREV_INSN (insn) == NULL)
3987 return 1;
3988
3989 seq = NEXT_INSN (PREV_INSN (insn));
3990 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3991 return 0;
3992
3993 return 1;
3994 }
3995
3996 /* Return nonzero if we should emit a nop after a cbcond instruction.
3997 The cbcond instruction does not have a delay slot, however there is
3998 a severe performance penalty if a control transfer appears right
3999 after a cbcond. Therefore we emit a nop when we detect this
4000 situation. */
4001
4002 int
4003 emit_cbcond_nop (rtx_insn *insn)
4004 {
4005 rtx next = next_active_insn (insn);
4006
4007 if (!next)
4008 return 1;
4009
4010 if (NONJUMP_INSN_P (next)
4011 && GET_CODE (PATTERN (next)) == SEQUENCE)
4012 next = XVECEXP (PATTERN (next), 0, 0);
4013 else if (CALL_P (next)
4014 && GET_CODE (PATTERN (next)) == PARALLEL)
4015 {
4016 rtx delay = XVECEXP (PATTERN (next), 0, 1);
4017
4018 if (GET_CODE (delay) == RETURN)
4019 {
4020 /* It's a sibling call. Do not emit the nop if we're going
4021 to emit something other than the jump itself as the first
4022 instruction of the sibcall sequence. */
4023 if (sparc_leaf_function_p || TARGET_FLAT)
4024 return 0;
4025 }
4026 }
4027
4028 if (NONJUMP_INSN_P (next))
4029 return 0;
4030
4031 return 1;
4032 }
4033
4034 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4035 instruction. RETURN_P is true if the v9 variant 'return' is to be
4036 considered in the test too.
4037
4038 TRIAL must be a SET whose destination is a REG appropriate for the
4039 'restore' instruction or, if RETURN_P is true, for the 'return'
4040 instruction. */
4041
4042 static int
4043 eligible_for_restore_insn (rtx trial, bool return_p)
4044 {
4045 rtx pat = PATTERN (trial);
4046 rtx src = SET_SRC (pat);
4047 bool src_is_freg = false;
4048 rtx src_reg;
4049
4050 /* Since we now can do moves between float and integer registers when
4051 VIS3 is enabled, we have to catch this case. We can allow such
4052 moves when doing a 'return' however. */
4053 src_reg = src;
4054 if (GET_CODE (src_reg) == SUBREG)
4055 src_reg = SUBREG_REG (src_reg);
4056 if (GET_CODE (src_reg) == REG
4057 && SPARC_FP_REG_P (REGNO (src_reg)))
4058 src_is_freg = true;
4059
4060 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4061 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4062 && arith_operand (src, GET_MODE (src))
4063 && ! src_is_freg)
4064 {
4065 if (TARGET_ARCH64)
4066 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4067 else
4068 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4069 }
4070
4071 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4072 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4073 && arith_double_operand (src, GET_MODE (src))
4074 && ! src_is_freg)
4075 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4076
4077 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4078 else if (! TARGET_FPU && register_operand (src, SFmode))
4079 return 1;
4080
4081 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4082 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4083 return 1;
4084
4085 /* If we have the 'return' instruction, anything that does not use
4086 local or output registers and can go into a delay slot wins. */
4087 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4088 return 1;
4089
4090 /* The 'restore src1,src2,dest' pattern for SImode. */
4091 else if (GET_CODE (src) == PLUS
4092 && register_operand (XEXP (src, 0), SImode)
4093 && arith_operand (XEXP (src, 1), SImode))
4094 return 1;
4095
4096 /* The 'restore src1,src2,dest' pattern for DImode. */
4097 else if (GET_CODE (src) == PLUS
4098 && register_operand (XEXP (src, 0), DImode)
4099 && arith_double_operand (XEXP (src, 1), DImode))
4100 return 1;
4101
4102 /* The 'restore src1,%lo(src2),dest' pattern. */
4103 else if (GET_CODE (src) == LO_SUM
4104 && ! TARGET_CM_MEDMID
4105 && ((register_operand (XEXP (src, 0), SImode)
4106 && immediate_operand (XEXP (src, 1), SImode))
4107 || (TARGET_ARCH64
4108 && register_operand (XEXP (src, 0), DImode)
4109 && immediate_operand (XEXP (src, 1), DImode))))
4110 return 1;
4111
4112 /* The 'restore src,src,dest' pattern. */
4113 else if (GET_CODE (src) == ASHIFT
4114 && (register_operand (XEXP (src, 0), SImode)
4115 || register_operand (XEXP (src, 0), DImode))
4116 && XEXP (src, 1) == const1_rtx)
4117 return 1;
4118
4119 return 0;
4120 }
4121
4122 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4123
4124 int
4125 eligible_for_return_delay (rtx_insn *trial)
4126 {
4127 int regno;
4128 rtx pat;
4129
4130 /* If the function uses __builtin_eh_return, the eh_return machinery
4131 occupies the delay slot. */
4132 if (crtl->calls_eh_return)
4133 return 0;
4134
4135 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4136 return 0;
4137
4138 /* In the case of a leaf or flat function, anything can go into the slot. */
4139 if (sparc_leaf_function_p || TARGET_FLAT)
4140 return 1;
4141
4142 if (!NONJUMP_INSN_P (trial))
4143 return 0;
4144
4145 pat = PATTERN (trial);
4146 if (GET_CODE (pat) == PARALLEL)
4147 {
4148 int i;
4149
4150 if (! TARGET_V9)
4151 return 0;
4152 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4153 {
4154 rtx expr = XVECEXP (pat, 0, i);
4155 if (GET_CODE (expr) != SET)
4156 return 0;
4157 if (GET_CODE (SET_DEST (expr)) != REG)
4158 return 0;
4159 regno = REGNO (SET_DEST (expr));
4160 if (regno >= 8 && regno < 24)
4161 return 0;
4162 }
4163 return !epilogue_renumber (&pat, 1);
4164 }
4165
4166 if (GET_CODE (pat) != SET)
4167 return 0;
4168
4169 if (GET_CODE (SET_DEST (pat)) != REG)
4170 return 0;
4171
4172 regno = REGNO (SET_DEST (pat));
4173
4174 /* Otherwise, only operations which can be done in tandem with
4175 a `restore' or `return' insn can go into the delay slot. */
4176 if (regno >= 8 && regno < 24)
4177 return 0;
4178
4179 /* If this instruction sets up floating point register and we have a return
4180 instruction, it can probably go in. But restore will not work
4181 with FP_REGS. */
4182 if (! SPARC_INT_REG_P (regno))
4183 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4184
4185 return eligible_for_restore_insn (trial, true);
4186 }
4187
4188 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4189
4190 int
4191 eligible_for_sibcall_delay (rtx_insn *trial)
4192 {
4193 rtx pat;
4194
4195 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4196 return 0;
4197
4198 if (!NONJUMP_INSN_P (trial))
4199 return 0;
4200
4201 pat = PATTERN (trial);
4202
4203 if (sparc_leaf_function_p || TARGET_FLAT)
4204 {
4205 /* If the tail call is done using the call instruction,
4206 we have to restore %o7 in the delay slot. */
4207 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4208 return 0;
4209
4210 /* %g1 is used to build the function address */
4211 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4212 return 0;
4213
4214 return 1;
4215 }
4216
4217 if (GET_CODE (pat) != SET)
4218 return 0;
4219
4220 /* Otherwise, only operations which can be done in tandem with
4221 a `restore' insn can go into the delay slot. */
4222 if (GET_CODE (SET_DEST (pat)) != REG
4223 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4224 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4225 return 0;
4226
4227 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4228 in most cases. */
4229 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4230 return 0;
4231
4232 return eligible_for_restore_insn (trial, false);
4233 }
4234 \f
4235 /* Determine if it's legal to put X into the constant pool. This
4236 is not possible if X contains the address of a symbol that is
4237 not constant (TLS) or not known at final link time (PIC). */
4238
4239 static bool
4240 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4241 {
4242 switch (GET_CODE (x))
4243 {
4244 case CONST_INT:
4245 case CONST_WIDE_INT:
4246 case CONST_DOUBLE:
4247 case CONST_VECTOR:
4248 /* Accept all non-symbolic constants. */
4249 return false;
4250
4251 case LABEL_REF:
4252 /* Labels are OK iff we are non-PIC. */
4253 return flag_pic != 0;
4254
4255 case SYMBOL_REF:
4256 /* 'Naked' TLS symbol references are never OK,
4257 non-TLS symbols are OK iff we are non-PIC. */
4258 if (SYMBOL_REF_TLS_MODEL (x))
4259 return true;
4260 else
4261 return flag_pic != 0;
4262
4263 case CONST:
4264 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4265 case PLUS:
4266 case MINUS:
4267 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4268 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4269 case UNSPEC:
4270 return true;
4271 default:
4272 gcc_unreachable ();
4273 }
4274 }
4275 \f
4276 /* Global Offset Table support. */
4277 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4278 static GTY(()) rtx got_register_rtx = NULL_RTX;
4279 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4280
4281 static GTY(()) bool got_helper_needed = false;
4282
4283 /* Return the SYMBOL_REF for the Global Offset Table. */
4284
4285 static rtx
4286 sparc_got (void)
4287 {
4288 if (!got_symbol_rtx)
4289 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4290
4291 return got_symbol_rtx;
4292 }
4293
4294 /* Output the load_pcrel_sym pattern. */
4295
4296 const char *
4297 output_load_pcrel_sym (rtx *operands)
4298 {
4299 if (flag_delayed_branch)
4300 {
4301 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4302 output_asm_insn ("call\t%a2", operands);
4303 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4304 }
4305 else
4306 {
4307 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4308 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4309 output_asm_insn ("call\t%a2", operands);
4310 output_asm_insn (" nop", NULL);
4311 }
4312
4313 if (operands[2] == got_helper_rtx)
4314 got_helper_needed = true;
4315
4316 return "";
4317 }
4318
4319 #ifdef HAVE_GAS_HIDDEN
4320 # define USE_HIDDEN_LINKONCE 1
4321 #else
4322 # define USE_HIDDEN_LINKONCE 0
4323 #endif
4324
4325 /* Emit code to load the GOT register. */
4326
4327 void
4328 load_got_register (void)
4329 {
4330 rtx insn;
4331
4332 if (TARGET_VXWORKS_RTP)
4333 {
4334 if (!got_register_rtx)
4335 got_register_rtx = pic_offset_table_rtx;
4336
4337 insn = gen_vxworks_load_got ();
4338 }
4339 else
4340 {
4341 if (!got_register_rtx)
4342 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4343
4344 /* The GOT symbol is subject to a PC-relative relocation so we need a
4345 helper function to add the PC value and thus get the final value. */
4346 if (!got_helper_rtx)
4347 {
4348 char name[32];
4349
4350 /* Skip the leading '%' as that cannot be used in a symbol name. */
4351 if (USE_HIDDEN_LINKONCE)
4352 sprintf (name, "__sparc_get_pc_thunk.%s",
4353 reg_names[REGNO (got_register_rtx)] + 1);
4354 else
4355 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4356 REGNO (got_register_rtx));
4357
4358 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4359 }
4360
4361 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4362 const int orig_flag_pic = flag_pic;
4363 flag_pic = 0;
4364 insn = gen_load_pcrel_sym (Pmode,
4365 got_register_rtx,
4366 sparc_got (),
4367 got_helper_rtx,
4368 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM));
4369 flag_pic = orig_flag_pic;
4370 }
4371
4372 emit_insn (insn);
4373 }
4374
4375 /* Ensure that we are not using patterns that are not OK with PIC. */
4376
4377 int
4378 check_pic (int i)
4379 {
4380 rtx op;
4381
4382 switch (flag_pic)
4383 {
4384 case 1:
4385 op = recog_data.operand[i];
4386 gcc_assert (GET_CODE (op) != SYMBOL_REF
4387 && (GET_CODE (op) != CONST
4388 || (GET_CODE (XEXP (op, 0)) == MINUS
4389 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4390 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4391 /* fallthrough */
4392 case 2:
4393 default:
4394 return 1;
4395 }
4396 }
4397
4398 /* Return true if X is an address which needs a temporary register when
4399 reloaded while generating PIC code. */
4400
4401 int
4402 pic_address_needs_scratch (rtx x)
4403 {
4404 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4405 if (GET_CODE (x) == CONST
4406 && GET_CODE (XEXP (x, 0)) == PLUS
4407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4408 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4409 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4410 return 1;
4411
4412 return 0;
4413 }
4414
4415 /* Determine if a given RTX is a valid constant. We already know this
4416 satisfies CONSTANT_P. */
4417
4418 static bool
4419 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4420 {
4421 switch (GET_CODE (x))
4422 {
4423 case CONST:
4424 case SYMBOL_REF:
4425 if (sparc_tls_referenced_p (x))
4426 return false;
4427 break;
4428
4429 case CONST_DOUBLE:
4430 /* Floating point constants are generally not ok.
4431 The only exception is 0.0 and all-ones in VIS. */
4432 if (TARGET_VIS
4433 && SCALAR_FLOAT_MODE_P (mode)
4434 && (const_zero_operand (x, mode)
4435 || const_all_ones_operand (x, mode)))
4436 return true;
4437
4438 return false;
4439
4440 case CONST_VECTOR:
4441 /* Vector constants are generally not ok.
4442 The only exception is 0 or -1 in VIS. */
4443 if (TARGET_VIS
4444 && (const_zero_operand (x, mode)
4445 || const_all_ones_operand (x, mode)))
4446 return true;
4447
4448 return false;
4449
4450 default:
4451 break;
4452 }
4453
4454 return true;
4455 }
4456
4457 /* Determine if a given RTX is a valid constant address. */
4458
4459 bool
4460 constant_address_p (rtx x)
4461 {
4462 switch (GET_CODE (x))
4463 {
4464 case LABEL_REF:
4465 case CONST_INT:
4466 case HIGH:
4467 return true;
4468
4469 case CONST:
4470 if (flag_pic && pic_address_needs_scratch (x))
4471 return false;
4472 return sparc_legitimate_constant_p (Pmode, x);
4473
4474 case SYMBOL_REF:
4475 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4476
4477 default:
4478 return false;
4479 }
4480 }
4481
4482 /* Nonzero if the constant value X is a legitimate general operand
4483 when generating PIC code. It is given that flag_pic is on and
4484 that X satisfies CONSTANT_P. */
4485
4486 bool
4487 legitimate_pic_operand_p (rtx x)
4488 {
4489 if (pic_address_needs_scratch (x))
4490 return false;
4491 if (sparc_tls_referenced_p (x))
4492 return false;
4493 return true;
4494 }
4495
4496 /* Return true if X is a representation of the PIC register. */
4497
4498 static bool
4499 sparc_pic_register_p (rtx x)
4500 {
4501 if (!REG_P (x) || !pic_offset_table_rtx)
4502 return false;
4503
4504 if (x == pic_offset_table_rtx)
4505 return true;
4506
4507 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4508 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4509 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4510 return true;
4511
4512 return false;
4513 }
4514
4515 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4516 (CONST_INT_P (X) \
4517 && INTVAL (X) >= -0x1000 \
4518 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4519
4520 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4521 (CONST_INT_P (X) \
4522 && INTVAL (X) >= -0x1000 \
4523 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4524
4525 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4526
4527 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4528 ordinarily. This changes a bit when generating PIC. */
4529
4530 static bool
4531 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4532 {
4533 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4534
4535 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4536 rs1 = addr;
4537 else if (GET_CODE (addr) == PLUS)
4538 {
4539 rs1 = XEXP (addr, 0);
4540 rs2 = XEXP (addr, 1);
4541
4542 /* Canonicalize. REG comes first, if there are no regs,
4543 LO_SUM comes first. */
4544 if (!REG_P (rs1)
4545 && GET_CODE (rs1) != SUBREG
4546 && (REG_P (rs2)
4547 || GET_CODE (rs2) == SUBREG
4548 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4549 {
4550 rs1 = XEXP (addr, 1);
4551 rs2 = XEXP (addr, 0);
4552 }
4553
4554 if ((flag_pic == 1
4555 && sparc_pic_register_p (rs1)
4556 && !REG_P (rs2)
4557 && GET_CODE (rs2) != SUBREG
4558 && GET_CODE (rs2) != LO_SUM
4559 && GET_CODE (rs2) != MEM
4560 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4561 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4562 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4563 || ((REG_P (rs1)
4564 || GET_CODE (rs1) == SUBREG)
4565 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4566 {
4567 imm1 = rs2;
4568 rs2 = NULL;
4569 }
4570 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4571 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4572 {
4573 /* We prohibit REG + REG for TFmode when there are no quad move insns
4574 and we consequently need to split. We do this because REG+REG
4575 is not an offsettable address. If we get the situation in reload
4576 where source and destination of a movtf pattern are both MEMs with
4577 REG+REG address, then only one of them gets converted to an
4578 offsettable address. */
4579 if (mode == TFmode
4580 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4581 return 0;
4582
4583 /* Likewise for TImode, but in all cases. */
4584 if (mode == TImode)
4585 return 0;
4586
4587 /* We prohibit REG + REG on ARCH32 if not optimizing for
4588 DFmode/DImode because then mem_min_alignment is likely to be zero
4589 after reload and the forced split would lack a matching splitter
4590 pattern. */
4591 if (TARGET_ARCH32 && !optimize
4592 && (mode == DFmode || mode == DImode))
4593 return 0;
4594 }
4595 else if (USE_AS_OFFSETABLE_LO10
4596 && GET_CODE (rs1) == LO_SUM
4597 && TARGET_ARCH64
4598 && ! TARGET_CM_MEDMID
4599 && RTX_OK_FOR_OLO10_P (rs2, mode))
4600 {
4601 rs2 = NULL;
4602 imm1 = XEXP (rs1, 1);
4603 rs1 = XEXP (rs1, 0);
4604 if (!CONSTANT_P (imm1)
4605 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4606 return 0;
4607 }
4608 }
4609 else if (GET_CODE (addr) == LO_SUM)
4610 {
4611 rs1 = XEXP (addr, 0);
4612 imm1 = XEXP (addr, 1);
4613
4614 if (!CONSTANT_P (imm1)
4615 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4616 return 0;
4617
4618 /* We can't allow TFmode in 32-bit mode, because an offset greater
4619 than the alignment (8) may cause the LO_SUM to overflow. */
4620 if (mode == TFmode && TARGET_ARCH32)
4621 return 0;
4622
4623 /* During reload, accept the HIGH+LO_SUM construct generated by
4624 sparc_legitimize_reload_address. */
4625 if (reload_in_progress
4626 && GET_CODE (rs1) == HIGH
4627 && XEXP (rs1, 0) == imm1)
4628 return 1;
4629 }
4630 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4631 return 1;
4632 else
4633 return 0;
4634
4635 if (GET_CODE (rs1) == SUBREG)
4636 rs1 = SUBREG_REG (rs1);
4637 if (!REG_P (rs1))
4638 return 0;
4639
4640 if (rs2)
4641 {
4642 if (GET_CODE (rs2) == SUBREG)
4643 rs2 = SUBREG_REG (rs2);
4644 if (!REG_P (rs2))
4645 return 0;
4646 }
4647
4648 if (strict)
4649 {
4650 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4651 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4652 return 0;
4653 }
4654 else
4655 {
4656 if ((! SPARC_INT_REG_P (REGNO (rs1))
4657 && REGNO (rs1) != FRAME_POINTER_REGNUM
4658 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4659 || (rs2
4660 && (! SPARC_INT_REG_P (REGNO (rs2))
4661 && REGNO (rs2) != FRAME_POINTER_REGNUM
4662 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4663 return 0;
4664 }
4665 return 1;
4666 }
4667
4668 /* Return the SYMBOL_REF for the tls_get_addr function. */
4669
4670 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4671
4672 static rtx
4673 sparc_tls_get_addr (void)
4674 {
4675 if (!sparc_tls_symbol)
4676 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4677
4678 return sparc_tls_symbol;
4679 }
4680
4681 /* Return the Global Offset Table to be used in TLS mode. */
4682
4683 static rtx
4684 sparc_tls_got (void)
4685 {
4686 /* In PIC mode, this is just the PIC offset table. */
4687 if (flag_pic)
4688 {
4689 crtl->uses_pic_offset_table = 1;
4690 return pic_offset_table_rtx;
4691 }
4692
4693 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4694 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4695 if (TARGET_SUN_TLS && TARGET_ARCH32)
4696 {
4697 load_got_register ();
4698 return got_register_rtx;
4699 }
4700
4701 /* In all other cases, we load a new pseudo with the GOT symbol. */
4702 return copy_to_reg (sparc_got ());
4703 }
4704
4705 /* Return true if X contains a thread-local symbol. */
4706
4707 static bool
4708 sparc_tls_referenced_p (rtx x)
4709 {
4710 if (!TARGET_HAVE_TLS)
4711 return false;
4712
4713 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4714 x = XEXP (XEXP (x, 0), 0);
4715
4716 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4717 return true;
4718
4719 /* That's all we handle in sparc_legitimize_tls_address for now. */
4720 return false;
4721 }
4722
4723 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4724 this (thread-local) address. */
4725
4726 static rtx
4727 sparc_legitimize_tls_address (rtx addr)
4728 {
4729 rtx temp1, temp2, temp3, ret, o0, got;
4730 rtx_insn *insn;
4731
4732 gcc_assert (can_create_pseudo_p ());
4733
4734 if (GET_CODE (addr) == SYMBOL_REF)
4735 /* Although the various sethi/or sequences generate SImode values, many of
4736 them can be transformed by the linker when relaxing and, if relaxing to
4737 local-exec, will become a sethi/xor pair, which is signed and therefore
4738 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4739 values be spilled onto the stack in 64-bit mode. */
4740 switch (SYMBOL_REF_TLS_MODEL (addr))
4741 {
4742 case TLS_MODEL_GLOBAL_DYNAMIC:
4743 start_sequence ();
4744 temp1 = gen_reg_rtx (Pmode);
4745 temp2 = gen_reg_rtx (Pmode);
4746 ret = gen_reg_rtx (Pmode);
4747 o0 = gen_rtx_REG (Pmode, 8);
4748 got = sparc_tls_got ();
4749 emit_insn (gen_tgd_hi22 (Pmode, temp1, addr));
4750 emit_insn (gen_tgd_lo10 (Pmode, temp2, temp1, addr));
4751 emit_insn (gen_tgd_add (Pmode, o0, got, temp2, addr));
4752 insn = emit_call_insn (gen_tgd_call (Pmode, o0, sparc_tls_get_addr (),
4753 addr, const1_rtx));
4754 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4755 RTL_CONST_CALL_P (insn) = 1;
4756 insn = get_insns ();
4757 end_sequence ();
4758 emit_libcall_block (insn, ret, o0, addr);
4759 break;
4760
4761 case TLS_MODEL_LOCAL_DYNAMIC:
4762 start_sequence ();
4763 temp1 = gen_reg_rtx (Pmode);
4764 temp2 = gen_reg_rtx (Pmode);
4765 temp3 = gen_reg_rtx (Pmode);
4766 ret = gen_reg_rtx (Pmode);
4767 o0 = gen_rtx_REG (Pmode, 8);
4768 got = sparc_tls_got ();
4769 emit_insn (gen_tldm_hi22 (Pmode, temp1));
4770 emit_insn (gen_tldm_lo10 (Pmode, temp2, temp1));
4771 emit_insn (gen_tldm_add (Pmode, o0, got, temp2));
4772 insn = emit_call_insn (gen_tldm_call (Pmode, o0, sparc_tls_get_addr (),
4773 const1_rtx));
4774 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4775 RTL_CONST_CALL_P (insn) = 1;
4776 insn = get_insns ();
4777 end_sequence ();
4778 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4779 share the LD_BASE result with other LD model accesses. */
4780 emit_libcall_block (insn, temp3, o0,
4781 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4782 UNSPEC_TLSLD_BASE));
4783 temp1 = gen_reg_rtx (Pmode);
4784 temp2 = gen_reg_rtx (Pmode);
4785 emit_insn (gen_tldo_hix22 (Pmode, temp1, addr));
4786 emit_insn (gen_tldo_lox10 (Pmode, temp2, temp1, addr));
4787 emit_insn (gen_tldo_add (Pmode, ret, temp3, temp2, addr));
4788 break;
4789
4790 case TLS_MODEL_INITIAL_EXEC:
4791 temp1 = gen_reg_rtx (Pmode);
4792 temp2 = gen_reg_rtx (Pmode);
4793 temp3 = gen_reg_rtx (Pmode);
4794 got = sparc_tls_got ();
4795 emit_insn (gen_tie_hi22 (Pmode, temp1, addr));
4796 emit_insn (gen_tie_lo10 (Pmode, temp2, temp1, addr));
4797 if (TARGET_ARCH32)
4798 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4799 else
4800 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4801 if (TARGET_SUN_TLS)
4802 {
4803 ret = gen_reg_rtx (Pmode);
4804 emit_insn (gen_tie_add (Pmode, ret, gen_rtx_REG (Pmode, 7),
4805 temp3, addr));
4806 }
4807 else
4808 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4809 break;
4810
4811 case TLS_MODEL_LOCAL_EXEC:
4812 temp1 = gen_reg_rtx (Pmode);
4813 temp2 = gen_reg_rtx (Pmode);
4814 emit_insn (gen_tle_hix22 (Pmode, temp1, addr));
4815 emit_insn (gen_tle_lox10 (Pmode, temp2, temp1, addr));
4816 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4817 break;
4818
4819 default:
4820 gcc_unreachable ();
4821 }
4822
4823 else if (GET_CODE (addr) == CONST)
4824 {
4825 rtx base, offset;
4826
4827 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4828
4829 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4830 offset = XEXP (XEXP (addr, 0), 1);
4831
4832 base = force_operand (base, NULL_RTX);
4833 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4834 offset = force_reg (Pmode, offset);
4835 ret = gen_rtx_PLUS (Pmode, base, offset);
4836 }
4837
4838 else
4839 gcc_unreachable (); /* for now ... */
4840
4841 return ret;
4842 }
4843
4844 /* Legitimize PIC addresses. If the address is already position-independent,
4845 we return ORIG. Newly generated position-independent addresses go into a
4846 reg. This is REG if nonzero, otherwise we allocate register(s) as
4847 necessary. */
4848
4849 static rtx
4850 sparc_legitimize_pic_address (rtx orig, rtx reg)
4851 {
4852 if (GET_CODE (orig) == SYMBOL_REF
4853 /* See the comment in sparc_expand_move. */
4854 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4855 {
4856 bool gotdata_op = false;
4857 rtx pic_ref, address;
4858 rtx_insn *insn;
4859
4860 if (!reg)
4861 {
4862 gcc_assert (can_create_pseudo_p ());
4863 reg = gen_reg_rtx (Pmode);
4864 }
4865
4866 if (flag_pic == 2)
4867 {
4868 /* If not during reload, allocate another temp reg here for loading
4869 in the address, so that these instructions can be optimized
4870 properly. */
4871 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4872
4873 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4874 won't get confused into thinking that these two instructions
4875 are loading in the true address of the symbol. If in the
4876 future a PIC rtx exists, that should be used instead. */
4877 if (TARGET_ARCH64)
4878 {
4879 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4880 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4881 }
4882 else
4883 {
4884 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4885 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4886 }
4887
4888 address = temp_reg;
4889 gotdata_op = true;
4890 }
4891 else
4892 address = orig;
4893
4894 crtl->uses_pic_offset_table = 1;
4895 if (gotdata_op)
4896 {
4897 if (TARGET_ARCH64)
4898 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4899 pic_offset_table_rtx,
4900 address, orig));
4901 else
4902 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4903 pic_offset_table_rtx,
4904 address, orig));
4905 }
4906 else
4907 {
4908 pic_ref
4909 = gen_const_mem (Pmode,
4910 gen_rtx_PLUS (Pmode,
4911 pic_offset_table_rtx, address));
4912 insn = emit_move_insn (reg, pic_ref);
4913 }
4914
4915 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4916 by loop. */
4917 set_unique_reg_note (insn, REG_EQUAL, orig);
4918 return reg;
4919 }
4920 else if (GET_CODE (orig) == CONST)
4921 {
4922 rtx base, offset;
4923
4924 if (GET_CODE (XEXP (orig, 0)) == PLUS
4925 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4926 return orig;
4927
4928 if (!reg)
4929 {
4930 gcc_assert (can_create_pseudo_p ());
4931 reg = gen_reg_rtx (Pmode);
4932 }
4933
4934 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4935 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4936 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4937 base == reg ? NULL_RTX : reg);
4938
4939 if (GET_CODE (offset) == CONST_INT)
4940 {
4941 if (SMALL_INT (offset))
4942 return plus_constant (Pmode, base, INTVAL (offset));
4943 else if (can_create_pseudo_p ())
4944 offset = force_reg (Pmode, offset);
4945 else
4946 /* If we reach here, then something is seriously wrong. */
4947 gcc_unreachable ();
4948 }
4949 return gen_rtx_PLUS (Pmode, base, offset);
4950 }
4951 else if (GET_CODE (orig) == LABEL_REF)
4952 /* ??? We ought to be checking that the register is live instead, in case
4953 it is eliminated. */
4954 crtl->uses_pic_offset_table = 1;
4955
4956 return orig;
4957 }
4958
4959 /* Try machine-dependent ways of modifying an illegitimate address X
4960 to be legitimate. If we find one, return the new, valid address.
4961
4962 OLDX is the address as it was before break_out_memory_refs was called.
4963 In some cases it is useful to look at this to decide what needs to be done.
4964
4965 MODE is the mode of the operand pointed to by X.
4966
4967 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4968
4969 static rtx
4970 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4971 machine_mode mode)
4972 {
4973 rtx orig_x = x;
4974
4975 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4976 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4977 force_operand (XEXP (x, 0), NULL_RTX));
4978 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4979 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4980 force_operand (XEXP (x, 1), NULL_RTX));
4981 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4982 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4983 XEXP (x, 1));
4984 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4985 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4986 force_operand (XEXP (x, 1), NULL_RTX));
4987
4988 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4989 return x;
4990
4991 if (sparc_tls_referenced_p (x))
4992 x = sparc_legitimize_tls_address (x);
4993 else if (flag_pic)
4994 x = sparc_legitimize_pic_address (x, NULL_RTX);
4995 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4996 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4997 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4998 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4999 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5000 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5001 else if (GET_CODE (x) == SYMBOL_REF
5002 || GET_CODE (x) == CONST
5003 || GET_CODE (x) == LABEL_REF)
5004 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5005
5006 return x;
5007 }
5008
5009 /* Delegitimize an address that was legitimized by the above function. */
5010
5011 static rtx
5012 sparc_delegitimize_address (rtx x)
5013 {
5014 x = delegitimize_mem_from_attrs (x);
5015
5016 if (GET_CODE (x) == LO_SUM)
5017 x = XEXP (x, 1);
5018
5019 if (GET_CODE (x) == UNSPEC)
5020 switch (XINT (x, 1))
5021 {
5022 case UNSPEC_MOVE_PIC:
5023 case UNSPEC_TLSLE:
5024 x = XVECEXP (x, 0, 0);
5025 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5026 break;
5027 case UNSPEC_MOVE_GOTDATA:
5028 x = XVECEXP (x, 0, 2);
5029 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5030 break;
5031 default:
5032 break;
5033 }
5034
5035 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5036 if (GET_CODE (x) == MINUS
5037 && (XEXP (x, 0) == got_register_rtx
5038 || sparc_pic_register_p (XEXP (x, 0))))
5039 {
5040 rtx y = XEXP (x, 1);
5041
5042 if (GET_CODE (y) == LO_SUM)
5043 y = XEXP (y, 1);
5044
5045 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5046 {
5047 x = XVECEXP (y, 0, 0);
5048 gcc_assert (GET_CODE (x) == LABEL_REF
5049 || (GET_CODE (x) == CONST
5050 && GET_CODE (XEXP (x, 0)) == PLUS
5051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5052 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5053 }
5054 }
5055
5056 return x;
5057 }
5058
5059 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5060 replace the input X, or the original X if no replacement is called for.
5061 The output parameter *WIN is 1 if the calling macro should goto WIN,
5062 0 if it should not.
5063
5064 For SPARC, we wish to handle addresses by splitting them into
5065 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5066 This cuts the number of extra insns by one.
5067
5068 Do nothing when generating PIC code and the address is a symbolic
5069 operand or requires a scratch register. */
5070
5071 rtx
5072 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5073 int opnum, int type,
5074 int ind_levels ATTRIBUTE_UNUSED, int *win)
5075 {
5076 /* Decompose SImode constants into HIGH+LO_SUM. */
5077 if (CONSTANT_P (x)
5078 && (mode != TFmode || TARGET_ARCH64)
5079 && GET_MODE (x) == SImode
5080 && GET_CODE (x) != LO_SUM
5081 && GET_CODE (x) != HIGH
5082 && sparc_code_model <= CM_MEDLOW
5083 && !(flag_pic
5084 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5085 {
5086 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5087 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5088 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5089 opnum, (enum reload_type)type);
5090 *win = 1;
5091 return x;
5092 }
5093
5094 /* We have to recognize what we have already generated above. */
5095 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5096 {
5097 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5098 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5099 opnum, (enum reload_type)type);
5100 *win = 1;
5101 return x;
5102 }
5103
5104 *win = 0;
5105 return x;
5106 }
5107
5108 /* Return true if ADDR (a legitimate address expression)
5109 has an effect that depends on the machine mode it is used for.
5110
5111 In PIC mode,
5112
5113 (mem:HI [%l7+a])
5114
5115 is not equivalent to
5116
5117 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5118
5119 because [%l7+a+1] is interpreted as the address of (a+1). */
5120
5121
5122 static bool
5123 sparc_mode_dependent_address_p (const_rtx addr,
5124 addr_space_t as ATTRIBUTE_UNUSED)
5125 {
5126 if (GET_CODE (addr) == PLUS
5127 && sparc_pic_register_p (XEXP (addr, 0))
5128 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5129 return true;
5130
5131 return false;
5132 }
5133
5134 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5135 address of the call target. */
5136
5137 void
5138 sparc_emit_call_insn (rtx pat, rtx addr)
5139 {
5140 rtx_insn *insn;
5141
5142 insn = emit_call_insn (pat);
5143
5144 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5145 if (TARGET_VXWORKS_RTP
5146 && flag_pic
5147 && GET_CODE (addr) == SYMBOL_REF
5148 && (SYMBOL_REF_DECL (addr)
5149 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5150 : !SYMBOL_REF_LOCAL_P (addr)))
5151 {
5152 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5153 crtl->uses_pic_offset_table = 1;
5154 }
5155 }
5156 \f
5157 /* Return 1 if RTX is a MEM which is known to be aligned to at
5158 least a DESIRED byte boundary. */
5159
5160 int
5161 mem_min_alignment (rtx mem, int desired)
5162 {
5163 rtx addr, base, offset;
5164
5165 /* If it's not a MEM we can't accept it. */
5166 if (GET_CODE (mem) != MEM)
5167 return 0;
5168
5169 /* Obviously... */
5170 if (!TARGET_UNALIGNED_DOUBLES
5171 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5172 return 1;
5173
5174 /* ??? The rest of the function predates MEM_ALIGN so
5175 there is probably a bit of redundancy. */
5176 addr = XEXP (mem, 0);
5177 base = offset = NULL_RTX;
5178 if (GET_CODE (addr) == PLUS)
5179 {
5180 if (GET_CODE (XEXP (addr, 0)) == REG)
5181 {
5182 base = XEXP (addr, 0);
5183
5184 /* What we are saying here is that if the base
5185 REG is aligned properly, the compiler will make
5186 sure any REG based index upon it will be so
5187 as well. */
5188 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5189 offset = XEXP (addr, 1);
5190 else
5191 offset = const0_rtx;
5192 }
5193 }
5194 else if (GET_CODE (addr) == REG)
5195 {
5196 base = addr;
5197 offset = const0_rtx;
5198 }
5199
5200 if (base != NULL_RTX)
5201 {
5202 int regno = REGNO (base);
5203
5204 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5205 {
5206 /* Check if the compiler has recorded some information
5207 about the alignment of the base REG. If reload has
5208 completed, we already matched with proper alignments.
5209 If not running global_alloc, reload might give us
5210 unaligned pointer to local stack though. */
5211 if (((cfun != 0
5212 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5213 || (optimize && reload_completed))
5214 && (INTVAL (offset) & (desired - 1)) == 0)
5215 return 1;
5216 }
5217 else
5218 {
5219 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5220 return 1;
5221 }
5222 }
5223 else if (! TARGET_UNALIGNED_DOUBLES
5224 || CONSTANT_P (addr)
5225 || GET_CODE (addr) == LO_SUM)
5226 {
5227 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5228 is true, in which case we can only assume that an access is aligned if
5229 it is to a constant address, or the address involves a LO_SUM. */
5230 return 1;
5231 }
5232
5233 /* An obviously unaligned address. */
5234 return 0;
5235 }
5236
5237 \f
5238 /* Vectors to keep interesting information about registers where it can easily
5239 be got. We used to use the actual mode value as the bit number, but there
5240 are more than 32 modes now. Instead we use two tables: one indexed by
5241 hard register number, and one indexed by mode. */
5242
5243 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5244 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5245 mapped into one sparc_mode_class mode. */
5246
5247 enum sparc_mode_class {
5248 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5249 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5250 CC_MODE, CCFP_MODE
5251 };
5252
5253 /* Modes for single-word and smaller quantities. */
5254 #define S_MODES \
5255 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5256
5257 /* Modes for double-word and smaller quantities. */
5258 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5259
5260 /* Modes for quad-word and smaller quantities. */
5261 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5262
5263 /* Modes for 8-word and smaller quantities. */
5264 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5265
5266 /* Modes for single-float quantities. */
5267 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5268
5269 /* Modes for double-float and smaller quantities. */
5270 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5271
5272 /* Modes for quad-float and smaller quantities. */
5273 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5274
5275 /* Modes for quad-float pairs and smaller quantities. */
5276 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5277
5278 /* Modes for double-float only quantities. */
5279 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5280
5281 /* Modes for quad-float and double-float only quantities. */
5282 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5283
5284 /* Modes for quad-float pairs and double-float only quantities. */
5285 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5286
5287 /* Modes for condition codes. */
5288 #define CC_MODES (1 << (int) CC_MODE)
5289 #define CCFP_MODES (1 << (int) CCFP_MODE)
5290
5291 /* Value is 1 if register/mode pair is acceptable on sparc.
5292
5293 The funny mixture of D and T modes is because integer operations
5294 do not specially operate on tetra quantities, so non-quad-aligned
5295 registers can hold quadword quantities (except %o4 and %i4 because
5296 they cross fixed registers).
5297
5298 ??? Note that, despite the settings, non-double-aligned parameter
5299 registers can hold double-word quantities in 32-bit mode. */
5300
5301 /* This points to either the 32-bit or the 64-bit version. */
5302 static const int *hard_regno_mode_classes;
5303
5304 static const int hard_32bit_mode_classes[] = {
5305 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5306 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5307 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5308 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5309
5310 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5311 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5312 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5313 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5314
5315 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5316 and none can hold SFmode/SImode values. */
5317 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5318 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5319 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5320 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5321
5322 /* %fcc[0123] */
5323 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5324
5325 /* %icc, %sfp, %gsr */
5326 CC_MODES, 0, D_MODES
5327 };
5328
5329 static const int hard_64bit_mode_classes[] = {
5330 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5331 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5332 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5333 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5334
5335 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5336 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5337 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5338 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5339
5340 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5341 and none can hold SFmode/SImode values. */
5342 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5343 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5344 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5345 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5346
5347 /* %fcc[0123] */
5348 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5349
5350 /* %icc, %sfp, %gsr */
5351 CC_MODES, 0, D_MODES
5352 };
5353
5354 static int sparc_mode_class [NUM_MACHINE_MODES];
5355
5356 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5357
5358 static void
5359 sparc_init_modes (void)
5360 {
5361 int i;
5362
5363 for (i = 0; i < NUM_MACHINE_MODES; i++)
5364 {
5365 machine_mode m = (machine_mode) i;
5366 unsigned int size = GET_MODE_SIZE (m);
5367
5368 switch (GET_MODE_CLASS (m))
5369 {
5370 case MODE_INT:
5371 case MODE_PARTIAL_INT:
5372 case MODE_COMPLEX_INT:
5373 if (size < 4)
5374 sparc_mode_class[i] = 1 << (int) H_MODE;
5375 else if (size == 4)
5376 sparc_mode_class[i] = 1 << (int) S_MODE;
5377 else if (size == 8)
5378 sparc_mode_class[i] = 1 << (int) D_MODE;
5379 else if (size == 16)
5380 sparc_mode_class[i] = 1 << (int) T_MODE;
5381 else if (size == 32)
5382 sparc_mode_class[i] = 1 << (int) O_MODE;
5383 else
5384 sparc_mode_class[i] = 0;
5385 break;
5386 case MODE_VECTOR_INT:
5387 if (size == 4)
5388 sparc_mode_class[i] = 1 << (int) SF_MODE;
5389 else if (size == 8)
5390 sparc_mode_class[i] = 1 << (int) DF_MODE;
5391 else
5392 sparc_mode_class[i] = 0;
5393 break;
5394 case MODE_FLOAT:
5395 case MODE_COMPLEX_FLOAT:
5396 if (size == 4)
5397 sparc_mode_class[i] = 1 << (int) SF_MODE;
5398 else if (size == 8)
5399 sparc_mode_class[i] = 1 << (int) DF_MODE;
5400 else if (size == 16)
5401 sparc_mode_class[i] = 1 << (int) TF_MODE;
5402 else if (size == 32)
5403 sparc_mode_class[i] = 1 << (int) OF_MODE;
5404 else
5405 sparc_mode_class[i] = 0;
5406 break;
5407 case MODE_CC:
5408 if (m == CCFPmode || m == CCFPEmode)
5409 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5410 else
5411 sparc_mode_class[i] = 1 << (int) CC_MODE;
5412 break;
5413 default:
5414 sparc_mode_class[i] = 0;
5415 break;
5416 }
5417 }
5418
5419 if (TARGET_ARCH64)
5420 hard_regno_mode_classes = hard_64bit_mode_classes;
5421 else
5422 hard_regno_mode_classes = hard_32bit_mode_classes;
5423
5424 /* Initialize the array used by REGNO_REG_CLASS. */
5425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5426 {
5427 if (i < 16 && TARGET_V8PLUS)
5428 sparc_regno_reg_class[i] = I64_REGS;
5429 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5430 sparc_regno_reg_class[i] = GENERAL_REGS;
5431 else if (i < 64)
5432 sparc_regno_reg_class[i] = FP_REGS;
5433 else if (i < 96)
5434 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5435 else if (i < 100)
5436 sparc_regno_reg_class[i] = FPCC_REGS;
5437 else
5438 sparc_regno_reg_class[i] = NO_REGS;
5439 }
5440 }
5441 \f
5442 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5443
5444 static inline bool
5445 save_global_or_fp_reg_p (unsigned int regno,
5446 int leaf_function ATTRIBUTE_UNUSED)
5447 {
5448 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5449 }
5450
5451 /* Return whether the return address register (%i7) is needed. */
5452
5453 static inline bool
5454 return_addr_reg_needed_p (int leaf_function)
5455 {
5456 /* If it is live, for example because of __builtin_return_address (0). */
5457 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5458 return true;
5459
5460 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5461 if (!leaf_function
5462 /* Loading the GOT register clobbers %o7. */
5463 || crtl->uses_pic_offset_table
5464 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5465 return true;
5466
5467 return false;
5468 }
5469
5470 /* Return whether REGNO, a local or in register, must be saved/restored. */
5471
5472 static bool
5473 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5474 {
5475 /* General case: call-saved registers live at some point. */
5476 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5477 return true;
5478
5479 /* Frame pointer register (%fp) if needed. */
5480 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5481 return true;
5482
5483 /* Return address register (%i7) if needed. */
5484 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5485 return true;
5486
5487 /* GOT register (%l7) if needed. */
5488 if (got_register_rtx && regno == REGNO (got_register_rtx))
5489 return true;
5490
5491 /* If the function accesses prior frames, the frame pointer and the return
5492 address of the previous frame must be saved on the stack. */
5493 if (crtl->accesses_prior_frames
5494 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5495 return true;
5496
5497 return false;
5498 }
5499
5500 /* Compute the frame size required by the function. This function is called
5501 during the reload pass and also by sparc_expand_prologue. */
5502
5503 static HOST_WIDE_INT
5504 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5505 {
5506 HOST_WIDE_INT frame_size, apparent_frame_size;
5507 int args_size, n_global_fp_regs = 0;
5508 bool save_local_in_regs_p = false;
5509 unsigned int i;
5510
5511 /* If the function allocates dynamic stack space, the dynamic offset is
5512 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5513 if (leaf_function && !cfun->calls_alloca)
5514 args_size = 0;
5515 else
5516 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5517
5518 /* Calculate space needed for global registers. */
5519 if (TARGET_ARCH64)
5520 {
5521 for (i = 0; i < 8; i++)
5522 if (save_global_or_fp_reg_p (i, 0))
5523 n_global_fp_regs += 2;
5524 }
5525 else
5526 {
5527 for (i = 0; i < 8; i += 2)
5528 if (save_global_or_fp_reg_p (i, 0)
5529 || save_global_or_fp_reg_p (i + 1, 0))
5530 n_global_fp_regs += 2;
5531 }
5532
5533 /* In the flat window model, find out which local and in registers need to
5534 be saved. We don't reserve space in the current frame for them as they
5535 will be spilled into the register window save area of the caller's frame.
5536 However, as soon as we use this register window save area, we must create
5537 that of the current frame to make it the live one. */
5538 if (TARGET_FLAT)
5539 for (i = 16; i < 32; i++)
5540 if (save_local_or_in_reg_p (i, leaf_function))
5541 {
5542 save_local_in_regs_p = true;
5543 break;
5544 }
5545
5546 /* Calculate space needed for FP registers. */
5547 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5548 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5549 n_global_fp_regs += 2;
5550
5551 if (size == 0
5552 && n_global_fp_regs == 0
5553 && args_size == 0
5554 && !save_local_in_regs_p)
5555 frame_size = apparent_frame_size = 0;
5556 else
5557 {
5558 /* Start from the apparent frame size. */
5559 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5560
5561 /* We need to add the size of the outgoing argument area. */
5562 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5563
5564 /* And that of the register window save area. */
5565 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5566
5567 /* Finally, bump to the appropriate alignment. */
5568 frame_size = SPARC_STACK_ALIGN (frame_size);
5569 }
5570
5571 /* Set up values for use in prologue and epilogue. */
5572 sparc_frame_size = frame_size;
5573 sparc_apparent_frame_size = apparent_frame_size;
5574 sparc_n_global_fp_regs = n_global_fp_regs;
5575 sparc_save_local_in_regs_p = save_local_in_regs_p;
5576
5577 return frame_size;
5578 }
5579
5580 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5581
5582 int
5583 sparc_initial_elimination_offset (int to)
5584 {
5585 int offset;
5586
5587 if (to == STACK_POINTER_REGNUM)
5588 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5589 else
5590 offset = 0;
5591
5592 offset += SPARC_STACK_BIAS;
5593 return offset;
5594 }
5595
5596 /* Output any necessary .register pseudo-ops. */
5597
5598 void
5599 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5600 {
5601 int i;
5602
5603 if (TARGET_ARCH32)
5604 return;
5605
5606 /* Check if %g[2367] were used without
5607 .register being printed for them already. */
5608 for (i = 2; i < 8; i++)
5609 {
5610 if (df_regs_ever_live_p (i)
5611 && ! sparc_hard_reg_printed [i])
5612 {
5613 sparc_hard_reg_printed [i] = 1;
5614 /* %g7 is used as TLS base register, use #ignore
5615 for it instead of #scratch. */
5616 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5617 i == 7 ? "ignore" : "scratch");
5618 }
5619 if (i == 3) i = 5;
5620 }
5621 }
5622
5623 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5624
5625 #if PROBE_INTERVAL > 4096
5626 #error Cannot use indexed addressing mode for stack probing
5627 #endif
5628
5629 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5630 inclusive. These are offsets from the current stack pointer.
5631
5632 Note that we don't use the REG+REG addressing mode for the probes because
5633 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5634 so the advantages of having a single code win here. */
5635
5636 static void
5637 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5638 {
5639 rtx g1 = gen_rtx_REG (Pmode, 1);
5640
5641 /* See if we have a constant small number of probes to generate. If so,
5642 that's the easy case. */
5643 if (size <= PROBE_INTERVAL)
5644 {
5645 emit_move_insn (g1, GEN_INT (first));
5646 emit_insn (gen_rtx_SET (g1,
5647 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5648 emit_stack_probe (plus_constant (Pmode, g1, -size));
5649 }
5650
5651 /* The run-time loop is made up of 9 insns in the generic case while the
5652 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5653 else if (size <= 4 * PROBE_INTERVAL)
5654 {
5655 HOST_WIDE_INT i;
5656
5657 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5658 emit_insn (gen_rtx_SET (g1,
5659 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5660 emit_stack_probe (g1);
5661
5662 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5663 it exceeds SIZE. If only two probes are needed, this will not
5664 generate any code. Then probe at FIRST + SIZE. */
5665 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5666 {
5667 emit_insn (gen_rtx_SET (g1,
5668 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5669 emit_stack_probe (g1);
5670 }
5671
5672 emit_stack_probe (plus_constant (Pmode, g1,
5673 (i - PROBE_INTERVAL) - size));
5674 }
5675
5676 /* Otherwise, do the same as above, but in a loop. Note that we must be
5677 extra careful with variables wrapping around because we might be at
5678 the very top (or the very bottom) of the address space and we have
5679 to be able to handle this case properly; in particular, we use an
5680 equality test for the loop condition. */
5681 else
5682 {
5683 HOST_WIDE_INT rounded_size;
5684 rtx g4 = gen_rtx_REG (Pmode, 4);
5685
5686 emit_move_insn (g1, GEN_INT (first));
5687
5688
5689 /* Step 1: round SIZE to the previous multiple of the interval. */
5690
5691 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5692 emit_move_insn (g4, GEN_INT (rounded_size));
5693
5694
5695 /* Step 2: compute initial and final value of the loop counter. */
5696
5697 /* TEST_ADDR = SP + FIRST. */
5698 emit_insn (gen_rtx_SET (g1,
5699 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5700
5701 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5702 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5703
5704
5705 /* Step 3: the loop
5706
5707 while (TEST_ADDR != LAST_ADDR)
5708 {
5709 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5710 probe at TEST_ADDR
5711 }
5712
5713 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5714 until it is equal to ROUNDED_SIZE. */
5715
5716 emit_insn (gen_probe_stack_range (Pmode, g1, g1, g4));
5717
5718
5719 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5720 that SIZE is equal to ROUNDED_SIZE. */
5721
5722 if (size != rounded_size)
5723 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5724 }
5725
5726 /* Make sure nothing is scheduled before we are done. */
5727 emit_insn (gen_blockage ());
5728 }
5729
5730 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5731 absolute addresses. */
5732
5733 const char *
5734 output_probe_stack_range (rtx reg1, rtx reg2)
5735 {
5736 static int labelno = 0;
5737 char loop_lab[32];
5738 rtx xops[2];
5739
5740 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5741
5742 /* Loop. */
5743 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5744
5745 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5746 xops[0] = reg1;
5747 xops[1] = GEN_INT (-PROBE_INTERVAL);
5748 output_asm_insn ("add\t%0, %1, %0", xops);
5749
5750 /* Test if TEST_ADDR == LAST_ADDR. */
5751 xops[1] = reg2;
5752 output_asm_insn ("cmp\t%0, %1", xops);
5753
5754 /* Probe at TEST_ADDR and branch. */
5755 if (TARGET_ARCH64)
5756 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5757 else
5758 fputs ("\tbne\t", asm_out_file);
5759 assemble_name_raw (asm_out_file, loop_lab);
5760 fputc ('\n', asm_out_file);
5761 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5762 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5763
5764 return "";
5765 }
5766
5767 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5768 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5769 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5770 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5771 the action to be performed if it returns false. Return the new offset. */
5772
5773 typedef bool (*sorr_pred_t) (unsigned int, int);
5774 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5775
5776 static int
5777 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5778 int offset, int leaf_function, sorr_pred_t save_p,
5779 sorr_act_t action_true, sorr_act_t action_false)
5780 {
5781 unsigned int i;
5782 rtx mem;
5783 rtx_insn *insn;
5784
5785 if (TARGET_ARCH64 && high <= 32)
5786 {
5787 int fp_offset = -1;
5788
5789 for (i = low; i < high; i++)
5790 {
5791 if (save_p (i, leaf_function))
5792 {
5793 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5794 base, offset));
5795 if (action_true == SORR_SAVE)
5796 {
5797 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5798 RTX_FRAME_RELATED_P (insn) = 1;
5799 }
5800 else /* action_true == SORR_RESTORE */
5801 {
5802 /* The frame pointer must be restored last since its old
5803 value may be used as base address for the frame. This
5804 is problematic in 64-bit mode only because of the lack
5805 of double-word load instruction. */
5806 if (i == HARD_FRAME_POINTER_REGNUM)
5807 fp_offset = offset;
5808 else
5809 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5810 }
5811 offset += 8;
5812 }
5813 else if (action_false == SORR_ADVANCE)
5814 offset += 8;
5815 }
5816
5817 if (fp_offset >= 0)
5818 {
5819 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5820 emit_move_insn (hard_frame_pointer_rtx, mem);
5821 }
5822 }
5823 else
5824 {
5825 for (i = low; i < high; i += 2)
5826 {
5827 bool reg0 = save_p (i, leaf_function);
5828 bool reg1 = save_p (i + 1, leaf_function);
5829 machine_mode mode;
5830 int regno;
5831
5832 if (reg0 && reg1)
5833 {
5834 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5835 regno = i;
5836 }
5837 else if (reg0)
5838 {
5839 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5840 regno = i;
5841 }
5842 else if (reg1)
5843 {
5844 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5845 regno = i + 1;
5846 offset += 4;
5847 }
5848 else
5849 {
5850 if (action_false == SORR_ADVANCE)
5851 offset += 8;
5852 continue;
5853 }
5854
5855 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5856 if (action_true == SORR_SAVE)
5857 {
5858 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5859 RTX_FRAME_RELATED_P (insn) = 1;
5860 if (mode == DImode)
5861 {
5862 rtx set1, set2;
5863 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5864 offset));
5865 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5866 RTX_FRAME_RELATED_P (set1) = 1;
5867 mem
5868 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5869 offset + 4));
5870 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5871 RTX_FRAME_RELATED_P (set2) = 1;
5872 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5873 gen_rtx_PARALLEL (VOIDmode,
5874 gen_rtvec (2, set1, set2)));
5875 }
5876 }
5877 else /* action_true == SORR_RESTORE */
5878 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5879
5880 /* Bump and round down to double word
5881 in case we already bumped by 4. */
5882 offset = ROUND_DOWN (offset + 8, 8);
5883 }
5884 }
5885
5886 return offset;
5887 }
5888
5889 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5890
5891 static rtx
5892 emit_adjust_base_to_offset (rtx base, int offset)
5893 {
5894 /* ??? This might be optimized a little as %g1 might already have a
5895 value close enough that a single add insn will do. */
5896 /* ??? Although, all of this is probably only a temporary fix because
5897 if %g1 can hold a function result, then sparc_expand_epilogue will
5898 lose (the result will be clobbered). */
5899 rtx new_base = gen_rtx_REG (Pmode, 1);
5900 emit_move_insn (new_base, GEN_INT (offset));
5901 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5902 return new_base;
5903 }
5904
5905 /* Emit code to save/restore call-saved global and FP registers. */
5906
5907 static void
5908 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5909 {
5910 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5911 {
5912 base = emit_adjust_base_to_offset (base, offset);
5913 offset = 0;
5914 }
5915
5916 offset
5917 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5918 save_global_or_fp_reg_p, action, SORR_NONE);
5919 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5920 save_global_or_fp_reg_p, action, SORR_NONE);
5921 }
5922
5923 /* Emit code to save/restore call-saved local and in registers. */
5924
5925 static void
5926 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5927 {
5928 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5929 {
5930 base = emit_adjust_base_to_offset (base, offset);
5931 offset = 0;
5932 }
5933
5934 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5935 save_local_or_in_reg_p, action, SORR_ADVANCE);
5936 }
5937
5938 /* Emit a window_save insn. */
5939
5940 static rtx_insn *
5941 emit_window_save (rtx increment)
5942 {
5943 rtx_insn *insn = emit_insn (gen_window_save (increment));
5944 RTX_FRAME_RELATED_P (insn) = 1;
5945
5946 /* The incoming return address (%o7) is saved in %i7. */
5947 add_reg_note (insn, REG_CFA_REGISTER,
5948 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5949 gen_rtx_REG (Pmode,
5950 INCOMING_RETURN_ADDR_REGNUM)));
5951
5952 /* The window save event. */
5953 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5954
5955 /* The CFA is %fp, the hard frame pointer. */
5956 add_reg_note (insn, REG_CFA_DEF_CFA,
5957 plus_constant (Pmode, hard_frame_pointer_rtx,
5958 INCOMING_FRAME_SP_OFFSET));
5959
5960 return insn;
5961 }
5962
5963 /* Generate an increment for the stack pointer. */
5964
5965 static rtx
5966 gen_stack_pointer_inc (rtx increment)
5967 {
5968 return gen_rtx_SET (stack_pointer_rtx,
5969 gen_rtx_PLUS (Pmode,
5970 stack_pointer_rtx,
5971 increment));
5972 }
5973
5974 /* Expand the function prologue. The prologue is responsible for reserving
5975 storage for the frame, saving the call-saved registers and loading the
5976 GOT register if needed. */
5977
5978 void
5979 sparc_expand_prologue (void)
5980 {
5981 HOST_WIDE_INT size;
5982 rtx_insn *insn;
5983
5984 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5985 on the final value of the flag means deferring the prologue/epilogue
5986 expansion until just before the second scheduling pass, which is too
5987 late to emit multiple epilogues or return insns.
5988
5989 Of course we are making the assumption that the value of the flag
5990 will not change between now and its final value. Of the three parts
5991 of the formula, only the last one can reasonably vary. Let's take a
5992 closer look, after assuming that the first two ones are set to true
5993 (otherwise the last value is effectively silenced).
5994
5995 If only_leaf_regs_used returns false, the global predicate will also
5996 be false so the actual frame size calculated below will be positive.
5997 As a consequence, the save_register_window insn will be emitted in
5998 the instruction stream; now this insn explicitly references %fp
5999 which is not a leaf register so only_leaf_regs_used will always
6000 return false subsequently.
6001
6002 If only_leaf_regs_used returns true, we hope that the subsequent
6003 optimization passes won't cause non-leaf registers to pop up. For
6004 example, the regrename pass has special provisions to not rename to
6005 non-leaf registers in a leaf function. */
6006 sparc_leaf_function_p
6007 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6008
6009 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6010
6011 if (flag_stack_usage_info)
6012 current_function_static_stack_size = size;
6013
6014 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6015 || flag_stack_clash_protection)
6016 {
6017 if (crtl->is_leaf && !cfun->calls_alloca)
6018 {
6019 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6020 sparc_emit_probe_stack_range (get_stack_check_protect (),
6021 size - get_stack_check_protect ());
6022 }
6023 else if (size > 0)
6024 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6025 }
6026
6027 if (size == 0)
6028 ; /* do nothing. */
6029 else if (sparc_leaf_function_p)
6030 {
6031 rtx size_int_rtx = GEN_INT (-size);
6032
6033 if (size <= 4096)
6034 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6035 else if (size <= 8192)
6036 {
6037 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6038 RTX_FRAME_RELATED_P (insn) = 1;
6039
6040 /* %sp is still the CFA register. */
6041 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6042 }
6043 else
6044 {
6045 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6046 emit_move_insn (size_rtx, size_int_rtx);
6047 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6048 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6049 gen_stack_pointer_inc (size_int_rtx));
6050 }
6051
6052 RTX_FRAME_RELATED_P (insn) = 1;
6053 }
6054 else
6055 {
6056 rtx size_int_rtx = GEN_INT (-size);
6057
6058 if (size <= 4096)
6059 emit_window_save (size_int_rtx);
6060 else if (size <= 8192)
6061 {
6062 emit_window_save (GEN_INT (-4096));
6063
6064 /* %sp is not the CFA register anymore. */
6065 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6066
6067 /* Make sure no %fp-based store is issued until after the frame is
6068 established. The offset between the frame pointer and the stack
6069 pointer is calculated relative to the value of the stack pointer
6070 at the end of the function prologue, and moving instructions that
6071 access the stack via the frame pointer between the instructions
6072 that decrement the stack pointer could result in accessing the
6073 register window save area, which is volatile. */
6074 emit_insn (gen_frame_blockage ());
6075 }
6076 else
6077 {
6078 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6079 emit_move_insn (size_rtx, size_int_rtx);
6080 emit_window_save (size_rtx);
6081 }
6082 }
6083
6084 if (sparc_leaf_function_p)
6085 {
6086 sparc_frame_base_reg = stack_pointer_rtx;
6087 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6088 }
6089 else
6090 {
6091 sparc_frame_base_reg = hard_frame_pointer_rtx;
6092 sparc_frame_base_offset = SPARC_STACK_BIAS;
6093 }
6094
6095 if (sparc_n_global_fp_regs > 0)
6096 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6097 sparc_frame_base_offset
6098 - sparc_apparent_frame_size,
6099 SORR_SAVE);
6100
6101 /* Advertise that the data calculated just above are now valid. */
6102 sparc_prologue_data_valid_p = true;
6103 }
6104
6105 /* Expand the function prologue. The prologue is responsible for reserving
6106 storage for the frame, saving the call-saved registers and loading the
6107 GOT register if needed. */
6108
6109 void
6110 sparc_flat_expand_prologue (void)
6111 {
6112 HOST_WIDE_INT size;
6113 rtx_insn *insn;
6114
6115 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6116
6117 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6118
6119 if (flag_stack_usage_info)
6120 current_function_static_stack_size = size;
6121
6122 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6123 || flag_stack_clash_protection)
6124 {
6125 if (crtl->is_leaf && !cfun->calls_alloca)
6126 {
6127 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6128 sparc_emit_probe_stack_range (get_stack_check_protect (),
6129 size - get_stack_check_protect ());
6130 }
6131 else if (size > 0)
6132 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6133 }
6134
6135 if (sparc_save_local_in_regs_p)
6136 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6137 SORR_SAVE);
6138
6139 if (size == 0)
6140 ; /* do nothing. */
6141 else
6142 {
6143 rtx size_int_rtx, size_rtx;
6144
6145 size_rtx = size_int_rtx = GEN_INT (-size);
6146
6147 /* We establish the frame (i.e. decrement the stack pointer) first, even
6148 if we use a frame pointer, because we cannot clobber any call-saved
6149 registers, including the frame pointer, if we haven't created a new
6150 register save area, for the sake of compatibility with the ABI. */
6151 if (size <= 4096)
6152 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6153 else if (size <= 8192 && !frame_pointer_needed)
6154 {
6155 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6156 RTX_FRAME_RELATED_P (insn) = 1;
6157 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6158 }
6159 else
6160 {
6161 size_rtx = gen_rtx_REG (Pmode, 1);
6162 emit_move_insn (size_rtx, size_int_rtx);
6163 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6164 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6165 gen_stack_pointer_inc (size_int_rtx));
6166 }
6167 RTX_FRAME_RELATED_P (insn) = 1;
6168
6169 /* Ensure nothing is scheduled until after the frame is established. */
6170 emit_insn (gen_blockage ());
6171
6172 if (frame_pointer_needed)
6173 {
6174 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6175 gen_rtx_MINUS (Pmode,
6176 stack_pointer_rtx,
6177 size_rtx)));
6178 RTX_FRAME_RELATED_P (insn) = 1;
6179
6180 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6181 gen_rtx_SET (hard_frame_pointer_rtx,
6182 plus_constant (Pmode, stack_pointer_rtx,
6183 size)));
6184 }
6185
6186 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6187 {
6188 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6189 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6190
6191 insn = emit_move_insn (i7, o7);
6192 RTX_FRAME_RELATED_P (insn) = 1;
6193
6194 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6195
6196 /* Prevent this instruction from ever being considered dead,
6197 even if this function has no epilogue. */
6198 emit_use (i7);
6199 }
6200 }
6201
6202 if (frame_pointer_needed)
6203 {
6204 sparc_frame_base_reg = hard_frame_pointer_rtx;
6205 sparc_frame_base_offset = SPARC_STACK_BIAS;
6206 }
6207 else
6208 {
6209 sparc_frame_base_reg = stack_pointer_rtx;
6210 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6211 }
6212
6213 if (sparc_n_global_fp_regs > 0)
6214 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6215 sparc_frame_base_offset
6216 - sparc_apparent_frame_size,
6217 SORR_SAVE);
6218
6219 /* Advertise that the data calculated just above are now valid. */
6220 sparc_prologue_data_valid_p = true;
6221 }
6222
6223 /* This function generates the assembly code for function entry, which boils
6224 down to emitting the necessary .register directives. */
6225
6226 static void
6227 sparc_asm_function_prologue (FILE *file)
6228 {
6229 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6230 if (!TARGET_FLAT)
6231 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6232
6233 sparc_output_scratch_registers (file);
6234 }
6235
6236 /* Expand the function epilogue, either normal or part of a sibcall.
6237 We emit all the instructions except the return or the call. */
6238
6239 void
6240 sparc_expand_epilogue (bool for_eh)
6241 {
6242 HOST_WIDE_INT size = sparc_frame_size;
6243
6244 if (cfun->calls_alloca)
6245 emit_insn (gen_frame_blockage ());
6246
6247 if (sparc_n_global_fp_regs > 0)
6248 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6249 sparc_frame_base_offset
6250 - sparc_apparent_frame_size,
6251 SORR_RESTORE);
6252
6253 if (size == 0 || for_eh)
6254 ; /* do nothing. */
6255 else if (sparc_leaf_function_p)
6256 {
6257 if (size <= 4096)
6258 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6259 else if (size <= 8192)
6260 {
6261 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6262 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6263 }
6264 else
6265 {
6266 rtx reg = gen_rtx_REG (Pmode, 1);
6267 emit_move_insn (reg, GEN_INT (size));
6268 emit_insn (gen_stack_pointer_inc (reg));
6269 }
6270 }
6271 }
6272
6273 /* Expand the function epilogue, either normal or part of a sibcall.
6274 We emit all the instructions except the return or the call. */
6275
6276 void
6277 sparc_flat_expand_epilogue (bool for_eh)
6278 {
6279 HOST_WIDE_INT size = sparc_frame_size;
6280
6281 if (sparc_n_global_fp_regs > 0)
6282 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6283 sparc_frame_base_offset
6284 - sparc_apparent_frame_size,
6285 SORR_RESTORE);
6286
6287 /* If we have a frame pointer, we'll need both to restore it before the
6288 frame is destroyed and use its current value in destroying the frame.
6289 Since we don't have an atomic way to do that in the flat window model,
6290 we save the current value into a temporary register (%g1). */
6291 if (frame_pointer_needed && !for_eh)
6292 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6293
6294 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6295 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6296 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6297
6298 if (sparc_save_local_in_regs_p)
6299 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6300 sparc_frame_base_offset,
6301 SORR_RESTORE);
6302
6303 if (size == 0 || for_eh)
6304 ; /* do nothing. */
6305 else if (frame_pointer_needed)
6306 {
6307 /* Make sure the frame is destroyed after everything else is done. */
6308 emit_insn (gen_blockage ());
6309
6310 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6311 }
6312 else
6313 {
6314 /* Likewise. */
6315 emit_insn (gen_blockage ());
6316
6317 if (size <= 4096)
6318 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6319 else if (size <= 8192)
6320 {
6321 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6322 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6323 }
6324 else
6325 {
6326 rtx reg = gen_rtx_REG (Pmode, 1);
6327 emit_move_insn (reg, GEN_INT (size));
6328 emit_insn (gen_stack_pointer_inc (reg));
6329 }
6330 }
6331 }
6332
6333 /* Return true if it is appropriate to emit `return' instructions in the
6334 body of a function. */
6335
6336 bool
6337 sparc_can_use_return_insn_p (void)
6338 {
6339 return sparc_prologue_data_valid_p
6340 && sparc_n_global_fp_regs == 0
6341 && TARGET_FLAT
6342 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6343 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6344 }
6345
6346 /* This function generates the assembly code for function exit. */
6347
6348 static void
6349 sparc_asm_function_epilogue (FILE *file)
6350 {
6351 /* If the last two instructions of a function are "call foo; dslot;"
6352 the return address might point to the first instruction in the next
6353 function and we have to output a dummy nop for the sake of sane
6354 backtraces in such cases. This is pointless for sibling calls since
6355 the return address is explicitly adjusted. */
6356
6357 rtx_insn *insn = get_last_insn ();
6358
6359 rtx last_real_insn = prev_real_insn (insn);
6360 if (last_real_insn
6361 && NONJUMP_INSN_P (last_real_insn)
6362 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6363 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6364
6365 if (last_real_insn
6366 && CALL_P (last_real_insn)
6367 && !SIBLING_CALL_P (last_real_insn))
6368 fputs("\tnop\n", file);
6369
6370 sparc_output_deferred_case_vectors ();
6371 }
6372
6373 /* Output a 'restore' instruction. */
6374
6375 static void
6376 output_restore (rtx pat)
6377 {
6378 rtx operands[3];
6379
6380 if (! pat)
6381 {
6382 fputs ("\t restore\n", asm_out_file);
6383 return;
6384 }
6385
6386 gcc_assert (GET_CODE (pat) == SET);
6387
6388 operands[0] = SET_DEST (pat);
6389 pat = SET_SRC (pat);
6390
6391 switch (GET_CODE (pat))
6392 {
6393 case PLUS:
6394 operands[1] = XEXP (pat, 0);
6395 operands[2] = XEXP (pat, 1);
6396 output_asm_insn (" restore %r1, %2, %Y0", operands);
6397 break;
6398 case LO_SUM:
6399 operands[1] = XEXP (pat, 0);
6400 operands[2] = XEXP (pat, 1);
6401 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6402 break;
6403 case ASHIFT:
6404 operands[1] = XEXP (pat, 0);
6405 gcc_assert (XEXP (pat, 1) == const1_rtx);
6406 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6407 break;
6408 default:
6409 operands[1] = pat;
6410 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6411 break;
6412 }
6413 }
6414
6415 /* Output a return. */
6416
6417 const char *
6418 output_return (rtx_insn *insn)
6419 {
6420 if (crtl->calls_eh_return)
6421 {
6422 /* If the function uses __builtin_eh_return, the eh_return
6423 machinery occupies the delay slot. */
6424 gcc_assert (!final_sequence);
6425
6426 if (flag_delayed_branch)
6427 {
6428 if (!TARGET_FLAT && TARGET_V9)
6429 fputs ("\treturn\t%i7+8\n", asm_out_file);
6430 else
6431 {
6432 if (!TARGET_FLAT)
6433 fputs ("\trestore\n", asm_out_file);
6434
6435 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6436 }
6437
6438 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6439 }
6440 else
6441 {
6442 if (!TARGET_FLAT)
6443 fputs ("\trestore\n", asm_out_file);
6444
6445 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6446 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6447 }
6448 }
6449 else if (sparc_leaf_function_p || TARGET_FLAT)
6450 {
6451 /* This is a leaf or flat function so we don't have to bother restoring
6452 the register window, which frees us from dealing with the convoluted
6453 semantics of restore/return. We simply output the jump to the
6454 return address and the insn in the delay slot (if any). */
6455
6456 return "jmp\t%%o7+%)%#";
6457 }
6458 else
6459 {
6460 /* This is a regular function so we have to restore the register window.
6461 We may have a pending insn for the delay slot, which will be either
6462 combined with the 'restore' instruction or put in the delay slot of
6463 the 'return' instruction. */
6464
6465 if (final_sequence)
6466 {
6467 rtx_insn *delay;
6468 rtx pat;
6469
6470 delay = NEXT_INSN (insn);
6471 gcc_assert (delay);
6472
6473 pat = PATTERN (delay);
6474
6475 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6476 {
6477 epilogue_renumber (&pat, 0);
6478 return "return\t%%i7+%)%#";
6479 }
6480 else
6481 {
6482 output_asm_insn ("jmp\t%%i7+%)", NULL);
6483
6484 /* We're going to output the insn in the delay slot manually.
6485 Make sure to output its source location first. */
6486 PATTERN (delay) = gen_blockage ();
6487 INSN_CODE (delay) = -1;
6488 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6489 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6490
6491 output_restore (pat);
6492 }
6493 }
6494 else
6495 {
6496 /* The delay slot is empty. */
6497 if (TARGET_V9)
6498 return "return\t%%i7+%)\n\t nop";
6499 else if (flag_delayed_branch)
6500 return "jmp\t%%i7+%)\n\t restore";
6501 else
6502 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6503 }
6504 }
6505
6506 return "";
6507 }
6508
6509 /* Output a sibling call. */
6510
6511 const char *
6512 output_sibcall (rtx_insn *insn, rtx call_operand)
6513 {
6514 rtx operands[1];
6515
6516 gcc_assert (flag_delayed_branch);
6517
6518 operands[0] = call_operand;
6519
6520 if (sparc_leaf_function_p || TARGET_FLAT)
6521 {
6522 /* This is a leaf or flat function so we don't have to bother restoring
6523 the register window. We simply output the jump to the function and
6524 the insn in the delay slot (if any). */
6525
6526 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6527
6528 if (final_sequence)
6529 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6530 operands);
6531 else
6532 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6533 it into branch if possible. */
6534 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6535 operands);
6536 }
6537 else
6538 {
6539 /* This is a regular function so we have to restore the register window.
6540 We may have a pending insn for the delay slot, which will be combined
6541 with the 'restore' instruction. */
6542
6543 output_asm_insn ("call\t%a0, 0", operands);
6544
6545 if (final_sequence)
6546 {
6547 rtx_insn *delay;
6548 rtx pat;
6549
6550 delay = NEXT_INSN (insn);
6551 gcc_assert (delay);
6552
6553 pat = PATTERN (delay);
6554
6555 /* We're going to output the insn in the delay slot manually.
6556 Make sure to output its source location first. */
6557 PATTERN (delay) = gen_blockage ();
6558 INSN_CODE (delay) = -1;
6559 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6560 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6561
6562 output_restore (pat);
6563 }
6564 else
6565 output_restore (NULL_RTX);
6566 }
6567
6568 return "";
6569 }
6570 \f
6571 /* Functions for handling argument passing.
6572
6573 For 32-bit, the first 6 args are normally in registers and the rest are
6574 pushed. Any arg that starts within the first 6 words is at least
6575 partially passed in a register unless its data type forbids.
6576
6577 For 64-bit, the argument registers are laid out as an array of 16 elements
6578 and arguments are added sequentially. The first 6 int args and up to the
6579 first 16 fp args (depending on size) are passed in regs.
6580
6581 Slot Stack Integral Float Float in structure Double Long Double
6582 ---- ----- -------- ----- ------------------ ------ -----------
6583 15 [SP+248] %f31 %f30,%f31 %d30
6584 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6585 13 [SP+232] %f27 %f26,%f27 %d26
6586 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6587 11 [SP+216] %f23 %f22,%f23 %d22
6588 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6589 9 [SP+200] %f19 %f18,%f19 %d18
6590 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6591 7 [SP+184] %f15 %f14,%f15 %d14
6592 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6593 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6594 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6595 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6596 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6597 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6598 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6599
6600 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6601
6602 Integral arguments are always passed as 64-bit quantities appropriately
6603 extended.
6604
6605 Passing of floating point values is handled as follows.
6606 If a prototype is in scope:
6607 If the value is in a named argument (i.e. not a stdarg function or a
6608 value not part of the `...') then the value is passed in the appropriate
6609 fp reg.
6610 If the value is part of the `...' and is passed in one of the first 6
6611 slots then the value is passed in the appropriate int reg.
6612 If the value is part of the `...' and is not passed in one of the first 6
6613 slots then the value is passed in memory.
6614 If a prototype is not in scope:
6615 If the value is one of the first 6 arguments the value is passed in the
6616 appropriate integer reg and the appropriate fp reg.
6617 If the value is not one of the first 6 arguments the value is passed in
6618 the appropriate fp reg and in memory.
6619
6620
6621 Summary of the calling conventions implemented by GCC on the SPARC:
6622
6623 32-bit ABI:
6624 size argument return value
6625
6626 small integer <4 int. reg. int. reg.
6627 word 4 int. reg. int. reg.
6628 double word 8 int. reg. int. reg.
6629
6630 _Complex small integer <8 int. reg. int. reg.
6631 _Complex word 8 int. reg. int. reg.
6632 _Complex double word 16 memory int. reg.
6633
6634 vector integer <=8 int. reg. FP reg.
6635 vector integer >8 memory memory
6636
6637 float 4 int. reg. FP reg.
6638 double 8 int. reg. FP reg.
6639 long double 16 memory memory
6640
6641 _Complex float 8 memory FP reg.
6642 _Complex double 16 memory FP reg.
6643 _Complex long double 32 memory FP reg.
6644
6645 vector float any memory memory
6646
6647 aggregate any memory memory
6648
6649
6650
6651 64-bit ABI:
6652 size argument return value
6653
6654 small integer <8 int. reg. int. reg.
6655 word 8 int. reg. int. reg.
6656 double word 16 int. reg. int. reg.
6657
6658 _Complex small integer <16 int. reg. int. reg.
6659 _Complex word 16 int. reg. int. reg.
6660 _Complex double word 32 memory int. reg.
6661
6662 vector integer <=16 FP reg. FP reg.
6663 vector integer 16<s<=32 memory FP reg.
6664 vector integer >32 memory memory
6665
6666 float 4 FP reg. FP reg.
6667 double 8 FP reg. FP reg.
6668 long double 16 FP reg. FP reg.
6669
6670 _Complex float 8 FP reg. FP reg.
6671 _Complex double 16 FP reg. FP reg.
6672 _Complex long double 32 memory FP reg.
6673
6674 vector float <=16 FP reg. FP reg.
6675 vector float 16<s<=32 memory FP reg.
6676 vector float >32 memory memory
6677
6678 aggregate <=16 reg. reg.
6679 aggregate 16<s<=32 memory reg.
6680 aggregate >32 memory memory
6681
6682
6683
6684 Note #1: complex floating-point types follow the extended SPARC ABIs as
6685 implemented by the Sun compiler.
6686
6687 Note #2: integer vector types follow the scalar floating-point types
6688 conventions to match what is implemented by the Sun VIS SDK.
6689
6690 Note #3: floating-point vector types follow the aggregate types
6691 conventions. */
6692
6693
6694 /* Maximum number of int regs for args. */
6695 #define SPARC_INT_ARG_MAX 6
6696 /* Maximum number of fp regs for args. */
6697 #define SPARC_FP_ARG_MAX 16
6698 /* Number of words (partially) occupied for a given size in units. */
6699 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6700
6701 /* Handle the INIT_CUMULATIVE_ARGS macro.
6702 Initialize a variable CUM of type CUMULATIVE_ARGS
6703 for a call to a function whose data type is FNTYPE.
6704 For a library call, FNTYPE is 0. */
6705
6706 void
6707 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6708 {
6709 cum->words = 0;
6710 cum->prototype_p = fntype && prototype_p (fntype);
6711 cum->libcall_p = !fntype;
6712 }
6713
6714 /* Handle promotion of pointer and integer arguments. */
6715
6716 static machine_mode
6717 sparc_promote_function_mode (const_tree type, machine_mode mode,
6718 int *punsignedp, const_tree, int)
6719 {
6720 if (type && POINTER_TYPE_P (type))
6721 {
6722 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6723 return Pmode;
6724 }
6725
6726 /* Integral arguments are passed as full words, as per the ABI. */
6727 if (GET_MODE_CLASS (mode) == MODE_INT
6728 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6729 return word_mode;
6730
6731 return mode;
6732 }
6733
6734 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6735
6736 static bool
6737 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6738 {
6739 return TARGET_ARCH64 ? true : false;
6740 }
6741
6742 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6743 Specify whether to pass the argument by reference. */
6744
6745 static bool
6746 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6747 {
6748 tree type = arg.type;
6749 machine_mode mode = arg.mode;
6750 if (TARGET_ARCH32)
6751 /* Original SPARC 32-bit ABI says that structures and unions,
6752 and quad-precision floats are passed by reference.
6753 All other base types are passed in registers.
6754
6755 Extended ABI (as implemented by the Sun compiler) says that all
6756 complex floats are passed by reference. Pass complex integers
6757 in registers up to 8 bytes. More generally, enforce the 2-word
6758 cap for passing arguments in registers.
6759
6760 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6761 vectors are passed like floats of the same size, that is in
6762 registers up to 8 bytes. Pass all vector floats by reference
6763 like structure and unions. */
6764 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6765 || mode == SCmode
6766 /* Catch CDImode, TFmode, DCmode and TCmode. */
6767 || GET_MODE_SIZE (mode) > 8
6768 || (type
6769 && VECTOR_TYPE_P (type)
6770 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6771 else
6772 /* Original SPARC 64-bit ABI says that structures and unions
6773 smaller than 16 bytes are passed in registers, as well as
6774 all other base types.
6775
6776 Extended ABI (as implemented by the Sun compiler) says that
6777 complex floats are passed in registers up to 16 bytes. Pass
6778 all complex integers in registers up to 16 bytes. More generally,
6779 enforce the 2-word cap for passing arguments in registers.
6780
6781 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6782 vectors are passed like floats of the same size, that is in
6783 registers (up to 16 bytes). Pass all vector floats like structure
6784 and unions. */
6785 return ((type
6786 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6787 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6788 /* Catch CTImode and TCmode. */
6789 || GET_MODE_SIZE (mode) > 16);
6790 }
6791
6792 /* Traverse the record TYPE recursively and call FUNC on its fields.
6793 NAMED is true if this is for a named parameter. DATA is passed
6794 to FUNC for each field. OFFSET is the starting position and
6795 PACKED is true if we are inside a packed record. */
6796
6797 template <typename T, void Func (const_tree, int, bool, T*)>
6798 static void
6799 traverse_record_type (const_tree type, bool named, T *data,
6800 int offset = 0, bool packed = false)
6801 {
6802 /* The ABI obviously doesn't specify how packed structures are passed.
6803 These are passed in integer regs if possible, otherwise memory. */
6804 if (!packed)
6805 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6806 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6807 {
6808 packed = true;
6809 break;
6810 }
6811
6812 /* Walk the real fields, but skip those with no size or a zero size.
6813 ??? Fields with variable offset are handled as having zero offset. */
6814 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6815 if (TREE_CODE (field) == FIELD_DECL)
6816 {
6817 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6818 continue;
6819
6820 int bitpos = offset;
6821 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6822 bitpos += int_bit_position (field);
6823
6824 tree field_type = TREE_TYPE (field);
6825 if (TREE_CODE (field_type) == RECORD_TYPE)
6826 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6827 packed);
6828 else
6829 {
6830 const bool fp_type
6831 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6832 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6833 data);
6834 }
6835 }
6836 }
6837
6838 /* Handle recursive register classifying for structure layout. */
6839
6840 typedef struct
6841 {
6842 bool fp_regs; /* true if field eligible to FP registers. */
6843 bool fp_regs_in_first_word; /* true if such field in first word. */
6844 } classify_data_t;
6845
6846 /* A subroutine of function_arg_slotno. Classify the field. */
6847
6848 inline void
6849 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6850 {
6851 if (fp)
6852 {
6853 data->fp_regs = true;
6854 if (bitpos < BITS_PER_WORD)
6855 data->fp_regs_in_first_word = true;
6856 }
6857 }
6858
6859 /* Compute the slot number to pass an argument in.
6860 Return the slot number or -1 if passing on the stack.
6861
6862 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6863 the preceding args and about the function being called.
6864 MODE is the argument's machine mode.
6865 TYPE is the data type of the argument (as a tree).
6866 This is null for libcalls where that information may
6867 not be available.
6868 NAMED is nonzero if this argument is a named parameter
6869 (otherwise it is an extra parameter matching an ellipsis).
6870 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6871 *PREGNO records the register number to use if scalar type.
6872 *PPADDING records the amount of padding needed in words. */
6873
6874 static int
6875 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6876 const_tree type, bool named, bool incoming,
6877 int *pregno, int *ppadding)
6878 {
6879 const int regbase
6880 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6881 int slotno = cum->words, regno;
6882 enum mode_class mclass = GET_MODE_CLASS (mode);
6883
6884 /* Silence warnings in the callers. */
6885 *pregno = -1;
6886 *ppadding = -1;
6887
6888 if (type && TREE_ADDRESSABLE (type))
6889 return -1;
6890
6891 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6892 if (TARGET_ARCH64
6893 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6894 && (slotno & 1) != 0)
6895 {
6896 slotno++;
6897 *ppadding = 1;
6898 }
6899 else
6900 *ppadding = 0;
6901
6902 /* Vector types deserve special treatment because they are polymorphic wrt
6903 their mode, depending upon whether VIS instructions are enabled. */
6904 if (type && VECTOR_TYPE_P (type))
6905 {
6906 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6907 {
6908 /* The SPARC port defines no floating-point vector modes. */
6909 gcc_assert (mode == BLKmode);
6910 }
6911 else
6912 {
6913 /* Integer vector types should either have a vector
6914 mode or an integral mode, because we are guaranteed
6915 by pass_by_reference that their size is not greater
6916 than 16 bytes and TImode is 16-byte wide. */
6917 gcc_assert (mode != BLKmode);
6918
6919 /* Integer vectors are handled like floats as per
6920 the Sun VIS SDK. */
6921 mclass = MODE_FLOAT;
6922 }
6923 }
6924
6925 switch (mclass)
6926 {
6927 case MODE_FLOAT:
6928 case MODE_COMPLEX_FLOAT:
6929 case MODE_VECTOR_INT:
6930 if (TARGET_ARCH64 && TARGET_FPU && named)
6931 {
6932 /* If all arg slots are filled, then must pass on stack. */
6933 if (slotno >= SPARC_FP_ARG_MAX)
6934 return -1;
6935
6936 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6937 /* Arguments filling only one single FP register are
6938 right-justified in the outer double FP register. */
6939 if (GET_MODE_SIZE (mode) <= 4)
6940 regno++;
6941 break;
6942 }
6943 /* fallthrough */
6944
6945 case MODE_INT:
6946 case MODE_COMPLEX_INT:
6947 /* If all arg slots are filled, then must pass on stack. */
6948 if (slotno >= SPARC_INT_ARG_MAX)
6949 return -1;
6950
6951 regno = regbase + slotno;
6952 break;
6953
6954 case MODE_RANDOM:
6955 /* MODE is VOIDmode when generating the actual call. */
6956 if (mode == VOIDmode)
6957 return -1;
6958
6959 if (TARGET_64BIT && TARGET_FPU && named
6960 && type
6961 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6962 {
6963 /* If all arg slots are filled, then must pass on stack. */
6964 if (slotno >= SPARC_FP_ARG_MAX)
6965 return -1;
6966
6967 if (TREE_CODE (type) == RECORD_TYPE)
6968 {
6969 classify_data_t data = { false, false };
6970 traverse_record_type<classify_data_t, classify_registers>
6971 (type, named, &data);
6972
6973 if (data.fp_regs)
6974 {
6975 /* If all FP slots are filled except for the last one and
6976 there is no FP field in the first word, then must pass
6977 on stack. */
6978 if (slotno >= SPARC_FP_ARG_MAX - 1
6979 && !data.fp_regs_in_first_word)
6980 return -1;
6981 }
6982 else
6983 {
6984 /* If all int slots are filled, then must pass on stack. */
6985 if (slotno >= SPARC_INT_ARG_MAX)
6986 return -1;
6987 }
6988
6989 /* PREGNO isn't set since both int and FP regs can be used. */
6990 return slotno;
6991 }
6992
6993 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6994 }
6995 else
6996 {
6997 /* If all arg slots are filled, then must pass on stack. */
6998 if (slotno >= SPARC_INT_ARG_MAX)
6999 return -1;
7000
7001 regno = regbase + slotno;
7002 }
7003 break;
7004
7005 default :
7006 gcc_unreachable ();
7007 }
7008
7009 *pregno = regno;
7010 return slotno;
7011 }
7012
7013 /* Handle recursive register counting/assigning for structure layout. */
7014
7015 typedef struct
7016 {
7017 int slotno; /* slot number of the argument. */
7018 int regbase; /* regno of the base register. */
7019 int intoffset; /* offset of the first pending integer field. */
7020 int nregs; /* number of words passed in registers. */
7021 bool stack; /* true if part of the argument is on the stack. */
7022 rtx ret; /* return expression being built. */
7023 } assign_data_t;
7024
7025 /* A subroutine of function_arg_record_value. Compute the number of integer
7026 registers to be assigned between PARMS->intoffset and BITPOS. Return
7027 true if at least one integer register is assigned or false otherwise. */
7028
7029 static bool
7030 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7031 {
7032 if (data->intoffset < 0)
7033 return false;
7034
7035 const int intoffset = data->intoffset;
7036 data->intoffset = -1;
7037
7038 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7039 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7040 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7041 int nregs = (endbit - startbit) / BITS_PER_WORD;
7042
7043 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7044 {
7045 nregs = SPARC_INT_ARG_MAX - this_slotno;
7046
7047 /* We need to pass this field (partly) on the stack. */
7048 data->stack = 1;
7049 }
7050
7051 if (nregs <= 0)
7052 return false;
7053
7054 *pnregs = nregs;
7055 return true;
7056 }
7057
7058 /* A subroutine of function_arg_record_value. Compute the number and the mode
7059 of the FP registers to be assigned for FIELD. Return true if at least one
7060 FP register is assigned or false otherwise. */
7061
7062 static bool
7063 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7064 int *pnregs, machine_mode *pmode)
7065 {
7066 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7067 machine_mode mode = DECL_MODE (field);
7068 int nregs, nslots;
7069
7070 /* Slots are counted as words while regs are counted as having the size of
7071 the (inner) mode. */
7072 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7073 {
7074 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7075 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7076 }
7077 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7078 {
7079 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7080 nregs = 2;
7081 }
7082 else
7083 nregs = 1;
7084
7085 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7086
7087 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7088 {
7089 nslots = SPARC_FP_ARG_MAX - this_slotno;
7090 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7091
7092 /* We need to pass this field (partly) on the stack. */
7093 data->stack = 1;
7094
7095 if (nregs <= 0)
7096 return false;
7097 }
7098
7099 *pnregs = nregs;
7100 *pmode = mode;
7101 return true;
7102 }
7103
7104 /* A subroutine of function_arg_record_value. Count the number of registers
7105 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7106
7107 inline void
7108 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7109 {
7110 if (fp)
7111 {
7112 int nregs;
7113 machine_mode mode;
7114
7115 if (compute_int_layout (bitpos, data, &nregs))
7116 data->nregs += nregs;
7117
7118 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7119 data->nregs += nregs;
7120 }
7121 else
7122 {
7123 if (data->intoffset < 0)
7124 data->intoffset = bitpos;
7125 }
7126 }
7127
7128 /* A subroutine of function_arg_record_value. Assign the bits of the
7129 structure between PARMS->intoffset and BITPOS to integer registers. */
7130
7131 static void
7132 assign_int_registers (int bitpos, assign_data_t *data)
7133 {
7134 int intoffset = data->intoffset;
7135 machine_mode mode;
7136 int nregs;
7137
7138 if (!compute_int_layout (bitpos, data, &nregs))
7139 return;
7140
7141 /* If this is the trailing part of a word, only load that much into
7142 the register. Otherwise load the whole register. Note that in
7143 the latter case we may pick up unwanted bits. It's not a problem
7144 at the moment but may wish to revisit. */
7145 if (intoffset % BITS_PER_WORD != 0)
7146 mode = smallest_int_mode_for_size (BITS_PER_WORD
7147 - intoffset % BITS_PER_WORD);
7148 else
7149 mode = word_mode;
7150
7151 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7152 unsigned int regno = data->regbase + this_slotno;
7153 intoffset /= BITS_PER_UNIT;
7154
7155 do
7156 {
7157 rtx reg = gen_rtx_REG (mode, regno);
7158 XVECEXP (data->ret, 0, data->stack + data->nregs)
7159 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7160 data->nregs += 1;
7161 mode = word_mode;
7162 regno += 1;
7163 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7164 }
7165 while (--nregs > 0);
7166 }
7167
7168 /* A subroutine of function_arg_record_value. Assign FIELD at position
7169 BITPOS to FP registers. */
7170
7171 static void
7172 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7173 {
7174 int nregs;
7175 machine_mode mode;
7176
7177 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7178 return;
7179
7180 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7181 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7182 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7183 regno++;
7184 int pos = bitpos / BITS_PER_UNIT;
7185
7186 do
7187 {
7188 rtx reg = gen_rtx_REG (mode, regno);
7189 XVECEXP (data->ret, 0, data->stack + data->nregs)
7190 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7191 data->nregs += 1;
7192 regno += GET_MODE_SIZE (mode) / 4;
7193 pos += GET_MODE_SIZE (mode);
7194 }
7195 while (--nregs > 0);
7196 }
7197
7198 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7199 the structure between PARMS->intoffset and BITPOS to registers. */
7200
7201 inline void
7202 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7203 {
7204 if (fp)
7205 {
7206 assign_int_registers (bitpos, data);
7207
7208 assign_fp_registers (field, bitpos, data);
7209 }
7210 else
7211 {
7212 if (data->intoffset < 0)
7213 data->intoffset = bitpos;
7214 }
7215 }
7216
7217 /* Used by function_arg and function_value to implement the complex
7218 conventions of the 64-bit ABI for passing and returning structures.
7219 Return an expression valid as a return value for the FUNCTION_ARG
7220 and TARGET_FUNCTION_VALUE.
7221
7222 TYPE is the data type of the argument (as a tree).
7223 This is null for libcalls where that information may
7224 not be available.
7225 MODE is the argument's machine mode.
7226 SLOTNO is the index number of the argument's slot in the parameter array.
7227 NAMED is true if this argument is a named parameter
7228 (otherwise it is an extra parameter matching an ellipsis).
7229 REGBASE is the regno of the base register for the parameter array. */
7230
7231 static rtx
7232 function_arg_record_value (const_tree type, machine_mode mode,
7233 int slotno, bool named, int regbase)
7234 {
7235 const int size = int_size_in_bytes (type);
7236 assign_data_t data;
7237 int nregs;
7238
7239 data.slotno = slotno;
7240 data.regbase = regbase;
7241
7242 /* Count how many registers we need. */
7243 data.nregs = 0;
7244 data.intoffset = 0;
7245 data.stack = false;
7246 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7247
7248 /* Take into account pending integer fields. */
7249 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7250 data.nregs += nregs;
7251
7252 /* Allocate the vector and handle some annoying special cases. */
7253 nregs = data.nregs;
7254
7255 if (nregs == 0)
7256 {
7257 /* ??? Empty structure has no value? Duh? */
7258 if (size <= 0)
7259 {
7260 /* Though there's nothing really to store, return a word register
7261 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7262 leads to breakage due to the fact that there are zero bytes to
7263 load. */
7264 return gen_rtx_REG (mode, regbase);
7265 }
7266
7267 /* ??? C++ has structures with no fields, and yet a size. Give up
7268 for now and pass everything back in integer registers. */
7269 nregs = CEIL_NWORDS (size);
7270 if (nregs + slotno > SPARC_INT_ARG_MAX)
7271 nregs = SPARC_INT_ARG_MAX - slotno;
7272 }
7273
7274 gcc_assert (nregs > 0);
7275
7276 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7277
7278 /* If at least one field must be passed on the stack, generate
7279 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7280 also be passed on the stack. We can't do much better because the
7281 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7282 of structures for which the fields passed exclusively in registers
7283 are not at the beginning of the structure. */
7284 if (data.stack)
7285 XVECEXP (data.ret, 0, 0)
7286 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7287
7288 /* Assign the registers. */
7289 data.nregs = 0;
7290 data.intoffset = 0;
7291 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7292
7293 /* Assign pending integer fields. */
7294 assign_int_registers (size * BITS_PER_UNIT, &data);
7295
7296 gcc_assert (data.nregs == nregs);
7297
7298 return data.ret;
7299 }
7300
7301 /* Used by function_arg and function_value to implement the conventions
7302 of the 64-bit ABI for passing and returning unions.
7303 Return an expression valid as a return value for the FUNCTION_ARG
7304 and TARGET_FUNCTION_VALUE.
7305
7306 SIZE is the size in bytes of the union.
7307 MODE is the argument's machine mode.
7308 SLOTNO is the index number of the argument's slot in the parameter array.
7309 REGNO is the hard register the union will be passed in. */
7310
7311 static rtx
7312 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7313 {
7314 unsigned int nwords;
7315
7316 /* See comment in function_arg_record_value for empty structures. */
7317 if (size <= 0)
7318 return gen_rtx_REG (mode, regno);
7319
7320 if (slotno == SPARC_INT_ARG_MAX - 1)
7321 nwords = 1;
7322 else
7323 nwords = CEIL_NWORDS (size);
7324
7325 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7326
7327 /* Unions are passed left-justified. */
7328 for (unsigned int i = 0; i < nwords; i++)
7329 XVECEXP (regs, 0, i)
7330 = gen_rtx_EXPR_LIST (VOIDmode,
7331 gen_rtx_REG (word_mode, regno + i),
7332 GEN_INT (UNITS_PER_WORD * i));
7333
7334 return regs;
7335 }
7336
7337 /* Used by function_arg and function_value to implement the conventions
7338 of the 64-bit ABI for passing and returning BLKmode vectors.
7339 Return an expression valid as a return value for the FUNCTION_ARG
7340 and TARGET_FUNCTION_VALUE.
7341
7342 SIZE is the size in bytes of the vector.
7343 SLOTNO is the index number of the argument's slot in the parameter array.
7344 NAMED is true if this argument is a named parameter
7345 (otherwise it is an extra parameter matching an ellipsis).
7346 REGNO is the hard register the vector will be passed in. */
7347
7348 static rtx
7349 function_arg_vector_value (int size, int slotno, bool named, int regno)
7350 {
7351 const int mult = (named ? 2 : 1);
7352 unsigned int nwords;
7353
7354 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7355 nwords = 1;
7356 else
7357 nwords = CEIL_NWORDS (size);
7358
7359 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7360
7361 if (size < UNITS_PER_WORD)
7362 XVECEXP (regs, 0, 0)
7363 = gen_rtx_EXPR_LIST (VOIDmode,
7364 gen_rtx_REG (SImode, regno),
7365 const0_rtx);
7366 else
7367 for (unsigned int i = 0; i < nwords; i++)
7368 XVECEXP (regs, 0, i)
7369 = gen_rtx_EXPR_LIST (VOIDmode,
7370 gen_rtx_REG (word_mode, regno + i * mult),
7371 GEN_INT (i * UNITS_PER_WORD));
7372
7373 return regs;
7374 }
7375
7376 /* Determine where to put an argument to a function.
7377 Value is zero to push the argument on the stack,
7378 or a hard register in which to store the argument.
7379
7380 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7381 the preceding args and about the function being called.
7382 ARG is a description of the argument.
7383 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7384 TARGET_FUNCTION_INCOMING_ARG. */
7385
7386 static rtx
7387 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7388 bool incoming)
7389 {
7390 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7391 const int regbase
7392 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7393 int slotno, regno, padding;
7394 tree type = arg.type;
7395 machine_mode mode = arg.mode;
7396 enum mode_class mclass = GET_MODE_CLASS (mode);
7397 bool named = arg.named;
7398
7399 slotno
7400 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7401 if (slotno == -1)
7402 return 0;
7403
7404 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7405 if (type && VECTOR_INTEGER_TYPE_P (type))
7406 mclass = MODE_FLOAT;
7407
7408 if (TARGET_ARCH32)
7409 return gen_rtx_REG (mode, regno);
7410
7411 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7412 and are promoted to registers if possible. */
7413 if (type && TREE_CODE (type) == RECORD_TYPE)
7414 {
7415 const int size = int_size_in_bytes (type);
7416 gcc_assert (size <= 16);
7417
7418 return function_arg_record_value (type, mode, slotno, named, regbase);
7419 }
7420
7421 /* Unions up to 16 bytes in size are passed in integer registers. */
7422 else if (type && TREE_CODE (type) == UNION_TYPE)
7423 {
7424 const int size = int_size_in_bytes (type);
7425 gcc_assert (size <= 16);
7426
7427 return function_arg_union_value (size, mode, slotno, regno);
7428 }
7429
7430 /* Floating-point vectors up to 16 bytes are passed in registers. */
7431 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7432 {
7433 const int size = int_size_in_bytes (type);
7434 gcc_assert (size <= 16);
7435
7436 return function_arg_vector_value (size, slotno, named, regno);
7437 }
7438
7439 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7440 but also have the slot allocated for them.
7441 If no prototype is in scope fp values in register slots get passed
7442 in two places, either fp regs and int regs or fp regs and memory. */
7443 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7444 && SPARC_FP_REG_P (regno))
7445 {
7446 rtx reg = gen_rtx_REG (mode, regno);
7447 if (cum->prototype_p || cum->libcall_p)
7448 return reg;
7449 else
7450 {
7451 rtx v0, v1;
7452
7453 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7454 {
7455 int intreg;
7456
7457 /* On incoming, we don't need to know that the value
7458 is passed in %f0 and %i0, and it confuses other parts
7459 causing needless spillage even on the simplest cases. */
7460 if (incoming)
7461 return reg;
7462
7463 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7464 + (regno - SPARC_FP_ARG_FIRST) / 2);
7465
7466 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7467 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7468 const0_rtx);
7469 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7470 }
7471 else
7472 {
7473 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7474 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7475 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7476 }
7477 }
7478 }
7479
7480 /* All other aggregate types are passed in an integer register in a mode
7481 corresponding to the size of the type. */
7482 else if (type && AGGREGATE_TYPE_P (type))
7483 {
7484 const int size = int_size_in_bytes (type);
7485 gcc_assert (size <= 16);
7486
7487 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7488 }
7489
7490 return gen_rtx_REG (mode, regno);
7491 }
7492
7493 /* Handle the TARGET_FUNCTION_ARG target hook. */
7494
7495 static rtx
7496 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7497 {
7498 return sparc_function_arg_1 (cum, arg, false);
7499 }
7500
7501 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7502
7503 static rtx
7504 sparc_function_incoming_arg (cumulative_args_t cum,
7505 const function_arg_info &arg)
7506 {
7507 return sparc_function_arg_1 (cum, arg, true);
7508 }
7509
7510 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7511
7512 static unsigned int
7513 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7514 {
7515 return ((TARGET_ARCH64
7516 && (GET_MODE_ALIGNMENT (mode) == 128
7517 || (type && TYPE_ALIGN (type) == 128)))
7518 ? 128
7519 : PARM_BOUNDARY);
7520 }
7521
7522 /* For an arg passed partly in registers and partly in memory,
7523 this is the number of bytes of registers used.
7524 For args passed entirely in registers or entirely in memory, zero.
7525
7526 Any arg that starts in the first 6 regs but won't entirely fit in them
7527 needs partial registers on v8. On v9, structures with integer
7528 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7529 values that begin in the last fp reg [where "last fp reg" varies with the
7530 mode] will be split between that reg and memory. */
7531
7532 static int
7533 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7534 {
7535 int slotno, regno, padding;
7536
7537 /* We pass false for incoming here, it doesn't matter. */
7538 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7539 arg.named, false, &regno, &padding);
7540
7541 if (slotno == -1)
7542 return 0;
7543
7544 if (TARGET_ARCH32)
7545 {
7546 /* We are guaranteed by pass_by_reference that the size of the
7547 argument is not greater than 8 bytes, so we only need to return
7548 one word if the argument is partially passed in registers. */
7549 const int size = GET_MODE_SIZE (arg.mode);
7550
7551 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7552 return UNITS_PER_WORD;
7553 }
7554 else
7555 {
7556 /* We are guaranteed by pass_by_reference that the size of the
7557 argument is not greater than 16 bytes, so we only need to return
7558 one word if the argument is partially passed in registers. */
7559 if (arg.aggregate_type_p ())
7560 {
7561 const int size = int_size_in_bytes (arg.type);
7562
7563 if (size > UNITS_PER_WORD
7564 && (slotno == SPARC_INT_ARG_MAX - 1
7565 || slotno == SPARC_FP_ARG_MAX - 1))
7566 return UNITS_PER_WORD;
7567 }
7568 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7569 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7570 || (arg.type && VECTOR_TYPE_P (arg.type)))
7571 && !(TARGET_FPU && arg.named)))
7572 {
7573 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7574 ? int_size_in_bytes (arg.type)
7575 : GET_MODE_SIZE (arg.mode);
7576
7577 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7578 return UNITS_PER_WORD;
7579 }
7580 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7581 || (arg.type && VECTOR_TYPE_P (arg.type)))
7582 {
7583 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7584 ? int_size_in_bytes (arg.type)
7585 : GET_MODE_SIZE (arg.mode);
7586
7587 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7588 return UNITS_PER_WORD;
7589 }
7590 }
7591
7592 return 0;
7593 }
7594
7595 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7596 Update the data in CUM to advance over argument ARG. */
7597
7598 static void
7599 sparc_function_arg_advance (cumulative_args_t cum_v,
7600 const function_arg_info &arg)
7601 {
7602 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7603 tree type = arg.type;
7604 machine_mode mode = arg.mode;
7605 int regno, padding;
7606
7607 /* We pass false for incoming here, it doesn't matter. */
7608 function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7609
7610 /* If argument requires leading padding, add it. */
7611 cum->words += padding;
7612
7613 if (TARGET_ARCH32)
7614 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7615 else
7616 {
7617 /* For types that can have BLKmode, get the size from the type. */
7618 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7619 {
7620 const int size = int_size_in_bytes (type);
7621
7622 /* See comment in function_arg_record_value for empty structures. */
7623 if (size <= 0)
7624 cum->words++;
7625 else
7626 cum->words += CEIL_NWORDS (size);
7627 }
7628 else
7629 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7630 }
7631 }
7632
7633 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7634 are always stored left shifted in their argument slot. */
7635
7636 static pad_direction
7637 sparc_function_arg_padding (machine_mode mode, const_tree type)
7638 {
7639 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7640 return PAD_UPWARD;
7641
7642 /* Fall back to the default. */
7643 return default_function_arg_padding (mode, type);
7644 }
7645
7646 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7647 Specify whether to return the return value in memory. */
7648
7649 static bool
7650 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7651 {
7652 if (TARGET_ARCH32)
7653 /* Original SPARC 32-bit ABI says that structures and unions, and
7654 quad-precision floats are returned in memory. But note that the
7655 first part is implemented through -fpcc-struct-return being the
7656 default, so here we only implement -freg-struct-return instead.
7657 All other base types are returned in registers.
7658
7659 Extended ABI (as implemented by the Sun compiler) says that
7660 all complex floats are returned in registers (8 FP registers
7661 at most for '_Complex long double'). Return all complex integers
7662 in registers (4 at most for '_Complex long long').
7663
7664 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7665 integers are returned like floats of the same size, that is in
7666 registers up to 8 bytes and in memory otherwise. Return all
7667 vector floats in memory like structure and unions; note that
7668 they always have BLKmode like the latter. */
7669 return (TYPE_MODE (type) == BLKmode
7670 || TYPE_MODE (type) == TFmode
7671 || (TREE_CODE (type) == VECTOR_TYPE
7672 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7673 else
7674 /* Original SPARC 64-bit ABI says that structures and unions
7675 smaller than 32 bytes are returned in registers, as well as
7676 all other base types.
7677
7678 Extended ABI (as implemented by the Sun compiler) says that all
7679 complex floats are returned in registers (8 FP registers at most
7680 for '_Complex long double'). Return all complex integers in
7681 registers (4 at most for '_Complex TItype').
7682
7683 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7684 integers are returned like floats of the same size, that is in
7685 registers. Return all vector floats like structure and unions;
7686 note that they always have BLKmode like the latter. */
7687 return (TYPE_MODE (type) == BLKmode
7688 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7689 }
7690
7691 /* Handle the TARGET_STRUCT_VALUE target hook.
7692 Return where to find the structure return value address. */
7693
7694 static rtx
7695 sparc_struct_value_rtx (tree fndecl, int incoming)
7696 {
7697 if (TARGET_ARCH64)
7698 return NULL_RTX;
7699 else
7700 {
7701 rtx mem;
7702
7703 if (incoming)
7704 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7705 STRUCT_VALUE_OFFSET));
7706 else
7707 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7708 STRUCT_VALUE_OFFSET));
7709
7710 /* Only follow the SPARC ABI for fixed-size structure returns.
7711 Variable size structure returns are handled per the normal
7712 procedures in GCC. This is enabled by -mstd-struct-return */
7713 if (incoming == 2
7714 && sparc_std_struct_return
7715 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7716 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7717 {
7718 /* We must check and adjust the return address, as it is optional
7719 as to whether the return object is really provided. */
7720 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7721 rtx scratch = gen_reg_rtx (SImode);
7722 rtx_code_label *endlab = gen_label_rtx ();
7723
7724 /* Calculate the return object size. */
7725 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7726 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7727 /* Construct a temporary return value. */
7728 rtx temp_val
7729 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7730
7731 /* Implement SPARC 32-bit psABI callee return struct checking:
7732
7733 Fetch the instruction where we will return to and see if
7734 it's an unimp instruction (the most significant 10 bits
7735 will be zero). */
7736 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7737 plus_constant (Pmode,
7738 ret_reg, 8)));
7739 /* Assume the size is valid and pre-adjust. */
7740 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7741 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7742 0, endlab);
7743 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7744 /* Write the address of the memory pointed to by temp_val into
7745 the memory pointed to by mem. */
7746 emit_move_insn (mem, XEXP (temp_val, 0));
7747 emit_label (endlab);
7748 }
7749
7750 return mem;
7751 }
7752 }
7753
7754 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7755 For v9, function return values are subject to the same rules as arguments,
7756 except that up to 32 bytes may be returned in registers. */
7757
7758 static rtx
7759 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7760 {
7761 /* Beware that the two values are swapped here wrt function_arg. */
7762 const int regbase
7763 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7764 enum mode_class mclass = GET_MODE_CLASS (mode);
7765 int regno;
7766
7767 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7768 Note that integer vectors larger than 16 bytes have BLKmode so
7769 they need to be handled like floating-point vectors below. */
7770 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7771 mclass = MODE_FLOAT;
7772
7773 if (TARGET_ARCH64 && type)
7774 {
7775 /* Structures up to 32 bytes in size are returned in registers. */
7776 if (TREE_CODE (type) == RECORD_TYPE)
7777 {
7778 const int size = int_size_in_bytes (type);
7779 gcc_assert (size <= 32);
7780
7781 return function_arg_record_value (type, mode, 0, true, regbase);
7782 }
7783
7784 /* Unions up to 32 bytes in size are returned in integer registers. */
7785 else if (TREE_CODE (type) == UNION_TYPE)
7786 {
7787 const int size = int_size_in_bytes (type);
7788 gcc_assert (size <= 32);
7789
7790 return function_arg_union_value (size, mode, 0, regbase);
7791 }
7792
7793 /* Vectors up to 32 bytes are returned in FP registers. */
7794 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7795 {
7796 const int size = int_size_in_bytes (type);
7797 gcc_assert (size <= 32);
7798
7799 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7800 }
7801
7802 /* Objects that require it are returned in FP registers. */
7803 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7804 ;
7805
7806 /* All other aggregate types are returned in an integer register in a
7807 mode corresponding to the size of the type. */
7808 else if (AGGREGATE_TYPE_P (type))
7809 {
7810 /* All other aggregate types are passed in an integer register
7811 in a mode corresponding to the size of the type. */
7812 const int size = int_size_in_bytes (type);
7813 gcc_assert (size <= 32);
7814
7815 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7816
7817 /* ??? We probably should have made the same ABI change in
7818 3.4.0 as the one we made for unions. The latter was
7819 required by the SCD though, while the former is not
7820 specified, so we favored compatibility and efficiency.
7821
7822 Now we're stuck for aggregates larger than 16 bytes,
7823 because OImode vanished in the meantime. Let's not
7824 try to be unduly clever, and simply follow the ABI
7825 for unions in that case. */
7826 if (mode == BLKmode)
7827 return function_arg_union_value (size, mode, 0, regbase);
7828 else
7829 mclass = MODE_INT;
7830 }
7831
7832 /* We should only have pointer and integer types at this point. This
7833 must match sparc_promote_function_mode. */
7834 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7835 mode = word_mode;
7836 }
7837
7838 /* We should only have pointer and integer types at this point, except with
7839 -freg-struct-return. This must match sparc_promote_function_mode. */
7840 else if (TARGET_ARCH32
7841 && !(type && AGGREGATE_TYPE_P (type))
7842 && mclass == MODE_INT
7843 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7844 mode = word_mode;
7845
7846 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7847 regno = SPARC_FP_ARG_FIRST;
7848 else
7849 regno = regbase;
7850
7851 return gen_rtx_REG (mode, regno);
7852 }
7853
7854 /* Handle TARGET_FUNCTION_VALUE.
7855 On the SPARC, the value is found in the first "output" register, but the
7856 called function leaves it in the first "input" register. */
7857
7858 static rtx
7859 sparc_function_value (const_tree valtype,
7860 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7861 bool outgoing)
7862 {
7863 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7864 }
7865
7866 /* Handle TARGET_LIBCALL_VALUE. */
7867
7868 static rtx
7869 sparc_libcall_value (machine_mode mode,
7870 const_rtx fun ATTRIBUTE_UNUSED)
7871 {
7872 return sparc_function_value_1 (NULL_TREE, mode, false);
7873 }
7874
7875 /* Handle FUNCTION_VALUE_REGNO_P.
7876 On the SPARC, the first "output" reg is used for integer values, and the
7877 first floating point register is used for floating point values. */
7878
7879 static bool
7880 sparc_function_value_regno_p (const unsigned int regno)
7881 {
7882 return (regno == 8 || (TARGET_FPU && regno == 32));
7883 }
7884
7885 /* Do what is necessary for `va_start'. We look at the current function
7886 to determine if stdarg or varargs is used and return the address of
7887 the first unnamed parameter. */
7888
7889 static rtx
7890 sparc_builtin_saveregs (void)
7891 {
7892 int first_reg = crtl->args.info.words;
7893 rtx address;
7894 int regno;
7895
7896 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7897 emit_move_insn (gen_rtx_MEM (word_mode,
7898 gen_rtx_PLUS (Pmode,
7899 frame_pointer_rtx,
7900 GEN_INT (FIRST_PARM_OFFSET (0)
7901 + (UNITS_PER_WORD
7902 * regno)))),
7903 gen_rtx_REG (word_mode,
7904 SPARC_INCOMING_INT_ARG_FIRST + regno));
7905
7906 address = gen_rtx_PLUS (Pmode,
7907 frame_pointer_rtx,
7908 GEN_INT (FIRST_PARM_OFFSET (0)
7909 + UNITS_PER_WORD * first_reg));
7910
7911 return address;
7912 }
7913
7914 /* Implement `va_start' for stdarg. */
7915
7916 static void
7917 sparc_va_start (tree valist, rtx nextarg)
7918 {
7919 nextarg = expand_builtin_saveregs ();
7920 std_expand_builtin_va_start (valist, nextarg);
7921 }
7922
7923 /* Implement `va_arg' for stdarg. */
7924
7925 static tree
7926 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7927 gimple_seq *post_p)
7928 {
7929 HOST_WIDE_INT size, rsize, align;
7930 tree addr, incr;
7931 bool indirect;
7932 tree ptrtype = build_pointer_type (type);
7933
7934 if (pass_va_arg_by_reference (type))
7935 {
7936 indirect = true;
7937 size = rsize = UNITS_PER_WORD;
7938 align = 0;
7939 }
7940 else
7941 {
7942 indirect = false;
7943 size = int_size_in_bytes (type);
7944 rsize = ROUND_UP (size, UNITS_PER_WORD);
7945 align = 0;
7946
7947 if (TARGET_ARCH64)
7948 {
7949 /* For SPARC64, objects requiring 16-byte alignment get it. */
7950 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7951 align = 2 * UNITS_PER_WORD;
7952
7953 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7954 are left-justified in their slots. */
7955 if (AGGREGATE_TYPE_P (type))
7956 {
7957 if (size == 0)
7958 size = rsize = UNITS_PER_WORD;
7959 else
7960 size = rsize;
7961 }
7962 }
7963 }
7964
7965 incr = valist;
7966 if (align)
7967 {
7968 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7969 incr = fold_convert (sizetype, incr);
7970 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7971 size_int (-align));
7972 incr = fold_convert (ptr_type_node, incr);
7973 }
7974
7975 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7976 addr = incr;
7977
7978 if (BYTES_BIG_ENDIAN && size < rsize)
7979 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7980
7981 if (indirect)
7982 {
7983 addr = fold_convert (build_pointer_type (ptrtype), addr);
7984 addr = build_va_arg_indirect_ref (addr);
7985 }
7986
7987 /* If the address isn't aligned properly for the type, we need a temporary.
7988 FIXME: This is inefficient, usually we can do this in registers. */
7989 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7990 {
7991 tree tmp = create_tmp_var (type, "va_arg_tmp");
7992 tree dest_addr = build_fold_addr_expr (tmp);
7993 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7994 3, dest_addr, addr, size_int (rsize));
7995 TREE_ADDRESSABLE (tmp) = 1;
7996 gimplify_and_add (copy, pre_p);
7997 addr = dest_addr;
7998 }
7999
8000 else
8001 addr = fold_convert (ptrtype, addr);
8002
8003 incr = fold_build_pointer_plus_hwi (incr, rsize);
8004 gimplify_assign (valist, incr, post_p);
8005
8006 return build_va_arg_indirect_ref (addr);
8007 }
8008 \f
8009 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8010 Specify whether the vector mode is supported by the hardware. */
8011
8012 static bool
8013 sparc_vector_mode_supported_p (machine_mode mode)
8014 {
8015 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8016 }
8017 \f
8018 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8019
8020 static machine_mode
8021 sparc_preferred_simd_mode (scalar_mode mode)
8022 {
8023 if (TARGET_VIS)
8024 switch (mode)
8025 {
8026 case E_SImode:
8027 return V2SImode;
8028 case E_HImode:
8029 return V4HImode;
8030 case E_QImode:
8031 return V8QImode;
8032
8033 default:;
8034 }
8035
8036 return word_mode;
8037 }
8038 \f
8039 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8040
8041 static bool
8042 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8043 {
8044 /* Do not fold unconditional jumps that have been created for crossing
8045 partition boundaries. */
8046 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8047 return false;
8048
8049 return true;
8050 }
8051
8052 /* Return the string to output an unconditional branch to LABEL, which is
8053 the operand number of the label.
8054
8055 DEST is the destination insn (i.e. the label), INSN is the source. */
8056
8057 const char *
8058 output_ubranch (rtx dest, rtx_insn *insn)
8059 {
8060 static char string[64];
8061 bool v9_form = false;
8062 int delta;
8063 char *p;
8064
8065 /* Even if we are trying to use cbcond for this, evaluate
8066 whether we can use V9 branches as our backup plan. */
8067 delta = 5000000;
8068 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8069 delta = (INSN_ADDRESSES (INSN_UID (dest))
8070 - INSN_ADDRESSES (INSN_UID (insn)));
8071
8072 /* Leave some instructions for "slop". */
8073 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8074 v9_form = true;
8075
8076 if (TARGET_CBCOND)
8077 {
8078 bool emit_nop = emit_cbcond_nop (insn);
8079 bool far = false;
8080 const char *rval;
8081
8082 if (delta < -500 || delta > 500)
8083 far = true;
8084
8085 if (far)
8086 {
8087 if (v9_form)
8088 rval = "ba,a,pt\t%%xcc, %l0";
8089 else
8090 rval = "b,a\t%l0";
8091 }
8092 else
8093 {
8094 if (emit_nop)
8095 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8096 else
8097 rval = "cwbe\t%%g0, %%g0, %l0";
8098 }
8099 return rval;
8100 }
8101
8102 if (v9_form)
8103 strcpy (string, "ba%*,pt\t%%xcc, ");
8104 else
8105 strcpy (string, "b%*\t");
8106
8107 p = strchr (string, '\0');
8108 *p++ = '%';
8109 *p++ = 'l';
8110 *p++ = '0';
8111 *p++ = '%';
8112 *p++ = '(';
8113 *p = '\0';
8114
8115 return string;
8116 }
8117
8118 /* Return the string to output a conditional branch to LABEL, which is
8119 the operand number of the label. OP is the conditional expression.
8120 XEXP (OP, 0) is assumed to be a condition code register (integer or
8121 floating point) and its mode specifies what kind of comparison we made.
8122
8123 DEST is the destination insn (i.e. the label), INSN is the source.
8124
8125 REVERSED is nonzero if we should reverse the sense of the comparison.
8126
8127 ANNUL is nonzero if we should generate an annulling branch. */
8128
8129 const char *
8130 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8131 rtx_insn *insn)
8132 {
8133 static char string[64];
8134 enum rtx_code code = GET_CODE (op);
8135 rtx cc_reg = XEXP (op, 0);
8136 machine_mode mode = GET_MODE (cc_reg);
8137 const char *labelno, *branch;
8138 int spaces = 8, far;
8139 char *p;
8140
8141 /* v9 branches are limited to +-1MB. If it is too far away,
8142 change
8143
8144 bne,pt %xcc, .LC30
8145
8146 to
8147
8148 be,pn %xcc, .+12
8149 nop
8150 ba .LC30
8151
8152 and
8153
8154 fbne,a,pn %fcc2, .LC29
8155
8156 to
8157
8158 fbe,pt %fcc2, .+16
8159 nop
8160 ba .LC29 */
8161
8162 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8163 if (reversed ^ far)
8164 {
8165 /* Reversal of FP compares takes care -- an ordered compare
8166 becomes an unordered compare and vice versa. */
8167 if (mode == CCFPmode || mode == CCFPEmode)
8168 code = reverse_condition_maybe_unordered (code);
8169 else
8170 code = reverse_condition (code);
8171 }
8172
8173 /* Start by writing the branch condition. */
8174 if (mode == CCFPmode || mode == CCFPEmode)
8175 {
8176 switch (code)
8177 {
8178 case NE:
8179 branch = "fbne";
8180 break;
8181 case EQ:
8182 branch = "fbe";
8183 break;
8184 case GE:
8185 branch = "fbge";
8186 break;
8187 case GT:
8188 branch = "fbg";
8189 break;
8190 case LE:
8191 branch = "fble";
8192 break;
8193 case LT:
8194 branch = "fbl";
8195 break;
8196 case UNORDERED:
8197 branch = "fbu";
8198 break;
8199 case ORDERED:
8200 branch = "fbo";
8201 break;
8202 case UNGT:
8203 branch = "fbug";
8204 break;
8205 case UNLT:
8206 branch = "fbul";
8207 break;
8208 case UNEQ:
8209 branch = "fbue";
8210 break;
8211 case UNGE:
8212 branch = "fbuge";
8213 break;
8214 case UNLE:
8215 branch = "fbule";
8216 break;
8217 case LTGT:
8218 branch = "fblg";
8219 break;
8220 default:
8221 gcc_unreachable ();
8222 }
8223
8224 /* ??? !v9: FP branches cannot be preceded by another floating point
8225 insn. Because there is currently no concept of pre-delay slots,
8226 we can fix this only by always emitting a nop before a floating
8227 point branch. */
8228
8229 string[0] = '\0';
8230 if (! TARGET_V9)
8231 strcpy (string, "nop\n\t");
8232 strcat (string, branch);
8233 }
8234 else
8235 {
8236 switch (code)
8237 {
8238 case NE:
8239 if (mode == CCVmode || mode == CCXVmode)
8240 branch = "bvs";
8241 else
8242 branch = "bne";
8243 break;
8244 case EQ:
8245 if (mode == CCVmode || mode == CCXVmode)
8246 branch = "bvc";
8247 else
8248 branch = "be";
8249 break;
8250 case GE:
8251 if (mode == CCNZmode || mode == CCXNZmode)
8252 branch = "bpos";
8253 else
8254 branch = "bge";
8255 break;
8256 case GT:
8257 branch = "bg";
8258 break;
8259 case LE:
8260 branch = "ble";
8261 break;
8262 case LT:
8263 if (mode == CCNZmode || mode == CCXNZmode)
8264 branch = "bneg";
8265 else
8266 branch = "bl";
8267 break;
8268 case GEU:
8269 branch = "bgeu";
8270 break;
8271 case GTU:
8272 branch = "bgu";
8273 break;
8274 case LEU:
8275 branch = "bleu";
8276 break;
8277 case LTU:
8278 branch = "blu";
8279 break;
8280 default:
8281 gcc_unreachable ();
8282 }
8283 strcpy (string, branch);
8284 }
8285 spaces -= strlen (branch);
8286 p = strchr (string, '\0');
8287
8288 /* Now add the annulling, the label, and a possible noop. */
8289 if (annul && ! far)
8290 {
8291 strcpy (p, ",a");
8292 p += 2;
8293 spaces -= 2;
8294 }
8295
8296 if (TARGET_V9)
8297 {
8298 rtx note;
8299 int v8 = 0;
8300
8301 if (! far && insn && INSN_ADDRESSES_SET_P ())
8302 {
8303 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8304 - INSN_ADDRESSES (INSN_UID (insn)));
8305 /* Leave some instructions for "slop". */
8306 if (delta < -260000 || delta >= 260000)
8307 v8 = 1;
8308 }
8309
8310 switch (mode)
8311 {
8312 case E_CCmode:
8313 case E_CCNZmode:
8314 case E_CCCmode:
8315 case E_CCVmode:
8316 labelno = "%%icc, ";
8317 if (v8)
8318 labelno = "";
8319 break;
8320 case E_CCXmode:
8321 case E_CCXNZmode:
8322 case E_CCXCmode:
8323 case E_CCXVmode:
8324 labelno = "%%xcc, ";
8325 gcc_assert (!v8);
8326 break;
8327 case E_CCFPmode:
8328 case E_CCFPEmode:
8329 {
8330 static char v9_fcc_labelno[] = "%%fccX, ";
8331 /* Set the char indicating the number of the fcc reg to use. */
8332 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8333 labelno = v9_fcc_labelno;
8334 if (v8)
8335 {
8336 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8337 labelno = "";
8338 }
8339 }
8340 break;
8341 default:
8342 gcc_unreachable ();
8343 }
8344
8345 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8346 {
8347 strcpy (p,
8348 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8349 >= profile_probability::even ()) ^ far)
8350 ? ",pt" : ",pn");
8351 p += 3;
8352 spaces -= 3;
8353 }
8354 }
8355 else
8356 labelno = "";
8357
8358 if (spaces > 0)
8359 *p++ = '\t';
8360 else
8361 *p++ = ' ';
8362 strcpy (p, labelno);
8363 p = strchr (p, '\0');
8364 if (far)
8365 {
8366 strcpy (p, ".+12\n\t nop\n\tb\t");
8367 /* Skip the next insn if requested or
8368 if we know that it will be a nop. */
8369 if (annul || ! final_sequence)
8370 p[3] = '6';
8371 p += 14;
8372 }
8373 *p++ = '%';
8374 *p++ = 'l';
8375 *p++ = label + '0';
8376 *p++ = '%';
8377 *p++ = '#';
8378 *p = '\0';
8379
8380 return string;
8381 }
8382
8383 /* Emit a library call comparison between floating point X and Y.
8384 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8385 Return the new operator to be used in the comparison sequence.
8386
8387 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8388 values as arguments instead of the TFmode registers themselves,
8389 that's why we cannot call emit_float_lib_cmp. */
8390
8391 rtx
8392 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8393 {
8394 const char *qpfunc;
8395 rtx slot0, slot1, result, tem, tem2, libfunc;
8396 machine_mode mode;
8397 enum rtx_code new_comparison;
8398
8399 switch (comparison)
8400 {
8401 case EQ:
8402 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8403 break;
8404
8405 case NE:
8406 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8407 break;
8408
8409 case GT:
8410 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8411 break;
8412
8413 case GE:
8414 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8415 break;
8416
8417 case LT:
8418 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8419 break;
8420
8421 case LE:
8422 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8423 break;
8424
8425 case ORDERED:
8426 case UNORDERED:
8427 case UNGT:
8428 case UNLT:
8429 case UNEQ:
8430 case UNGE:
8431 case UNLE:
8432 case LTGT:
8433 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8434 break;
8435
8436 default:
8437 gcc_unreachable ();
8438 }
8439
8440 if (TARGET_ARCH64)
8441 {
8442 if (MEM_P (x))
8443 {
8444 tree expr = MEM_EXPR (x);
8445 if (expr)
8446 mark_addressable (expr);
8447 slot0 = x;
8448 }
8449 else
8450 {
8451 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8452 emit_move_insn (slot0, x);
8453 }
8454
8455 if (MEM_P (y))
8456 {
8457 tree expr = MEM_EXPR (y);
8458 if (expr)
8459 mark_addressable (expr);
8460 slot1 = y;
8461 }
8462 else
8463 {
8464 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8465 emit_move_insn (slot1, y);
8466 }
8467
8468 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8469 emit_library_call (libfunc, LCT_NORMAL,
8470 DImode,
8471 XEXP (slot0, 0), Pmode,
8472 XEXP (slot1, 0), Pmode);
8473 mode = DImode;
8474 }
8475 else
8476 {
8477 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8478 emit_library_call (libfunc, LCT_NORMAL,
8479 SImode,
8480 x, TFmode, y, TFmode);
8481 mode = SImode;
8482 }
8483
8484
8485 /* Immediately move the result of the libcall into a pseudo
8486 register so reload doesn't clobber the value if it needs
8487 the return register for a spill reg. */
8488 result = gen_reg_rtx (mode);
8489 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8490
8491 switch (comparison)
8492 {
8493 default:
8494 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8495 case ORDERED:
8496 case UNORDERED:
8497 new_comparison = (comparison == UNORDERED ? EQ : NE);
8498 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8499 case UNGT:
8500 case UNGE:
8501 new_comparison = (comparison == UNGT ? GT : NE);
8502 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8503 case UNLE:
8504 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8505 case UNLT:
8506 tem = gen_reg_rtx (mode);
8507 if (TARGET_ARCH32)
8508 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8509 else
8510 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8511 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8512 case UNEQ:
8513 case LTGT:
8514 tem = gen_reg_rtx (mode);
8515 if (TARGET_ARCH32)
8516 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8517 else
8518 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8519 tem2 = gen_reg_rtx (mode);
8520 if (TARGET_ARCH32)
8521 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8522 else
8523 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8524 new_comparison = (comparison == UNEQ ? EQ : NE);
8525 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8526 }
8527
8528 gcc_unreachable ();
8529 }
8530
8531 /* Generate an unsigned DImode to FP conversion. This is the same code
8532 optabs would emit if we didn't have TFmode patterns. */
8533
8534 void
8535 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8536 {
8537 rtx i0, i1, f0, in, out;
8538
8539 out = operands[0];
8540 in = force_reg (DImode, operands[1]);
8541 rtx_code_label *neglab = gen_label_rtx ();
8542 rtx_code_label *donelab = gen_label_rtx ();
8543 i0 = gen_reg_rtx (DImode);
8544 i1 = gen_reg_rtx (DImode);
8545 f0 = gen_reg_rtx (mode);
8546
8547 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8548
8549 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8550 emit_jump_insn (gen_jump (donelab));
8551 emit_barrier ();
8552
8553 emit_label (neglab);
8554
8555 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8556 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8557 emit_insn (gen_iordi3 (i0, i0, i1));
8558 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8559 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8560
8561 emit_label (donelab);
8562 }
8563
8564 /* Generate an FP to unsigned DImode conversion. This is the same code
8565 optabs would emit if we didn't have TFmode patterns. */
8566
8567 void
8568 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8569 {
8570 rtx i0, i1, f0, in, out, limit;
8571
8572 out = operands[0];
8573 in = force_reg (mode, operands[1]);
8574 rtx_code_label *neglab = gen_label_rtx ();
8575 rtx_code_label *donelab = gen_label_rtx ();
8576 i0 = gen_reg_rtx (DImode);
8577 i1 = gen_reg_rtx (DImode);
8578 limit = gen_reg_rtx (mode);
8579 f0 = gen_reg_rtx (mode);
8580
8581 emit_move_insn (limit,
8582 const_double_from_real_value (
8583 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8584 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8585
8586 emit_insn (gen_rtx_SET (out,
8587 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8588 emit_jump_insn (gen_jump (donelab));
8589 emit_barrier ();
8590
8591 emit_label (neglab);
8592
8593 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8594 emit_insn (gen_rtx_SET (i0,
8595 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8596 emit_insn (gen_movdi (i1, const1_rtx));
8597 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8598 emit_insn (gen_xordi3 (out, i0, i1));
8599
8600 emit_label (donelab);
8601 }
8602
8603 /* Return the string to output a compare and branch instruction to DEST.
8604 DEST is the destination insn (i.e. the label), INSN is the source,
8605 and OP is the conditional expression. */
8606
8607 const char *
8608 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8609 {
8610 machine_mode mode = GET_MODE (XEXP (op, 0));
8611 enum rtx_code code = GET_CODE (op);
8612 const char *cond_str, *tmpl;
8613 int far, emit_nop, len;
8614 static char string[64];
8615 char size_char;
8616
8617 /* Compare and Branch is limited to +-2KB. If it is too far away,
8618 change
8619
8620 cxbne X, Y, .LC30
8621
8622 to
8623
8624 cxbe X, Y, .+16
8625 nop
8626 ba,pt xcc, .LC30
8627 nop */
8628
8629 len = get_attr_length (insn);
8630
8631 far = len == 4;
8632 emit_nop = len == 2;
8633
8634 if (far)
8635 code = reverse_condition (code);
8636
8637 size_char = ((mode == SImode) ? 'w' : 'x');
8638
8639 switch (code)
8640 {
8641 case NE:
8642 cond_str = "ne";
8643 break;
8644
8645 case EQ:
8646 cond_str = "e";
8647 break;
8648
8649 case GE:
8650 cond_str = "ge";
8651 break;
8652
8653 case GT:
8654 cond_str = "g";
8655 break;
8656
8657 case LE:
8658 cond_str = "le";
8659 break;
8660
8661 case LT:
8662 cond_str = "l";
8663 break;
8664
8665 case GEU:
8666 cond_str = "cc";
8667 break;
8668
8669 case GTU:
8670 cond_str = "gu";
8671 break;
8672
8673 case LEU:
8674 cond_str = "leu";
8675 break;
8676
8677 case LTU:
8678 cond_str = "cs";
8679 break;
8680
8681 default:
8682 gcc_unreachable ();
8683 }
8684
8685 if (far)
8686 {
8687 int veryfar = 1, delta;
8688
8689 if (INSN_ADDRESSES_SET_P ())
8690 {
8691 delta = (INSN_ADDRESSES (INSN_UID (dest))
8692 - INSN_ADDRESSES (INSN_UID (insn)));
8693 /* Leave some instructions for "slop". */
8694 if (delta >= -260000 && delta < 260000)
8695 veryfar = 0;
8696 }
8697
8698 if (veryfar)
8699 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8700 else
8701 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8702 }
8703 else
8704 {
8705 if (emit_nop)
8706 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8707 else
8708 tmpl = "c%cb%s\t%%1, %%2, %%3";
8709 }
8710
8711 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8712
8713 return string;
8714 }
8715
8716 /* Return the string to output a conditional branch to LABEL, testing
8717 register REG. LABEL is the operand number of the label; REG is the
8718 operand number of the reg. OP is the conditional expression. The mode
8719 of REG says what kind of comparison we made.
8720
8721 DEST is the destination insn (i.e. the label), INSN is the source.
8722
8723 REVERSED is nonzero if we should reverse the sense of the comparison.
8724
8725 ANNUL is nonzero if we should generate an annulling branch. */
8726
8727 const char *
8728 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8729 int annul, rtx_insn *insn)
8730 {
8731 static char string[64];
8732 enum rtx_code code = GET_CODE (op);
8733 machine_mode mode = GET_MODE (XEXP (op, 0));
8734 rtx note;
8735 int far;
8736 char *p;
8737
8738 /* branch on register are limited to +-128KB. If it is too far away,
8739 change
8740
8741 brnz,pt %g1, .LC30
8742
8743 to
8744
8745 brz,pn %g1, .+12
8746 nop
8747 ba,pt %xcc, .LC30
8748
8749 and
8750
8751 brgez,a,pn %o1, .LC29
8752
8753 to
8754
8755 brlz,pt %o1, .+16
8756 nop
8757 ba,pt %xcc, .LC29 */
8758
8759 far = get_attr_length (insn) >= 3;
8760
8761 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8762 if (reversed ^ far)
8763 code = reverse_condition (code);
8764
8765 /* Only 64-bit versions of these instructions exist. */
8766 gcc_assert (mode == DImode);
8767
8768 /* Start by writing the branch condition. */
8769
8770 switch (code)
8771 {
8772 case NE:
8773 strcpy (string, "brnz");
8774 break;
8775
8776 case EQ:
8777 strcpy (string, "brz");
8778 break;
8779
8780 case GE:
8781 strcpy (string, "brgez");
8782 break;
8783
8784 case LT:
8785 strcpy (string, "brlz");
8786 break;
8787
8788 case LE:
8789 strcpy (string, "brlez");
8790 break;
8791
8792 case GT:
8793 strcpy (string, "brgz");
8794 break;
8795
8796 default:
8797 gcc_unreachable ();
8798 }
8799
8800 p = strchr (string, '\0');
8801
8802 /* Now add the annulling, reg, label, and nop. */
8803 if (annul && ! far)
8804 {
8805 strcpy (p, ",a");
8806 p += 2;
8807 }
8808
8809 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8810 {
8811 strcpy (p,
8812 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8813 >= profile_probability::even ()) ^ far)
8814 ? ",pt" : ",pn");
8815 p += 3;
8816 }
8817
8818 *p = p < string + 8 ? '\t' : ' ';
8819 p++;
8820 *p++ = '%';
8821 *p++ = '0' + reg;
8822 *p++ = ',';
8823 *p++ = ' ';
8824 if (far)
8825 {
8826 int veryfar = 1, delta;
8827
8828 if (INSN_ADDRESSES_SET_P ())
8829 {
8830 delta = (INSN_ADDRESSES (INSN_UID (dest))
8831 - INSN_ADDRESSES (INSN_UID (insn)));
8832 /* Leave some instructions for "slop". */
8833 if (delta >= -260000 && delta < 260000)
8834 veryfar = 0;
8835 }
8836
8837 strcpy (p, ".+12\n\t nop\n\t");
8838 /* Skip the next insn if requested or
8839 if we know that it will be a nop. */
8840 if (annul || ! final_sequence)
8841 p[3] = '6';
8842 p += 12;
8843 if (veryfar)
8844 {
8845 strcpy (p, "b\t");
8846 p += 2;
8847 }
8848 else
8849 {
8850 strcpy (p, "ba,pt\t%%xcc, ");
8851 p += 13;
8852 }
8853 }
8854 *p++ = '%';
8855 *p++ = 'l';
8856 *p++ = '0' + label;
8857 *p++ = '%';
8858 *p++ = '#';
8859 *p = '\0';
8860
8861 return string;
8862 }
8863
8864 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8865 Such instructions cannot be used in the delay slot of return insn on v9.
8866 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8867 */
8868
8869 static int
8870 epilogue_renumber (rtx *where, int test)
8871 {
8872 const char *fmt;
8873 int i;
8874 enum rtx_code code;
8875
8876 if (*where == 0)
8877 return 0;
8878
8879 code = GET_CODE (*where);
8880
8881 switch (code)
8882 {
8883 case REG:
8884 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8885 return 1;
8886 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8887 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8888 /* fallthrough */
8889 case SCRATCH:
8890 case PC:
8891 case CONST_INT:
8892 case CONST_WIDE_INT:
8893 case CONST_DOUBLE:
8894 return 0;
8895
8896 /* Do not replace the frame pointer with the stack pointer because
8897 it can cause the delayed instruction to load below the stack.
8898 This occurs when instructions like:
8899
8900 (set (reg/i:SI 24 %i0)
8901 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8902 (const_int -20 [0xffffffec])) 0))
8903
8904 are in the return delayed slot. */
8905 case PLUS:
8906 if (GET_CODE (XEXP (*where, 0)) == REG
8907 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8908 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8909 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8910 return 1;
8911 break;
8912
8913 case MEM:
8914 if (SPARC_STACK_BIAS
8915 && GET_CODE (XEXP (*where, 0)) == REG
8916 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8917 return 1;
8918 break;
8919
8920 default:
8921 break;
8922 }
8923
8924 fmt = GET_RTX_FORMAT (code);
8925
8926 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8927 {
8928 if (fmt[i] == 'E')
8929 {
8930 int j;
8931 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8932 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8933 return 1;
8934 }
8935 else if (fmt[i] == 'e'
8936 && epilogue_renumber (&(XEXP (*where, i)), test))
8937 return 1;
8938 }
8939 return 0;
8940 }
8941 \f
8942 /* Leaf functions and non-leaf functions have different needs. */
8943
8944 static const int reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8945
8946 static const int reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8947
8948 static const int *const reg_alloc_orders[] =
8949 {
8950 reg_leaf_alloc_order,
8951 reg_nonleaf_alloc_order
8952 };
8953
8954 void
8955 sparc_order_regs_for_local_alloc (void)
8956 {
8957 static int last_order_nonleaf = 1;
8958
8959 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8960 {
8961 last_order_nonleaf = !last_order_nonleaf;
8962 memcpy ((char *) reg_alloc_order,
8963 (const char *) reg_alloc_orders[last_order_nonleaf],
8964 FIRST_PSEUDO_REGISTER * sizeof (int));
8965 }
8966 }
8967
8968 int
8969 sparc_leaf_reg_remap (int regno)
8970 {
8971 gcc_checking_assert (regno >= 0);
8972
8973 /* Do not remap in flat mode. */
8974 if (TARGET_FLAT)
8975 return regno;
8976
8977 /* Do not remap global, stack pointer or floating-point registers. */
8978 if (regno < 8 || regno == STACK_POINTER_REGNUM || regno > SPARC_LAST_INT_REG)
8979 return regno;
8980
8981 /* Neither out nor local nor frame pointer registers must appear. */
8982 if ((regno >= 8 && regno <= 23) || regno == HARD_FRAME_POINTER_REGNUM)
8983 return -1;
8984
8985 /* Remap in to out registers. */
8986 return regno - 16;
8987 }
8988
8989 /* Return 1 if REG and MEM are legitimate enough to allow the various
8990 MEM<-->REG splits to be run. */
8991
8992 int
8993 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8994 {
8995 /* Punt if we are here by mistake. */
8996 gcc_assert (reload_completed);
8997
8998 /* We must have an offsettable memory reference. */
8999 if (!offsettable_memref_p (mem))
9000 return 0;
9001
9002 /* If we have legitimate args for ldd/std, we do not want
9003 the split to happen. */
9004 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9005 return 0;
9006
9007 /* Success. */
9008 return 1;
9009 }
9010
9011 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9012
9013 void
9014 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9015 {
9016 rtx high_part = gen_highpart (mode, dest);
9017 rtx low_part = gen_lowpart (mode, dest);
9018 rtx word0 = adjust_address (src, mode, 0);
9019 rtx word1 = adjust_address (src, mode, 4);
9020
9021 if (reg_overlap_mentioned_p (high_part, word1))
9022 {
9023 emit_move_insn_1 (low_part, word1);
9024 emit_move_insn_1 (high_part, word0);
9025 }
9026 else
9027 {
9028 emit_move_insn_1 (high_part, word0);
9029 emit_move_insn_1 (low_part, word1);
9030 }
9031 }
9032
9033 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9034
9035 void
9036 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9037 {
9038 rtx word0 = adjust_address (dest, mode, 0);
9039 rtx word1 = adjust_address (dest, mode, 4);
9040 rtx high_part = gen_highpart (mode, src);
9041 rtx low_part = gen_lowpart (mode, src);
9042
9043 emit_move_insn_1 (word0, high_part);
9044 emit_move_insn_1 (word1, low_part);
9045 }
9046
9047 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9048
9049 int
9050 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9051 {
9052 /* Punt if we are here by mistake. */
9053 gcc_assert (reload_completed);
9054
9055 if (GET_CODE (reg1) == SUBREG)
9056 reg1 = SUBREG_REG (reg1);
9057 if (GET_CODE (reg1) != REG)
9058 return 0;
9059 const int regno1 = REGNO (reg1);
9060
9061 if (GET_CODE (reg2) == SUBREG)
9062 reg2 = SUBREG_REG (reg2);
9063 if (GET_CODE (reg2) != REG)
9064 return 0;
9065 const int regno2 = REGNO (reg2);
9066
9067 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9068 return 1;
9069
9070 if (TARGET_VIS3)
9071 {
9072 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9073 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9074 return 1;
9075 }
9076
9077 return 0;
9078 }
9079
9080 /* Split a REG <--> REG move into a pair of moves in MODE. */
9081
9082 void
9083 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9084 {
9085 rtx dest1 = gen_highpart (mode, dest);
9086 rtx dest2 = gen_lowpart (mode, dest);
9087 rtx src1 = gen_highpart (mode, src);
9088 rtx src2 = gen_lowpart (mode, src);
9089
9090 /* Now emit using the real source and destination we found, swapping
9091 the order if we detect overlap. */
9092 if (reg_overlap_mentioned_p (dest1, src2))
9093 {
9094 emit_move_insn_1 (dest2, src2);
9095 emit_move_insn_1 (dest1, src1);
9096 }
9097 else
9098 {
9099 emit_move_insn_1 (dest1, src1);
9100 emit_move_insn_1 (dest2, src2);
9101 }
9102 }
9103
9104 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9105 This makes them candidates for using ldd and std insns.
9106
9107 Note reg1 and reg2 *must* be hard registers. */
9108
9109 int
9110 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9111 {
9112 /* We might have been passed a SUBREG. */
9113 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9114 return 0;
9115
9116 if (REGNO (reg1) % 2 != 0)
9117 return 0;
9118
9119 /* Integer ldd is deprecated in SPARC V9 */
9120 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9121 return 0;
9122
9123 return (REGNO (reg1) == REGNO (reg2) - 1);
9124 }
9125
9126 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9127 an ldd or std insn.
9128
9129 This can only happen when addr1 and addr2, the addresses in mem1
9130 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9131 addr1 must also be aligned on a 64-bit boundary.
9132
9133 Also iff dependent_reg_rtx is not null it should not be used to
9134 compute the address for mem1, i.e. we cannot optimize a sequence
9135 like:
9136 ld [%o0], %o0
9137 ld [%o0 + 4], %o1
9138 to
9139 ldd [%o0], %o0
9140 nor:
9141 ld [%g3 + 4], %g3
9142 ld [%g3], %g2
9143 to
9144 ldd [%g3], %g2
9145
9146 But, note that the transformation from:
9147 ld [%g2 + 4], %g3
9148 ld [%g2], %g2
9149 to
9150 ldd [%g2], %g2
9151 is perfectly fine. Thus, the peephole2 patterns always pass us
9152 the destination register of the first load, never the second one.
9153
9154 For stores we don't have a similar problem, so dependent_reg_rtx is
9155 NULL_RTX. */
9156
9157 int
9158 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9159 {
9160 rtx addr1, addr2;
9161 unsigned int reg1;
9162 HOST_WIDE_INT offset1;
9163
9164 /* The mems cannot be volatile. */
9165 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9166 return 0;
9167
9168 /* MEM1 should be aligned on a 64-bit boundary. */
9169 if (MEM_ALIGN (mem1) < 64)
9170 return 0;
9171
9172 addr1 = XEXP (mem1, 0);
9173 addr2 = XEXP (mem2, 0);
9174
9175 /* Extract a register number and offset (if used) from the first addr. */
9176 if (GET_CODE (addr1) == PLUS)
9177 {
9178 /* If not a REG, return zero. */
9179 if (GET_CODE (XEXP (addr1, 0)) != REG)
9180 return 0;
9181 else
9182 {
9183 reg1 = REGNO (XEXP (addr1, 0));
9184 /* The offset must be constant! */
9185 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9186 return 0;
9187 offset1 = INTVAL (XEXP (addr1, 1));
9188 }
9189 }
9190 else if (GET_CODE (addr1) != REG)
9191 return 0;
9192 else
9193 {
9194 reg1 = REGNO (addr1);
9195 /* This was a simple (mem (reg)) expression. Offset is 0. */
9196 offset1 = 0;
9197 }
9198
9199 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9200 if (GET_CODE (addr2) != PLUS)
9201 return 0;
9202
9203 if (GET_CODE (XEXP (addr2, 0)) != REG
9204 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9205 return 0;
9206
9207 if (reg1 != REGNO (XEXP (addr2, 0)))
9208 return 0;
9209
9210 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9211 return 0;
9212
9213 /* The first offset must be evenly divisible by 8 to ensure the
9214 address is 64-bit aligned. */
9215 if (offset1 % 8 != 0)
9216 return 0;
9217
9218 /* The offset for the second addr must be 4 more than the first addr. */
9219 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9220 return 0;
9221
9222 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9223 instructions. */
9224 return 1;
9225 }
9226
9227 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9228
9229 rtx
9230 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9231 {
9232 rtx x = widen_memory_access (mem1, mode, 0);
9233 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9234 return x;
9235 }
9236
9237 /* Return 1 if reg is a pseudo, or is the first register in
9238 a hard register pair. This makes it suitable for use in
9239 ldd and std insns. */
9240
9241 int
9242 register_ok_for_ldd (rtx reg)
9243 {
9244 /* We might have been passed a SUBREG. */
9245 if (!REG_P (reg))
9246 return 0;
9247
9248 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9249 return (REGNO (reg) % 2 == 0);
9250
9251 return 1;
9252 }
9253
9254 /* Return 1 if OP, a MEM, has an address which is known to be
9255 aligned to an 8-byte boundary. */
9256
9257 int
9258 memory_ok_for_ldd (rtx op)
9259 {
9260 if (!mem_min_alignment (op, 8))
9261 return 0;
9262
9263 /* We need to perform the job of a memory constraint. */
9264 if ((reload_in_progress || reload_completed)
9265 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9266 return 0;
9267
9268 if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
9269 return 0;
9270
9271 return 1;
9272 }
9273 \f
9274 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9275
9276 static bool
9277 sparc_print_operand_punct_valid_p (unsigned char code)
9278 {
9279 if (code == '#'
9280 || code == '*'
9281 || code == '('
9282 || code == ')'
9283 || code == '_'
9284 || code == '&')
9285 return true;
9286
9287 return false;
9288 }
9289
9290 /* Implement TARGET_PRINT_OPERAND.
9291 Print operand X (an rtx) in assembler syntax to file FILE.
9292 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9293 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9294
9295 static void
9296 sparc_print_operand (FILE *file, rtx x, int code)
9297 {
9298 const char *s;
9299
9300 switch (code)
9301 {
9302 case '#':
9303 /* Output an insn in a delay slot. */
9304 if (final_sequence)
9305 sparc_indent_opcode = 1;
9306 else
9307 fputs ("\n\t nop", file);
9308 return;
9309 case '*':
9310 /* Output an annul flag if there's nothing for the delay slot and we
9311 are optimizing. This is always used with '(' below.
9312 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9313 this is a dbx bug. So, we only do this when optimizing.
9314 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9315 Always emit a nop in case the next instruction is a branch. */
9316 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9317 fputs (",a", file);
9318 return;
9319 case '(':
9320 /* Output a 'nop' if there's nothing for the delay slot and we are
9321 not optimizing. This is always used with '*' above. */
9322 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9323 fputs ("\n\t nop", file);
9324 else if (final_sequence)
9325 sparc_indent_opcode = 1;
9326 return;
9327 case ')':
9328 /* Output the right displacement from the saved PC on function return.
9329 The caller may have placed an "unimp" insn immediately after the call
9330 so we have to account for it. This insn is used in the 32-bit ABI
9331 when calling a function that returns a non zero-sized structure. The
9332 64-bit ABI doesn't have it. Be careful to have this test be the same
9333 as that for the call. The exception is when sparc_std_struct_return
9334 is enabled, the psABI is followed exactly and the adjustment is made
9335 by the code in sparc_struct_value_rtx. The call emitted is the same
9336 when sparc_std_struct_return is enabled. */
9337 if (!TARGET_ARCH64
9338 && cfun->returns_struct
9339 && !sparc_std_struct_return
9340 && DECL_SIZE (DECL_RESULT (current_function_decl))
9341 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9342 == INTEGER_CST
9343 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9344 fputs ("12", file);
9345 else
9346 fputc ('8', file);
9347 return;
9348 case '_':
9349 /* Output the Embedded Medium/Anywhere code model base register. */
9350 fputs (EMBMEDANY_BASE_REG, file);
9351 return;
9352 case '&':
9353 /* Print some local dynamic TLS name. */
9354 if (const char *name = get_some_local_dynamic_name ())
9355 assemble_name (file, name);
9356 else
9357 output_operand_lossage ("'%%&' used without any "
9358 "local dynamic TLS references");
9359 return;
9360
9361 case 'Y':
9362 /* Adjust the operand to take into account a RESTORE operation. */
9363 if (GET_CODE (x) == CONST_INT)
9364 break;
9365 else if (GET_CODE (x) != REG)
9366 output_operand_lossage ("invalid %%Y operand");
9367 else if (REGNO (x) < 8)
9368 fputs (reg_names[REGNO (x)], file);
9369 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9370 fputs (reg_names[REGNO (x)-16], file);
9371 else
9372 output_operand_lossage ("invalid %%Y operand");
9373 return;
9374 case 'L':
9375 /* Print out the low order register name of a register pair. */
9376 if (WORDS_BIG_ENDIAN)
9377 fputs (reg_names[REGNO (x)+1], file);
9378 else
9379 fputs (reg_names[REGNO (x)], file);
9380 return;
9381 case 'H':
9382 /* Print out the high order register name of a register pair. */
9383 if (WORDS_BIG_ENDIAN)
9384 fputs (reg_names[REGNO (x)], file);
9385 else
9386 fputs (reg_names[REGNO (x)+1], file);
9387 return;
9388 case 'R':
9389 /* Print out the second register name of a register pair or quad.
9390 I.e., R (%o0) => %o1. */
9391 fputs (reg_names[REGNO (x)+1], file);
9392 return;
9393 case 'S':
9394 /* Print out the third register name of a register quad.
9395 I.e., S (%o0) => %o2. */
9396 fputs (reg_names[REGNO (x)+2], file);
9397 return;
9398 case 'T':
9399 /* Print out the fourth register name of a register quad.
9400 I.e., T (%o0) => %o3. */
9401 fputs (reg_names[REGNO (x)+3], file);
9402 return;
9403 case 'x':
9404 /* Print a condition code register. */
9405 if (REGNO (x) == SPARC_ICC_REG)
9406 {
9407 switch (GET_MODE (x))
9408 {
9409 case E_CCmode:
9410 case E_CCNZmode:
9411 case E_CCCmode:
9412 case E_CCVmode:
9413 s = "%icc";
9414 break;
9415 case E_CCXmode:
9416 case E_CCXNZmode:
9417 case E_CCXCmode:
9418 case E_CCXVmode:
9419 s = "%xcc";
9420 break;
9421 default:
9422 gcc_unreachable ();
9423 }
9424 fputs (s, file);
9425 }
9426 else
9427 /* %fccN register */
9428 fputs (reg_names[REGNO (x)], file);
9429 return;
9430 case 'm':
9431 /* Print the operand's address only. */
9432 output_address (GET_MODE (x), XEXP (x, 0));
9433 return;
9434 case 'r':
9435 /* In this case we need a register. Use %g0 if the
9436 operand is const0_rtx. */
9437 if (x == const0_rtx
9438 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9439 {
9440 fputs ("%g0", file);
9441 return;
9442 }
9443 else
9444 break;
9445
9446 case 'A':
9447 switch (GET_CODE (x))
9448 {
9449 case IOR:
9450 s = "or";
9451 break;
9452 case AND:
9453 s = "and";
9454 break;
9455 case XOR:
9456 s = "xor";
9457 break;
9458 default:
9459 output_operand_lossage ("invalid %%A operand");
9460 s = "";
9461 break;
9462 }
9463 fputs (s, file);
9464 return;
9465
9466 case 'B':
9467 switch (GET_CODE (x))
9468 {
9469 case IOR:
9470 s = "orn";
9471 break;
9472 case AND:
9473 s = "andn";
9474 break;
9475 case XOR:
9476 s = "xnor";
9477 break;
9478 default:
9479 output_operand_lossage ("invalid %%B operand");
9480 s = "";
9481 break;
9482 }
9483 fputs (s, file);
9484 return;
9485
9486 /* This is used by the conditional move instructions. */
9487 case 'C':
9488 {
9489 machine_mode mode = GET_MODE (XEXP (x, 0));
9490 switch (GET_CODE (x))
9491 {
9492 case NE:
9493 if (mode == CCVmode || mode == CCXVmode)
9494 s = "vs";
9495 else
9496 s = "ne";
9497 break;
9498 case EQ:
9499 if (mode == CCVmode || mode == CCXVmode)
9500 s = "vc";
9501 else
9502 s = "e";
9503 break;
9504 case GE:
9505 if (mode == CCNZmode || mode == CCXNZmode)
9506 s = "pos";
9507 else
9508 s = "ge";
9509 break;
9510 case GT:
9511 s = "g";
9512 break;
9513 case LE:
9514 s = "le";
9515 break;
9516 case LT:
9517 if (mode == CCNZmode || mode == CCXNZmode)
9518 s = "neg";
9519 else
9520 s = "l";
9521 break;
9522 case GEU:
9523 s = "geu";
9524 break;
9525 case GTU:
9526 s = "gu";
9527 break;
9528 case LEU:
9529 s = "leu";
9530 break;
9531 case LTU:
9532 s = "lu";
9533 break;
9534 case LTGT:
9535 s = "lg";
9536 break;
9537 case UNORDERED:
9538 s = "u";
9539 break;
9540 case ORDERED:
9541 s = "o";
9542 break;
9543 case UNLT:
9544 s = "ul";
9545 break;
9546 case UNLE:
9547 s = "ule";
9548 break;
9549 case UNGT:
9550 s = "ug";
9551 break;
9552 case UNGE:
9553 s = "uge"
9554 ; break;
9555 case UNEQ:
9556 s = "ue";
9557 break;
9558 default:
9559 output_operand_lossage ("invalid %%C operand");
9560 s = "";
9561 break;
9562 }
9563 fputs (s, file);
9564 return;
9565 }
9566
9567 /* This are used by the movr instruction pattern. */
9568 case 'D':
9569 {
9570 switch (GET_CODE (x))
9571 {
9572 case NE:
9573 s = "ne";
9574 break;
9575 case EQ:
9576 s = "e";
9577 break;
9578 case GE:
9579 s = "gez";
9580 break;
9581 case LT:
9582 s = "lz";
9583 break;
9584 case LE:
9585 s = "lez";
9586 break;
9587 case GT:
9588 s = "gz";
9589 break;
9590 default:
9591 output_operand_lossage ("invalid %%D operand");
9592 s = "";
9593 break;
9594 }
9595 fputs (s, file);
9596 return;
9597 }
9598
9599 case 'b':
9600 {
9601 /* Print a sign-extended character. */
9602 int i = trunc_int_for_mode (INTVAL (x), QImode);
9603 fprintf (file, "%d", i);
9604 return;
9605 }
9606
9607 case 'f':
9608 /* Operand must be a MEM; write its address. */
9609 if (GET_CODE (x) != MEM)
9610 output_operand_lossage ("invalid %%f operand");
9611 output_address (GET_MODE (x), XEXP (x, 0));
9612 return;
9613
9614 case 's':
9615 {
9616 /* Print a sign-extended 32-bit value. */
9617 HOST_WIDE_INT i;
9618 if (GET_CODE(x) == CONST_INT)
9619 i = INTVAL (x);
9620 else
9621 {
9622 output_operand_lossage ("invalid %%s operand");
9623 return;
9624 }
9625 i = trunc_int_for_mode (i, SImode);
9626 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9627 return;
9628 }
9629
9630 case 0:
9631 /* Do nothing special. */
9632 break;
9633
9634 default:
9635 /* Undocumented flag. */
9636 output_operand_lossage ("invalid operand output code");
9637 }
9638
9639 if (GET_CODE (x) == REG)
9640 fputs (reg_names[REGNO (x)], file);
9641 else if (GET_CODE (x) == MEM)
9642 {
9643 fputc ('[', file);
9644 /* Poor Sun assembler doesn't understand absolute addressing. */
9645 if (CONSTANT_P (XEXP (x, 0)))
9646 fputs ("%g0+", file);
9647 output_address (GET_MODE (x), XEXP (x, 0));
9648 fputc (']', file);
9649 }
9650 else if (GET_CODE (x) == HIGH)
9651 {
9652 fputs ("%hi(", file);
9653 output_addr_const (file, XEXP (x, 0));
9654 fputc (')', file);
9655 }
9656 else if (GET_CODE (x) == LO_SUM)
9657 {
9658 sparc_print_operand (file, XEXP (x, 0), 0);
9659 if (TARGET_CM_MEDMID)
9660 fputs ("+%l44(", file);
9661 else
9662 fputs ("+%lo(", file);
9663 output_addr_const (file, XEXP (x, 1));
9664 fputc (')', file);
9665 }
9666 else if (GET_CODE (x) == CONST_DOUBLE)
9667 output_operand_lossage ("floating-point constant not a valid immediate operand");
9668 else
9669 output_addr_const (file, x);
9670 }
9671
9672 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9673
9674 static void
9675 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9676 {
9677 rtx base, index = 0;
9678 int offset = 0;
9679 rtx addr = x;
9680
9681 if (REG_P (addr))
9682 fputs (reg_names[REGNO (addr)], file);
9683 else if (GET_CODE (addr) == PLUS)
9684 {
9685 if (CONST_INT_P (XEXP (addr, 0)))
9686 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9687 else if (CONST_INT_P (XEXP (addr, 1)))
9688 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9689 else
9690 base = XEXP (addr, 0), index = XEXP (addr, 1);
9691 if (GET_CODE (base) == LO_SUM)
9692 {
9693 gcc_assert (USE_AS_OFFSETABLE_LO10
9694 && TARGET_ARCH64
9695 && ! TARGET_CM_MEDMID);
9696 output_operand (XEXP (base, 0), 0);
9697 fputs ("+%lo(", file);
9698 output_address (VOIDmode, XEXP (base, 1));
9699 fprintf (file, ")+%d", offset);
9700 }
9701 else
9702 {
9703 fputs (reg_names[REGNO (base)], file);
9704 if (index == 0)
9705 fprintf (file, "%+d", offset);
9706 else if (REG_P (index))
9707 fprintf (file, "+%s", reg_names[REGNO (index)]);
9708 else if (GET_CODE (index) == SYMBOL_REF
9709 || GET_CODE (index) == LABEL_REF
9710 || GET_CODE (index) == CONST)
9711 fputc ('+', file), output_addr_const (file, index);
9712 else gcc_unreachable ();
9713 }
9714 }
9715 else if (GET_CODE (addr) == MINUS
9716 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9717 {
9718 output_addr_const (file, XEXP (addr, 0));
9719 fputs ("-(", file);
9720 output_addr_const (file, XEXP (addr, 1));
9721 fputs ("-.)", file);
9722 }
9723 else if (GET_CODE (addr) == LO_SUM)
9724 {
9725 output_operand (XEXP (addr, 0), 0);
9726 if (TARGET_CM_MEDMID)
9727 fputs ("+%l44(", file);
9728 else
9729 fputs ("+%lo(", file);
9730 output_address (VOIDmode, XEXP (addr, 1));
9731 fputc (')', file);
9732 }
9733 else if (flag_pic
9734 && GET_CODE (addr) == CONST
9735 && GET_CODE (XEXP (addr, 0)) == MINUS
9736 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9737 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9738 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9739 {
9740 addr = XEXP (addr, 0);
9741 output_addr_const (file, XEXP (addr, 0));
9742 /* Group the args of the second CONST in parenthesis. */
9743 fputs ("-(", file);
9744 /* Skip past the second CONST--it does nothing for us. */
9745 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9746 /* Close the parenthesis. */
9747 fputc (')', file);
9748 }
9749 else
9750 {
9751 output_addr_const (file, addr);
9752 }
9753 }
9754 \f
9755 /* Target hook for assembling integer objects. The sparc version has
9756 special handling for aligned DI-mode objects. */
9757
9758 static bool
9759 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9760 {
9761 /* ??? We only output .xword's for symbols and only then in environments
9762 where the assembler can handle them. */
9763 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9764 {
9765 if (TARGET_V9)
9766 {
9767 assemble_integer_with_op ("\t.xword\t", x);
9768 return true;
9769 }
9770 else
9771 {
9772 assemble_aligned_integer (4, const0_rtx);
9773 assemble_aligned_integer (4, x);
9774 return true;
9775 }
9776 }
9777 return default_assemble_integer (x, size, aligned_p);
9778 }
9779 \f
9780 /* Return the value of a code used in the .proc pseudo-op that says
9781 what kind of result this function returns. For non-C types, we pick
9782 the closest C type. */
9783
9784 #ifndef SHORT_TYPE_SIZE
9785 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9786 #endif
9787
9788 #ifndef INT_TYPE_SIZE
9789 #define INT_TYPE_SIZE BITS_PER_WORD
9790 #endif
9791
9792 #ifndef LONG_TYPE_SIZE
9793 #define LONG_TYPE_SIZE BITS_PER_WORD
9794 #endif
9795
9796 #ifndef LONG_LONG_TYPE_SIZE
9797 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9798 #endif
9799
9800 #ifndef FLOAT_TYPE_SIZE
9801 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9802 #endif
9803
9804 #ifndef DOUBLE_TYPE_SIZE
9805 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9806 #endif
9807
9808 #ifndef LONG_DOUBLE_TYPE_SIZE
9809 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9810 #endif
9811
9812 unsigned long
9813 sparc_type_code (tree type)
9814 {
9815 unsigned long qualifiers = 0;
9816 unsigned shift;
9817
9818 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9819 setting more, since some assemblers will give an error for this. Also,
9820 we must be careful to avoid shifts of 32 bits or more to avoid getting
9821 unpredictable results. */
9822
9823 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9824 {
9825 switch (TREE_CODE (type))
9826 {
9827 case ERROR_MARK:
9828 return qualifiers;
9829
9830 case ARRAY_TYPE:
9831 qualifiers |= (3 << shift);
9832 break;
9833
9834 case FUNCTION_TYPE:
9835 case METHOD_TYPE:
9836 qualifiers |= (2 << shift);
9837 break;
9838
9839 case POINTER_TYPE:
9840 case REFERENCE_TYPE:
9841 case OFFSET_TYPE:
9842 qualifiers |= (1 << shift);
9843 break;
9844
9845 case RECORD_TYPE:
9846 return (qualifiers | 8);
9847
9848 case UNION_TYPE:
9849 case QUAL_UNION_TYPE:
9850 return (qualifiers | 9);
9851
9852 case ENUMERAL_TYPE:
9853 return (qualifiers | 10);
9854
9855 case VOID_TYPE:
9856 return (qualifiers | 16);
9857
9858 case INTEGER_TYPE:
9859 /* If this is a range type, consider it to be the underlying
9860 type. */
9861 if (TREE_TYPE (type) != 0)
9862 break;
9863
9864 /* Carefully distinguish all the standard types of C,
9865 without messing up if the language is not C. We do this by
9866 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9867 look at both the names and the above fields, but that's redundant.
9868 Any type whose size is between two C types will be considered
9869 to be the wider of the two types. Also, we do not have a
9870 special code to use for "long long", so anything wider than
9871 long is treated the same. Note that we can't distinguish
9872 between "int" and "long" in this code if they are the same
9873 size, but that's fine, since neither can the assembler. */
9874
9875 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9876 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9877
9878 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9879 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9880
9881 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9882 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9883
9884 else
9885 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9886
9887 case REAL_TYPE:
9888 /* If this is a range type, consider it to be the underlying
9889 type. */
9890 if (TREE_TYPE (type) != 0)
9891 break;
9892
9893 /* Carefully distinguish all the standard types of C,
9894 without messing up if the language is not C. */
9895
9896 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9897 return (qualifiers | 6);
9898
9899 else
9900 return (qualifiers | 7);
9901
9902 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9903 /* ??? We need to distinguish between double and float complex types,
9904 but I don't know how yet because I can't reach this code from
9905 existing front-ends. */
9906 return (qualifiers | 7); /* Who knows? */
9907
9908 case VECTOR_TYPE:
9909 case BOOLEAN_TYPE: /* Boolean truth value type. */
9910 case LANG_TYPE:
9911 case NULLPTR_TYPE:
9912 return qualifiers;
9913
9914 default:
9915 gcc_unreachable (); /* Not a type! */
9916 }
9917 }
9918
9919 return qualifiers;
9920 }
9921 \f
9922 /* Nested function support. */
9923
9924 /* Emit RTL insns to initialize the variable parts of a trampoline.
9925 FNADDR is an RTX for the address of the function's pure code.
9926 CXT is an RTX for the static chain value for the function.
9927
9928 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9929 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9930 (to store insns). This is a bit excessive. Perhaps a different
9931 mechanism would be better here.
9932
9933 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9934
9935 static void
9936 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9937 {
9938 /* SPARC 32-bit trampoline:
9939
9940 sethi %hi(fn), %g1
9941 sethi %hi(static), %g2
9942 jmp %g1+%lo(fn)
9943 or %g2, %lo(static), %g2
9944
9945 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9946 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9947 */
9948
9949 emit_move_insn
9950 (adjust_address (m_tramp, SImode, 0),
9951 expand_binop (SImode, ior_optab,
9952 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9953 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9954 NULL_RTX, 1, OPTAB_DIRECT));
9955
9956 emit_move_insn
9957 (adjust_address (m_tramp, SImode, 4),
9958 expand_binop (SImode, ior_optab,
9959 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9960 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9961 NULL_RTX, 1, OPTAB_DIRECT));
9962
9963 emit_move_insn
9964 (adjust_address (m_tramp, SImode, 8),
9965 expand_binop (SImode, ior_optab,
9966 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9967 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9968 NULL_RTX, 1, OPTAB_DIRECT));
9969
9970 emit_move_insn
9971 (adjust_address (m_tramp, SImode, 12),
9972 expand_binop (SImode, ior_optab,
9973 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9974 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9975 NULL_RTX, 1, OPTAB_DIRECT));
9976
9977 emit_insn
9978 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 0))));
9979
9980 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9981 aligned on a 16 byte boundary so one flush clears it all. */
9982 if (sparc_cpu != PROCESSOR_ULTRASPARC
9983 && sparc_cpu != PROCESSOR_ULTRASPARC3
9984 && sparc_cpu != PROCESSOR_NIAGARA
9985 && sparc_cpu != PROCESSOR_NIAGARA2
9986 && sparc_cpu != PROCESSOR_NIAGARA3
9987 && sparc_cpu != PROCESSOR_NIAGARA4
9988 && sparc_cpu != PROCESSOR_NIAGARA7
9989 && sparc_cpu != PROCESSOR_M8)
9990 emit_insn
9991 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 8))));
9992
9993 /* Call __enable_execute_stack after writing onto the stack to make sure
9994 the stack address is accessible. */
9995 #ifdef HAVE_ENABLE_EXECUTE_STACK
9996 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9997 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9998 #endif
9999
10000 }
10001
10002 /* The 64-bit version is simpler because it makes more sense to load the
10003 values as "immediate" data out of the trampoline. It's also easier since
10004 we can read the PC without clobbering a register. */
10005
10006 static void
10007 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10008 {
10009 /* SPARC 64-bit trampoline:
10010
10011 rd %pc, %g1
10012 ldx [%g1+24], %g5
10013 jmp %g5
10014 ldx [%g1+16], %g5
10015 +16 bytes data
10016 */
10017
10018 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10019 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10020 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10021 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10022 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10023 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10024 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10025 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10026 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10027 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10028 emit_insn
10029 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 0))));
10030
10031 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10032 aligned on a 16 byte boundary so one flush clears it all. */
10033 if (sparc_cpu != PROCESSOR_ULTRASPARC
10034 && sparc_cpu != PROCESSOR_ULTRASPARC3
10035 && sparc_cpu != PROCESSOR_NIAGARA
10036 && sparc_cpu != PROCESSOR_NIAGARA2
10037 && sparc_cpu != PROCESSOR_NIAGARA3
10038 && sparc_cpu != PROCESSOR_NIAGARA4
10039 && sparc_cpu != PROCESSOR_NIAGARA7
10040 && sparc_cpu != PROCESSOR_M8)
10041 emit_insn
10042 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 8))));
10043
10044 /* Call __enable_execute_stack after writing onto the stack to make sure
10045 the stack address is accessible. */
10046 #ifdef HAVE_ENABLE_EXECUTE_STACK
10047 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10048 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10049 #endif
10050 }
10051
10052 /* Worker for TARGET_TRAMPOLINE_INIT. */
10053
10054 static void
10055 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10056 {
10057 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10058 cxt = force_reg (Pmode, cxt);
10059 if (TARGET_ARCH64)
10060 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10061 else
10062 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10063 }
10064 \f
10065 /* Adjust the cost of a scheduling dependency. Return the new cost of
10066 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10067
10068 static int
10069 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10070 int cost)
10071 {
10072 enum attr_type insn_type;
10073
10074 if (recog_memoized (insn) < 0)
10075 return cost;
10076
10077 insn_type = get_attr_type (insn);
10078
10079 if (dep_type == 0)
10080 {
10081 /* Data dependency; DEP_INSN writes a register that INSN reads some
10082 cycles later. */
10083
10084 /* if a load, then the dependence must be on the memory address;
10085 add an extra "cycle". Note that the cost could be two cycles
10086 if the reg was written late in an instruction group; we ca not tell
10087 here. */
10088 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10089 return cost + 3;
10090
10091 /* Get the delay only if the address of the store is the dependence. */
10092 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10093 {
10094 rtx pat = PATTERN(insn);
10095 rtx dep_pat = PATTERN (dep_insn);
10096
10097 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10098 return cost; /* This should not happen! */
10099
10100 /* The dependency between the two instructions was on the data that
10101 is being stored. Assume that this implies that the address of the
10102 store is not dependent. */
10103 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10104 return cost;
10105
10106 return cost + 3; /* An approximation. */
10107 }
10108
10109 /* A shift instruction cannot receive its data from an instruction
10110 in the same cycle; add a one cycle penalty. */
10111 if (insn_type == TYPE_SHIFT)
10112 return cost + 3; /* Split before cascade into shift. */
10113 }
10114 else
10115 {
10116 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10117 INSN writes some cycles later. */
10118
10119 /* These are only significant for the fpu unit; writing a fp reg before
10120 the fpu has finished with it stalls the processor. */
10121
10122 /* Reusing an integer register causes no problems. */
10123 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10124 return 0;
10125 }
10126
10127 return cost;
10128 }
10129
10130 static int
10131 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10132 int cost)
10133 {
10134 enum attr_type insn_type, dep_type;
10135 rtx pat = PATTERN(insn);
10136 rtx dep_pat = PATTERN (dep_insn);
10137
10138 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10139 return cost;
10140
10141 insn_type = get_attr_type (insn);
10142 dep_type = get_attr_type (dep_insn);
10143
10144 switch (dtype)
10145 {
10146 case 0:
10147 /* Data dependency; DEP_INSN writes a register that INSN reads some
10148 cycles later. */
10149
10150 switch (insn_type)
10151 {
10152 case TYPE_STORE:
10153 case TYPE_FPSTORE:
10154 /* Get the delay iff the address of the store is the dependence. */
10155 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10156 return cost;
10157
10158 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10159 return cost;
10160 return cost + 3;
10161
10162 case TYPE_LOAD:
10163 case TYPE_SLOAD:
10164 case TYPE_FPLOAD:
10165 /* If a load, then the dependence must be on the memory address. If
10166 the addresses aren't equal, then it might be a false dependency */
10167 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10168 {
10169 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10170 || GET_CODE (SET_DEST (dep_pat)) != MEM
10171 || GET_CODE (SET_SRC (pat)) != MEM
10172 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10173 XEXP (SET_SRC (pat), 0)))
10174 return cost + 2;
10175
10176 return cost + 8;
10177 }
10178 break;
10179
10180 case TYPE_BRANCH:
10181 /* Compare to branch latency is 0. There is no benefit from
10182 separating compare and branch. */
10183 if (dep_type == TYPE_COMPARE)
10184 return 0;
10185 /* Floating point compare to branch latency is less than
10186 compare to conditional move. */
10187 if (dep_type == TYPE_FPCMP)
10188 return cost - 1;
10189 break;
10190 default:
10191 break;
10192 }
10193 break;
10194
10195 case REG_DEP_ANTI:
10196 /* Anti-dependencies only penalize the fpu unit. */
10197 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10198 return 0;
10199 break;
10200
10201 default:
10202 break;
10203 }
10204
10205 return cost;
10206 }
10207
10208 static int
10209 leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10210 int cost)
10211 {
10212 enum attr_type insn_type, dep_type;
10213 rtx pat = PATTERN (insn);
10214 rtx dep_pat = PATTERN (dep_insn);
10215
10216 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10217 return cost;
10218
10219 insn_type = get_attr_type (insn);
10220 dep_type = get_attr_type (dep_insn);
10221
10222 switch (dtype)
10223 {
10224 case REG_DEP_TRUE:
10225 /* Data dependency; DEP_INSN writes a register that INSN reads some
10226 cycles later. */
10227
10228 switch (insn_type)
10229 {
10230 case TYPE_STORE:
10231 /* Try to schedule three instructions between the store and
10232 the ALU instruction that generated the data. */
10233 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10234 {
10235 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10236 break;
10237
10238 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10239 return 4;
10240 }
10241 break;
10242 default:
10243 break;
10244 }
10245 break;
10246 case REG_DEP_ANTI:
10247 /* Penalize anti-dependencies for FPU instructions. */
10248 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10249 return 4;
10250 break;
10251 default:
10252 break;
10253 }
10254
10255 return cost;
10256 }
10257
10258 static int
10259 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10260 unsigned int)
10261 {
10262 switch (sparc_cpu)
10263 {
10264 case PROCESSOR_LEON5:
10265 cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10266 break;
10267 case PROCESSOR_SUPERSPARC:
10268 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10269 break;
10270 case PROCESSOR_HYPERSPARC:
10271 case PROCESSOR_SPARCLITE86X:
10272 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10273 break;
10274 default:
10275 break;
10276 }
10277 return cost;
10278 }
10279
10280 static void
10281 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10282 int sched_verbose ATTRIBUTE_UNUSED,
10283 int max_ready ATTRIBUTE_UNUSED)
10284 {}
10285
10286 static int
10287 sparc_use_sched_lookahead (void)
10288 {
10289 switch (sparc_cpu)
10290 {
10291 case PROCESSOR_ULTRASPARC:
10292 case PROCESSOR_ULTRASPARC3:
10293 return 4;
10294 case PROCESSOR_SUPERSPARC:
10295 case PROCESSOR_HYPERSPARC:
10296 case PROCESSOR_SPARCLITE86X:
10297 return 3;
10298 case PROCESSOR_NIAGARA4:
10299 case PROCESSOR_NIAGARA7:
10300 case PROCESSOR_M8:
10301 return 2;
10302 case PROCESSOR_NIAGARA:
10303 case PROCESSOR_NIAGARA2:
10304 case PROCESSOR_NIAGARA3:
10305 default:
10306 return 0;
10307 }
10308 }
10309
10310 static int
10311 sparc_issue_rate (void)
10312 {
10313 switch (sparc_cpu)
10314 {
10315 case PROCESSOR_ULTRASPARC:
10316 case PROCESSOR_ULTRASPARC3:
10317 case PROCESSOR_M8:
10318 return 4;
10319 case PROCESSOR_SUPERSPARC:
10320 return 3;
10321 case PROCESSOR_HYPERSPARC:
10322 case PROCESSOR_SPARCLITE86X:
10323 case PROCESSOR_V9:
10324 /* Assume V9 processors are capable of at least dual-issue. */
10325 case PROCESSOR_NIAGARA4:
10326 case PROCESSOR_NIAGARA7:
10327 return 2;
10328 case PROCESSOR_NIAGARA:
10329 case PROCESSOR_NIAGARA2:
10330 case PROCESSOR_NIAGARA3:
10331 default:
10332 return 1;
10333 }
10334 }
10335
10336 int
10337 sparc_branch_cost (bool speed_p, bool predictable_p)
10338 {
10339 if (!speed_p)
10340 return 2;
10341
10342 /* For pre-V9 processors we use a single value (usually 3) to take into
10343 account the potential annulling of the delay slot (which ends up being
10344 a bubble in the pipeline slot) plus a cycle to take into consideration
10345 the instruction cache effects.
10346
10347 On V9 and later processors, which have branch prediction facilities,
10348 we take into account whether the branch is (easily) predictable. */
10349 const int cost = sparc_costs->branch_cost;
10350
10351 switch (sparc_cpu)
10352 {
10353 case PROCESSOR_V9:
10354 case PROCESSOR_ULTRASPARC:
10355 case PROCESSOR_ULTRASPARC3:
10356 case PROCESSOR_NIAGARA:
10357 case PROCESSOR_NIAGARA2:
10358 case PROCESSOR_NIAGARA3:
10359 case PROCESSOR_NIAGARA4:
10360 case PROCESSOR_NIAGARA7:
10361 case PROCESSOR_M8:
10362 return cost + (predictable_p ? 0 : 2);
10363
10364 default:
10365 return cost;
10366 }
10367 }
10368
10369 static int
10370 set_extends (rtx_insn *insn)
10371 {
10372 rtx pat = PATTERN (insn);
10373
10374 switch (GET_CODE (SET_SRC (pat)))
10375 {
10376 /* Load and some shift instructions zero extend. */
10377 case MEM:
10378 case ZERO_EXTEND:
10379 /* sethi clears the high bits */
10380 case HIGH:
10381 /* LO_SUM is used with sethi. sethi cleared the high
10382 bits and the values used with lo_sum are positive */
10383 case LO_SUM:
10384 /* Store flag stores 0 or 1 */
10385 case LT: case LTU:
10386 case GT: case GTU:
10387 case LE: case LEU:
10388 case GE: case GEU:
10389 case EQ:
10390 case NE:
10391 return 1;
10392 case AND:
10393 {
10394 rtx op0 = XEXP (SET_SRC (pat), 0);
10395 rtx op1 = XEXP (SET_SRC (pat), 1);
10396 if (GET_CODE (op1) == CONST_INT)
10397 return INTVAL (op1) >= 0;
10398 if (GET_CODE (op0) != REG)
10399 return 0;
10400 if (sparc_check_64 (op0, insn) == 1)
10401 return 1;
10402 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10403 }
10404 case IOR:
10405 case XOR:
10406 {
10407 rtx op0 = XEXP (SET_SRC (pat), 0);
10408 rtx op1 = XEXP (SET_SRC (pat), 1);
10409 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10410 return 0;
10411 if (GET_CODE (op1) == CONST_INT)
10412 return INTVAL (op1) >= 0;
10413 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10414 }
10415 case LSHIFTRT:
10416 return GET_MODE (SET_SRC (pat)) == SImode;
10417 /* Positive integers leave the high bits zero. */
10418 case CONST_INT:
10419 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10420 case ASHIFTRT:
10421 case SIGN_EXTEND:
10422 return - (GET_MODE (SET_SRC (pat)) == SImode);
10423 case REG:
10424 return sparc_check_64 (SET_SRC (pat), insn);
10425 default:
10426 return 0;
10427 }
10428 }
10429
10430 /* We _ought_ to have only one kind per function, but... */
10431 static GTY(()) rtx sparc_addr_diff_list;
10432 static GTY(()) rtx sparc_addr_list;
10433
10434 void
10435 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10436 {
10437 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10438 if (diff)
10439 sparc_addr_diff_list
10440 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10441 else
10442 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10443 }
10444
10445 static void
10446 sparc_output_addr_vec (rtx vec)
10447 {
10448 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10449 int idx, vlen = XVECLEN (body, 0);
10450
10451 #ifdef ASM_OUTPUT_ADDR_VEC_START
10452 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10453 #endif
10454
10455 #ifdef ASM_OUTPUT_CASE_LABEL
10456 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10457 NEXT_INSN (lab));
10458 #else
10459 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10460 #endif
10461
10462 for (idx = 0; idx < vlen; idx++)
10463 {
10464 ASM_OUTPUT_ADDR_VEC_ELT
10465 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10466 }
10467
10468 #ifdef ASM_OUTPUT_ADDR_VEC_END
10469 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10470 #endif
10471 }
10472
10473 static void
10474 sparc_output_addr_diff_vec (rtx vec)
10475 {
10476 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10477 rtx base = XEXP (XEXP (body, 0), 0);
10478 int idx, vlen = XVECLEN (body, 1);
10479
10480 #ifdef ASM_OUTPUT_ADDR_VEC_START
10481 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10482 #endif
10483
10484 #ifdef ASM_OUTPUT_CASE_LABEL
10485 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10486 NEXT_INSN (lab));
10487 #else
10488 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10489 #endif
10490
10491 for (idx = 0; idx < vlen; idx++)
10492 {
10493 ASM_OUTPUT_ADDR_DIFF_ELT
10494 (asm_out_file,
10495 body,
10496 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10497 CODE_LABEL_NUMBER (base));
10498 }
10499
10500 #ifdef ASM_OUTPUT_ADDR_VEC_END
10501 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10502 #endif
10503 }
10504
10505 static void
10506 sparc_output_deferred_case_vectors (void)
10507 {
10508 rtx t;
10509 int align;
10510
10511 if (sparc_addr_list == NULL_RTX
10512 && sparc_addr_diff_list == NULL_RTX)
10513 return;
10514
10515 /* Align to cache line in the function's code section. */
10516 switch_to_section (current_function_section ());
10517
10518 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10519 if (align > 0)
10520 ASM_OUTPUT_ALIGN (asm_out_file, align);
10521
10522 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10523 sparc_output_addr_vec (XEXP (t, 0));
10524 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10525 sparc_output_addr_diff_vec (XEXP (t, 0));
10526
10527 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10528 }
10529
10530 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10531 unknown. Return 1 if the high bits are zero, -1 if the register is
10532 sign extended. */
10533 int
10534 sparc_check_64 (rtx x, rtx_insn *insn)
10535 {
10536 /* If a register is set only once it is safe to ignore insns this
10537 code does not know how to handle. The loop will either recognize
10538 the single set and return the correct value or fail to recognize
10539 it and return 0. */
10540 int set_once = 0;
10541 rtx y = x;
10542
10543 gcc_assert (GET_CODE (x) == REG);
10544
10545 if (GET_MODE (x) == DImode)
10546 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10547
10548 if (flag_expensive_optimizations
10549 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10550 set_once = 1;
10551
10552 if (insn == 0)
10553 {
10554 if (set_once)
10555 insn = get_last_insn_anywhere ();
10556 else
10557 return 0;
10558 }
10559
10560 while ((insn = PREV_INSN (insn)))
10561 {
10562 switch (GET_CODE (insn))
10563 {
10564 case JUMP_INSN:
10565 case NOTE:
10566 break;
10567 case CODE_LABEL:
10568 case CALL_INSN:
10569 default:
10570 if (! set_once)
10571 return 0;
10572 break;
10573 case INSN:
10574 {
10575 rtx pat = PATTERN (insn);
10576 if (GET_CODE (pat) != SET)
10577 return 0;
10578 if (rtx_equal_p (x, SET_DEST (pat)))
10579 return set_extends (insn);
10580 if (y && rtx_equal_p (y, SET_DEST (pat)))
10581 return set_extends (insn);
10582 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10583 return 0;
10584 }
10585 }
10586 }
10587 return 0;
10588 }
10589
10590 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10591 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10592
10593 const char *
10594 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10595 {
10596 static char asm_code[60];
10597
10598 /* The scratch register is only required when the destination
10599 register is not a 64-bit global or out register. */
10600 if (which_alternative != 2)
10601 operands[3] = operands[0];
10602
10603 /* We can only shift by constants <= 63. */
10604 if (GET_CODE (operands[2]) == CONST_INT)
10605 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10606
10607 if (GET_CODE (operands[1]) == CONST_INT)
10608 {
10609 output_asm_insn ("mov\t%1, %3", operands);
10610 }
10611 else
10612 {
10613 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10614 if (sparc_check_64 (operands[1], insn) <= 0)
10615 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10616 output_asm_insn ("or\t%L1, %3, %3", operands);
10617 }
10618
10619 strcpy (asm_code, opcode);
10620
10621 if (which_alternative != 2)
10622 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10623 else
10624 return
10625 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10626 }
10627 \f
10628 /* Output rtl to increment the profiler label LABELNO
10629 for profiling a function entry. */
10630
10631 void
10632 sparc_profile_hook (int labelno)
10633 {
10634 char buf[32];
10635 rtx lab, fun;
10636
10637 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10638 if (NO_PROFILE_COUNTERS)
10639 {
10640 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10641 }
10642 else
10643 {
10644 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10645 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10646 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10647 }
10648 }
10649 \f
10650 #ifdef TARGET_SOLARIS
10651 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10652
10653 static void
10654 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10655 tree decl ATTRIBUTE_UNUSED)
10656 {
10657 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10658 {
10659 solaris_elf_asm_comdat_section (name, flags, decl);
10660 return;
10661 }
10662
10663 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10664
10665 if (!(flags & SECTION_DEBUG))
10666 fputs (",#alloc", asm_out_file);
10667 #if HAVE_GAS_SECTION_EXCLUDE
10668 if (flags & SECTION_EXCLUDE)
10669 fputs (",#exclude", asm_out_file);
10670 #endif
10671 if (flags & SECTION_WRITE)
10672 fputs (",#write", asm_out_file);
10673 if (flags & SECTION_TLS)
10674 fputs (",#tls", asm_out_file);
10675 if (flags & SECTION_CODE)
10676 fputs (",#execinstr", asm_out_file);
10677
10678 if (flags & SECTION_NOTYPE)
10679 ;
10680 else if (flags & SECTION_BSS)
10681 fputs (",#nobits", asm_out_file);
10682 else
10683 fputs (",#progbits", asm_out_file);
10684
10685 fputc ('\n', asm_out_file);
10686 }
10687 #endif /* TARGET_SOLARIS */
10688
10689 /* We do not allow indirect calls to be optimized into sibling calls.
10690
10691 We cannot use sibling calls when delayed branches are disabled
10692 because they will likely require the call delay slot to be filled.
10693
10694 Also, on SPARC 32-bit we cannot emit a sibling call when the
10695 current function returns a structure. This is because the "unimp
10696 after call" convention would cause the callee to return to the
10697 wrong place. The generic code already disallows cases where the
10698 function being called returns a structure.
10699
10700 It may seem strange how this last case could occur. Usually there
10701 is code after the call which jumps to epilogue code which dumps the
10702 return value into the struct return area. That ought to invalidate
10703 the sibling call right? Well, in the C++ case we can end up passing
10704 the pointer to the struct return area to a constructor (which returns
10705 void) and then nothing else happens. Such a sibling call would look
10706 valid without the added check here.
10707
10708 VxWorks PIC PLT entries require the global pointer to be initialized
10709 on entry. We therefore can't emit sibling calls to them. */
10710 static bool
10711 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10712 {
10713 return (decl
10714 && flag_delayed_branch
10715 && (TARGET_ARCH64 || ! cfun->returns_struct)
10716 && !(TARGET_VXWORKS_RTP
10717 && flag_pic
10718 && !targetm.binds_local_p (decl)));
10719 }
10720 \f
10721 /* libfunc renaming. */
10722
10723 static void
10724 sparc_init_libfuncs (void)
10725 {
10726 if (TARGET_ARCH32)
10727 {
10728 /* Use the subroutines that Sun's library provides for integer
10729 multiply and divide. The `*' prevents an underscore from
10730 being prepended by the compiler. .umul is a little faster
10731 than .mul. */
10732 set_optab_libfunc (smul_optab, SImode, "*.umul");
10733 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10734 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10735 set_optab_libfunc (smod_optab, SImode, "*.rem");
10736 set_optab_libfunc (umod_optab, SImode, "*.urem");
10737
10738 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10739 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10740 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10741 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10742 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10743 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10744
10745 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10746 is because with soft-float, the SFmode and DFmode sqrt
10747 instructions will be absent, and the compiler will notice and
10748 try to use the TFmode sqrt instruction for calls to the
10749 builtin function sqrt, but this fails. */
10750 if (TARGET_FPU)
10751 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10752
10753 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10754 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10755 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10756 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10757 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10758 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10759
10760 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10761 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10762 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10763 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10764
10765 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10766 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10767 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10768 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10769
10770 if (DITF_CONVERSION_LIBFUNCS)
10771 {
10772 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10773 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10774 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10775 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10776 }
10777
10778 if (SUN_CONVERSION_LIBFUNCS)
10779 {
10780 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10781 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10782 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10783 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10784 }
10785 }
10786 if (TARGET_ARCH64)
10787 {
10788 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10789 do not exist in the library. Make sure the compiler does not
10790 emit calls to them by accident. (It should always use the
10791 hardware instructions.) */
10792 set_optab_libfunc (smul_optab, SImode, 0);
10793 set_optab_libfunc (sdiv_optab, SImode, 0);
10794 set_optab_libfunc (udiv_optab, SImode, 0);
10795 set_optab_libfunc (smod_optab, SImode, 0);
10796 set_optab_libfunc (umod_optab, SImode, 0);
10797
10798 if (SUN_INTEGER_MULTIPLY_64)
10799 {
10800 set_optab_libfunc (smul_optab, DImode, "__mul64");
10801 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10802 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10803 set_optab_libfunc (smod_optab, DImode, "__rem64");
10804 set_optab_libfunc (umod_optab, DImode, "__urem64");
10805 }
10806
10807 if (SUN_CONVERSION_LIBFUNCS)
10808 {
10809 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10810 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10811 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10812 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10813 }
10814 }
10815 }
10816 \f
10817 /* SPARC builtins. */
10818 enum sparc_builtins
10819 {
10820 /* FPU builtins. */
10821 SPARC_BUILTIN_LDFSR,
10822 SPARC_BUILTIN_STFSR,
10823
10824 /* VIS 1.0 builtins. */
10825 SPARC_BUILTIN_FPACK16,
10826 SPARC_BUILTIN_FPACK32,
10827 SPARC_BUILTIN_FPACKFIX,
10828 SPARC_BUILTIN_FEXPAND,
10829 SPARC_BUILTIN_FPMERGE,
10830 SPARC_BUILTIN_FMUL8X16,
10831 SPARC_BUILTIN_FMUL8X16AU,
10832 SPARC_BUILTIN_FMUL8X16AL,
10833 SPARC_BUILTIN_FMUL8SUX16,
10834 SPARC_BUILTIN_FMUL8ULX16,
10835 SPARC_BUILTIN_FMULD8SUX16,
10836 SPARC_BUILTIN_FMULD8ULX16,
10837 SPARC_BUILTIN_FALIGNDATAV4HI,
10838 SPARC_BUILTIN_FALIGNDATAV8QI,
10839 SPARC_BUILTIN_FALIGNDATAV2SI,
10840 SPARC_BUILTIN_FALIGNDATADI,
10841 SPARC_BUILTIN_WRGSR,
10842 SPARC_BUILTIN_RDGSR,
10843 SPARC_BUILTIN_ALIGNADDR,
10844 SPARC_BUILTIN_ALIGNADDRL,
10845 SPARC_BUILTIN_PDIST,
10846 SPARC_BUILTIN_EDGE8,
10847 SPARC_BUILTIN_EDGE8L,
10848 SPARC_BUILTIN_EDGE16,
10849 SPARC_BUILTIN_EDGE16L,
10850 SPARC_BUILTIN_EDGE32,
10851 SPARC_BUILTIN_EDGE32L,
10852 SPARC_BUILTIN_FCMPLE16,
10853 SPARC_BUILTIN_FCMPLE32,
10854 SPARC_BUILTIN_FCMPNE16,
10855 SPARC_BUILTIN_FCMPNE32,
10856 SPARC_BUILTIN_FCMPGT16,
10857 SPARC_BUILTIN_FCMPGT32,
10858 SPARC_BUILTIN_FCMPEQ16,
10859 SPARC_BUILTIN_FCMPEQ32,
10860 SPARC_BUILTIN_FPADD16,
10861 SPARC_BUILTIN_FPADD16S,
10862 SPARC_BUILTIN_FPADD32,
10863 SPARC_BUILTIN_FPADD32S,
10864 SPARC_BUILTIN_FPSUB16,
10865 SPARC_BUILTIN_FPSUB16S,
10866 SPARC_BUILTIN_FPSUB32,
10867 SPARC_BUILTIN_FPSUB32S,
10868 SPARC_BUILTIN_ARRAY8,
10869 SPARC_BUILTIN_ARRAY16,
10870 SPARC_BUILTIN_ARRAY32,
10871
10872 /* VIS 2.0 builtins. */
10873 SPARC_BUILTIN_EDGE8N,
10874 SPARC_BUILTIN_EDGE8LN,
10875 SPARC_BUILTIN_EDGE16N,
10876 SPARC_BUILTIN_EDGE16LN,
10877 SPARC_BUILTIN_EDGE32N,
10878 SPARC_BUILTIN_EDGE32LN,
10879 SPARC_BUILTIN_BMASK,
10880 SPARC_BUILTIN_BSHUFFLEV4HI,
10881 SPARC_BUILTIN_BSHUFFLEV8QI,
10882 SPARC_BUILTIN_BSHUFFLEV2SI,
10883 SPARC_BUILTIN_BSHUFFLEDI,
10884
10885 /* VIS 3.0 builtins. */
10886 SPARC_BUILTIN_CMASK8,
10887 SPARC_BUILTIN_CMASK16,
10888 SPARC_BUILTIN_CMASK32,
10889 SPARC_BUILTIN_FCHKSM16,
10890 SPARC_BUILTIN_FSLL16,
10891 SPARC_BUILTIN_FSLAS16,
10892 SPARC_BUILTIN_FSRL16,
10893 SPARC_BUILTIN_FSRA16,
10894 SPARC_BUILTIN_FSLL32,
10895 SPARC_BUILTIN_FSLAS32,
10896 SPARC_BUILTIN_FSRL32,
10897 SPARC_BUILTIN_FSRA32,
10898 SPARC_BUILTIN_PDISTN,
10899 SPARC_BUILTIN_FMEAN16,
10900 SPARC_BUILTIN_FPADD64,
10901 SPARC_BUILTIN_FPSUB64,
10902 SPARC_BUILTIN_FPADDS16,
10903 SPARC_BUILTIN_FPADDS16S,
10904 SPARC_BUILTIN_FPSUBS16,
10905 SPARC_BUILTIN_FPSUBS16S,
10906 SPARC_BUILTIN_FPADDS32,
10907 SPARC_BUILTIN_FPADDS32S,
10908 SPARC_BUILTIN_FPSUBS32,
10909 SPARC_BUILTIN_FPSUBS32S,
10910 SPARC_BUILTIN_FUCMPLE8,
10911 SPARC_BUILTIN_FUCMPNE8,
10912 SPARC_BUILTIN_FUCMPGT8,
10913 SPARC_BUILTIN_FUCMPEQ8,
10914 SPARC_BUILTIN_FHADDS,
10915 SPARC_BUILTIN_FHADDD,
10916 SPARC_BUILTIN_FHSUBS,
10917 SPARC_BUILTIN_FHSUBD,
10918 SPARC_BUILTIN_FNHADDS,
10919 SPARC_BUILTIN_FNHADDD,
10920 SPARC_BUILTIN_UMULXHI,
10921 SPARC_BUILTIN_XMULX,
10922 SPARC_BUILTIN_XMULXHI,
10923
10924 /* VIS 4.0 builtins. */
10925 SPARC_BUILTIN_FPADD8,
10926 SPARC_BUILTIN_FPADDS8,
10927 SPARC_BUILTIN_FPADDUS8,
10928 SPARC_BUILTIN_FPADDUS16,
10929 SPARC_BUILTIN_FPCMPLE8,
10930 SPARC_BUILTIN_FPCMPGT8,
10931 SPARC_BUILTIN_FPCMPULE16,
10932 SPARC_BUILTIN_FPCMPUGT16,
10933 SPARC_BUILTIN_FPCMPULE32,
10934 SPARC_BUILTIN_FPCMPUGT32,
10935 SPARC_BUILTIN_FPMAX8,
10936 SPARC_BUILTIN_FPMAX16,
10937 SPARC_BUILTIN_FPMAX32,
10938 SPARC_BUILTIN_FPMAXU8,
10939 SPARC_BUILTIN_FPMAXU16,
10940 SPARC_BUILTIN_FPMAXU32,
10941 SPARC_BUILTIN_FPMIN8,
10942 SPARC_BUILTIN_FPMIN16,
10943 SPARC_BUILTIN_FPMIN32,
10944 SPARC_BUILTIN_FPMINU8,
10945 SPARC_BUILTIN_FPMINU16,
10946 SPARC_BUILTIN_FPMINU32,
10947 SPARC_BUILTIN_FPSUB8,
10948 SPARC_BUILTIN_FPSUBS8,
10949 SPARC_BUILTIN_FPSUBUS8,
10950 SPARC_BUILTIN_FPSUBUS16,
10951
10952 /* VIS 4.0B builtins. */
10953
10954 /* Note that all the DICTUNPACK* entries should be kept
10955 contiguous. */
10956 SPARC_BUILTIN_FIRST_DICTUNPACK,
10957 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10958 SPARC_BUILTIN_DICTUNPACK16,
10959 SPARC_BUILTIN_DICTUNPACK32,
10960 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10961
10962 /* Note that all the FPCMP*SHL entries should be kept
10963 contiguous. */
10964 SPARC_BUILTIN_FIRST_FPCMPSHL,
10965 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10966 SPARC_BUILTIN_FPCMPGT8SHL,
10967 SPARC_BUILTIN_FPCMPEQ8SHL,
10968 SPARC_BUILTIN_FPCMPNE8SHL,
10969 SPARC_BUILTIN_FPCMPLE16SHL,
10970 SPARC_BUILTIN_FPCMPGT16SHL,
10971 SPARC_BUILTIN_FPCMPEQ16SHL,
10972 SPARC_BUILTIN_FPCMPNE16SHL,
10973 SPARC_BUILTIN_FPCMPLE32SHL,
10974 SPARC_BUILTIN_FPCMPGT32SHL,
10975 SPARC_BUILTIN_FPCMPEQ32SHL,
10976 SPARC_BUILTIN_FPCMPNE32SHL,
10977 SPARC_BUILTIN_FPCMPULE8SHL,
10978 SPARC_BUILTIN_FPCMPUGT8SHL,
10979 SPARC_BUILTIN_FPCMPULE16SHL,
10980 SPARC_BUILTIN_FPCMPUGT16SHL,
10981 SPARC_BUILTIN_FPCMPULE32SHL,
10982 SPARC_BUILTIN_FPCMPUGT32SHL,
10983 SPARC_BUILTIN_FPCMPDE8SHL,
10984 SPARC_BUILTIN_FPCMPDE16SHL,
10985 SPARC_BUILTIN_FPCMPDE32SHL,
10986 SPARC_BUILTIN_FPCMPUR8SHL,
10987 SPARC_BUILTIN_FPCMPUR16SHL,
10988 SPARC_BUILTIN_FPCMPUR32SHL,
10989 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10990
10991 SPARC_BUILTIN_MAX
10992 };
10993
10994 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10995 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10996
10997 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10998 The instruction should require a constant operand of some sort. The
10999 function prints an error if OPVAL is not valid. */
11000
11001 static int
11002 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11003 {
11004 if (GET_CODE (opval) != CONST_INT)
11005 {
11006 error ("%qs expects a constant argument", insn_data[icode].name);
11007 return false;
11008 }
11009
11010 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11011 {
11012 error ("constant argument out of range for %qs", insn_data[icode].name);
11013 return false;
11014 }
11015 return true;
11016 }
11017
11018 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
11019 function decl or NULL_TREE if the builtin was not added. */
11020
11021 static tree
11022 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11023 tree type)
11024 {
11025 tree t
11026 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11027
11028 if (t)
11029 {
11030 sparc_builtins[code] = t;
11031 sparc_builtins_icode[code] = icode;
11032 }
11033
11034 return t;
11035 }
11036
11037 /* Likewise, but also marks the function as "const". */
11038
11039 static tree
11040 def_builtin_const (const char *name, enum insn_code icode,
11041 enum sparc_builtins code, tree type)
11042 {
11043 tree t = def_builtin (name, icode, code, type);
11044
11045 if (t)
11046 TREE_READONLY (t) = 1;
11047
11048 return t;
11049 }
11050
11051 /* Implement the TARGET_INIT_BUILTINS target hook.
11052 Create builtin functions for special SPARC instructions. */
11053
11054 static void
11055 sparc_init_builtins (void)
11056 {
11057 if (TARGET_FPU)
11058 sparc_fpu_init_builtins ();
11059
11060 if (TARGET_VIS)
11061 sparc_vis_init_builtins ();
11062 }
11063
11064 /* Create builtin functions for FPU instructions. */
11065
11066 static void
11067 sparc_fpu_init_builtins (void)
11068 {
11069 tree ftype
11070 = build_function_type_list (void_type_node,
11071 build_pointer_type (unsigned_type_node), 0);
11072 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11073 SPARC_BUILTIN_LDFSR, ftype);
11074 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11075 SPARC_BUILTIN_STFSR, ftype);
11076 }
11077
11078 /* Create builtin functions for VIS instructions. */
11079
11080 static void
11081 sparc_vis_init_builtins (void)
11082 {
11083 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11084 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11085 tree v4hi = build_vector_type (intHI_type_node, 4);
11086 tree v2hi = build_vector_type (intHI_type_node, 2);
11087 tree v2si = build_vector_type (intSI_type_node, 2);
11088 tree v1si = build_vector_type (intSI_type_node, 1);
11089
11090 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11091 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11092 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11093 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11094 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11095 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11096 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11097 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11098 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11099 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11100 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11101 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11102 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11103 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11104 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11105 v8qi, v8qi,
11106 intDI_type_node, 0);
11107 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11108 v8qi, v8qi, 0);
11109 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11110 v8qi, v8qi, 0);
11111 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11112 intSI_type_node, 0);
11113 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11114 intSI_type_node, 0);
11115 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11116 intDI_type_node, 0);
11117 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11118 intDI_type_node,
11119 intDI_type_node, 0);
11120 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11121 intSI_type_node,
11122 intSI_type_node, 0);
11123 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11124 ptr_type_node,
11125 intSI_type_node, 0);
11126 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11127 ptr_type_node,
11128 intDI_type_node, 0);
11129 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11130 ptr_type_node,
11131 ptr_type_node, 0);
11132 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11133 ptr_type_node,
11134 ptr_type_node, 0);
11135 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11136 v4hi, v4hi, 0);
11137 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11138 v2si, v2si, 0);
11139 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11140 v4hi, v4hi, 0);
11141 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11142 v2si, v2si, 0);
11143 tree void_ftype_di = build_function_type_list (void_type_node,
11144 intDI_type_node, 0);
11145 tree di_ftype_void = build_function_type_list (intDI_type_node,
11146 void_type_node, 0);
11147 tree void_ftype_si = build_function_type_list (void_type_node,
11148 intSI_type_node, 0);
11149 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11150 float_type_node,
11151 float_type_node, 0);
11152 tree df_ftype_df_df = build_function_type_list (double_type_node,
11153 double_type_node,
11154 double_type_node, 0);
11155
11156 /* Packing and expanding vectors. */
11157 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11158 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11159 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11160 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11161 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11162 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11163 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11164 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11165 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11166 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11167
11168 /* Multiplications. */
11169 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11170 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11171 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11172 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11173 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11174 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11175 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11176 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11177 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11178 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11179 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11180 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11181 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11182 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11183
11184 /* Data aligning. */
11185 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11186 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11187 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11188 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11189 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11190 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11191 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11192 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11193
11194 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11195 SPARC_BUILTIN_WRGSR, void_ftype_di);
11196 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11197 SPARC_BUILTIN_RDGSR, di_ftype_void);
11198
11199 if (TARGET_ARCH64)
11200 {
11201 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11202 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11203 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11204 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11205 }
11206 else
11207 {
11208 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11209 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11210 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11211 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11212 }
11213
11214 /* Pixel distance. */
11215 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11216 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11217
11218 /* Edge handling. */
11219 if (TARGET_ARCH64)
11220 {
11221 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11222 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11223 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11224 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11225 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11226 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11227 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11228 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11229 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11230 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11231 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11232 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11233 }
11234 else
11235 {
11236 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11237 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11238 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11239 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11240 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11241 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11242 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11243 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11244 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11245 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11246 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11247 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11248 }
11249
11250 /* Pixel compare. */
11251 if (TARGET_ARCH64)
11252 {
11253 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11254 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11255 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11256 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11257 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11258 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11259 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11260 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11261 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11262 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11263 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11264 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11265 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11266 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11267 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11268 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11269 }
11270 else
11271 {
11272 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11273 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11274 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11275 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11276 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11277 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11278 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11279 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11280 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11281 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11282 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11283 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11284 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11285 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11286 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11287 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11288 }
11289
11290 /* Addition and subtraction. */
11291 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11292 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11293 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11294 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11295 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11296 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11297 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11298 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11299 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11300 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11301 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11302 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11303 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11304 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11305 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11306 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11307
11308 /* Three-dimensional array addressing. */
11309 if (TARGET_ARCH64)
11310 {
11311 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11312 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11313 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11314 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11315 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11316 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11317 }
11318 else
11319 {
11320 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11321 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11322 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11323 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11324 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11325 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11326 }
11327
11328 if (TARGET_VIS2)
11329 {
11330 /* Edge handling. */
11331 if (TARGET_ARCH64)
11332 {
11333 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11334 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11335 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11336 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11337 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11338 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11339 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11340 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11341 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11342 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11343 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11344 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11345 }
11346 else
11347 {
11348 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11349 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11350 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11351 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11352 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11353 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11354 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11355 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11356 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11357 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11358 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11359 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11360 }
11361
11362 /* Byte mask and shuffle. */
11363 if (TARGET_ARCH64)
11364 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11365 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11366 else
11367 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11368 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11369 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11370 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11371 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11372 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11373 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11374 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11375 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11376 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11377 }
11378
11379 if (TARGET_VIS3)
11380 {
11381 if (TARGET_ARCH64)
11382 {
11383 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11384 SPARC_BUILTIN_CMASK8, void_ftype_di);
11385 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11386 SPARC_BUILTIN_CMASK16, void_ftype_di);
11387 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11388 SPARC_BUILTIN_CMASK32, void_ftype_di);
11389 }
11390 else
11391 {
11392 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11393 SPARC_BUILTIN_CMASK8, void_ftype_si);
11394 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11395 SPARC_BUILTIN_CMASK16, void_ftype_si);
11396 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11397 SPARC_BUILTIN_CMASK32, void_ftype_si);
11398 }
11399
11400 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11401 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11402
11403 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11404 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11405 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11406 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11407 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11408 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11409 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11410 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11411 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11412 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11413 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11414 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11415 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11416 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11417 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11418 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11419
11420 if (TARGET_ARCH64)
11421 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11422 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11423 else
11424 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11425 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11426
11427 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11428 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11429 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11430 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11431 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11432 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11433
11434 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11435 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11436 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11437 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11438 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11439 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11440 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11441 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11442 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11443 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11444 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11445 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11446 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11447 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11448 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11449 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11450
11451 if (TARGET_ARCH64)
11452 {
11453 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11454 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11455 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11456 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11457 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11458 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11459 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11460 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11461 }
11462 else
11463 {
11464 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11465 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11466 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11467 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11468 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11469 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11470 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11471 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11472 }
11473
11474 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11475 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11476 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11477 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11478 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11479 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11480 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11481 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11482 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11483 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11484 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11485 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11486
11487 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11488 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11489 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11490 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11491 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11492 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11493 }
11494
11495 if (TARGET_VIS4)
11496 {
11497 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11498 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11499 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11500 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11501 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11502 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11503 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11504 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11505
11506
11507 if (TARGET_ARCH64)
11508 {
11509 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11510 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11511 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11512 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11513 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11514 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11515 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11516 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11517 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11518 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11519 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11520 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11521 }
11522 else
11523 {
11524 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11525 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11526 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11527 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11528 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11529 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11530 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11531 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11532 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11533 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11534 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11535 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11536 }
11537
11538 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11539 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11540 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11541 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11542 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11543 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11544 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11545 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11546 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11547 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11548 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11549 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11550 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11551 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11552 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11553 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11554 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11555 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11556 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11557 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11558 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11559 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11560 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11561 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11562 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11563 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11564 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11565 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11566 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11567 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11568 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11569 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11570 }
11571
11572 if (TARGET_VIS4B)
11573 {
11574 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11575 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11576 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11577 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11578 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11579 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11580
11581 if (TARGET_ARCH64)
11582 {
11583 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11584 v8qi, v8qi,
11585 intSI_type_node, 0);
11586 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11587 v4hi, v4hi,
11588 intSI_type_node, 0);
11589 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11590 v2si, v2si,
11591 intSI_type_node, 0);
11592
11593 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11594 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11595 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11596 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11597 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11598 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11599 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11600 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11601
11602 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11603 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11604 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11605 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11606 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11607 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11608 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11609 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11610
11611 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11612 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11613 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11614 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11615 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11616 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11617 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11618 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11619
11620
11621 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11622 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11623 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11624 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11625
11626 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11627 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11628 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11629 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11630
11631 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11632 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11633 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11634 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11635
11636 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11637 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11638 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11639 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11640 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11641 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11642
11643 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11644 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11645 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11646 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11647 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11648 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11649
11650 }
11651 else
11652 {
11653 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11654 v8qi, v8qi,
11655 intSI_type_node, 0);
11656 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11657 v4hi, v4hi,
11658 intSI_type_node, 0);
11659 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11660 v2si, v2si,
11661 intSI_type_node, 0);
11662
11663 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11664 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11665 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11666 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11667 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11668 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11669 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11670 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11671
11672 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11673 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11674 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11675 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11676 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11677 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11678 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11679 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11680
11681 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11682 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11683 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11684 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11685 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11686 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11687 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11688 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11689
11690
11691 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11692 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11693 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11694 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11695
11696 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11697 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11698 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11699 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11700
11701 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11702 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11703 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11704 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11705
11706 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11707 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11708 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11709 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11710 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11711 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11712
11713 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11714 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11715 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11716 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11717 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11718 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11719 }
11720 }
11721 }
11722
11723 /* Implement TARGET_BUILTIN_DECL hook. */
11724
11725 static tree
11726 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11727 {
11728 if (code >= SPARC_BUILTIN_MAX)
11729 return error_mark_node;
11730
11731 return sparc_builtins[code];
11732 }
11733
11734 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11735
11736 static rtx
11737 sparc_expand_builtin (tree exp, rtx target,
11738 rtx subtarget ATTRIBUTE_UNUSED,
11739 machine_mode tmode ATTRIBUTE_UNUSED,
11740 int ignore ATTRIBUTE_UNUSED)
11741 {
11742 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11743 enum sparc_builtins code
11744 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11745 enum insn_code icode = sparc_builtins_icode[code];
11746 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11747 call_expr_arg_iterator iter;
11748 int arg_count = 0;
11749 rtx pat, op[4];
11750 tree arg;
11751
11752 if (nonvoid)
11753 {
11754 machine_mode tmode = insn_data[icode].operand[0].mode;
11755 if (!target
11756 || GET_MODE (target) != tmode
11757 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11758 op[0] = gen_reg_rtx (tmode);
11759 else
11760 op[0] = target;
11761 }
11762 else
11763 op[0] = NULL_RTX;
11764
11765 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11766 {
11767 const struct insn_operand_data *insn_op;
11768 int idx;
11769
11770 if (arg == error_mark_node)
11771 return NULL_RTX;
11772
11773 arg_count++;
11774 idx = arg_count - !nonvoid;
11775 insn_op = &insn_data[icode].operand[idx];
11776 op[arg_count] = expand_normal (arg);
11777
11778 /* Some of the builtins require constant arguments. We check
11779 for this here. */
11780 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11781 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11782 && arg_count == 3)
11783 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11784 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11785 && arg_count == 2))
11786 {
11787 if (!check_constant_argument (icode, idx, op[arg_count]))
11788 return const0_rtx;
11789 }
11790
11791 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11792 {
11793 if (!address_operand (op[arg_count], SImode))
11794 {
11795 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11796 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11797 }
11798 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11799 }
11800
11801 else if (insn_op->mode == V1DImode
11802 && GET_MODE (op[arg_count]) == DImode)
11803 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11804
11805 else if (insn_op->mode == V1SImode
11806 && GET_MODE (op[arg_count]) == SImode)
11807 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11808
11809 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11810 insn_op->mode))
11811 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11812 }
11813
11814 switch (arg_count)
11815 {
11816 case 0:
11817 pat = GEN_FCN (icode) (op[0]);
11818 break;
11819 case 1:
11820 if (nonvoid)
11821 pat = GEN_FCN (icode) (op[0], op[1]);
11822 else
11823 pat = GEN_FCN (icode) (op[1]);
11824 break;
11825 case 2:
11826 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11827 break;
11828 case 3:
11829 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11830 break;
11831 default:
11832 gcc_unreachable ();
11833 }
11834
11835 if (!pat)
11836 return NULL_RTX;
11837
11838 emit_insn (pat);
11839
11840 return (nonvoid ? op[0] : const0_rtx);
11841 }
11842
11843 /* Return the upper 16 bits of the 8x16 multiplication. */
11844
11845 static int
11846 sparc_vis_mul8x16 (int e8, int e16)
11847 {
11848 return (e8 * e16 + 128) / 256;
11849 }
11850
11851 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11852 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11853
11854 static void
11855 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11856 tree inner_type, tree cst0, tree cst1)
11857 {
11858 unsigned i, num = VECTOR_CST_NELTS (cst0);
11859 int scale;
11860
11861 switch (fncode)
11862 {
11863 case SPARC_BUILTIN_FMUL8X16:
11864 for (i = 0; i < num; ++i)
11865 {
11866 int val
11867 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11868 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11869 n_elts->quick_push (build_int_cst (inner_type, val));
11870 }
11871 break;
11872
11873 case SPARC_BUILTIN_FMUL8X16AU:
11874 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11875
11876 for (i = 0; i < num; ++i)
11877 {
11878 int val
11879 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11880 scale);
11881 n_elts->quick_push (build_int_cst (inner_type, val));
11882 }
11883 break;
11884
11885 case SPARC_BUILTIN_FMUL8X16AL:
11886 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11887
11888 for (i = 0; i < num; ++i)
11889 {
11890 int val
11891 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11892 scale);
11893 n_elts->quick_push (build_int_cst (inner_type, val));
11894 }
11895 break;
11896
11897 default:
11898 gcc_unreachable ();
11899 }
11900 }
11901
11902 /* Implement TARGET_FOLD_BUILTIN hook.
11903
11904 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11905 result of the function call is ignored. NULL_TREE is returned if the
11906 function could not be folded. */
11907
11908 static tree
11909 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11910 tree *args, bool ignore)
11911 {
11912 enum sparc_builtins code
11913 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11914 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11915 tree arg0, arg1, arg2;
11916
11917 if (ignore)
11918 switch (code)
11919 {
11920 case SPARC_BUILTIN_LDFSR:
11921 case SPARC_BUILTIN_STFSR:
11922 case SPARC_BUILTIN_ALIGNADDR:
11923 case SPARC_BUILTIN_WRGSR:
11924 case SPARC_BUILTIN_BMASK:
11925 case SPARC_BUILTIN_CMASK8:
11926 case SPARC_BUILTIN_CMASK16:
11927 case SPARC_BUILTIN_CMASK32:
11928 break;
11929
11930 default:
11931 return build_zero_cst (rtype);
11932 }
11933
11934 switch (code)
11935 {
11936 case SPARC_BUILTIN_FEXPAND:
11937 arg0 = args[0];
11938 STRIP_NOPS (arg0);
11939
11940 if (TREE_CODE (arg0) == VECTOR_CST)
11941 {
11942 tree inner_type = TREE_TYPE (rtype);
11943 unsigned i;
11944
11945 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11946 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11947 {
11948 unsigned HOST_WIDE_INT val
11949 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11950 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11951 }
11952 return n_elts.build ();
11953 }
11954 break;
11955
11956 case SPARC_BUILTIN_FMUL8X16:
11957 case SPARC_BUILTIN_FMUL8X16AU:
11958 case SPARC_BUILTIN_FMUL8X16AL:
11959 arg0 = args[0];
11960 arg1 = args[1];
11961 STRIP_NOPS (arg0);
11962 STRIP_NOPS (arg1);
11963
11964 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11965 {
11966 tree inner_type = TREE_TYPE (rtype);
11967 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11968 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11969 return n_elts.build ();
11970 }
11971 break;
11972
11973 case SPARC_BUILTIN_FPMERGE:
11974 arg0 = args[0];
11975 arg1 = args[1];
11976 STRIP_NOPS (arg0);
11977 STRIP_NOPS (arg1);
11978
11979 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11980 {
11981 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11982 unsigned i;
11983 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11984 {
11985 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11986 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11987 }
11988
11989 return n_elts.build ();
11990 }
11991 break;
11992
11993 case SPARC_BUILTIN_PDIST:
11994 case SPARC_BUILTIN_PDISTN:
11995 arg0 = args[0];
11996 arg1 = args[1];
11997 STRIP_NOPS (arg0);
11998 STRIP_NOPS (arg1);
11999 if (code == SPARC_BUILTIN_PDIST)
12000 {
12001 arg2 = args[2];
12002 STRIP_NOPS (arg2);
12003 }
12004 else
12005 arg2 = integer_zero_node;
12006
12007 if (TREE_CODE (arg0) == VECTOR_CST
12008 && TREE_CODE (arg1) == VECTOR_CST
12009 && TREE_CODE (arg2) == INTEGER_CST)
12010 {
12011 bool overflow = false;
12012 widest_int result = wi::to_widest (arg2);
12013 widest_int tmp;
12014 unsigned i;
12015
12016 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12017 {
12018 tree e0 = VECTOR_CST_ELT (arg0, i);
12019 tree e1 = VECTOR_CST_ELT (arg1, i);
12020
12021 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12022
12023 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12024 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12025 if (wi::neg_p (tmp))
12026 tmp = wi::neg (tmp, &neg2_ovf);
12027 else
12028 neg2_ovf = wi::OVF_NONE;
12029 result = wi::add (result, tmp, SIGNED, &add2_ovf);
12030 overflow |= ((neg1_ovf != wi::OVF_NONE)
12031 | (neg2_ovf != wi::OVF_NONE)
12032 | (add1_ovf != wi::OVF_NONE)
12033 | (add2_ovf != wi::OVF_NONE));
12034 }
12035
12036 gcc_assert (!overflow);
12037
12038 return wide_int_to_tree (rtype, result);
12039 }
12040
12041 default:
12042 break;
12043 }
12044
12045 return NULL_TREE;
12046 }
12047 \f
12048 /* ??? This duplicates information provided to the compiler by the
12049 ??? scheduler description. Some day, teach genautomata to output
12050 ??? the latencies and then CSE will just use that. */
12051
12052 static bool
12053 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12054 int opno ATTRIBUTE_UNUSED,
12055 int *total, bool speed ATTRIBUTE_UNUSED)
12056 {
12057 int code = GET_CODE (x);
12058 bool float_mode_p = FLOAT_MODE_P (mode);
12059
12060 switch (code)
12061 {
12062 case CONST_INT:
12063 if (SMALL_INT (x))
12064 *total = 0;
12065 else
12066 *total = 2;
12067 return true;
12068
12069 case CONST_WIDE_INT:
12070 *total = 0;
12071 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12072 *total += 2;
12073 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12074 *total += 2;
12075 return true;
12076
12077 case HIGH:
12078 *total = 2;
12079 return true;
12080
12081 case CONST:
12082 case LABEL_REF:
12083 case SYMBOL_REF:
12084 *total = 4;
12085 return true;
12086
12087 case CONST_DOUBLE:
12088 *total = 8;
12089 return true;
12090
12091 case MEM:
12092 /* If outer-code was a sign or zero extension, a cost
12093 of COSTS_N_INSNS (1) was already added in. This is
12094 why we are subtracting it back out. */
12095 if (outer_code == ZERO_EXTEND)
12096 {
12097 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12098 }
12099 else if (outer_code == SIGN_EXTEND)
12100 {
12101 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12102 }
12103 else if (float_mode_p)
12104 {
12105 *total = sparc_costs->float_load;
12106 }
12107 else
12108 {
12109 *total = sparc_costs->int_load;
12110 }
12111
12112 return true;
12113
12114 case PLUS:
12115 case MINUS:
12116 if (float_mode_p)
12117 *total = sparc_costs->float_plusminus;
12118 else
12119 *total = COSTS_N_INSNS (1);
12120 return false;
12121
12122 case FMA:
12123 {
12124 rtx sub;
12125
12126 gcc_assert (float_mode_p);
12127 *total = sparc_costs->float_mul;
12128
12129 sub = XEXP (x, 0);
12130 if (GET_CODE (sub) == NEG)
12131 sub = XEXP (sub, 0);
12132 *total += rtx_cost (sub, mode, FMA, 0, speed);
12133
12134 sub = XEXP (x, 2);
12135 if (GET_CODE (sub) == NEG)
12136 sub = XEXP (sub, 0);
12137 *total += rtx_cost (sub, mode, FMA, 2, speed);
12138 return true;
12139 }
12140
12141 case MULT:
12142 if (float_mode_p)
12143 *total = sparc_costs->float_mul;
12144 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12145 *total = COSTS_N_INSNS (25);
12146 else
12147 {
12148 int bit_cost;
12149
12150 bit_cost = 0;
12151 if (sparc_costs->int_mul_bit_factor)
12152 {
12153 int nbits;
12154
12155 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12156 {
12157 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12158 for (nbits = 0; value != 0; value &= value - 1)
12159 nbits++;
12160 }
12161 else
12162 nbits = 7;
12163
12164 if (nbits < 3)
12165 nbits = 3;
12166 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12167 bit_cost = COSTS_N_INSNS (bit_cost);
12168 }
12169
12170 if (mode == DImode || !TARGET_HARD_MUL)
12171 *total = sparc_costs->int_mulX + bit_cost;
12172 else
12173 *total = sparc_costs->int_mul + bit_cost;
12174 }
12175 return false;
12176
12177 case ASHIFT:
12178 case ASHIFTRT:
12179 case LSHIFTRT:
12180 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12181 return false;
12182
12183 case DIV:
12184 case UDIV:
12185 case MOD:
12186 case UMOD:
12187 if (float_mode_p)
12188 {
12189 if (mode == DFmode)
12190 *total = sparc_costs->float_div_df;
12191 else
12192 *total = sparc_costs->float_div_sf;
12193 }
12194 else
12195 {
12196 if (mode == DImode)
12197 *total = sparc_costs->int_divX;
12198 else
12199 *total = sparc_costs->int_div;
12200 }
12201 return false;
12202
12203 case NEG:
12204 if (! float_mode_p)
12205 {
12206 *total = COSTS_N_INSNS (1);
12207 return false;
12208 }
12209 /* FALLTHRU */
12210
12211 case ABS:
12212 case FLOAT:
12213 case UNSIGNED_FLOAT:
12214 case FIX:
12215 case UNSIGNED_FIX:
12216 case FLOAT_EXTEND:
12217 case FLOAT_TRUNCATE:
12218 *total = sparc_costs->float_move;
12219 return false;
12220
12221 case SQRT:
12222 if (mode == DFmode)
12223 *total = sparc_costs->float_sqrt_df;
12224 else
12225 *total = sparc_costs->float_sqrt_sf;
12226 return false;
12227
12228 case COMPARE:
12229 if (float_mode_p)
12230 *total = sparc_costs->float_cmp;
12231 else
12232 *total = COSTS_N_INSNS (1);
12233 return false;
12234
12235 case IF_THEN_ELSE:
12236 if (float_mode_p)
12237 *total = sparc_costs->float_cmove;
12238 else
12239 *total = sparc_costs->int_cmove;
12240 return false;
12241
12242 case IOR:
12243 /* Handle the NAND vector patterns. */
12244 if (sparc_vector_mode_supported_p (mode)
12245 && GET_CODE (XEXP (x, 0)) == NOT
12246 && GET_CODE (XEXP (x, 1)) == NOT)
12247 {
12248 *total = COSTS_N_INSNS (1);
12249 return true;
12250 }
12251 else
12252 return false;
12253
12254 default:
12255 return false;
12256 }
12257 }
12258
12259 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12260
12261 static inline bool
12262 general_or_i64_p (reg_class_t rclass)
12263 {
12264 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12265 }
12266
12267 /* Implement TARGET_REGISTER_MOVE_COST. */
12268
12269 static int
12270 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12271 reg_class_t from, reg_class_t to)
12272 {
12273 bool need_memory = false;
12274
12275 /* This helps postreload CSE to eliminate redundant comparisons. */
12276 if (from == NO_REGS || to == NO_REGS)
12277 return 100;
12278
12279 if (from == FPCC_REGS || to == FPCC_REGS)
12280 need_memory = true;
12281 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12282 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12283 {
12284 if (TARGET_VIS3)
12285 {
12286 int size = GET_MODE_SIZE (mode);
12287 if (size == 8 || size == 4)
12288 {
12289 if (! TARGET_ARCH32 || size == 4)
12290 return 4;
12291 else
12292 return 6;
12293 }
12294 }
12295 need_memory = true;
12296 }
12297
12298 if (need_memory)
12299 {
12300 if (sparc_cpu == PROCESSOR_ULTRASPARC
12301 || sparc_cpu == PROCESSOR_ULTRASPARC3
12302 || sparc_cpu == PROCESSOR_NIAGARA
12303 || sparc_cpu == PROCESSOR_NIAGARA2
12304 || sparc_cpu == PROCESSOR_NIAGARA3
12305 || sparc_cpu == PROCESSOR_NIAGARA4
12306 || sparc_cpu == PROCESSOR_NIAGARA7
12307 || sparc_cpu == PROCESSOR_M8)
12308 return 12;
12309
12310 return 6;
12311 }
12312
12313 return 2;
12314 }
12315
12316 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12317 This is achieved by means of a manual dynamic stack space allocation in
12318 the current frame. We make the assumption that SEQ doesn't contain any
12319 function calls, with the possible exception of calls to the GOT helper. */
12320
12321 static void
12322 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12323 {
12324 /* We must preserve the lowest 16 words for the register save area. */
12325 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12326 /* We really need only 2 words of fresh stack space. */
12327 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12328
12329 rtx slot
12330 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12331 SPARC_STACK_BIAS + offset));
12332
12333 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12334 emit_insn (gen_rtx_SET (slot, reg));
12335 if (reg2)
12336 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12337 reg2));
12338 emit_insn (seq);
12339 if (reg2)
12340 emit_insn (gen_rtx_SET (reg2,
12341 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12342 emit_insn (gen_rtx_SET (reg, slot));
12343 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12344 }
12345
12346 /* Output the assembler code for a thunk function. THUNK_DECL is the
12347 declaration for the thunk function itself, FUNCTION is the decl for
12348 the target function. DELTA is an immediate constant offset to be
12349 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12350 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12351
12352 static void
12353 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12354 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12355 tree function)
12356 {
12357 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12358 rtx this_rtx, funexp;
12359 rtx_insn *insn;
12360 unsigned int int_arg_first;
12361
12362 reload_completed = 1;
12363 epilogue_completed = 1;
12364
12365 emit_note (NOTE_INSN_PROLOGUE_END);
12366
12367 if (TARGET_FLAT)
12368 {
12369 sparc_leaf_function_p = 1;
12370
12371 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12372 }
12373 else if (flag_delayed_branch)
12374 {
12375 /* We will emit a regular sibcall below, so we need to instruct
12376 output_sibcall that we are in a leaf function. */
12377 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12378
12379 /* This will cause final.c to invoke leaf_renumber_regs so we
12380 must behave as if we were in a not-yet-leafified function. */
12381 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12382 }
12383 else
12384 {
12385 /* We will emit the sibcall manually below, so we will need to
12386 manually spill non-leaf registers. */
12387 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12388
12389 /* We really are in a leaf function. */
12390 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12391 }
12392
12393 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12394 returns a structure, the structure return pointer is there instead. */
12395 if (TARGET_ARCH64
12396 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12397 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12398 else
12399 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12400
12401 /* Add DELTA. When possible use a plain add, otherwise load it into
12402 a register first. */
12403 if (delta)
12404 {
12405 rtx delta_rtx = GEN_INT (delta);
12406
12407 if (! SPARC_SIMM13_P (delta))
12408 {
12409 rtx scratch = gen_rtx_REG (Pmode, 1);
12410 emit_move_insn (scratch, delta_rtx);
12411 delta_rtx = scratch;
12412 }
12413
12414 /* THIS_RTX += DELTA. */
12415 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12416 }
12417
12418 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12419 if (vcall_offset)
12420 {
12421 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12422 rtx scratch = gen_rtx_REG (Pmode, 1);
12423
12424 gcc_assert (vcall_offset < 0);
12425
12426 /* SCRATCH = *THIS_RTX. */
12427 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12428
12429 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12430 may not have any available scratch register at this point. */
12431 if (SPARC_SIMM13_P (vcall_offset))
12432 ;
12433 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12434 else if (! fixed_regs[5]
12435 /* The below sequence is made up of at least 2 insns,
12436 while the default method may need only one. */
12437 && vcall_offset < -8192)
12438 {
12439 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12440 emit_move_insn (scratch2, vcall_offset_rtx);
12441 vcall_offset_rtx = scratch2;
12442 }
12443 else
12444 {
12445 rtx increment = GEN_INT (-4096);
12446
12447 /* VCALL_OFFSET is a negative number whose typical range can be
12448 estimated as -32768..0 in 32-bit mode. In almost all cases
12449 it is therefore cheaper to emit multiple add insns than
12450 spilling and loading the constant into a register (at least
12451 6 insns). */
12452 while (! SPARC_SIMM13_P (vcall_offset))
12453 {
12454 emit_insn (gen_add2_insn (scratch, increment));
12455 vcall_offset += 4096;
12456 }
12457 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12458 }
12459
12460 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12461 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12462 gen_rtx_PLUS (Pmode,
12463 scratch,
12464 vcall_offset_rtx)));
12465
12466 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12467 emit_insn (gen_add2_insn (this_rtx, scratch));
12468 }
12469
12470 /* Generate a tail call to the target function. */
12471 if (! TREE_USED (function))
12472 {
12473 assemble_external (function);
12474 TREE_USED (function) = 1;
12475 }
12476 funexp = XEXP (DECL_RTL (function), 0);
12477
12478 if (flag_delayed_branch)
12479 {
12480 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12481 insn = emit_call_insn (gen_sibcall (funexp));
12482 SIBLING_CALL_P (insn) = 1;
12483 }
12484 else
12485 {
12486 /* The hoops we have to jump through in order to generate a sibcall
12487 without using delay slots... */
12488 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12489
12490 if (flag_pic)
12491 {
12492 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12493 start_sequence ();
12494 load_got_register (); /* clobbers %o7 */
12495 if (!TARGET_VXWORKS_RTP)
12496 pic_offset_table_rtx = got_register_rtx;
12497 scratch = sparc_legitimize_pic_address (funexp, scratch);
12498 seq = get_insns ();
12499 end_sequence ();
12500 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12501 }
12502 else if (TARGET_ARCH32)
12503 {
12504 emit_insn (gen_rtx_SET (scratch,
12505 gen_rtx_HIGH (SImode, funexp)));
12506 emit_insn (gen_rtx_SET (scratch,
12507 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12508 }
12509 else /* TARGET_ARCH64 */
12510 {
12511 switch (sparc_code_model)
12512 {
12513 case CM_MEDLOW:
12514 case CM_MEDMID:
12515 /* The destination can serve as a temporary. */
12516 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12517 break;
12518
12519 case CM_MEDANY:
12520 case CM_EMBMEDANY:
12521 /* The destination cannot serve as a temporary. */
12522 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12523 start_sequence ();
12524 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12525 seq = get_insns ();
12526 end_sequence ();
12527 emit_and_preserve (seq, spill_reg, 0);
12528 break;
12529
12530 default:
12531 gcc_unreachable ();
12532 }
12533 }
12534
12535 emit_jump_insn (gen_indirect_jump (scratch));
12536 }
12537
12538 emit_barrier ();
12539
12540 /* Run just enough of rest_of_compilation to get the insns emitted.
12541 There's not really enough bulk here to make other passes such as
12542 instruction scheduling worth while. */
12543 insn = get_insns ();
12544 shorten_branches (insn);
12545 assemble_start_function (thunk_fndecl, fnname);
12546 final_start_function (insn, file, 1);
12547 final (insn, file, 1);
12548 final_end_function ();
12549 assemble_end_function (thunk_fndecl, fnname);
12550
12551 reload_completed = 0;
12552 epilogue_completed = 0;
12553 }
12554
12555 /* Return true if sparc_output_mi_thunk would be able to output the
12556 assembler code for the thunk function specified by the arguments
12557 it is passed, and false otherwise. */
12558 static bool
12559 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12560 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12561 HOST_WIDE_INT vcall_offset,
12562 const_tree function ATTRIBUTE_UNUSED)
12563 {
12564 /* Bound the loop used in the default method above. */
12565 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12566 }
12567
12568 /* How to allocate a 'struct machine_function'. */
12569
12570 static struct machine_function *
12571 sparc_init_machine_status (void)
12572 {
12573 return ggc_cleared_alloc<machine_function> ();
12574 }
12575 \f
12576 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12577
12578 static unsigned HOST_WIDE_INT
12579 sparc_asan_shadow_offset (void)
12580 {
12581 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12582 }
12583 \f
12584 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12585 We need to emit DTP-relative relocations. */
12586
12587 static void
12588 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12589 {
12590 switch (size)
12591 {
12592 case 4:
12593 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12594 break;
12595 case 8:
12596 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12597 break;
12598 default:
12599 gcc_unreachable ();
12600 }
12601 output_addr_const (file, x);
12602 fputs (")", file);
12603 }
12604
12605 /* Do whatever processing is required at the end of a file. */
12606
12607 static void
12608 sparc_file_end (void)
12609 {
12610 /* If we need to emit the special GOT helper function, do so now. */
12611 if (got_helper_needed)
12612 {
12613 const char *name = XSTR (got_helper_rtx, 0);
12614 #ifdef DWARF2_UNWIND_INFO
12615 bool do_cfi;
12616 #endif
12617
12618 if (USE_HIDDEN_LINKONCE)
12619 {
12620 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12621 get_identifier (name),
12622 build_function_type_list (void_type_node,
12623 NULL_TREE));
12624 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12625 NULL_TREE, void_type_node);
12626 TREE_PUBLIC (decl) = 1;
12627 TREE_STATIC (decl) = 1;
12628 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12629 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12630 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12631 resolve_unique_section (decl, 0, flag_function_sections);
12632 allocate_struct_function (decl, true);
12633 cfun->is_thunk = 1;
12634 current_function_decl = decl;
12635 init_varasm_status ();
12636 assemble_start_function (decl, name);
12637 }
12638 else
12639 {
12640 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12641 switch_to_section (text_section);
12642 if (align > 0)
12643 ASM_OUTPUT_ALIGN (asm_out_file, align);
12644 ASM_OUTPUT_LABEL (asm_out_file, name);
12645 }
12646
12647 #ifdef DWARF2_UNWIND_INFO
12648 do_cfi = dwarf2out_do_cfi_asm ();
12649 if (do_cfi)
12650 output_asm_insn (".cfi_startproc", NULL);
12651 #endif
12652 if (flag_delayed_branch)
12653 {
12654 output_asm_insn ("jmp\t%%o7+8", NULL);
12655 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12656 }
12657 else
12658 {
12659 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12660 output_asm_insn ("jmp\t%%o7+8", NULL);
12661 output_asm_insn (" nop", NULL);
12662 }
12663 #ifdef DWARF2_UNWIND_INFO
12664 if (do_cfi)
12665 output_asm_insn (".cfi_endproc", NULL);
12666 #endif
12667 }
12668
12669 if (NEED_INDICATE_EXEC_STACK)
12670 file_end_indicate_exec_stack ();
12671
12672 #ifdef TARGET_SOLARIS
12673 solaris_file_end ();
12674 #endif
12675 }
12676
12677 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12678 /* Implement TARGET_MANGLE_TYPE. */
12679
12680 static const char *
12681 sparc_mangle_type (const_tree type)
12682 {
12683 if (TARGET_ARCH32
12684 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12685 && TARGET_LONG_DOUBLE_128)
12686 return "g";
12687
12688 /* For all other types, use normal C++ mangling. */
12689 return NULL;
12690 }
12691 #endif
12692
12693 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12694 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12695 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12696
12697 void
12698 sparc_emit_membar_for_model (enum memmodel model,
12699 int load_store, int before_after)
12700 {
12701 /* Bits for the MEMBAR mmask field. */
12702 const int LoadLoad = 1;
12703 const int StoreLoad = 2;
12704 const int LoadStore = 4;
12705 const int StoreStore = 8;
12706
12707 int mm = 0, implied = 0;
12708
12709 switch (sparc_memory_model)
12710 {
12711 case SMM_SC:
12712 /* Sequential Consistency. All memory transactions are immediately
12713 visible in sequential execution order. No barriers needed. */
12714 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12715 break;
12716
12717 case SMM_TSO:
12718 /* Total Store Ordering: all memory transactions with store semantics
12719 are followed by an implied StoreStore. */
12720 implied |= StoreStore;
12721
12722 /* If we're not looking for a raw barrer (before+after), then atomic
12723 operations get the benefit of being both load and store. */
12724 if (load_store == 3 && before_after == 1)
12725 implied |= StoreLoad;
12726 /* FALLTHRU */
12727
12728 case SMM_PSO:
12729 /* Partial Store Ordering: all memory transactions with load semantics
12730 are followed by an implied LoadLoad | LoadStore. */
12731 implied |= LoadLoad | LoadStore;
12732
12733 /* If we're not looking for a raw barrer (before+after), then atomic
12734 operations get the benefit of being both load and store. */
12735 if (load_store == 3 && before_after == 2)
12736 implied |= StoreLoad | StoreStore;
12737 /* FALLTHRU */
12738
12739 case SMM_RMO:
12740 /* Relaxed Memory Ordering: no implicit bits. */
12741 break;
12742
12743 default:
12744 gcc_unreachable ();
12745 }
12746
12747 if (before_after & 1)
12748 {
12749 if (is_mm_release (model) || is_mm_acq_rel (model)
12750 || is_mm_seq_cst (model))
12751 {
12752 if (load_store & 1)
12753 mm |= LoadLoad | StoreLoad;
12754 if (load_store & 2)
12755 mm |= LoadStore | StoreStore;
12756 }
12757 }
12758 if (before_after & 2)
12759 {
12760 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12761 || is_mm_seq_cst (model))
12762 {
12763 if (load_store & 1)
12764 mm |= LoadLoad | LoadStore;
12765 if (load_store & 2)
12766 mm |= StoreLoad | StoreStore;
12767 }
12768 }
12769
12770 /* Remove the bits implied by the system memory model. */
12771 mm &= ~implied;
12772
12773 /* For raw barriers (before+after), always emit a barrier.
12774 This will become a compile-time barrier if needed. */
12775 if (mm || before_after == 3)
12776 emit_insn (gen_membar (GEN_INT (mm)));
12777 }
12778
12779 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12780 compare and swap on the word containing the byte or half-word. */
12781
12782 static void
12783 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12784 rtx oldval, rtx newval)
12785 {
12786 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12787 rtx addr = gen_reg_rtx (Pmode);
12788 rtx off = gen_reg_rtx (SImode);
12789 rtx oldv = gen_reg_rtx (SImode);
12790 rtx newv = gen_reg_rtx (SImode);
12791 rtx oldvalue = gen_reg_rtx (SImode);
12792 rtx newvalue = gen_reg_rtx (SImode);
12793 rtx res = gen_reg_rtx (SImode);
12794 rtx resv = gen_reg_rtx (SImode);
12795 rtx memsi, val, mask, cc;
12796
12797 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12798
12799 if (Pmode != SImode)
12800 addr1 = gen_lowpart (SImode, addr1);
12801 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12802
12803 memsi = gen_rtx_MEM (SImode, addr);
12804 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12805 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12806
12807 val = copy_to_reg (memsi);
12808
12809 emit_insn (gen_rtx_SET (off,
12810 gen_rtx_XOR (SImode, off,
12811 GEN_INT (GET_MODE (mem) == QImode
12812 ? 3 : 2))));
12813
12814 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12815
12816 if (GET_MODE (mem) == QImode)
12817 mask = force_reg (SImode, GEN_INT (0xff));
12818 else
12819 mask = force_reg (SImode, GEN_INT (0xffff));
12820
12821 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12822
12823 emit_insn (gen_rtx_SET (val,
12824 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12825 val)));
12826
12827 oldval = gen_lowpart (SImode, oldval);
12828 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12829
12830 newval = gen_lowpart_common (SImode, newval);
12831 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12832
12833 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12834
12835 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12836
12837 rtx_code_label *end_label = gen_label_rtx ();
12838 rtx_code_label *loop_label = gen_label_rtx ();
12839 emit_label (loop_label);
12840
12841 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12842
12843 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12844
12845 emit_move_insn (bool_result, const1_rtx);
12846
12847 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12848
12849 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12850
12851 emit_insn (gen_rtx_SET (resv,
12852 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12853 res)));
12854
12855 emit_move_insn (bool_result, const0_rtx);
12856
12857 cc = gen_compare_reg_1 (NE, resv, val);
12858 emit_insn (gen_rtx_SET (val, resv));
12859
12860 /* Use cbranchcc4 to separate the compare and branch! */
12861 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12862 cc, const0_rtx, loop_label));
12863
12864 emit_label (end_label);
12865
12866 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12867
12868 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12869
12870 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12871 }
12872
12873 /* Expand code to perform a compare-and-swap. */
12874
12875 void
12876 sparc_expand_compare_and_swap (rtx operands[])
12877 {
12878 rtx bval, retval, mem, oldval, newval;
12879 machine_mode mode;
12880 enum memmodel model;
12881
12882 bval = operands[0];
12883 retval = operands[1];
12884 mem = operands[2];
12885 oldval = operands[3];
12886 newval = operands[4];
12887 model = (enum memmodel) INTVAL (operands[6]);
12888 mode = GET_MODE (mem);
12889
12890 sparc_emit_membar_for_model (model, 3, 1);
12891
12892 if (reg_overlap_mentioned_p (retval, oldval))
12893 oldval = copy_to_reg (oldval);
12894
12895 if (mode == QImode || mode == HImode)
12896 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12897 else
12898 {
12899 rtx (*gen) (rtx, rtx, rtx, rtx);
12900 rtx x;
12901
12902 if (mode == SImode)
12903 gen = gen_atomic_compare_and_swapsi_1;
12904 else
12905 gen = gen_atomic_compare_and_swapdi_1;
12906 emit_insn (gen (retval, mem, oldval, newval));
12907
12908 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12909 if (x != bval)
12910 convert_move (bval, x, 1);
12911 }
12912
12913 sparc_emit_membar_for_model (model, 3, 2);
12914 }
12915
12916 void
12917 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12918 {
12919 rtx t_1, t_2, t_3;
12920
12921 sel = gen_lowpart (DImode, sel);
12922 switch (vmode)
12923 {
12924 case E_V2SImode:
12925 /* inp = xxxxxxxAxxxxxxxB */
12926 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12927 NULL_RTX, 1, OPTAB_DIRECT);
12928 /* t_1 = ....xxxxxxxAxxx. */
12929 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12930 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12931 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12932 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12933 /* sel = .......B */
12934 /* t_1 = ...A.... */
12935 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12936 /* sel = ...A...B */
12937 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12938 /* sel = AAAABBBB * 4 */
12939 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12940 /* sel = { A*4, A*4+1, A*4+2, ... } */
12941 break;
12942
12943 case E_V4HImode:
12944 /* inp = xxxAxxxBxxxCxxxD */
12945 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12946 NULL_RTX, 1, OPTAB_DIRECT);
12947 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12948 NULL_RTX, 1, OPTAB_DIRECT);
12949 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12950 NULL_RTX, 1, OPTAB_DIRECT);
12951 /* t_1 = ..xxxAxxxBxxxCxx */
12952 /* t_2 = ....xxxAxxxBxxxC */
12953 /* t_3 = ......xxxAxxxBxx */
12954 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12955 GEN_INT (0x07),
12956 NULL_RTX, 1, OPTAB_DIRECT);
12957 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12958 GEN_INT (0x0700),
12959 NULL_RTX, 1, OPTAB_DIRECT);
12960 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12961 GEN_INT (0x070000),
12962 NULL_RTX, 1, OPTAB_DIRECT);
12963 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12964 GEN_INT (0x07000000),
12965 NULL_RTX, 1, OPTAB_DIRECT);
12966 /* sel = .......D */
12967 /* t_1 = .....C.. */
12968 /* t_2 = ...B.... */
12969 /* t_3 = .A...... */
12970 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12971 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12972 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12973 /* sel = .A.B.C.D */
12974 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12975 /* sel = AABBCCDD * 2 */
12976 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12977 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12978 break;
12979
12980 case E_V8QImode:
12981 /* input = xAxBxCxDxExFxGxH */
12982 sel = expand_simple_binop (DImode, AND, sel,
12983 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12984 | 0x0f0f0f0f),
12985 NULL_RTX, 1, OPTAB_DIRECT);
12986 /* sel = .A.B.C.D.E.F.G.H */
12987 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12988 NULL_RTX, 1, OPTAB_DIRECT);
12989 /* t_1 = ..A.B.C.D.E.F.G. */
12990 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12991 NULL_RTX, 1, OPTAB_DIRECT);
12992 /* sel = .AABBCCDDEEFFGGH */
12993 sel = expand_simple_binop (DImode, AND, sel,
12994 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12995 | 0xff00ff),
12996 NULL_RTX, 1, OPTAB_DIRECT);
12997 /* sel = ..AB..CD..EF..GH */
12998 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12999 NULL_RTX, 1, OPTAB_DIRECT);
13000 /* t_1 = ....AB..CD..EF.. */
13001 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13002 NULL_RTX, 1, OPTAB_DIRECT);
13003 /* sel = ..ABABCDCDEFEFGH */
13004 sel = expand_simple_binop (DImode, AND, sel,
13005 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13006 NULL_RTX, 1, OPTAB_DIRECT);
13007 /* sel = ....ABCD....EFGH */
13008 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13009 NULL_RTX, 1, OPTAB_DIRECT);
13010 /* t_1 = ........ABCD.... */
13011 sel = gen_lowpart (SImode, sel);
13012 t_1 = gen_lowpart (SImode, t_1);
13013 break;
13014
13015 default:
13016 gcc_unreachable ();
13017 }
13018
13019 /* Always perform the final addition/merge within the bmask insn. */
13020 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13021 }
13022
13023 /* Implement TARGET_VEC_PERM_CONST. */
13024
13025 static bool
13026 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
13027 rtx op1, const vec_perm_indices &sel)
13028 {
13029 if (!TARGET_VIS2)
13030 return false;
13031
13032 /* All permutes are supported. */
13033 if (!target)
13034 return true;
13035
13036 /* Force target-independent code to convert constant permutations on other
13037 modes down to V8QI. Rely on this to avoid the complexity of the byte
13038 order of the permutation. */
13039 if (vmode != V8QImode)
13040 return false;
13041
13042 rtx nop0 = force_reg (vmode, op0);
13043 if (op0 == op1)
13044 op1 = nop0;
13045 op0 = nop0;
13046 op1 = force_reg (vmode, op1);
13047
13048 unsigned int i, mask;
13049 for (i = mask = 0; i < 8; ++i)
13050 mask |= (sel[i] & 0xf) << (28 - i*4);
13051 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13052
13053 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13054 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13055 return true;
13056 }
13057
13058 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
13059
13060 static bool
13061 sparc_frame_pointer_required (void)
13062 {
13063 /* If the stack pointer is dynamically modified in the function, it cannot
13064 serve as the frame pointer. */
13065 if (cfun->calls_alloca)
13066 return true;
13067
13068 /* If the function receives nonlocal gotos, it needs to save the frame
13069 pointer in the nonlocal_goto_save_area object. */
13070 if (cfun->has_nonlocal_label)
13071 return true;
13072
13073 /* In flat mode, that's it. */
13074 if (TARGET_FLAT)
13075 return false;
13076
13077 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13078 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13079 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13080 }
13081
13082 /* The way this is structured, we can't eliminate SFP in favor of SP
13083 if the frame pointer is required: we want to use the SFP->HFP elimination
13084 in that case. But the test in update_eliminables doesn't know we are
13085 assuming below that we only do the former elimination. */
13086
13087 static bool
13088 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13089 {
13090 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13091 }
13092
13093 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13094 they won't be allocated. */
13095
13096 static void
13097 sparc_conditional_register_usage (void)
13098 {
13099 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13100 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13101 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13102 /* then honor it. */
13103 if (TARGET_ARCH32 && fixed_regs[5])
13104 fixed_regs[5] = 1;
13105 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13106 fixed_regs[5] = 0;
13107 if (! TARGET_V9)
13108 {
13109 int regno;
13110 for (regno = SPARC_FIRST_V9_FP_REG;
13111 regno <= SPARC_LAST_V9_FP_REG;
13112 regno++)
13113 fixed_regs[regno] = 1;
13114 /* %fcc0 is used by v8 and v9. */
13115 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13116 regno <= SPARC_LAST_V9_FCC_REG;
13117 regno++)
13118 fixed_regs[regno] = 1;
13119 }
13120 if (! TARGET_FPU)
13121 {
13122 int regno;
13123 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13124 fixed_regs[regno] = 1;
13125 }
13126 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13127 /* then honor it. Likewise with g3 and g4. */
13128 if (fixed_regs[2] == 2)
13129 fixed_regs[2] = ! TARGET_APP_REGS;
13130 if (fixed_regs[3] == 2)
13131 fixed_regs[3] = ! TARGET_APP_REGS;
13132 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13133 fixed_regs[4] = ! TARGET_APP_REGS;
13134 else if (TARGET_CM_EMBMEDANY)
13135 fixed_regs[4] = 1;
13136 else if (fixed_regs[4] == 2)
13137 fixed_regs[4] = 0;
13138
13139 /* Disable leaf function optimization in flat mode. */
13140 if (TARGET_FLAT)
13141 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13142
13143 if (TARGET_VIS)
13144 global_regs[SPARC_GSR_REG] = 1;
13145 }
13146
13147 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13148
13149 static bool
13150 sparc_use_pseudo_pic_reg (void)
13151 {
13152 return !TARGET_VXWORKS_RTP && flag_pic;
13153 }
13154
13155 /* Implement TARGET_INIT_PIC_REG. */
13156
13157 static void
13158 sparc_init_pic_reg (void)
13159 {
13160 edge entry_edge;
13161 rtx_insn *seq;
13162
13163 /* In PIC mode, we need to always initialize the PIC register if optimization
13164 is enabled, because we are called from IRA and LRA may later force things
13165 to the constant pool for optimization purposes. */
13166 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13167 return;
13168
13169 start_sequence ();
13170 load_got_register ();
13171 if (!TARGET_VXWORKS_RTP)
13172 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13173 seq = get_insns ();
13174 end_sequence ();
13175
13176 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13177 insert_insn_on_edge (seq, entry_edge);
13178 commit_one_edge_insertion (entry_edge);
13179 }
13180
13181 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13182
13183 - We can't load constants into FP registers.
13184 - We can't load FP constants into integer registers when soft-float,
13185 because there is no soft-float pattern with a r/F constraint.
13186 - We can't load FP constants into integer registers for TFmode unless
13187 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13188 - Try and reload integer constants (symbolic or otherwise) back into
13189 registers directly, rather than having them dumped to memory. */
13190
13191 static reg_class_t
13192 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13193 {
13194 machine_mode mode = GET_MODE (x);
13195 if (CONSTANT_P (x))
13196 {
13197 if (FP_REG_CLASS_P (rclass)
13198 || rclass == GENERAL_OR_FP_REGS
13199 || rclass == GENERAL_OR_EXTRA_FP_REGS
13200 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13201 || (mode == TFmode && ! const_zero_operand (x, mode)))
13202 return NO_REGS;
13203
13204 if (GET_MODE_CLASS (mode) == MODE_INT)
13205 return GENERAL_REGS;
13206
13207 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13208 {
13209 if (! FP_REG_CLASS_P (rclass)
13210 || !(const_zero_operand (x, mode)
13211 || const_all_ones_operand (x, mode)))
13212 return NO_REGS;
13213 }
13214 }
13215
13216 if (TARGET_VIS3
13217 && ! TARGET_ARCH64
13218 && (rclass == EXTRA_FP_REGS
13219 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13220 {
13221 int regno = true_regnum (x);
13222
13223 if (SPARC_INT_REG_P (regno))
13224 return (rclass == EXTRA_FP_REGS
13225 ? FP_REGS : GENERAL_OR_FP_REGS);
13226 }
13227
13228 return rclass;
13229 }
13230
13231 /* Return true if we use LRA instead of reload pass. */
13232
13233 static bool
13234 sparc_lra_p (void)
13235 {
13236 return TARGET_LRA;
13237 }
13238
13239 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13240 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13241
13242 const char *
13243 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13244 {
13245 char mulstr[32];
13246
13247 gcc_assert (! TARGET_ARCH64);
13248
13249 if (sparc_check_64 (operands[1], insn) <= 0)
13250 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13251 if (which_alternative == 1)
13252 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13253 if (GET_CODE (operands[2]) == CONST_INT)
13254 {
13255 if (which_alternative == 1)
13256 {
13257 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13258 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13259 output_asm_insn (mulstr, operands);
13260 return "srlx\t%L0, 32, %H0";
13261 }
13262 else
13263 {
13264 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13265 output_asm_insn ("or\t%L1, %3, %3", operands);
13266 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13267 output_asm_insn (mulstr, operands);
13268 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13269 return "mov\t%3, %L0";
13270 }
13271 }
13272 else if (rtx_equal_p (operands[1], operands[2]))
13273 {
13274 if (which_alternative == 1)
13275 {
13276 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13277 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13278 output_asm_insn (mulstr, operands);
13279 return "srlx\t%L0, 32, %H0";
13280 }
13281 else
13282 {
13283 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13284 output_asm_insn ("or\t%L1, %3, %3", operands);
13285 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13286 output_asm_insn (mulstr, operands);
13287 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13288 return "mov\t%3, %L0";
13289 }
13290 }
13291 if (sparc_check_64 (operands[2], insn) <= 0)
13292 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13293 if (which_alternative == 1)
13294 {
13295 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13296 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13297 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13298 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13299 output_asm_insn (mulstr, operands);
13300 return "srlx\t%L0, 32, %H0";
13301 }
13302 else
13303 {
13304 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13305 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13306 output_asm_insn ("or\t%L1, %3, %3", operands);
13307 output_asm_insn ("or\t%L2, %4, %4", operands);
13308 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13309 output_asm_insn (mulstr, operands);
13310 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13311 return "mov\t%3, %L0";
13312 }
13313 }
13314
13315 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13316 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13317 and INNER_MODE are the modes describing TARGET. */
13318
13319 static void
13320 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13321 machine_mode inner_mode)
13322 {
13323 rtx t1, final_insn, sel;
13324 int bmask;
13325
13326 t1 = gen_reg_rtx (mode);
13327
13328 elt = convert_modes (SImode, inner_mode, elt, true);
13329 emit_move_insn (gen_lowpart(SImode, t1), elt);
13330
13331 switch (mode)
13332 {
13333 case E_V2SImode:
13334 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13335 bmask = 0x45674567;
13336 break;
13337 case E_V4HImode:
13338 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13339 bmask = 0x67676767;
13340 break;
13341 case E_V8QImode:
13342 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13343 bmask = 0x77777777;
13344 break;
13345 default:
13346 gcc_unreachable ();
13347 }
13348
13349 sel = force_reg (SImode, GEN_INT (bmask));
13350 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13351 emit_insn (final_insn);
13352 }
13353
13354 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13355 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13356
13357 static void
13358 vector_init_fpmerge (rtx target, rtx elt)
13359 {
13360 rtx t1, t2, t2_low, t3, t3_low;
13361
13362 t1 = gen_reg_rtx (V4QImode);
13363 elt = convert_modes (SImode, QImode, elt, true);
13364 emit_move_insn (gen_lowpart (SImode, t1), elt);
13365
13366 t2 = gen_reg_rtx (V8QImode);
13367 t2_low = gen_lowpart (V4QImode, t2);
13368 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13369
13370 t3 = gen_reg_rtx (V8QImode);
13371 t3_low = gen_lowpart (V4QImode, t3);
13372 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13373
13374 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13375 }
13376
13377 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13378 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13379
13380 static void
13381 vector_init_faligndata (rtx target, rtx elt)
13382 {
13383 rtx t1 = gen_reg_rtx (V4HImode);
13384 int i;
13385
13386 elt = convert_modes (SImode, HImode, elt, true);
13387 emit_move_insn (gen_lowpart (SImode, t1), elt);
13388
13389 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13390 force_reg (SImode, GEN_INT (6)),
13391 const0_rtx));
13392
13393 for (i = 0; i < 4; i++)
13394 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13395 }
13396
13397 /* Emit code to initialize TARGET to values for individual fields VALS. */
13398
13399 void
13400 sparc_expand_vector_init (rtx target, rtx vals)
13401 {
13402 const machine_mode mode = GET_MODE (target);
13403 const machine_mode inner_mode = GET_MODE_INNER (mode);
13404 const int n_elts = GET_MODE_NUNITS (mode);
13405 int i, n_var = 0;
13406 bool all_same = true;
13407 rtx mem;
13408
13409 for (i = 0; i < n_elts; i++)
13410 {
13411 rtx x = XVECEXP (vals, 0, i);
13412 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13413 n_var++;
13414
13415 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13416 all_same = false;
13417 }
13418
13419 if (n_var == 0)
13420 {
13421 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13422 return;
13423 }
13424
13425 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13426 {
13427 if (GET_MODE_SIZE (inner_mode) == 4)
13428 {
13429 emit_move_insn (gen_lowpart (SImode, target),
13430 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13431 return;
13432 }
13433 else if (GET_MODE_SIZE (inner_mode) == 8)
13434 {
13435 emit_move_insn (gen_lowpart (DImode, target),
13436 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13437 return;
13438 }
13439 }
13440 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13441 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13442 {
13443 emit_move_insn (gen_highpart (word_mode, target),
13444 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13445 emit_move_insn (gen_lowpart (word_mode, target),
13446 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13447 return;
13448 }
13449
13450 if (all_same && GET_MODE_SIZE (mode) == 8)
13451 {
13452 if (TARGET_VIS2)
13453 {
13454 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13455 return;
13456 }
13457 if (mode == V8QImode)
13458 {
13459 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13460 return;
13461 }
13462 if (mode == V4HImode)
13463 {
13464 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13465 return;
13466 }
13467 }
13468
13469 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13470 for (i = 0; i < n_elts; i++)
13471 emit_move_insn (adjust_address_nv (mem, inner_mode,
13472 i * GET_MODE_SIZE (inner_mode)),
13473 XVECEXP (vals, 0, i));
13474 emit_move_insn (target, mem);
13475 }
13476
13477 /* Implement TARGET_SECONDARY_RELOAD. */
13478
13479 static reg_class_t
13480 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13481 machine_mode mode, secondary_reload_info *sri)
13482 {
13483 enum reg_class rclass = (enum reg_class) rclass_i;
13484
13485 sri->icode = CODE_FOR_nothing;
13486 sri->extra_cost = 0;
13487
13488 /* We need a temporary when loading/storing a HImode/QImode value
13489 between memory and the FPU registers. This can happen when combine puts
13490 a paradoxical subreg in a float/fix conversion insn. */
13491 if (FP_REG_CLASS_P (rclass)
13492 && (mode == HImode || mode == QImode)
13493 && (GET_CODE (x) == MEM
13494 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13495 && true_regnum (x) == -1)))
13496 return GENERAL_REGS;
13497
13498 /* On 32-bit we need a temporary when loading/storing a DFmode value
13499 between unaligned memory and the upper FPU registers. */
13500 if (TARGET_ARCH32
13501 && rclass == EXTRA_FP_REGS
13502 && mode == DFmode
13503 && GET_CODE (x) == MEM
13504 && ! mem_min_alignment (x, 8))
13505 return FP_REGS;
13506
13507 if (((TARGET_CM_MEDANY
13508 && symbolic_operand (x, mode))
13509 || (TARGET_CM_EMBMEDANY
13510 && text_segment_operand (x, mode)))
13511 && ! flag_pic)
13512 {
13513 if (in_p)
13514 sri->icode = direct_optab_handler (reload_in_optab, mode);
13515 else
13516 sri->icode = direct_optab_handler (reload_out_optab, mode);
13517 return NO_REGS;
13518 }
13519
13520 if (TARGET_VIS3 && TARGET_ARCH32)
13521 {
13522 int regno = true_regnum (x);
13523
13524 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13525 to move 8-byte values in 4-byte pieces. This only works via
13526 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13527 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13528 an FP_REGS intermediate move. */
13529 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13530 || ((general_or_i64_p (rclass)
13531 || rclass == GENERAL_OR_FP_REGS)
13532 && SPARC_FP_REG_P (regno)))
13533 {
13534 sri->extra_cost = 2;
13535 return FP_REGS;
13536 }
13537 }
13538
13539 return NO_REGS;
13540 }
13541
13542 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13543
13544 On SPARC when not VIS3 it is not possible to directly move data
13545 between GENERAL_REGS and FP_REGS. */
13546
13547 static bool
13548 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13549 reg_class_t class2)
13550 {
13551 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13552 && (! TARGET_VIS3
13553 || GET_MODE_SIZE (mode) > 8
13554 || GET_MODE_SIZE (mode) < 4));
13555 }
13556
13557 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13558
13559 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13560 because the movsi and movsf patterns don't handle r/f moves.
13561 For v8 we copy the default definition. */
13562
13563 static machine_mode
13564 sparc_secondary_memory_needed_mode (machine_mode mode)
13565 {
13566 if (TARGET_ARCH64)
13567 {
13568 if (GET_MODE_BITSIZE (mode) < 32)
13569 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13570 return mode;
13571 }
13572 else
13573 {
13574 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13575 return mode_for_size (BITS_PER_WORD,
13576 GET_MODE_CLASS (mode), 0).require ();
13577 return mode;
13578 }
13579 }
13580
13581 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13582 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13583
13584 bool
13585 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13586 {
13587 enum rtx_code rc = GET_CODE (operands[1]);
13588 machine_mode cmp_mode;
13589 rtx cc_reg, dst, cmp;
13590
13591 cmp = operands[1];
13592 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13593 return false;
13594
13595 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13596 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13597
13598 cmp_mode = GET_MODE (XEXP (cmp, 0));
13599 rc = GET_CODE (cmp);
13600
13601 dst = operands[0];
13602 if (! rtx_equal_p (operands[2], dst)
13603 && ! rtx_equal_p (operands[3], dst))
13604 {
13605 if (reg_overlap_mentioned_p (dst, cmp))
13606 dst = gen_reg_rtx (mode);
13607
13608 emit_move_insn (dst, operands[3]);
13609 }
13610 else if (operands[2] == dst)
13611 {
13612 operands[2] = operands[3];
13613
13614 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13615 rc = reverse_condition_maybe_unordered (rc);
13616 else
13617 rc = reverse_condition (rc);
13618 }
13619
13620 if (XEXP (cmp, 1) == const0_rtx
13621 && GET_CODE (XEXP (cmp, 0)) == REG
13622 && cmp_mode == DImode
13623 && v9_regcmp_p (rc))
13624 cc_reg = XEXP (cmp, 0);
13625 else
13626 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13627
13628 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13629
13630 emit_insn (gen_rtx_SET (dst,
13631 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13632
13633 if (dst != operands[0])
13634 emit_move_insn (operands[0], dst);
13635
13636 return true;
13637 }
13638
13639 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13640 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13641 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13642 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13643 code to be used for the condition mask. */
13644
13645 void
13646 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13647 {
13648 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13649 enum rtx_code code = GET_CODE (operands[3]);
13650
13651 mask = gen_reg_rtx (Pmode);
13652 cop0 = operands[4];
13653 cop1 = operands[5];
13654 if (code == LT || code == GE)
13655 {
13656 rtx t;
13657
13658 code = swap_condition (code);
13659 t = cop0; cop0 = cop1; cop1 = t;
13660 }
13661
13662 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13663
13664 fcmp = gen_rtx_UNSPEC (Pmode,
13665 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13666 fcode);
13667
13668 cmask = gen_rtx_UNSPEC (DImode,
13669 gen_rtvec (2, mask, gsr),
13670 ccode);
13671
13672 bshuf = gen_rtx_UNSPEC (mode,
13673 gen_rtvec (3, operands[1], operands[2], gsr),
13674 UNSPEC_BSHUFFLE);
13675
13676 emit_insn (gen_rtx_SET (mask, fcmp));
13677 emit_insn (gen_rtx_SET (gsr, cmask));
13678
13679 emit_insn (gen_rtx_SET (operands[0], bshuf));
13680 }
13681
13682 /* On the SPARC, any mode which naturally allocates into the single float
13683 registers should return 4 here. */
13684
13685 unsigned int
13686 sparc_regmode_natural_size (machine_mode mode)
13687 {
13688 const enum mode_class cl = GET_MODE_CLASS (mode);
13689
13690 if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
13691 return 4;
13692
13693 return UNITS_PER_WORD;
13694 }
13695
13696 /* Implement TARGET_HARD_REGNO_NREGS.
13697
13698 On SPARC, ordinary registers hold 32 bits worth; this means both
13699 integer and floating point registers. On v9, integer regs hold 64
13700 bits worth; floating point regs hold 32 bits worth (this includes the
13701 new fp regs as even the odd ones are included in the hard register
13702 count). */
13703
13704 static unsigned int
13705 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13706 {
13707 if (regno == SPARC_GSR_REG)
13708 return 1;
13709 if (TARGET_ARCH64)
13710 {
13711 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13712 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13713 return CEIL (GET_MODE_SIZE (mode), 4);
13714 }
13715 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13716 }
13717
13718 /* Implement TARGET_HARD_REGNO_MODE_OK.
13719
13720 ??? Because of the funny way we pass parameters we should allow certain
13721 ??? types of float/complex values to be in integer registers during
13722 ??? RTL generation. This only matters on arch32. */
13723
13724 static bool
13725 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13726 {
13727 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13728 }
13729
13730 /* Implement TARGET_MODES_TIEABLE_P.
13731
13732 For V9 we have to deal with the fact that only the lower 32 floating
13733 point registers are 32-bit addressable. */
13734
13735 static bool
13736 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13737 {
13738 enum mode_class mclass1, mclass2;
13739 unsigned short size1, size2;
13740
13741 if (mode1 == mode2)
13742 return true;
13743
13744 mclass1 = GET_MODE_CLASS (mode1);
13745 mclass2 = GET_MODE_CLASS (mode2);
13746 if (mclass1 != mclass2)
13747 return false;
13748
13749 if (! TARGET_V9)
13750 return true;
13751
13752 /* Classes are the same and we are V9 so we have to deal with upper
13753 vs. lower floating point registers. If one of the modes is a
13754 4-byte mode, and the other is not, we have to mark them as not
13755 tieable because only the lower 32 floating point register are
13756 addressable 32-bits at a time.
13757
13758 We can't just test explicitly for SFmode, otherwise we won't
13759 cover the vector mode cases properly. */
13760
13761 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13762 return true;
13763
13764 size1 = GET_MODE_SIZE (mode1);
13765 size2 = GET_MODE_SIZE (mode2);
13766 if ((size1 > 4 && size2 == 4)
13767 || (size2 > 4 && size1 == 4))
13768 return false;
13769
13770 return true;
13771 }
13772
13773 /* Implement TARGET_CSTORE_MODE. */
13774
13775 static scalar_int_mode
13776 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13777 {
13778 return (TARGET_ARCH64 ? DImode : SImode);
13779 }
13780
13781 /* Return the compound expression made of T1 and T2. */
13782
13783 static inline tree
13784 compound_expr (tree t1, tree t2)
13785 {
13786 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13787 }
13788
13789 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13790
13791 static void
13792 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13793 {
13794 if (!TARGET_FPU)
13795 return;
13796
13797 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13798 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13799
13800 /* We generate the equivalent of feholdexcept (&fenv_var):
13801
13802 unsigned int fenv_var;
13803 __builtin_store_fsr (&fenv_var);
13804
13805 unsigned int tmp1_var;
13806 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13807
13808 __builtin_load_fsr (&tmp1_var); */
13809
13810 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13811 TREE_ADDRESSABLE (fenv_var) = 1;
13812 tree fenv_addr = build_fold_addr_expr (fenv_var);
13813 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13814 tree hold_stfsr
13815 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13816 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13817
13818 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13819 TREE_ADDRESSABLE (tmp1_var) = 1;
13820 tree masked_fenv_var
13821 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13822 build_int_cst (unsigned_type_node,
13823 ~(accrued_exception_mask | trap_enable_mask)));
13824 tree hold_mask
13825 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13826 NULL_TREE, NULL_TREE);
13827
13828 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13829 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13830 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13831
13832 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13833
13834 /* We reload the value of tmp1_var to clear the exceptions:
13835
13836 __builtin_load_fsr (&tmp1_var); */
13837
13838 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13839
13840 /* We generate the equivalent of feupdateenv (&fenv_var):
13841
13842 unsigned int tmp2_var;
13843 __builtin_store_fsr (&tmp2_var);
13844
13845 __builtin_load_fsr (&fenv_var);
13846
13847 if (SPARC_LOW_FE_EXCEPT_VALUES)
13848 tmp2_var >>= 5;
13849 __atomic_feraiseexcept ((int) tmp2_var); */
13850
13851 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13852 TREE_ADDRESSABLE (tmp2_var) = 1;
13853 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13854 tree update_stfsr
13855 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13856 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13857
13858 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13859
13860 tree atomic_feraiseexcept
13861 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13862 tree update_call
13863 = build_call_expr (atomic_feraiseexcept, 1,
13864 fold_convert (integer_type_node, tmp2_var));
13865
13866 if (SPARC_LOW_FE_EXCEPT_VALUES)
13867 {
13868 tree shifted_tmp2_var
13869 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13870 build_int_cst (unsigned_type_node, 5));
13871 tree update_shift
13872 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13873 update_call = compound_expr (update_shift, update_call);
13874 }
13875
13876 *update
13877 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13878 }
13879
13880 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13881
13882 SImode loads to floating-point registers are not zero-extended.
13883 The definition for LOAD_EXTEND_OP specifies that integer loads
13884 narrower than BITS_PER_WORD will be zero-extended. As a result,
13885 we inhibit changes from SImode unless they are to a mode that is
13886 identical in size.
13887
13888 Likewise for SFmode, since word-mode paradoxical subregs are
13889 problematic on big-endian architectures. */
13890
13891 static bool
13892 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13893 reg_class_t rclass)
13894 {
13895 if (TARGET_ARCH64
13896 && GET_MODE_SIZE (from) == 4
13897 && GET_MODE_SIZE (to) != 4)
13898 return !reg_classes_intersect_p (rclass, FP_REGS);
13899 return true;
13900 }
13901
13902 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13903
13904 static HOST_WIDE_INT
13905 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13906 {
13907 if (TREE_CODE (exp) == STRING_CST)
13908 return MAX (align, FASTEST_ALIGNMENT);
13909 return align;
13910 }
13911
13912 /* Implement TARGET_ZERO_CALL_USED_REGS.
13913
13914 Generate a sequence of instructions that zero registers specified by
13915 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
13916 zeroed. */
13917
13918 static HARD_REG_SET
13919 sparc_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
13920 {
13921 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13922 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
13923 {
13924 /* Do not touch the CC registers or the FP registers if no VIS. */
13925 if (regno >= SPARC_FCC_REG
13926 || (regno >= SPARC_FIRST_FP_REG && !TARGET_VIS))
13927 CLEAR_HARD_REG_BIT (need_zeroed_hardregs, regno);
13928
13929 /* Do not access the odd upper FP registers individually. */
13930 else if (regno >= SPARC_FIRST_V9_FP_REG && (regno & 1))
13931 ;
13932
13933 /* Use the most natural mode for the registers, which is not given by
13934 regno_reg_rtx/reg_raw_mode for the FP registers on the SPARC. */
13935 else
13936 {
13937 machine_mode mode;
13938 rtx reg;
13939
13940 if (regno < SPARC_FIRST_FP_REG)
13941 {
13942 reg = regno_reg_rtx[regno];
13943 mode = GET_MODE (reg);
13944 }
13945 else
13946 {
13947 mode = regno < SPARC_FIRST_V9_FP_REG ? SFmode : DFmode;
13948 reg = gen_raw_REG (mode, regno);
13949 }
13950
13951 emit_move_insn (reg, CONST0_RTX (mode));
13952 }
13953 }
13954
13955 return need_zeroed_hardregs;
13956 }
13957
13958 #include "gt-sparc.h"