]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
3b4d41630f3d3c78c923fd76d3924ad77da80ae1
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2021 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
470 };
471
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
518
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
528
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
546
547 struct GTY(()) machine_function
548 {
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
551
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
555
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
561
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
564
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
570
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
573
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
576 };
577
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
586
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
589
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
594
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 const function_arg_info &);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 const function_arg_info &);
661 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
662 static rtx sparc_function_incoming_arg (cumulative_args_t,
663 const function_arg_info &);
664 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
665 static unsigned int sparc_function_arg_boundary (machine_mode,
666 const_tree);
667 static int sparc_arg_partial_bytes (cumulative_args_t,
668 const function_arg_info &);
669 static bool sparc_return_in_memory (const_tree, const_tree);
670 static rtx sparc_struct_value_rtx (tree, int);
671 static rtx sparc_function_value (const_tree, const_tree, bool);
672 static rtx sparc_libcall_value (machine_mode, const_rtx);
673 static bool sparc_function_value_regno_p (const unsigned int);
674 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
675 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
676 static void sparc_file_end (void);
677 static bool sparc_frame_pointer_required (void);
678 static bool sparc_can_eliminate (const int, const int);
679 static void sparc_conditional_register_usage (void);
680 static bool sparc_use_pseudo_pic_reg (void);
681 static void sparc_init_pic_reg (void);
682 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
683 static const char *sparc_mangle_type (const_tree);
684 #endif
685 static void sparc_trampoline_init (rtx, tree, rtx);
686 static machine_mode sparc_preferred_simd_mode (scalar_mode);
687 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
688 static bool sparc_lra_p (void);
689 static bool sparc_print_operand_punct_valid_p (unsigned char);
690 static void sparc_print_operand (FILE *, rtx, int);
691 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
692 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
693 machine_mode,
694 secondary_reload_info *);
695 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
696 reg_class_t);
697 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
698 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
699 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
700 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
701 static unsigned int sparc_min_arithmetic_precision (void);
702 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
703 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
704 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
705 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
706 reg_class_t);
707 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
708 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
709 const vec_perm_indices &);
710 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
711 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
712 \f
713 #ifdef SUBTARGET_ATTRIBUTE_TABLE
714 /* Table of valid machine attributes. */
715 static const struct attribute_spec sparc_attribute_table[] =
716 {
717 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
718 do_diagnostic, handler, exclude } */
719 SUBTARGET_ATTRIBUTE_TABLE,
720 { NULL, 0, 0, false, false, false, false, NULL, NULL }
721 };
722 #endif
723 \f
724 char sparc_hard_reg_printed[8];
725
726 /* Initialize the GCC target structure. */
727
728 /* The default is to use .half rather than .short for aligned HI objects. */
729 #undef TARGET_ASM_ALIGNED_HI_OP
730 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
731
732 #undef TARGET_ASM_UNALIGNED_HI_OP
733 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
734 #undef TARGET_ASM_UNALIGNED_SI_OP
735 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
736 #undef TARGET_ASM_UNALIGNED_DI_OP
737 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
738
739 /* The target hook has to handle DI-mode values. */
740 #undef TARGET_ASM_INTEGER
741 #define TARGET_ASM_INTEGER sparc_assemble_integer
742
743 #undef TARGET_ASM_FUNCTION_PROLOGUE
744 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
745 #undef TARGET_ASM_FUNCTION_EPILOGUE
746 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
747
748 #undef TARGET_SCHED_ADJUST_COST
749 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
750 #undef TARGET_SCHED_ISSUE_RATE
751 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
752 #undef TARGET_SCHED_INIT
753 #define TARGET_SCHED_INIT sparc_sched_init
754 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
755 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
756
757 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
758 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
759
760 #undef TARGET_INIT_LIBFUNCS
761 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
762
763 #undef TARGET_LEGITIMIZE_ADDRESS
764 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
765 #undef TARGET_DELEGITIMIZE_ADDRESS
766 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
767 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
768 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
769
770 #undef TARGET_INIT_BUILTINS
771 #define TARGET_INIT_BUILTINS sparc_init_builtins
772 #undef TARGET_BUILTIN_DECL
773 #define TARGET_BUILTIN_DECL sparc_builtin_decl
774 #undef TARGET_EXPAND_BUILTIN
775 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
776 #undef TARGET_FOLD_BUILTIN
777 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
778
779 #if TARGET_TLS
780 #undef TARGET_HAVE_TLS
781 #define TARGET_HAVE_TLS true
782 #endif
783
784 #undef TARGET_CANNOT_FORCE_CONST_MEM
785 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
786
787 #undef TARGET_ASM_OUTPUT_MI_THUNK
788 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
789 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
790 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
791
792 #undef TARGET_RTX_COSTS
793 #define TARGET_RTX_COSTS sparc_rtx_costs
794 #undef TARGET_ADDRESS_COST
795 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
796 #undef TARGET_REGISTER_MOVE_COST
797 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
798
799 #undef TARGET_PROMOTE_FUNCTION_MODE
800 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
801 #undef TARGET_STRICT_ARGUMENT_NAMING
802 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
803
804 #undef TARGET_MUST_PASS_IN_STACK
805 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
806 #undef TARGET_PASS_BY_REFERENCE
807 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
808 #undef TARGET_ARG_PARTIAL_BYTES
809 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
810 #undef TARGET_FUNCTION_ARG_ADVANCE
811 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
812 #undef TARGET_FUNCTION_ARG
813 #define TARGET_FUNCTION_ARG sparc_function_arg
814 #undef TARGET_FUNCTION_INCOMING_ARG
815 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
816 #undef TARGET_FUNCTION_ARG_PADDING
817 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
818 #undef TARGET_FUNCTION_ARG_BOUNDARY
819 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
820
821 #undef TARGET_RETURN_IN_MEMORY
822 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
823 #undef TARGET_STRUCT_VALUE_RTX
824 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
825 #undef TARGET_FUNCTION_VALUE
826 #define TARGET_FUNCTION_VALUE sparc_function_value
827 #undef TARGET_LIBCALL_VALUE
828 #define TARGET_LIBCALL_VALUE sparc_libcall_value
829 #undef TARGET_FUNCTION_VALUE_REGNO_P
830 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
831
832 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
833 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
834
835 #undef TARGET_ASAN_SHADOW_OFFSET
836 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
837
838 #undef TARGET_EXPAND_BUILTIN_VA_START
839 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
840 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
841 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
842
843 #undef TARGET_VECTOR_MODE_SUPPORTED_P
844 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
845
846 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
847 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
848
849 #ifdef SUBTARGET_INSERT_ATTRIBUTES
850 #undef TARGET_INSERT_ATTRIBUTES
851 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
852 #endif
853
854 #ifdef SUBTARGET_ATTRIBUTE_TABLE
855 #undef TARGET_ATTRIBUTE_TABLE
856 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
857 #endif
858
859 #undef TARGET_OPTION_OVERRIDE
860 #define TARGET_OPTION_OVERRIDE sparc_option_override
861
862 #ifdef TARGET_THREAD_SSP_OFFSET
863 #undef TARGET_STACK_PROTECT_GUARD
864 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
865 #endif
866
867 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
868 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
869 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
870 #endif
871
872 #undef TARGET_ASM_FILE_END
873 #define TARGET_ASM_FILE_END sparc_file_end
874
875 #undef TARGET_FRAME_POINTER_REQUIRED
876 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
877
878 #undef TARGET_CAN_ELIMINATE
879 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
880
881 #undef TARGET_PREFERRED_RELOAD_CLASS
882 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
883
884 #undef TARGET_SECONDARY_RELOAD
885 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
886 #undef TARGET_SECONDARY_MEMORY_NEEDED
887 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
888 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
889 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
890
891 #undef TARGET_CONDITIONAL_REGISTER_USAGE
892 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
893
894 #undef TARGET_INIT_PIC_REG
895 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
896
897 #undef TARGET_USE_PSEUDO_PIC_REG
898 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
899
900 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
901 #undef TARGET_MANGLE_TYPE
902 #define TARGET_MANGLE_TYPE sparc_mangle_type
903 #endif
904
905 #undef TARGET_LRA_P
906 #define TARGET_LRA_P sparc_lra_p
907
908 #undef TARGET_LEGITIMATE_ADDRESS_P
909 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
910
911 #undef TARGET_LEGITIMATE_CONSTANT_P
912 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
913
914 #undef TARGET_TRAMPOLINE_INIT
915 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
916
917 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
918 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
919 #undef TARGET_PRINT_OPERAND
920 #define TARGET_PRINT_OPERAND sparc_print_operand
921 #undef TARGET_PRINT_OPERAND_ADDRESS
922 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
923
924 /* The value stored by LDSTUB. */
925 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
926 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
927
928 #undef TARGET_CSTORE_MODE
929 #define TARGET_CSTORE_MODE sparc_cstore_mode
930
931 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
932 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
933
934 #undef TARGET_FIXED_CONDITION_CODE_REGS
935 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
936
937 #undef TARGET_MIN_ARITHMETIC_PRECISION
938 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
939
940 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
941 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
942
943 #undef TARGET_HARD_REGNO_NREGS
944 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
945 #undef TARGET_HARD_REGNO_MODE_OK
946 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
947
948 #undef TARGET_MODES_TIEABLE_P
949 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
950
951 #undef TARGET_CAN_CHANGE_MODE_CLASS
952 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
953
954 #undef TARGET_CONSTANT_ALIGNMENT
955 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
956
957 #undef TARGET_VECTORIZE_VEC_PERM_CONST
958 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
959
960 #undef TARGET_CAN_FOLLOW_JUMP
961 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
962
963 #undef TARGET_ZERO_CALL_USED_REGS
964 #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
965
966 struct gcc_target targetm = TARGET_INITIALIZER;
967
968 /* Return the memory reference contained in X if any, zero otherwise. */
969
970 static rtx
971 mem_ref (rtx x)
972 {
973 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
974 x = XEXP (x, 0);
975
976 if (MEM_P (x))
977 return x;
978
979 return NULL_RTX;
980 }
981
982 /* True if any of INSN's source register(s) is REG. */
983
984 static bool
985 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
986 {
987 extract_insn (insn);
988 return ((REG_P (recog_data.operand[1])
989 && REGNO (recog_data.operand[1]) == reg)
990 || (recog_data.n_operands == 3
991 && REG_P (recog_data.operand[2])
992 && REGNO (recog_data.operand[2]) == reg));
993 }
994
995 /* True if INSN is a floating-point division or square-root. */
996
997 static bool
998 div_sqrt_insn_p (rtx_insn *insn)
999 {
1000 if (GET_CODE (PATTERN (insn)) != SET)
1001 return false;
1002
1003 switch (get_attr_type (insn))
1004 {
1005 case TYPE_FPDIVS:
1006 case TYPE_FPSQRTS:
1007 case TYPE_FPDIVD:
1008 case TYPE_FPSQRTD:
1009 return true;
1010 default:
1011 return false;
1012 }
1013 }
1014
1015 /* True if INSN is a floating-point instruction. */
1016
1017 static bool
1018 fpop_insn_p (rtx_insn *insn)
1019 {
1020 if (GET_CODE (PATTERN (insn)) != SET)
1021 return false;
1022
1023 switch (get_attr_type (insn))
1024 {
1025 case TYPE_FPMOVE:
1026 case TYPE_FPCMOVE:
1027 case TYPE_FP:
1028 case TYPE_FPCMP:
1029 case TYPE_FPMUL:
1030 case TYPE_FPDIVS:
1031 case TYPE_FPSQRTS:
1032 case TYPE_FPDIVD:
1033 case TYPE_FPSQRTD:
1034 return true;
1035 default:
1036 return false;
1037 }
1038 }
1039
1040 /* True if INSN is an atomic instruction. */
1041
1042 static bool
1043 atomic_insn_for_leon3_p (rtx_insn *insn)
1044 {
1045 switch (INSN_CODE (insn))
1046 {
1047 case CODE_FOR_swapsi:
1048 case CODE_FOR_ldstub:
1049 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1050 return true;
1051 default:
1052 return false;
1053 }
1054 }
1055
1056 /* We use a machine specific pass to enable workarounds for errata.
1057
1058 We need to have the (essentially) final form of the insn stream in order
1059 to properly detect the various hazards. Therefore, this machine specific
1060 pass runs as late as possible. */
1061
1062 /* True if INSN is a md pattern or asm statement. */
1063 #define USEFUL_INSN_P(INSN) \
1064 (NONDEBUG_INSN_P (INSN) \
1065 && GET_CODE (PATTERN (INSN)) != USE \
1066 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1067
1068 static unsigned int
1069 sparc_do_work_around_errata (void)
1070 {
1071 rtx_insn *insn, *next;
1072
1073 /* Force all instructions to be split into their final form. */
1074 split_all_insns_noflow ();
1075
1076 /* Now look for specific patterns in the insn stream. */
1077 for (insn = get_insns (); insn; insn = next)
1078 {
1079 bool insert_nop = false;
1080 rtx set;
1081 rtx_insn *jump;
1082 rtx_sequence *seq;
1083
1084 /* Look into the instruction in a delay slot. */
1085 if (NONJUMP_INSN_P (insn)
1086 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1087 {
1088 jump = seq->insn (0);
1089 insn = seq->insn (1);
1090 }
1091 else if (JUMP_P (insn))
1092 jump = insn;
1093 else
1094 jump = NULL;
1095
1096 /* Place a NOP at the branch target of an integer branch if it is a
1097 floating-point operation or a floating-point branch. */
1098 if (sparc_fix_gr712rc
1099 && jump
1100 && jump_to_label_p (jump)
1101 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1102 {
1103 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1104 if (target
1105 && (fpop_insn_p (target)
1106 || (JUMP_P (target)
1107 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1108 emit_insn_before (gen_nop (), target);
1109 }
1110
1111 /* Insert a NOP between load instruction and atomic instruction. Insert
1112 a NOP at branch target if there is a load in delay slot and an atomic
1113 instruction at branch target. */
1114 if (sparc_fix_ut700
1115 && NONJUMP_INSN_P (insn)
1116 && (set = single_set (insn)) != NULL_RTX
1117 && mem_ref (SET_SRC (set))
1118 && REG_P (SET_DEST (set)))
1119 {
1120 if (jump && jump_to_label_p (jump))
1121 {
1122 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1123 if (target && atomic_insn_for_leon3_p (target))
1124 emit_insn_before (gen_nop (), target);
1125 }
1126
1127 next = next_active_insn (insn);
1128 if (!next)
1129 break;
1130
1131 if (atomic_insn_for_leon3_p (next))
1132 insert_nop = true;
1133 }
1134
1135 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1136 ends with another fdiv or fsqrt instruction with no dependencies on
1137 the former, along with an appropriate pattern in between. */
1138 if (sparc_fix_lost_divsqrt
1139 && NONJUMP_INSN_P (insn)
1140 && div_sqrt_insn_p (insn))
1141 {
1142 int i;
1143 int fp_found = 0;
1144 rtx_insn *after;
1145
1146 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1147
1148 next = next_active_insn (insn);
1149 if (!next)
1150 break;
1151
1152 for (after = next, i = 0; i < 4; i++)
1153 {
1154 /* Count floating-point operations. */
1155 if (i != 3 && fpop_insn_p (after))
1156 {
1157 /* If the insn uses the destination register of
1158 the div/sqrt, then it cannot be problematic. */
1159 if (insn_uses_reg_p (after, dest_reg))
1160 break;
1161 fp_found++;
1162 }
1163
1164 /* Count floating-point loads. */
1165 if (i != 3
1166 && (set = single_set (after)) != NULL_RTX
1167 && REG_P (SET_DEST (set))
1168 && REGNO (SET_DEST (set)) > 31)
1169 {
1170 /* If the insn uses the destination register of
1171 the div/sqrt, then it cannot be problematic. */
1172 if (REGNO (SET_DEST (set)) == dest_reg)
1173 break;
1174 fp_found++;
1175 }
1176
1177 /* Check if this is a problematic sequence. */
1178 if (i > 1
1179 && fp_found >= 2
1180 && div_sqrt_insn_p (after))
1181 {
1182 /* If this is the short version of the problematic
1183 sequence we add two NOPs in a row to also prevent
1184 the long version. */
1185 if (i == 2)
1186 emit_insn_before (gen_nop (), next);
1187 insert_nop = true;
1188 break;
1189 }
1190
1191 /* No need to scan past a second div/sqrt. */
1192 if (div_sqrt_insn_p (after))
1193 break;
1194
1195 /* Insert NOP before branch. */
1196 if (i < 3
1197 && (!NONJUMP_INSN_P (after)
1198 || GET_CODE (PATTERN (after)) == SEQUENCE))
1199 {
1200 insert_nop = true;
1201 break;
1202 }
1203
1204 after = next_active_insn (after);
1205 if (!after)
1206 break;
1207 }
1208 }
1209
1210 /* Look for either of these two sequences:
1211
1212 Sequence A:
1213 1. store of word size or less (e.g. st / stb / sth / stf)
1214 2. any single instruction that is not a load or store
1215 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1216
1217 Sequence B:
1218 1. store of double word size (e.g. std / stdf)
1219 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1220 if (sparc_fix_b2bst
1221 && NONJUMP_INSN_P (insn)
1222 && (set = single_set (insn)) != NULL_RTX
1223 && MEM_P (SET_DEST (set)))
1224 {
1225 /* Sequence B begins with a double-word store. */
1226 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1227 rtx_insn *after;
1228 int i;
1229
1230 next = next_active_insn (insn);
1231 if (!next)
1232 break;
1233
1234 for (after = next, i = 0; i < 2; i++)
1235 {
1236 /* Skip empty assembly statements. */
1237 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1238 || (USEFUL_INSN_P (after)
1239 && (asm_noperands (PATTERN (after))>=0)
1240 && !strcmp (decode_asm_operands (PATTERN (after),
1241 NULL, NULL, NULL,
1242 NULL, NULL), "")))
1243 after = next_active_insn (after);
1244 if (!after)
1245 break;
1246
1247 /* If the insn is a branch, then it cannot be problematic. */
1248 if (!NONJUMP_INSN_P (after)
1249 || GET_CODE (PATTERN (after)) == SEQUENCE)
1250 break;
1251
1252 /* Sequence B is only two instructions long. */
1253 if (seq_b)
1254 {
1255 /* Add NOP if followed by a store. */
1256 if ((set = single_set (after)) != NULL_RTX
1257 && MEM_P (SET_DEST (set)))
1258 insert_nop = true;
1259
1260 /* Otherwise it is ok. */
1261 break;
1262 }
1263
1264 /* If the second instruction is a load or a store,
1265 then the sequence cannot be problematic. */
1266 if (i == 0)
1267 {
1268 if ((set = single_set (after)) != NULL_RTX
1269 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1270 break;
1271
1272 after = next_active_insn (after);
1273 if (!after)
1274 break;
1275 }
1276
1277 /* Add NOP if third instruction is a store. */
1278 if (i == 1
1279 && (set = single_set (after)) != NULL_RTX
1280 && MEM_P (SET_DEST (set)))
1281 insert_nop = true;
1282 }
1283 }
1284
1285 /* Look for a single-word load into an odd-numbered FP register. */
1286 else if (sparc_fix_at697f
1287 && NONJUMP_INSN_P (insn)
1288 && (set = single_set (insn)) != NULL_RTX
1289 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1290 && mem_ref (SET_SRC (set))
1291 && REG_P (SET_DEST (set))
1292 && REGNO (SET_DEST (set)) > 31
1293 && REGNO (SET_DEST (set)) % 2 != 0)
1294 {
1295 /* The wrong dependency is on the enclosing double register. */
1296 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1297 unsigned int src1, src2, dest;
1298 int code;
1299
1300 next = next_active_insn (insn);
1301 if (!next)
1302 break;
1303 /* If the insn is a branch, then it cannot be problematic. */
1304 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1305 continue;
1306
1307 extract_insn (next);
1308 code = INSN_CODE (next);
1309
1310 switch (code)
1311 {
1312 case CODE_FOR_adddf3:
1313 case CODE_FOR_subdf3:
1314 case CODE_FOR_muldf3:
1315 case CODE_FOR_divdf3:
1316 dest = REGNO (recog_data.operand[0]);
1317 src1 = REGNO (recog_data.operand[1]);
1318 src2 = REGNO (recog_data.operand[2]);
1319 if (src1 != src2)
1320 {
1321 /* Case [1-4]:
1322 ld [address], %fx+1
1323 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1324 if ((src1 == x || src2 == x)
1325 && (dest == src1 || dest == src2))
1326 insert_nop = true;
1327 }
1328 else
1329 {
1330 /* Case 5:
1331 ld [address], %fx+1
1332 FPOPd %fx, %fx, %fx */
1333 if (src1 == x
1334 && dest == src1
1335 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1336 insert_nop = true;
1337 }
1338 break;
1339
1340 case CODE_FOR_sqrtdf2:
1341 dest = REGNO (recog_data.operand[0]);
1342 src1 = REGNO (recog_data.operand[1]);
1343 /* Case 6:
1344 ld [address], %fx+1
1345 fsqrtd %fx, %fx */
1346 if (src1 == x && dest == src1)
1347 insert_nop = true;
1348 break;
1349
1350 default:
1351 break;
1352 }
1353 }
1354
1355 /* Look for a single-word load into an integer register. */
1356 else if (sparc_fix_ut699
1357 && NONJUMP_INSN_P (insn)
1358 && (set = single_set (insn)) != NULL_RTX
1359 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1360 && (mem_ref (SET_SRC (set)) != NULL_RTX
1361 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1362 && REG_P (SET_DEST (set))
1363 && REGNO (SET_DEST (set)) < 32)
1364 {
1365 /* There is no problem if the second memory access has a data
1366 dependency on the first single-cycle load. */
1367 rtx x = SET_DEST (set);
1368
1369 next = next_active_insn (insn);
1370 if (!next)
1371 break;
1372 /* If the insn is a branch, then it cannot be problematic. */
1373 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1374 continue;
1375
1376 /* Look for a second memory access to/from an integer register. */
1377 if ((set = single_set (next)) != NULL_RTX)
1378 {
1379 rtx src = SET_SRC (set);
1380 rtx dest = SET_DEST (set);
1381 rtx mem;
1382
1383 /* LDD is affected. */
1384 if ((mem = mem_ref (src)) != NULL_RTX
1385 && REG_P (dest)
1386 && REGNO (dest) < 32
1387 && !reg_mentioned_p (x, XEXP (mem, 0)))
1388 insert_nop = true;
1389
1390 /* STD is *not* affected. */
1391 else if (MEM_P (dest)
1392 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1393 && (src == CONST0_RTX (GET_MODE (dest))
1394 || (REG_P (src)
1395 && REGNO (src) < 32
1396 && REGNO (src) != REGNO (x)))
1397 && !reg_mentioned_p (x, XEXP (dest, 0)))
1398 insert_nop = true;
1399
1400 /* GOT accesses uses LD. */
1401 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1402 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1403 insert_nop = true;
1404 }
1405 }
1406
1407 /* Look for a single-word load/operation into an FP register. */
1408 else if (sparc_fix_ut699
1409 && NONJUMP_INSN_P (insn)
1410 && (set = single_set (insn)) != NULL_RTX
1411 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1412 && REG_P (SET_DEST (set))
1413 && REGNO (SET_DEST (set)) > 31)
1414 {
1415 /* Number of instructions in the problematic window. */
1416 const int n_insns = 4;
1417 /* The problematic combination is with the sibling FP register. */
1418 const unsigned int x = REGNO (SET_DEST (set));
1419 const unsigned int y = x ^ 1;
1420 rtx_insn *after;
1421 int i;
1422
1423 next = next_active_insn (insn);
1424 if (!next)
1425 break;
1426 /* If the insn is a branch, then it cannot be problematic. */
1427 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1428 continue;
1429
1430 /* Look for a second load/operation into the sibling FP register. */
1431 if (!((set = single_set (next)) != NULL_RTX
1432 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1433 && REG_P (SET_DEST (set))
1434 && REGNO (SET_DEST (set)) == y))
1435 continue;
1436
1437 /* Look for a (possible) store from the FP register in the next N
1438 instructions, but bail out if it is again modified or if there
1439 is a store from the sibling FP register before this store. */
1440 for (after = next, i = 0; i < n_insns; i++)
1441 {
1442 bool branch_p;
1443
1444 after = next_active_insn (after);
1445 if (!after)
1446 break;
1447
1448 /* This is a branch with an empty delay slot. */
1449 if (!NONJUMP_INSN_P (after))
1450 {
1451 if (++i == n_insns)
1452 break;
1453 branch_p = true;
1454 after = NULL;
1455 }
1456 /* This is a branch with a filled delay slot. */
1457 else if (rtx_sequence *seq =
1458 dyn_cast <rtx_sequence *> (PATTERN (after)))
1459 {
1460 if (++i == n_insns)
1461 break;
1462 branch_p = true;
1463 after = seq->insn (1);
1464 }
1465 /* This is a regular instruction. */
1466 else
1467 branch_p = false;
1468
1469 if (after && (set = single_set (after)) != NULL_RTX)
1470 {
1471 const rtx src = SET_SRC (set);
1472 const rtx dest = SET_DEST (set);
1473 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1474
1475 /* If the FP register is again modified before the store,
1476 then the store isn't affected. */
1477 if (REG_P (dest)
1478 && (REGNO (dest) == x
1479 || (REGNO (dest) == y && size == 8)))
1480 break;
1481
1482 if (MEM_P (dest) && REG_P (src))
1483 {
1484 /* If there is a store from the sibling FP register
1485 before the store, then the store is not affected. */
1486 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1487 break;
1488
1489 /* Otherwise, the store is affected. */
1490 if (REGNO (src) == x && size == 4)
1491 {
1492 insert_nop = true;
1493 break;
1494 }
1495 }
1496 }
1497
1498 /* If we have a branch in the first M instructions, then we
1499 cannot see the (M+2)th instruction so we play safe. */
1500 if (branch_p && i <= (n_insns - 2))
1501 {
1502 insert_nop = true;
1503 break;
1504 }
1505 }
1506 }
1507
1508 else
1509 next = NEXT_INSN (insn);
1510
1511 if (insert_nop)
1512 emit_insn_before (gen_nop (), next);
1513 }
1514
1515 return 0;
1516 }
1517
1518 namespace {
1519
1520 const pass_data pass_data_work_around_errata =
1521 {
1522 RTL_PASS, /* type */
1523 "errata", /* name */
1524 OPTGROUP_NONE, /* optinfo_flags */
1525 TV_MACH_DEP, /* tv_id */
1526 0, /* properties_required */
1527 0, /* properties_provided */
1528 0, /* properties_destroyed */
1529 0, /* todo_flags_start */
1530 0, /* todo_flags_finish */
1531 };
1532
1533 class pass_work_around_errata : public rtl_opt_pass
1534 {
1535 public:
1536 pass_work_around_errata(gcc::context *ctxt)
1537 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1538 {}
1539
1540 /* opt_pass methods: */
1541 virtual bool gate (function *)
1542 {
1543 return sparc_fix_at697f
1544 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1545 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1546 }
1547
1548 virtual unsigned int execute (function *)
1549 {
1550 return sparc_do_work_around_errata ();
1551 }
1552
1553 }; // class pass_work_around_errata
1554
1555 } // anon namespace
1556
1557 rtl_opt_pass *
1558 make_pass_work_around_errata (gcc::context *ctxt)
1559 {
1560 return new pass_work_around_errata (ctxt);
1561 }
1562
1563 /* Helpers for TARGET_DEBUG_OPTIONS. */
1564 static void
1565 dump_target_flag_bits (const int flags)
1566 {
1567 if (flags & MASK_64BIT)
1568 fprintf (stderr, "64BIT ");
1569 if (flags & MASK_APP_REGS)
1570 fprintf (stderr, "APP_REGS ");
1571 if (flags & MASK_FASTER_STRUCTS)
1572 fprintf (stderr, "FASTER_STRUCTS ");
1573 if (flags & MASK_FLAT)
1574 fprintf (stderr, "FLAT ");
1575 if (flags & MASK_FMAF)
1576 fprintf (stderr, "FMAF ");
1577 if (flags & MASK_FSMULD)
1578 fprintf (stderr, "FSMULD ");
1579 if (flags & MASK_FPU)
1580 fprintf (stderr, "FPU ");
1581 if (flags & MASK_HARD_QUAD)
1582 fprintf (stderr, "HARD_QUAD ");
1583 if (flags & MASK_POPC)
1584 fprintf (stderr, "POPC ");
1585 if (flags & MASK_PTR64)
1586 fprintf (stderr, "PTR64 ");
1587 if (flags & MASK_STACK_BIAS)
1588 fprintf (stderr, "STACK_BIAS ");
1589 if (flags & MASK_UNALIGNED_DOUBLES)
1590 fprintf (stderr, "UNALIGNED_DOUBLES ");
1591 if (flags & MASK_V8PLUS)
1592 fprintf (stderr, "V8PLUS ");
1593 if (flags & MASK_VIS)
1594 fprintf (stderr, "VIS ");
1595 if (flags & MASK_VIS2)
1596 fprintf (stderr, "VIS2 ");
1597 if (flags & MASK_VIS3)
1598 fprintf (stderr, "VIS3 ");
1599 if (flags & MASK_VIS4)
1600 fprintf (stderr, "VIS4 ");
1601 if (flags & MASK_VIS4B)
1602 fprintf (stderr, "VIS4B ");
1603 if (flags & MASK_CBCOND)
1604 fprintf (stderr, "CBCOND ");
1605 if (flags & MASK_DEPRECATED_V8_INSNS)
1606 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1607 if (flags & MASK_SPARCLET)
1608 fprintf (stderr, "SPARCLET ");
1609 if (flags & MASK_SPARCLITE)
1610 fprintf (stderr, "SPARCLITE ");
1611 if (flags & MASK_V8)
1612 fprintf (stderr, "V8 ");
1613 if (flags & MASK_V9)
1614 fprintf (stderr, "V9 ");
1615 }
1616
1617 static void
1618 dump_target_flags (const char *prefix, const int flags)
1619 {
1620 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1621 dump_target_flag_bits (flags);
1622 fprintf(stderr, "]\n");
1623 }
1624
1625 /* Validate and override various options, and do some machine dependent
1626 initialization. */
1627
1628 static void
1629 sparc_option_override (void)
1630 {
1631 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1632 static struct cpu_default {
1633 const int cpu;
1634 const enum sparc_processor_type processor;
1635 } const cpu_default[] = {
1636 /* There must be one entry here for each TARGET_CPU value. */
1637 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1638 { TARGET_CPU_v8, PROCESSOR_V8 },
1639 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1640 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1641 { TARGET_CPU_leon, PROCESSOR_LEON },
1642 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1643 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1644 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1645 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1646 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1647 { TARGET_CPU_v9, PROCESSOR_V9 },
1648 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1649 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1650 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1651 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1652 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1653 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1654 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1655 { TARGET_CPU_m8, PROCESSOR_M8 },
1656 { -1, PROCESSOR_V7 }
1657 };
1658 const struct cpu_default *def;
1659 /* Table of values for -m{cpu,tune}=. This must match the order of
1660 the enum processor_type in sparc-opts.h. */
1661 static struct cpu_table {
1662 const char *const name;
1663 const int disable;
1664 const int enable;
1665 } const cpu_table[] = {
1666 { "v7", MASK_ISA, 0 },
1667 { "cypress", MASK_ISA, 0 },
1668 { "v8", MASK_ISA, MASK_V8 },
1669 /* TI TMS390Z55 supersparc */
1670 { "supersparc", MASK_ISA, MASK_V8 },
1671 { "hypersparc", MASK_ISA, MASK_V8 },
1672 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1673 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1674 { "leon3v7", MASK_ISA, MASK_LEON3 },
1675 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1676 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1677 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1678 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1679 { "f934", MASK_ISA, MASK_SPARCLITE },
1680 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1681 { "sparclet", MASK_ISA, MASK_SPARCLET },
1682 /* TEMIC sparclet */
1683 { "tsc701", MASK_ISA, MASK_SPARCLET },
1684 { "v9", MASK_ISA, MASK_V9 },
1685 /* UltraSPARC I, II, IIi */
1686 { "ultrasparc", MASK_ISA,
1687 /* Although insns using %y are deprecated, it is a clear win. */
1688 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1689 /* UltraSPARC III */
1690 /* ??? Check if %y issue still holds true. */
1691 { "ultrasparc3", MASK_ISA,
1692 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1693 /* UltraSPARC T1 */
1694 { "niagara", MASK_ISA,
1695 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1696 /* UltraSPARC T2 */
1697 { "niagara2", MASK_ISA,
1698 MASK_V9|MASK_POPC|MASK_VIS2 },
1699 /* UltraSPARC T3 */
1700 { "niagara3", MASK_ISA,
1701 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1702 /* UltraSPARC T4 */
1703 { "niagara4", MASK_ISA,
1704 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1705 /* UltraSPARC M7 */
1706 { "niagara7", MASK_ISA,
1707 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1708 /* UltraSPARC M8 */
1709 { "m8", MASK_ISA,
1710 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1711 };
1712 const struct cpu_table *cpu;
1713 unsigned int i;
1714
1715 if (sparc_debug_string != NULL)
1716 {
1717 const char *q;
1718 char *p;
1719
1720 p = ASTRDUP (sparc_debug_string);
1721 while ((q = strtok (p, ",")) != NULL)
1722 {
1723 bool invert;
1724 int mask;
1725
1726 p = NULL;
1727 if (*q == '!')
1728 {
1729 invert = true;
1730 q++;
1731 }
1732 else
1733 invert = false;
1734
1735 if (! strcmp (q, "all"))
1736 mask = MASK_DEBUG_ALL;
1737 else if (! strcmp (q, "options"))
1738 mask = MASK_DEBUG_OPTIONS;
1739 else
1740 error ("unknown %<-mdebug-%s%> switch", q);
1741
1742 if (invert)
1743 sparc_debug &= ~mask;
1744 else
1745 sparc_debug |= mask;
1746 }
1747 }
1748
1749 /* Enable the FsMULd instruction by default if not explicitly specified by
1750 the user. It may be later disabled by the CPU (explicitly or not). */
1751 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1752 target_flags |= MASK_FSMULD;
1753
1754 if (TARGET_DEBUG_OPTIONS)
1755 {
1756 dump_target_flags("Initial target_flags", target_flags);
1757 dump_target_flags("target_flags_explicit", target_flags_explicit);
1758 }
1759
1760 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1761 SUBTARGET_OVERRIDE_OPTIONS;
1762 #endif
1763
1764 #ifndef SPARC_BI_ARCH
1765 /* Check for unsupported architecture size. */
1766 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1767 error ("%s is not supported by this configuration",
1768 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1769 #endif
1770
1771 /* We force all 64bit archs to use 128 bit long double */
1772 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1773 {
1774 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1775 target_flags |= MASK_LONG_DOUBLE_128;
1776 }
1777
1778 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1779 for (i = 8; i < 16; i++)
1780 if (!call_used_regs [i])
1781 {
1782 error ("%<-fcall-saved-REG%> is not supported for out registers");
1783 call_used_regs [i] = 1;
1784 }
1785
1786 /* Set the default CPU if no -mcpu option was specified. */
1787 if (!global_options_set.x_sparc_cpu_and_features)
1788 {
1789 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1790 if (def->cpu == TARGET_CPU_DEFAULT)
1791 break;
1792 gcc_assert (def->cpu != -1);
1793 sparc_cpu_and_features = def->processor;
1794 }
1795
1796 /* Set the default CPU if no -mtune option was specified. */
1797 if (!global_options_set.x_sparc_cpu)
1798 sparc_cpu = sparc_cpu_and_features;
1799
1800 cpu = &cpu_table[(int) sparc_cpu_and_features];
1801
1802 if (TARGET_DEBUG_OPTIONS)
1803 {
1804 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1805 dump_target_flags ("cpu->disable", cpu->disable);
1806 dump_target_flags ("cpu->enable", cpu->enable);
1807 }
1808
1809 target_flags &= ~cpu->disable;
1810 target_flags |= (cpu->enable
1811 #ifndef HAVE_AS_FMAF_HPC_VIS3
1812 & ~(MASK_FMAF | MASK_VIS3)
1813 #endif
1814 #ifndef HAVE_AS_SPARC4
1815 & ~MASK_CBCOND
1816 #endif
1817 #ifndef HAVE_AS_SPARC5_VIS4
1818 & ~(MASK_VIS4 | MASK_SUBXC)
1819 #endif
1820 #ifndef HAVE_AS_SPARC6
1821 & ~(MASK_VIS4B)
1822 #endif
1823 #ifndef HAVE_AS_LEON
1824 & ~(MASK_LEON | MASK_LEON3)
1825 #endif
1826 & ~(target_flags_explicit & MASK_FEATURES)
1827 );
1828
1829 /* FsMULd is a V8 instruction. */
1830 if (!TARGET_V8 && !TARGET_V9)
1831 target_flags &= ~MASK_FSMULD;
1832
1833 /* -mvis2 implies -mvis. */
1834 if (TARGET_VIS2)
1835 target_flags |= MASK_VIS;
1836
1837 /* -mvis3 implies -mvis2 and -mvis. */
1838 if (TARGET_VIS3)
1839 target_flags |= MASK_VIS2 | MASK_VIS;
1840
1841 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1842 if (TARGET_VIS4)
1843 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1844
1845 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1846 if (TARGET_VIS4B)
1847 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1848
1849 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1850 FPU is disabled. */
1851 if (!TARGET_FPU)
1852 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1853 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1854
1855 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1856 are available; -m64 also implies v9. */
1857 if (TARGET_VIS || TARGET_ARCH64)
1858 {
1859 target_flags |= MASK_V9;
1860 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1861 }
1862
1863 /* -mvis also implies -mv8plus on 32-bit. */
1864 if (TARGET_VIS && !TARGET_ARCH64)
1865 target_flags |= MASK_V8PLUS;
1866
1867 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1868 if (TARGET_V9 && TARGET_ARCH32)
1869 target_flags |= MASK_DEPRECATED_V8_INSNS;
1870
1871 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1872 if (!TARGET_V9 || TARGET_ARCH64)
1873 target_flags &= ~MASK_V8PLUS;
1874
1875 /* Don't use stack biasing in 32-bit mode. */
1876 if (TARGET_ARCH32)
1877 target_flags &= ~MASK_STACK_BIAS;
1878
1879 /* Use LRA instead of reload, unless otherwise instructed. */
1880 if (!(target_flags_explicit & MASK_LRA))
1881 target_flags |= MASK_LRA;
1882
1883 /* Enable applicable errata workarounds for LEON3FT. */
1884 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1885 {
1886 sparc_fix_b2bst = 1;
1887 sparc_fix_lost_divsqrt = 1;
1888 }
1889
1890 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1891 if (sparc_fix_ut699)
1892 target_flags &= ~MASK_FSMULD;
1893
1894 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1895 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1896 target_flags |= MASK_LONG_DOUBLE_128;
1897 #endif
1898
1899 if (TARGET_DEBUG_OPTIONS)
1900 dump_target_flags ("Final target_flags", target_flags);
1901
1902 /* Set the code model if no -mcmodel option was specified. */
1903 if (global_options_set.x_sparc_code_model)
1904 {
1905 if (TARGET_ARCH32)
1906 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1907 }
1908 else
1909 {
1910 if (TARGET_ARCH32)
1911 sparc_code_model = CM_32;
1912 else
1913 sparc_code_model = SPARC_DEFAULT_CMODEL;
1914 }
1915
1916 /* Set the memory model if no -mmemory-model option was specified. */
1917 if (!global_options_set.x_sparc_memory_model)
1918 {
1919 /* Choose the memory model for the operating system. */
1920 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1921 if (os_default != SMM_DEFAULT)
1922 sparc_memory_model = os_default;
1923 /* Choose the most relaxed model for the processor. */
1924 else if (TARGET_V9)
1925 sparc_memory_model = SMM_RMO;
1926 else if (TARGET_LEON3)
1927 sparc_memory_model = SMM_TSO;
1928 else if (TARGET_LEON)
1929 sparc_memory_model = SMM_SC;
1930 else if (TARGET_V8)
1931 sparc_memory_model = SMM_PSO;
1932 else
1933 sparc_memory_model = SMM_SC;
1934 }
1935
1936 /* Supply a default value for align_functions. */
1937 if (flag_align_functions && !str_align_functions)
1938 {
1939 if (sparc_cpu == PROCESSOR_ULTRASPARC
1940 || sparc_cpu == PROCESSOR_ULTRASPARC3
1941 || sparc_cpu == PROCESSOR_NIAGARA
1942 || sparc_cpu == PROCESSOR_NIAGARA2
1943 || sparc_cpu == PROCESSOR_NIAGARA3
1944 || sparc_cpu == PROCESSOR_NIAGARA4)
1945 str_align_functions = "32";
1946 else if (sparc_cpu == PROCESSOR_NIAGARA7
1947 || sparc_cpu == PROCESSOR_M8)
1948 str_align_functions = "64";
1949 }
1950
1951 /* Validate PCC_STRUCT_RETURN. */
1952 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1953 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1954
1955 /* Only use .uaxword when compiling for a 64-bit target. */
1956 if (!TARGET_ARCH64)
1957 targetm.asm_out.unaligned_op.di = NULL;
1958
1959 /* Set the processor costs. */
1960 switch (sparc_cpu)
1961 {
1962 case PROCESSOR_V7:
1963 case PROCESSOR_CYPRESS:
1964 sparc_costs = &cypress_costs;
1965 break;
1966 case PROCESSOR_V8:
1967 case PROCESSOR_SPARCLITE:
1968 case PROCESSOR_SUPERSPARC:
1969 sparc_costs = &supersparc_costs;
1970 break;
1971 case PROCESSOR_F930:
1972 case PROCESSOR_F934:
1973 case PROCESSOR_HYPERSPARC:
1974 case PROCESSOR_SPARCLITE86X:
1975 sparc_costs = &hypersparc_costs;
1976 break;
1977 case PROCESSOR_LEON:
1978 sparc_costs = &leon_costs;
1979 break;
1980 case PROCESSOR_LEON3:
1981 case PROCESSOR_LEON3V7:
1982 sparc_costs = &leon3_costs;
1983 break;
1984 case PROCESSOR_SPARCLET:
1985 case PROCESSOR_TSC701:
1986 sparc_costs = &sparclet_costs;
1987 break;
1988 case PROCESSOR_V9:
1989 case PROCESSOR_ULTRASPARC:
1990 sparc_costs = &ultrasparc_costs;
1991 break;
1992 case PROCESSOR_ULTRASPARC3:
1993 sparc_costs = &ultrasparc3_costs;
1994 break;
1995 case PROCESSOR_NIAGARA:
1996 sparc_costs = &niagara_costs;
1997 break;
1998 case PROCESSOR_NIAGARA2:
1999 sparc_costs = &niagara2_costs;
2000 break;
2001 case PROCESSOR_NIAGARA3:
2002 sparc_costs = &niagara3_costs;
2003 break;
2004 case PROCESSOR_NIAGARA4:
2005 sparc_costs = &niagara4_costs;
2006 break;
2007 case PROCESSOR_NIAGARA7:
2008 sparc_costs = &niagara7_costs;
2009 break;
2010 case PROCESSOR_M8:
2011 sparc_costs = &m8_costs;
2012 break;
2013 case PROCESSOR_NATIVE:
2014 gcc_unreachable ();
2015 };
2016
2017 /* param_simultaneous_prefetches is the number of prefetches that
2018 can run at the same time. More important, it is the threshold
2019 defining when additional prefetches will be dropped by the
2020 hardware.
2021
2022 The UltraSPARC-III features a documented prefetch queue with a
2023 size of 8. Additional prefetches issued in the cpu are
2024 dropped.
2025
2026 Niagara processors are different. In these processors prefetches
2027 are handled much like regular loads. The L1 miss buffer is 32
2028 entries, but prefetches start getting affected when 30 entries
2029 become occupied. That occupation could be a mix of regular loads
2030 and prefetches though. And that buffer is shared by all threads.
2031 Once the threshold is reached, if the core is running a single
2032 thread the prefetch will retry. If more than one thread is
2033 running, the prefetch will be dropped.
2034
2035 All this makes it very difficult to determine how many
2036 simultaneous prefetches can be issued simultaneously, even in a
2037 single-threaded program. Experimental results show that setting
2038 this parameter to 32 works well when the number of threads is not
2039 high. */
2040 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2041 param_simultaneous_prefetches,
2042 ((sparc_cpu == PROCESSOR_ULTRASPARC
2043 || sparc_cpu == PROCESSOR_NIAGARA
2044 || sparc_cpu == PROCESSOR_NIAGARA2
2045 || sparc_cpu == PROCESSOR_NIAGARA3
2046 || sparc_cpu == PROCESSOR_NIAGARA4)
2047 ? 2
2048 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2049 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2050 || sparc_cpu == PROCESSOR_M8)
2051 ? 32 : 3))));
2052
2053 /* param_l1_cache_line_size is the size of the L1 cache line, in
2054 bytes.
2055
2056 The Oracle SPARC Architecture (previously the UltraSPARC
2057 Architecture) specification states that when a PREFETCH[A]
2058 instruction is executed an implementation-specific amount of data
2059 is prefetched, and that it is at least 64 bytes long (aligned to
2060 at least 64 bytes).
2061
2062 However, this is not correct. The M7 (and implementations prior
2063 to that) does not guarantee a 64B prefetch into a cache if the
2064 line size is smaller. A single cache line is all that is ever
2065 prefetched. So for the M7, where the L1D$ has 32B lines and the
2066 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2067 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2068 is a read_n prefetch, which is the only type which allocates to
2069 the L1.) */
2070 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2071 param_l1_cache_line_size,
2072 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2073
2074 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2075 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2076 Niagara processors feature a L1D$ of 16KB. */
2077 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2078 param_l1_cache_size,
2079 ((sparc_cpu == PROCESSOR_ULTRASPARC
2080 || sparc_cpu == PROCESSOR_ULTRASPARC3
2081 || sparc_cpu == PROCESSOR_NIAGARA
2082 || sparc_cpu == PROCESSOR_NIAGARA2
2083 || sparc_cpu == PROCESSOR_NIAGARA3
2084 || sparc_cpu == PROCESSOR_NIAGARA4
2085 || sparc_cpu == PROCESSOR_NIAGARA7
2086 || sparc_cpu == PROCESSOR_M8)
2087 ? 16 : 64));
2088
2089 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2090 that 512 is the default in params.def. */
2091 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2092 param_l2_cache_size,
2093 ((sparc_cpu == PROCESSOR_NIAGARA4
2094 || sparc_cpu == PROCESSOR_M8)
2095 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2096 ? 256 : 512)));
2097
2098
2099 /* Disable save slot sharing for call-clobbered registers by default.
2100 The IRA sharing algorithm works on single registers only and this
2101 pessimizes for double floating-point registers. */
2102 if (!global_options_set.x_flag_ira_share_save_slots)
2103 flag_ira_share_save_slots = 0;
2104
2105 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2106 redundant 32-to-64-bit extensions. */
2107 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2108 flag_ree = 0;
2109
2110 /* Do various machine dependent initializations. */
2111 sparc_init_modes ();
2112
2113 /* Set up function hooks. */
2114 init_machine_status = sparc_init_machine_status;
2115 }
2116 \f
2117 /* Miscellaneous utilities. */
2118
2119 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2120 or branch on register contents instructions. */
2121
2122 int
2123 v9_regcmp_p (enum rtx_code code)
2124 {
2125 return (code == EQ || code == NE || code == GE || code == LT
2126 || code == LE || code == GT);
2127 }
2128
2129 /* Nonzero if OP is a floating point constant which can
2130 be loaded into an integer register using a single
2131 sethi instruction. */
2132
2133 int
2134 fp_sethi_p (rtx op)
2135 {
2136 if (GET_CODE (op) == CONST_DOUBLE)
2137 {
2138 long i;
2139
2140 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2141 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2142 }
2143
2144 return 0;
2145 }
2146
2147 /* Nonzero if OP is a floating point constant which can
2148 be loaded into an integer register using a single
2149 mov instruction. */
2150
2151 int
2152 fp_mov_p (rtx op)
2153 {
2154 if (GET_CODE (op) == CONST_DOUBLE)
2155 {
2156 long i;
2157
2158 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2159 return SPARC_SIMM13_P (i);
2160 }
2161
2162 return 0;
2163 }
2164
2165 /* Nonzero if OP is a floating point constant which can
2166 be loaded into an integer register using a high/losum
2167 instruction sequence. */
2168
2169 int
2170 fp_high_losum_p (rtx op)
2171 {
2172 /* The constraints calling this should only be in
2173 SFmode move insns, so any constant which cannot
2174 be moved using a single insn will do. */
2175 if (GET_CODE (op) == CONST_DOUBLE)
2176 {
2177 long i;
2178
2179 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2180 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2181 }
2182
2183 return 0;
2184 }
2185
2186 /* Return true if the address of LABEL can be loaded by means of the
2187 mov{si,di}_pic_label_ref patterns in PIC mode. */
2188
2189 static bool
2190 can_use_mov_pic_label_ref (rtx label)
2191 {
2192 /* VxWorks does not impose a fixed gap between segments; the run-time
2193 gap can be different from the object-file gap. We therefore can't
2194 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2195 are absolutely sure that X is in the same segment as the GOT.
2196 Unfortunately, the flexibility of linker scripts means that we
2197 can't be sure of that in general, so assume that GOT-relative
2198 accesses are never valid on VxWorks. */
2199 if (TARGET_VXWORKS_RTP)
2200 return false;
2201
2202 /* Similarly, if the label is non-local, it might end up being placed
2203 in a different section than the current one; now mov_pic_label_ref
2204 requires the label and the code to be in the same section. */
2205 if (LABEL_REF_NONLOCAL_P (label))
2206 return false;
2207
2208 /* Finally, if we are reordering basic blocks and partition into hot
2209 and cold sections, this might happen for any label. */
2210 if (flag_reorder_blocks_and_partition)
2211 return false;
2212
2213 return true;
2214 }
2215
2216 /* Expand a move instruction. Return true if all work is done. */
2217
2218 bool
2219 sparc_expand_move (machine_mode mode, rtx *operands)
2220 {
2221 /* Handle sets of MEM first. */
2222 if (GET_CODE (operands[0]) == MEM)
2223 {
2224 /* 0 is a register (or a pair of registers) on SPARC. */
2225 if (register_or_zero_operand (operands[1], mode))
2226 return false;
2227
2228 if (!reload_in_progress)
2229 {
2230 operands[0] = validize_mem (operands[0]);
2231 operands[1] = force_reg (mode, operands[1]);
2232 }
2233 }
2234
2235 /* Fix up TLS cases. */
2236 if (TARGET_HAVE_TLS
2237 && CONSTANT_P (operands[1])
2238 && sparc_tls_referenced_p (operands [1]))
2239 {
2240 operands[1] = sparc_legitimize_tls_address (operands[1]);
2241 return false;
2242 }
2243
2244 /* Fix up PIC cases. */
2245 if (flag_pic && CONSTANT_P (operands[1]))
2246 {
2247 if (pic_address_needs_scratch (operands[1]))
2248 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2249
2250 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2251 if ((GET_CODE (operands[1]) == LABEL_REF
2252 && can_use_mov_pic_label_ref (operands[1]))
2253 || (GET_CODE (operands[1]) == CONST
2254 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2255 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2256 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2257 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2258 {
2259 if (mode == SImode)
2260 {
2261 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2262 return true;
2263 }
2264
2265 if (mode == DImode)
2266 {
2267 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2268 return true;
2269 }
2270 }
2271
2272 if (symbolic_operand (operands[1], mode))
2273 {
2274 operands[1]
2275 = sparc_legitimize_pic_address (operands[1],
2276 reload_in_progress
2277 ? operands[0] : NULL_RTX);
2278 return false;
2279 }
2280 }
2281
2282 /* If we are trying to toss an integer constant into FP registers,
2283 or loading a FP or vector constant, force it into memory. */
2284 if (CONSTANT_P (operands[1])
2285 && REG_P (operands[0])
2286 && (SPARC_FP_REG_P (REGNO (operands[0]))
2287 || SCALAR_FLOAT_MODE_P (mode)
2288 || VECTOR_MODE_P (mode)))
2289 {
2290 /* emit_group_store will send such bogosity to us when it is
2291 not storing directly into memory. So fix this up to avoid
2292 crashes in output_constant_pool. */
2293 if (operands [1] == const0_rtx)
2294 operands[1] = CONST0_RTX (mode);
2295
2296 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2297 always other regs. */
2298 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2299 && (const_zero_operand (operands[1], mode)
2300 || const_all_ones_operand (operands[1], mode)))
2301 return false;
2302
2303 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2304 /* We are able to build any SF constant in integer registers
2305 with at most 2 instructions. */
2306 && (mode == SFmode
2307 /* And any DF constant in integer registers if needed. */
2308 || (mode == DFmode && !can_create_pseudo_p ())))
2309 return false;
2310
2311 operands[1] = force_const_mem (mode, operands[1]);
2312 if (!reload_in_progress)
2313 operands[1] = validize_mem (operands[1]);
2314 return false;
2315 }
2316
2317 /* Accept non-constants and valid constants unmodified. */
2318 if (!CONSTANT_P (operands[1])
2319 || GET_CODE (operands[1]) == HIGH
2320 || input_operand (operands[1], mode))
2321 return false;
2322
2323 switch (mode)
2324 {
2325 case E_QImode:
2326 /* All QImode constants require only one insn, so proceed. */
2327 break;
2328
2329 case E_HImode:
2330 case E_SImode:
2331 sparc_emit_set_const32 (operands[0], operands[1]);
2332 return true;
2333
2334 case E_DImode:
2335 /* input_operand should have filtered out 32-bit mode. */
2336 sparc_emit_set_const64 (operands[0], operands[1]);
2337 return true;
2338
2339 case E_TImode:
2340 {
2341 rtx high, low;
2342 /* TImode isn't available in 32-bit mode. */
2343 split_double (operands[1], &high, &low);
2344 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2345 high));
2346 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2347 low));
2348 }
2349 return true;
2350
2351 default:
2352 gcc_unreachable ();
2353 }
2354
2355 return false;
2356 }
2357
2358 /* Load OP1, a 32-bit constant, into OP0, a register.
2359 We know it can't be done in one insn when we get
2360 here, the move expander guarantees this. */
2361
2362 static void
2363 sparc_emit_set_const32 (rtx op0, rtx op1)
2364 {
2365 machine_mode mode = GET_MODE (op0);
2366 rtx temp = op0;
2367
2368 if (can_create_pseudo_p ())
2369 temp = gen_reg_rtx (mode);
2370
2371 if (GET_CODE (op1) == CONST_INT)
2372 {
2373 gcc_assert (!small_int_operand (op1, mode)
2374 && !const_high_operand (op1, mode));
2375
2376 /* Emit them as real moves instead of a HIGH/LO_SUM,
2377 this way CSE can see everything and reuse intermediate
2378 values if it wants. */
2379 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2380 & ~(HOST_WIDE_INT) 0x3ff)));
2381
2382 emit_insn (gen_rtx_SET (op0,
2383 gen_rtx_IOR (mode, temp,
2384 GEN_INT (INTVAL (op1) & 0x3ff))));
2385 }
2386 else
2387 {
2388 /* A symbol, emit in the traditional way. */
2389 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2390 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2391 }
2392 }
2393
2394 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2395 If TEMP is nonzero, we are forbidden to use any other scratch
2396 registers. Otherwise, we are allowed to generate them as needed.
2397
2398 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2399 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2400
2401 void
2402 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2403 {
2404 rtx cst, temp1, temp2, temp3, temp4, temp5;
2405 rtx ti_temp = 0;
2406
2407 /* Deal with too large offsets. */
2408 if (GET_CODE (op1) == CONST
2409 && GET_CODE (XEXP (op1, 0)) == PLUS
2410 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2411 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2412 {
2413 gcc_assert (!temp);
2414 temp1 = gen_reg_rtx (DImode);
2415 temp2 = gen_reg_rtx (DImode);
2416 sparc_emit_set_const64 (temp2, cst);
2417 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2418 NULL_RTX);
2419 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2420 return;
2421 }
2422
2423 if (temp && GET_MODE (temp) == TImode)
2424 {
2425 ti_temp = temp;
2426 temp = gen_rtx_REG (DImode, REGNO (temp));
2427 }
2428
2429 /* SPARC-V9 code model support. */
2430 switch (sparc_code_model)
2431 {
2432 case CM_MEDLOW:
2433 /* The range spanned by all instructions in the object is less
2434 than 2^31 bytes (2GB) and the distance from any instruction
2435 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2436 than 2^31 bytes (2GB).
2437
2438 The executable must be in the low 4TB of the virtual address
2439 space.
2440
2441 sethi %hi(symbol), %temp1
2442 or %temp1, %lo(symbol), %reg */
2443 if (temp)
2444 temp1 = temp; /* op0 is allowed. */
2445 else
2446 temp1 = gen_reg_rtx (DImode);
2447
2448 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2449 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2450 break;
2451
2452 case CM_MEDMID:
2453 /* The range spanned by all instructions in the object is less
2454 than 2^31 bytes (2GB) and the distance from any instruction
2455 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2456 than 2^31 bytes (2GB).
2457
2458 The executable must be in the low 16TB of the virtual address
2459 space.
2460
2461 sethi %h44(symbol), %temp1
2462 or %temp1, %m44(symbol), %temp2
2463 sllx %temp2, 12, %temp3
2464 or %temp3, %l44(symbol), %reg */
2465 if (temp)
2466 {
2467 temp1 = op0;
2468 temp2 = op0;
2469 temp3 = temp; /* op0 is allowed. */
2470 }
2471 else
2472 {
2473 temp1 = gen_reg_rtx (DImode);
2474 temp2 = gen_reg_rtx (DImode);
2475 temp3 = gen_reg_rtx (DImode);
2476 }
2477
2478 emit_insn (gen_seth44 (temp1, op1));
2479 emit_insn (gen_setm44 (temp2, temp1, op1));
2480 emit_insn (gen_rtx_SET (temp3,
2481 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2482 emit_insn (gen_setl44 (op0, temp3, op1));
2483 break;
2484
2485 case CM_MEDANY:
2486 /* The range spanned by all instructions in the object is less
2487 than 2^31 bytes (2GB) and the distance from any instruction
2488 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2489 than 2^31 bytes (2GB).
2490
2491 The executable can be placed anywhere in the virtual address
2492 space.
2493
2494 sethi %hh(symbol), %temp1
2495 sethi %lm(symbol), %temp2
2496 or %temp1, %hm(symbol), %temp3
2497 sllx %temp3, 32, %temp4
2498 or %temp4, %temp2, %temp5
2499 or %temp5, %lo(symbol), %reg */
2500 if (temp)
2501 {
2502 /* It is possible that one of the registers we got for operands[2]
2503 might coincide with that of operands[0] (which is why we made
2504 it TImode). Pick the other one to use as our scratch. */
2505 if (rtx_equal_p (temp, op0))
2506 {
2507 gcc_assert (ti_temp);
2508 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2509 }
2510 temp1 = op0;
2511 temp2 = temp; /* op0 is _not_ allowed, see above. */
2512 temp3 = op0;
2513 temp4 = op0;
2514 temp5 = op0;
2515 }
2516 else
2517 {
2518 temp1 = gen_reg_rtx (DImode);
2519 temp2 = gen_reg_rtx (DImode);
2520 temp3 = gen_reg_rtx (DImode);
2521 temp4 = gen_reg_rtx (DImode);
2522 temp5 = gen_reg_rtx (DImode);
2523 }
2524
2525 emit_insn (gen_sethh (temp1, op1));
2526 emit_insn (gen_setlm (temp2, op1));
2527 emit_insn (gen_sethm (temp3, temp1, op1));
2528 emit_insn (gen_rtx_SET (temp4,
2529 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2530 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2531 emit_insn (gen_setlo (op0, temp5, op1));
2532 break;
2533
2534 case CM_EMBMEDANY:
2535 /* Old old old backwards compatibility kruft here.
2536 Essentially it is MEDLOW with a fixed 64-bit
2537 virtual base added to all data segment addresses.
2538 Text-segment stuff is computed like MEDANY, we can't
2539 reuse the code above because the relocation knobs
2540 look different.
2541
2542 Data segment: sethi %hi(symbol), %temp1
2543 add %temp1, EMBMEDANY_BASE_REG, %temp2
2544 or %temp2, %lo(symbol), %reg */
2545 if (data_segment_operand (op1, GET_MODE (op1)))
2546 {
2547 if (temp)
2548 {
2549 temp1 = temp; /* op0 is allowed. */
2550 temp2 = op0;
2551 }
2552 else
2553 {
2554 temp1 = gen_reg_rtx (DImode);
2555 temp2 = gen_reg_rtx (DImode);
2556 }
2557
2558 emit_insn (gen_embmedany_sethi (temp1, op1));
2559 emit_insn (gen_embmedany_brsum (temp2, temp1));
2560 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2561 }
2562
2563 /* Text segment: sethi %uhi(symbol), %temp1
2564 sethi %hi(symbol), %temp2
2565 or %temp1, %ulo(symbol), %temp3
2566 sllx %temp3, 32, %temp4
2567 or %temp4, %temp2, %temp5
2568 or %temp5, %lo(symbol), %reg */
2569 else
2570 {
2571 if (temp)
2572 {
2573 /* It is possible that one of the registers we got for operands[2]
2574 might coincide with that of operands[0] (which is why we made
2575 it TImode). Pick the other one to use as our scratch. */
2576 if (rtx_equal_p (temp, op0))
2577 {
2578 gcc_assert (ti_temp);
2579 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2580 }
2581 temp1 = op0;
2582 temp2 = temp; /* op0 is _not_ allowed, see above. */
2583 temp3 = op0;
2584 temp4 = op0;
2585 temp5 = op0;
2586 }
2587 else
2588 {
2589 temp1 = gen_reg_rtx (DImode);
2590 temp2 = gen_reg_rtx (DImode);
2591 temp3 = gen_reg_rtx (DImode);
2592 temp4 = gen_reg_rtx (DImode);
2593 temp5 = gen_reg_rtx (DImode);
2594 }
2595
2596 emit_insn (gen_embmedany_textuhi (temp1, op1));
2597 emit_insn (gen_embmedany_texthi (temp2, op1));
2598 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2599 emit_insn (gen_rtx_SET (temp4,
2600 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2601 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2602 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2603 }
2604 break;
2605
2606 default:
2607 gcc_unreachable ();
2608 }
2609 }
2610
2611 /* These avoid problems when cross compiling. If we do not
2612 go through all this hair then the optimizer will see
2613 invalid REG_EQUAL notes or in some cases none at all. */
2614 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2615 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2616 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2617 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2618
2619 /* The optimizer is not to assume anything about exactly
2620 which bits are set for a HIGH, they are unspecified.
2621 Unfortunately this leads to many missed optimizations
2622 during CSE. We mask out the non-HIGH bits, and matches
2623 a plain movdi, to alleviate this problem. */
2624 static rtx
2625 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2626 {
2627 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2628 }
2629
2630 static rtx
2631 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2632 {
2633 return gen_rtx_SET (dest, GEN_INT (val));
2634 }
2635
2636 static rtx
2637 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2638 {
2639 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2640 }
2641
2642 static rtx
2643 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2644 {
2645 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2646 }
2647
2648 /* Worker routines for 64-bit constant formation on arch64.
2649 One of the key things to be doing in these emissions is
2650 to create as many temp REGs as possible. This makes it
2651 possible for half-built constants to be used later when
2652 such values are similar to something required later on.
2653 Without doing this, the optimizer cannot see such
2654 opportunities. */
2655
2656 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2657 unsigned HOST_WIDE_INT, int);
2658
2659 static void
2660 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2661 unsigned HOST_WIDE_INT low_bits, int is_neg)
2662 {
2663 unsigned HOST_WIDE_INT high_bits;
2664
2665 if (is_neg)
2666 high_bits = (~low_bits) & 0xffffffff;
2667 else
2668 high_bits = low_bits;
2669
2670 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2671 if (!is_neg)
2672 {
2673 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2674 }
2675 else
2676 {
2677 /* If we are XOR'ing with -1, then we should emit a one's complement
2678 instead. This way the combiner will notice logical operations
2679 such as ANDN later on and substitute. */
2680 if ((low_bits & 0x3ff) == 0x3ff)
2681 {
2682 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2683 }
2684 else
2685 {
2686 emit_insn (gen_rtx_SET (op0,
2687 gen_safe_XOR64 (temp,
2688 (-(HOST_WIDE_INT)0x400
2689 | (low_bits & 0x3ff)))));
2690 }
2691 }
2692 }
2693
2694 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2695 unsigned HOST_WIDE_INT, int);
2696
2697 static void
2698 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2699 unsigned HOST_WIDE_INT high_bits,
2700 unsigned HOST_WIDE_INT low_immediate,
2701 int shift_count)
2702 {
2703 rtx temp2 = op0;
2704
2705 if ((high_bits & 0xfffffc00) != 0)
2706 {
2707 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2708 if ((high_bits & ~0xfffffc00) != 0)
2709 emit_insn (gen_rtx_SET (op0,
2710 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2711 else
2712 temp2 = temp;
2713 }
2714 else
2715 {
2716 emit_insn (gen_safe_SET64 (temp, high_bits));
2717 temp2 = temp;
2718 }
2719
2720 /* Now shift it up into place. */
2721 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2722 GEN_INT (shift_count))));
2723
2724 /* If there is a low immediate part piece, finish up by
2725 putting that in as well. */
2726 if (low_immediate != 0)
2727 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2728 }
2729
2730 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2731 unsigned HOST_WIDE_INT);
2732
2733 /* Full 64-bit constant decomposition. Even though this is the
2734 'worst' case, we still optimize a few things away. */
2735 static void
2736 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2737 unsigned HOST_WIDE_INT high_bits,
2738 unsigned HOST_WIDE_INT low_bits)
2739 {
2740 rtx sub_temp = op0;
2741
2742 if (can_create_pseudo_p ())
2743 sub_temp = gen_reg_rtx (DImode);
2744
2745 if ((high_bits & 0xfffffc00) != 0)
2746 {
2747 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2748 if ((high_bits & ~0xfffffc00) != 0)
2749 emit_insn (gen_rtx_SET (sub_temp,
2750 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2751 else
2752 sub_temp = temp;
2753 }
2754 else
2755 {
2756 emit_insn (gen_safe_SET64 (temp, high_bits));
2757 sub_temp = temp;
2758 }
2759
2760 if (can_create_pseudo_p ())
2761 {
2762 rtx temp2 = gen_reg_rtx (DImode);
2763 rtx temp3 = gen_reg_rtx (DImode);
2764 rtx temp4 = gen_reg_rtx (DImode);
2765
2766 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2767 GEN_INT (32))));
2768
2769 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2770 if ((low_bits & ~0xfffffc00) != 0)
2771 {
2772 emit_insn (gen_rtx_SET (temp3,
2773 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2774 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2775 }
2776 else
2777 {
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2779 }
2780 }
2781 else
2782 {
2783 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2784 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2785 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2786 int to_shift = 12;
2787
2788 /* We are in the middle of reload, so this is really
2789 painful. However we do still make an attempt to
2790 avoid emitting truly stupid code. */
2791 if (low1 != const0_rtx)
2792 {
2793 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2794 GEN_INT (to_shift))));
2795 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2796 sub_temp = op0;
2797 to_shift = 12;
2798 }
2799 else
2800 {
2801 to_shift += 12;
2802 }
2803 if (low2 != const0_rtx)
2804 {
2805 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2806 GEN_INT (to_shift))));
2807 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2808 sub_temp = op0;
2809 to_shift = 8;
2810 }
2811 else
2812 {
2813 to_shift += 8;
2814 }
2815 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2816 GEN_INT (to_shift))));
2817 if (low3 != const0_rtx)
2818 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2819 /* phew... */
2820 }
2821 }
2822
2823 /* Analyze a 64-bit constant for certain properties. */
2824 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2825 unsigned HOST_WIDE_INT,
2826 int *, int *, int *);
2827
2828 static void
2829 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2830 unsigned HOST_WIDE_INT low_bits,
2831 int *hbsp, int *lbsp, int *abbasp)
2832 {
2833 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2834 int i;
2835
2836 lowest_bit_set = highest_bit_set = -1;
2837 i = 0;
2838 do
2839 {
2840 if ((lowest_bit_set == -1)
2841 && ((low_bits >> i) & 1))
2842 lowest_bit_set = i;
2843 if ((highest_bit_set == -1)
2844 && ((high_bits >> (32 - i - 1)) & 1))
2845 highest_bit_set = (64 - i - 1);
2846 }
2847 while (++i < 32
2848 && ((highest_bit_set == -1)
2849 || (lowest_bit_set == -1)));
2850 if (i == 32)
2851 {
2852 i = 0;
2853 do
2854 {
2855 if ((lowest_bit_set == -1)
2856 && ((high_bits >> i) & 1))
2857 lowest_bit_set = i + 32;
2858 if ((highest_bit_set == -1)
2859 && ((low_bits >> (32 - i - 1)) & 1))
2860 highest_bit_set = 32 - i - 1;
2861 }
2862 while (++i < 32
2863 && ((highest_bit_set == -1)
2864 || (lowest_bit_set == -1)));
2865 }
2866 /* If there are no bits set this should have gone out
2867 as one instruction! */
2868 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2869 all_bits_between_are_set = 1;
2870 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2871 {
2872 if (i < 32)
2873 {
2874 if ((low_bits & (1 << i)) != 0)
2875 continue;
2876 }
2877 else
2878 {
2879 if ((high_bits & (1 << (i - 32))) != 0)
2880 continue;
2881 }
2882 all_bits_between_are_set = 0;
2883 break;
2884 }
2885 *hbsp = highest_bit_set;
2886 *lbsp = lowest_bit_set;
2887 *abbasp = all_bits_between_are_set;
2888 }
2889
2890 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2891
2892 static int
2893 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2894 unsigned HOST_WIDE_INT low_bits)
2895 {
2896 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2897
2898 if (high_bits == 0
2899 || high_bits == 0xffffffff)
2900 return 1;
2901
2902 analyze_64bit_constant (high_bits, low_bits,
2903 &highest_bit_set, &lowest_bit_set,
2904 &all_bits_between_are_set);
2905
2906 if ((highest_bit_set == 63
2907 || lowest_bit_set == 0)
2908 && all_bits_between_are_set != 0)
2909 return 1;
2910
2911 if ((highest_bit_set - lowest_bit_set) < 21)
2912 return 1;
2913
2914 return 0;
2915 }
2916
2917 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2918 unsigned HOST_WIDE_INT,
2919 int, int);
2920
2921 static unsigned HOST_WIDE_INT
2922 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2923 unsigned HOST_WIDE_INT low_bits,
2924 int lowest_bit_set, int shift)
2925 {
2926 HOST_WIDE_INT hi, lo;
2927
2928 if (lowest_bit_set < 32)
2929 {
2930 lo = (low_bits >> lowest_bit_set) << shift;
2931 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2932 }
2933 else
2934 {
2935 lo = 0;
2936 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2937 }
2938 gcc_assert (! (hi & lo));
2939 return (hi | lo);
2940 }
2941
2942 /* Here we are sure to be arch64 and this is an integer constant
2943 being loaded into a register. Emit the most efficient
2944 insn sequence possible. Detection of all the 1-insn cases
2945 has been done already. */
2946 static void
2947 sparc_emit_set_const64 (rtx op0, rtx op1)
2948 {
2949 unsigned HOST_WIDE_INT high_bits, low_bits;
2950 int lowest_bit_set, highest_bit_set;
2951 int all_bits_between_are_set;
2952 rtx temp = 0;
2953
2954 /* Sanity check that we know what we are working with. */
2955 gcc_assert (TARGET_ARCH64
2956 && (GET_CODE (op0) == SUBREG
2957 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2958
2959 if (! can_create_pseudo_p ())
2960 temp = op0;
2961
2962 if (GET_CODE (op1) != CONST_INT)
2963 {
2964 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2965 return;
2966 }
2967
2968 if (! temp)
2969 temp = gen_reg_rtx (DImode);
2970
2971 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2972 low_bits = (INTVAL (op1) & 0xffffffff);
2973
2974 /* low_bits bits 0 --> 31
2975 high_bits bits 32 --> 63 */
2976
2977 analyze_64bit_constant (high_bits, low_bits,
2978 &highest_bit_set, &lowest_bit_set,
2979 &all_bits_between_are_set);
2980
2981 /* First try for a 2-insn sequence. */
2982
2983 /* These situations are preferred because the optimizer can
2984 * do more things with them:
2985 * 1) mov -1, %reg
2986 * sllx %reg, shift, %reg
2987 * 2) mov -1, %reg
2988 * srlx %reg, shift, %reg
2989 * 3) mov some_small_const, %reg
2990 * sllx %reg, shift, %reg
2991 */
2992 if (((highest_bit_set == 63
2993 || lowest_bit_set == 0)
2994 && all_bits_between_are_set != 0)
2995 || ((highest_bit_set - lowest_bit_set) < 12))
2996 {
2997 HOST_WIDE_INT the_const = -1;
2998 int shift = lowest_bit_set;
2999
3000 if ((highest_bit_set != 63
3001 && lowest_bit_set != 0)
3002 || all_bits_between_are_set == 0)
3003 {
3004 the_const =
3005 create_simple_focus_bits (high_bits, low_bits,
3006 lowest_bit_set, 0);
3007 }
3008 else if (lowest_bit_set == 0)
3009 shift = -(63 - highest_bit_set);
3010
3011 gcc_assert (SPARC_SIMM13_P (the_const));
3012 gcc_assert (shift != 0);
3013
3014 emit_insn (gen_safe_SET64 (temp, the_const));
3015 if (shift > 0)
3016 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3017 GEN_INT (shift))));
3018 else if (shift < 0)
3019 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3020 GEN_INT (-shift))));
3021 return;
3022 }
3023
3024 /* Now a range of 22 or less bits set somewhere.
3025 * 1) sethi %hi(focus_bits), %reg
3026 * sllx %reg, shift, %reg
3027 * 2) sethi %hi(focus_bits), %reg
3028 * srlx %reg, shift, %reg
3029 */
3030 if ((highest_bit_set - lowest_bit_set) < 21)
3031 {
3032 unsigned HOST_WIDE_INT focus_bits =
3033 create_simple_focus_bits (high_bits, low_bits,
3034 lowest_bit_set, 10);
3035
3036 gcc_assert (SPARC_SETHI_P (focus_bits));
3037 gcc_assert (lowest_bit_set != 10);
3038
3039 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3040
3041 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3042 if (lowest_bit_set < 10)
3043 emit_insn (gen_rtx_SET (op0,
3044 gen_rtx_LSHIFTRT (DImode, temp,
3045 GEN_INT (10 - lowest_bit_set))));
3046 else if (lowest_bit_set > 10)
3047 emit_insn (gen_rtx_SET (op0,
3048 gen_rtx_ASHIFT (DImode, temp,
3049 GEN_INT (lowest_bit_set - 10))));
3050 return;
3051 }
3052
3053 /* 1) sethi %hi(low_bits), %reg
3054 * or %reg, %lo(low_bits), %reg
3055 * 2) sethi %hi(~low_bits), %reg
3056 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3057 */
3058 if (high_bits == 0
3059 || high_bits == 0xffffffff)
3060 {
3061 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3062 (high_bits == 0xffffffff));
3063 return;
3064 }
3065
3066 /* Now, try 3-insn sequences. */
3067
3068 /* 1) sethi %hi(high_bits), %reg
3069 * or %reg, %lo(high_bits), %reg
3070 * sllx %reg, 32, %reg
3071 */
3072 if (low_bits == 0)
3073 {
3074 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3075 return;
3076 }
3077
3078 /* We may be able to do something quick
3079 when the constant is negated, so try that. */
3080 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3081 (~low_bits) & 0xfffffc00))
3082 {
3083 /* NOTE: The trailing bits get XOR'd so we need the
3084 non-negated bits, not the negated ones. */
3085 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3086
3087 if ((((~high_bits) & 0xffffffff) == 0
3088 && ((~low_bits) & 0x80000000) == 0)
3089 || (((~high_bits) & 0xffffffff) == 0xffffffff
3090 && ((~low_bits) & 0x80000000) != 0))
3091 {
3092 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3093
3094 if ((SPARC_SETHI_P (fast_int)
3095 && (~high_bits & 0xffffffff) == 0)
3096 || SPARC_SIMM13_P (fast_int))
3097 emit_insn (gen_safe_SET64 (temp, fast_int));
3098 else
3099 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3100 }
3101 else
3102 {
3103 rtx negated_const;
3104 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3105 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3106 sparc_emit_set_const64 (temp, negated_const);
3107 }
3108
3109 /* If we are XOR'ing with -1, then we should emit a one's complement
3110 instead. This way the combiner will notice logical operations
3111 such as ANDN later on and substitute. */
3112 if (trailing_bits == 0x3ff)
3113 {
3114 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3115 }
3116 else
3117 {
3118 emit_insn (gen_rtx_SET (op0,
3119 gen_safe_XOR64 (temp,
3120 (-0x400 | trailing_bits))));
3121 }
3122 return;
3123 }
3124
3125 /* 1) sethi %hi(xxx), %reg
3126 * or %reg, %lo(xxx), %reg
3127 * sllx %reg, yyy, %reg
3128 *
3129 * ??? This is just a generalized version of the low_bits==0
3130 * thing above, FIXME...
3131 */
3132 if ((highest_bit_set - lowest_bit_set) < 32)
3133 {
3134 unsigned HOST_WIDE_INT focus_bits =
3135 create_simple_focus_bits (high_bits, low_bits,
3136 lowest_bit_set, 0);
3137
3138 /* We can't get here in this state. */
3139 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3140
3141 /* So what we know is that the set bits straddle the
3142 middle of the 64-bit word. */
3143 sparc_emit_set_const64_quick2 (op0, temp,
3144 focus_bits, 0,
3145 lowest_bit_set);
3146 return;
3147 }
3148
3149 /* 1) sethi %hi(high_bits), %reg
3150 * or %reg, %lo(high_bits), %reg
3151 * sllx %reg, 32, %reg
3152 * or %reg, low_bits, %reg
3153 */
3154 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3155 {
3156 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3157 return;
3158 }
3159
3160 /* The easiest way when all else fails, is full decomposition. */
3161 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3162 }
3163
3164 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3165
3166 static bool
3167 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3168 {
3169 *p1 = SPARC_ICC_REG;
3170 *p2 = SPARC_FCC_REG;
3171 return true;
3172 }
3173
3174 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3175
3176 static unsigned int
3177 sparc_min_arithmetic_precision (void)
3178 {
3179 return 32;
3180 }
3181
3182 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3183 return the mode to be used for the comparison. For floating-point,
3184 CCFP[E]mode is used. CCNZmode should be used when the first operand
3185 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3186 processing is needed. */
3187
3188 machine_mode
3189 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3190 {
3191 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3192 {
3193 switch (op)
3194 {
3195 case EQ:
3196 case NE:
3197 case UNORDERED:
3198 case ORDERED:
3199 case UNLT:
3200 case UNLE:
3201 case UNGT:
3202 case UNGE:
3203 case UNEQ:
3204 return CCFPmode;
3205
3206 case LT:
3207 case LE:
3208 case GT:
3209 case GE:
3210 case LTGT:
3211 return CCFPEmode;
3212
3213 default:
3214 gcc_unreachable ();
3215 }
3216 }
3217 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3218 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3219 && y == const0_rtx)
3220 {
3221 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3222 return CCXNZmode;
3223 else
3224 return CCNZmode;
3225 }
3226 else
3227 {
3228 /* This is for the cmp<mode>_sne pattern. */
3229 if (GET_CODE (x) == NOT && y == constm1_rtx)
3230 {
3231 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3232 return CCXCmode;
3233 else
3234 return CCCmode;
3235 }
3236
3237 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3238 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3239 {
3240 if (GET_CODE (y) == UNSPEC
3241 && (XINT (y, 1) == UNSPEC_ADDV
3242 || XINT (y, 1) == UNSPEC_SUBV
3243 || XINT (y, 1) == UNSPEC_NEGV))
3244 return CCVmode;
3245 else
3246 return CCCmode;
3247 }
3248
3249 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3250 return CCXmode;
3251 else
3252 return CCmode;
3253 }
3254 }
3255
3256 /* Emit the compare insn and return the CC reg for a CODE comparison
3257 with operands X and Y. */
3258
3259 static rtx
3260 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3261 {
3262 machine_mode mode;
3263 rtx cc_reg;
3264
3265 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3266 return x;
3267
3268 mode = SELECT_CC_MODE (code, x, y);
3269
3270 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3271 fcc regs (cse can't tell they're really call clobbered regs and will
3272 remove a duplicate comparison even if there is an intervening function
3273 call - it will then try to reload the cc reg via an int reg which is why
3274 we need the movcc patterns). It is possible to provide the movcc
3275 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3276 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3277 to tell cse that CCFPE mode registers (even pseudos) are call
3278 clobbered. */
3279
3280 /* ??? This is an experiment. Rather than making changes to cse which may
3281 or may not be easy/clean, we do our own cse. This is possible because
3282 we will generate hard registers. Cse knows they're call clobbered (it
3283 doesn't know the same thing about pseudos). If we guess wrong, no big
3284 deal, but if we win, great! */
3285
3286 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3287 #if 1 /* experiment */
3288 {
3289 int reg;
3290 /* We cycle through the registers to ensure they're all exercised. */
3291 static int next_fcc_reg = 0;
3292 /* Previous x,y for each fcc reg. */
3293 static rtx prev_args[4][2];
3294
3295 /* Scan prev_args for x,y. */
3296 for (reg = 0; reg < 4; reg++)
3297 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3298 break;
3299 if (reg == 4)
3300 {
3301 reg = next_fcc_reg;
3302 prev_args[reg][0] = x;
3303 prev_args[reg][1] = y;
3304 next_fcc_reg = (next_fcc_reg + 1) & 3;
3305 }
3306 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3307 }
3308 #else
3309 cc_reg = gen_reg_rtx (mode);
3310 #endif /* ! experiment */
3311 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3312 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3313 else
3314 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3315
3316 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3317 will only result in an unrecognizable insn so no point in asserting. */
3318 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3319
3320 return cc_reg;
3321 }
3322
3323
3324 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3325
3326 rtx
3327 gen_compare_reg (rtx cmp)
3328 {
3329 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3330 }
3331
3332 /* This function is used for v9 only.
3333 DEST is the target of the Scc insn.
3334 CODE is the code for an Scc's comparison.
3335 X and Y are the values we compare.
3336
3337 This function is needed to turn
3338
3339 (set (reg:SI 110)
3340 (gt (reg:CCX 100 %icc)
3341 (const_int 0)))
3342 into
3343 (set (reg:SI 110)
3344 (gt:DI (reg:CCX 100 %icc)
3345 (const_int 0)))
3346
3347 IE: The instruction recognizer needs to see the mode of the comparison to
3348 find the right instruction. We could use "gt:DI" right in the
3349 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3350
3351 static int
3352 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3353 {
3354 if (! TARGET_ARCH64
3355 && (GET_MODE (x) == DImode
3356 || GET_MODE (dest) == DImode))
3357 return 0;
3358
3359 /* Try to use the movrCC insns. */
3360 if (TARGET_ARCH64
3361 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3362 && y == const0_rtx
3363 && v9_regcmp_p (compare_code))
3364 {
3365 rtx op0 = x;
3366 rtx temp;
3367
3368 /* Special case for op0 != 0. This can be done with one instruction if
3369 dest == x. */
3370
3371 if (compare_code == NE
3372 && GET_MODE (dest) == DImode
3373 && rtx_equal_p (op0, dest))
3374 {
3375 emit_insn (gen_rtx_SET (dest,
3376 gen_rtx_IF_THEN_ELSE (DImode,
3377 gen_rtx_fmt_ee (compare_code, DImode,
3378 op0, const0_rtx),
3379 const1_rtx,
3380 dest)));
3381 return 1;
3382 }
3383
3384 if (reg_overlap_mentioned_p (dest, op0))
3385 {
3386 /* Handle the case where dest == x.
3387 We "early clobber" the result. */
3388 op0 = gen_reg_rtx (GET_MODE (x));
3389 emit_move_insn (op0, x);
3390 }
3391
3392 emit_insn (gen_rtx_SET (dest, const0_rtx));
3393 if (GET_MODE (op0) != DImode)
3394 {
3395 temp = gen_reg_rtx (DImode);
3396 convert_move (temp, op0, 0);
3397 }
3398 else
3399 temp = op0;
3400 emit_insn (gen_rtx_SET (dest,
3401 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3402 gen_rtx_fmt_ee (compare_code, DImode,
3403 temp, const0_rtx),
3404 const1_rtx,
3405 dest)));
3406 return 1;
3407 }
3408 else
3409 {
3410 x = gen_compare_reg_1 (compare_code, x, y);
3411 y = const0_rtx;
3412
3413 emit_insn (gen_rtx_SET (dest, const0_rtx));
3414 emit_insn (gen_rtx_SET (dest,
3415 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3416 gen_rtx_fmt_ee (compare_code,
3417 GET_MODE (x), x, y),
3418 const1_rtx, dest)));
3419 return 1;
3420 }
3421 }
3422
3423
3424 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3425 without jumps using the addx/subx instructions. */
3426
3427 bool
3428 emit_scc_insn (rtx operands[])
3429 {
3430 rtx tem, x, y;
3431 enum rtx_code code;
3432 machine_mode mode;
3433
3434 /* The quad-word fp compare library routines all return nonzero to indicate
3435 true, which is different from the equivalent libgcc routines, so we must
3436 handle them specially here. */
3437 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3438 {
3439 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3440 GET_CODE (operands[1]));
3441 operands[2] = XEXP (operands[1], 0);
3442 operands[3] = XEXP (operands[1], 1);
3443 }
3444
3445 code = GET_CODE (operands[1]);
3446 x = operands[2];
3447 y = operands[3];
3448 mode = GET_MODE (x);
3449
3450 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3451 more applications). The exception to this is "reg != 0" which can
3452 be done in one instruction on v9 (so we do it). */
3453 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3454 {
3455 if (y != const0_rtx)
3456 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3457
3458 rtx pat = gen_rtx_SET (operands[0],
3459 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3460 x, const0_rtx));
3461
3462 /* If we can use addx/subx or addxc, add a clobber for CC. */
3463 if (mode == SImode || (code == NE && TARGET_VIS3))
3464 {
3465 rtx clobber
3466 = gen_rtx_CLOBBER (VOIDmode,
3467 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3468 SPARC_ICC_REG));
3469 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3470 }
3471
3472 emit_insn (pat);
3473 return true;
3474 }
3475
3476 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3477 if (TARGET_ARCH64
3478 && mode == DImode
3479 && !((code == LTU || code == GTU) && TARGET_VIS3)
3480 && gen_v9_scc (operands[0], code, x, y))
3481 return true;
3482
3483 /* We can do LTU and GEU using the addx/subx instructions too. And
3484 for GTU/LEU, if both operands are registers swap them and fall
3485 back to the easy case. */
3486 if (code == GTU || code == LEU)
3487 {
3488 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3489 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3490 {
3491 tem = x;
3492 x = y;
3493 y = tem;
3494 code = swap_condition (code);
3495 }
3496 }
3497
3498 if (code == LTU || code == GEU)
3499 {
3500 emit_insn (gen_rtx_SET (operands[0],
3501 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3502 gen_compare_reg_1 (code, x, y),
3503 const0_rtx)));
3504 return true;
3505 }
3506
3507 /* All the posibilities to use addx/subx based sequences has been
3508 exhausted, try for a 3 instruction sequence using v9 conditional
3509 moves. */
3510 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3511 return true;
3512
3513 /* Nope, do branches. */
3514 return false;
3515 }
3516
3517 /* Emit a conditional jump insn for the v9 architecture using comparison code
3518 CODE and jump target LABEL.
3519 This function exists to take advantage of the v9 brxx insns. */
3520
3521 static void
3522 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3523 {
3524 emit_jump_insn (gen_rtx_SET (pc_rtx,
3525 gen_rtx_IF_THEN_ELSE (VOIDmode,
3526 gen_rtx_fmt_ee (code, GET_MODE (op0),
3527 op0, const0_rtx),
3528 gen_rtx_LABEL_REF (VOIDmode, label),
3529 pc_rtx)));
3530 }
3531
3532 /* Emit a conditional jump insn for the UA2011 architecture using
3533 comparison code CODE and jump target LABEL. This function exists
3534 to take advantage of the UA2011 Compare and Branch insns. */
3535
3536 static void
3537 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3538 {
3539 rtx if_then_else;
3540
3541 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3542 gen_rtx_fmt_ee(code, GET_MODE(op0),
3543 op0, op1),
3544 gen_rtx_LABEL_REF (VOIDmode, label),
3545 pc_rtx);
3546
3547 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3548 }
3549
3550 void
3551 emit_conditional_branch_insn (rtx operands[])
3552 {
3553 /* The quad-word fp compare library routines all return nonzero to indicate
3554 true, which is different from the equivalent libgcc routines, so we must
3555 handle them specially here. */
3556 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3557 {
3558 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3559 GET_CODE (operands[0]));
3560 operands[1] = XEXP (operands[0], 0);
3561 operands[2] = XEXP (operands[0], 1);
3562 }
3563
3564 /* If we can tell early on that the comparison is against a constant
3565 that won't fit in the 5-bit signed immediate field of a cbcond,
3566 use one of the other v9 conditional branch sequences. */
3567 if (TARGET_CBCOND
3568 && GET_CODE (operands[1]) == REG
3569 && (GET_MODE (operands[1]) == SImode
3570 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3571 && (GET_CODE (operands[2]) != CONST_INT
3572 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3573 {
3574 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3575 return;
3576 }
3577
3578 if (TARGET_ARCH64 && operands[2] == const0_rtx
3579 && GET_CODE (operands[1]) == REG
3580 && GET_MODE (operands[1]) == DImode)
3581 {
3582 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3583 return;
3584 }
3585
3586 operands[1] = gen_compare_reg (operands[0]);
3587 operands[2] = const0_rtx;
3588 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3589 operands[1], operands[2]);
3590 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3591 operands[3]));
3592 }
3593
3594
3595 /* Generate a DFmode part of a hard TFmode register.
3596 REG is the TFmode hard register, LOW is 1 for the
3597 low 64bit of the register and 0 otherwise.
3598 */
3599 rtx
3600 gen_df_reg (rtx reg, int low)
3601 {
3602 int regno = REGNO (reg);
3603
3604 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3605 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3606 return gen_rtx_REG (DFmode, regno);
3607 }
3608 \f
3609 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3610 Unlike normal calls, TFmode operands are passed by reference. It is
3611 assumed that no more than 3 operands are required. */
3612
3613 static void
3614 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3615 {
3616 rtx ret_slot = NULL, arg[3], func_sym;
3617 int i;
3618
3619 /* We only expect to be called for conversions, unary, and binary ops. */
3620 gcc_assert (nargs == 2 || nargs == 3);
3621
3622 for (i = 0; i < nargs; ++i)
3623 {
3624 rtx this_arg = operands[i];
3625 rtx this_slot;
3626
3627 /* TFmode arguments and return values are passed by reference. */
3628 if (GET_MODE (this_arg) == TFmode)
3629 {
3630 int force_stack_temp;
3631
3632 force_stack_temp = 0;
3633 if (TARGET_BUGGY_QP_LIB && i == 0)
3634 force_stack_temp = 1;
3635
3636 if (GET_CODE (this_arg) == MEM
3637 && ! force_stack_temp)
3638 {
3639 tree expr = MEM_EXPR (this_arg);
3640 if (expr)
3641 mark_addressable (expr);
3642 this_arg = XEXP (this_arg, 0);
3643 }
3644 else if (CONSTANT_P (this_arg)
3645 && ! force_stack_temp)
3646 {
3647 this_slot = force_const_mem (TFmode, this_arg);
3648 this_arg = XEXP (this_slot, 0);
3649 }
3650 else
3651 {
3652 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3653
3654 /* Operand 0 is the return value. We'll copy it out later. */
3655 if (i > 0)
3656 emit_move_insn (this_slot, this_arg);
3657 else
3658 ret_slot = this_slot;
3659
3660 this_arg = XEXP (this_slot, 0);
3661 }
3662 }
3663
3664 arg[i] = this_arg;
3665 }
3666
3667 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3668
3669 if (GET_MODE (operands[0]) == TFmode)
3670 {
3671 if (nargs == 2)
3672 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3673 arg[0], GET_MODE (arg[0]),
3674 arg[1], GET_MODE (arg[1]));
3675 else
3676 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3677 arg[0], GET_MODE (arg[0]),
3678 arg[1], GET_MODE (arg[1]),
3679 arg[2], GET_MODE (arg[2]));
3680
3681 if (ret_slot)
3682 emit_move_insn (operands[0], ret_slot);
3683 }
3684 else
3685 {
3686 rtx ret;
3687
3688 gcc_assert (nargs == 2);
3689
3690 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3691 GET_MODE (operands[0]),
3692 arg[1], GET_MODE (arg[1]));
3693
3694 if (ret != operands[0])
3695 emit_move_insn (operands[0], ret);
3696 }
3697 }
3698
3699 /* Expand soft-float TFmode calls to sparc abi routines. */
3700
3701 static void
3702 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3703 {
3704 const char *func;
3705
3706 switch (code)
3707 {
3708 case PLUS:
3709 func = "_Qp_add";
3710 break;
3711 case MINUS:
3712 func = "_Qp_sub";
3713 break;
3714 case MULT:
3715 func = "_Qp_mul";
3716 break;
3717 case DIV:
3718 func = "_Qp_div";
3719 break;
3720 default:
3721 gcc_unreachable ();
3722 }
3723
3724 emit_soft_tfmode_libcall (func, 3, operands);
3725 }
3726
3727 static void
3728 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3729 {
3730 const char *func;
3731
3732 gcc_assert (code == SQRT);
3733 func = "_Qp_sqrt";
3734
3735 emit_soft_tfmode_libcall (func, 2, operands);
3736 }
3737
3738 static void
3739 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3740 {
3741 const char *func;
3742
3743 switch (code)
3744 {
3745 case FLOAT_EXTEND:
3746 switch (GET_MODE (operands[1]))
3747 {
3748 case E_SFmode:
3749 func = "_Qp_stoq";
3750 break;
3751 case E_DFmode:
3752 func = "_Qp_dtoq";
3753 break;
3754 default:
3755 gcc_unreachable ();
3756 }
3757 break;
3758
3759 case FLOAT_TRUNCATE:
3760 switch (GET_MODE (operands[0]))
3761 {
3762 case E_SFmode:
3763 func = "_Qp_qtos";
3764 break;
3765 case E_DFmode:
3766 func = "_Qp_qtod";
3767 break;
3768 default:
3769 gcc_unreachable ();
3770 }
3771 break;
3772
3773 case FLOAT:
3774 switch (GET_MODE (operands[1]))
3775 {
3776 case E_SImode:
3777 func = "_Qp_itoq";
3778 if (TARGET_ARCH64)
3779 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3780 break;
3781 case E_DImode:
3782 func = "_Qp_xtoq";
3783 break;
3784 default:
3785 gcc_unreachable ();
3786 }
3787 break;
3788
3789 case UNSIGNED_FLOAT:
3790 switch (GET_MODE (operands[1]))
3791 {
3792 case E_SImode:
3793 func = "_Qp_uitoq";
3794 if (TARGET_ARCH64)
3795 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3796 break;
3797 case E_DImode:
3798 func = "_Qp_uxtoq";
3799 break;
3800 default:
3801 gcc_unreachable ();
3802 }
3803 break;
3804
3805 case FIX:
3806 switch (GET_MODE (operands[0]))
3807 {
3808 case E_SImode:
3809 func = "_Qp_qtoi";
3810 break;
3811 case E_DImode:
3812 func = "_Qp_qtox";
3813 break;
3814 default:
3815 gcc_unreachable ();
3816 }
3817 break;
3818
3819 case UNSIGNED_FIX:
3820 switch (GET_MODE (operands[0]))
3821 {
3822 case E_SImode:
3823 func = "_Qp_qtoui";
3824 break;
3825 case E_DImode:
3826 func = "_Qp_qtoux";
3827 break;
3828 default:
3829 gcc_unreachable ();
3830 }
3831 break;
3832
3833 default:
3834 gcc_unreachable ();
3835 }
3836
3837 emit_soft_tfmode_libcall (func, 2, operands);
3838 }
3839
3840 /* Expand a hard-float tfmode operation. All arguments must be in
3841 registers. */
3842
3843 static void
3844 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3845 {
3846 rtx op, dest;
3847
3848 if (GET_RTX_CLASS (code) == RTX_UNARY)
3849 {
3850 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3851 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3852 }
3853 else
3854 {
3855 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3856 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3857 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3858 operands[1], operands[2]);
3859 }
3860
3861 if (register_operand (operands[0], VOIDmode))
3862 dest = operands[0];
3863 else
3864 dest = gen_reg_rtx (GET_MODE (operands[0]));
3865
3866 emit_insn (gen_rtx_SET (dest, op));
3867
3868 if (dest != operands[0])
3869 emit_move_insn (operands[0], dest);
3870 }
3871
3872 void
3873 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3874 {
3875 if (TARGET_HARD_QUAD)
3876 emit_hard_tfmode_operation (code, operands);
3877 else
3878 emit_soft_tfmode_binop (code, operands);
3879 }
3880
3881 void
3882 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3883 {
3884 if (TARGET_HARD_QUAD)
3885 emit_hard_tfmode_operation (code, operands);
3886 else
3887 emit_soft_tfmode_unop (code, operands);
3888 }
3889
3890 void
3891 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3892 {
3893 if (TARGET_HARD_QUAD)
3894 emit_hard_tfmode_operation (code, operands);
3895 else
3896 emit_soft_tfmode_cvt (code, operands);
3897 }
3898 \f
3899 /* Return nonzero if a branch/jump/call instruction will be emitting
3900 nop into its delay slot. */
3901
3902 int
3903 empty_delay_slot (rtx_insn *insn)
3904 {
3905 rtx seq;
3906
3907 /* If no previous instruction (should not happen), return true. */
3908 if (PREV_INSN (insn) == NULL)
3909 return 1;
3910
3911 seq = NEXT_INSN (PREV_INSN (insn));
3912 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3913 return 0;
3914
3915 return 1;
3916 }
3917
3918 /* Return nonzero if we should emit a nop after a cbcond instruction.
3919 The cbcond instruction does not have a delay slot, however there is
3920 a severe performance penalty if a control transfer appears right
3921 after a cbcond. Therefore we emit a nop when we detect this
3922 situation. */
3923
3924 int
3925 emit_cbcond_nop (rtx_insn *insn)
3926 {
3927 rtx next = next_active_insn (insn);
3928
3929 if (!next)
3930 return 1;
3931
3932 if (NONJUMP_INSN_P (next)
3933 && GET_CODE (PATTERN (next)) == SEQUENCE)
3934 next = XVECEXP (PATTERN (next), 0, 0);
3935 else if (CALL_P (next)
3936 && GET_CODE (PATTERN (next)) == PARALLEL)
3937 {
3938 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3939
3940 if (GET_CODE (delay) == RETURN)
3941 {
3942 /* It's a sibling call. Do not emit the nop if we're going
3943 to emit something other than the jump itself as the first
3944 instruction of the sibcall sequence. */
3945 if (sparc_leaf_function_p || TARGET_FLAT)
3946 return 0;
3947 }
3948 }
3949
3950 if (NONJUMP_INSN_P (next))
3951 return 0;
3952
3953 return 1;
3954 }
3955
3956 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3957 instruction. RETURN_P is true if the v9 variant 'return' is to be
3958 considered in the test too.
3959
3960 TRIAL must be a SET whose destination is a REG appropriate for the
3961 'restore' instruction or, if RETURN_P is true, for the 'return'
3962 instruction. */
3963
3964 static int
3965 eligible_for_restore_insn (rtx trial, bool return_p)
3966 {
3967 rtx pat = PATTERN (trial);
3968 rtx src = SET_SRC (pat);
3969 bool src_is_freg = false;
3970 rtx src_reg;
3971
3972 /* Since we now can do moves between float and integer registers when
3973 VIS3 is enabled, we have to catch this case. We can allow such
3974 moves when doing a 'return' however. */
3975 src_reg = src;
3976 if (GET_CODE (src_reg) == SUBREG)
3977 src_reg = SUBREG_REG (src_reg);
3978 if (GET_CODE (src_reg) == REG
3979 && SPARC_FP_REG_P (REGNO (src_reg)))
3980 src_is_freg = true;
3981
3982 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3983 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3984 && arith_operand (src, GET_MODE (src))
3985 && ! src_is_freg)
3986 {
3987 if (TARGET_ARCH64)
3988 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3989 else
3990 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3991 }
3992
3993 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3994 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3995 && arith_double_operand (src, GET_MODE (src))
3996 && ! src_is_freg)
3997 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3998
3999 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4000 else if (! TARGET_FPU && register_operand (src, SFmode))
4001 return 1;
4002
4003 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4004 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4005 return 1;
4006
4007 /* If we have the 'return' instruction, anything that does not use
4008 local or output registers and can go into a delay slot wins. */
4009 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4010 return 1;
4011
4012 /* The 'restore src1,src2,dest' pattern for SImode. */
4013 else if (GET_CODE (src) == PLUS
4014 && register_operand (XEXP (src, 0), SImode)
4015 && arith_operand (XEXP (src, 1), SImode))
4016 return 1;
4017
4018 /* The 'restore src1,src2,dest' pattern for DImode. */
4019 else if (GET_CODE (src) == PLUS
4020 && register_operand (XEXP (src, 0), DImode)
4021 && arith_double_operand (XEXP (src, 1), DImode))
4022 return 1;
4023
4024 /* The 'restore src1,%lo(src2),dest' pattern. */
4025 else if (GET_CODE (src) == LO_SUM
4026 && ! TARGET_CM_MEDMID
4027 && ((register_operand (XEXP (src, 0), SImode)
4028 && immediate_operand (XEXP (src, 1), SImode))
4029 || (TARGET_ARCH64
4030 && register_operand (XEXP (src, 0), DImode)
4031 && immediate_operand (XEXP (src, 1), DImode))))
4032 return 1;
4033
4034 /* The 'restore src,src,dest' pattern. */
4035 else if (GET_CODE (src) == ASHIFT
4036 && (register_operand (XEXP (src, 0), SImode)
4037 || register_operand (XEXP (src, 0), DImode))
4038 && XEXP (src, 1) == const1_rtx)
4039 return 1;
4040
4041 return 0;
4042 }
4043
4044 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4045
4046 int
4047 eligible_for_return_delay (rtx_insn *trial)
4048 {
4049 int regno;
4050 rtx pat;
4051
4052 /* If the function uses __builtin_eh_return, the eh_return machinery
4053 occupies the delay slot. */
4054 if (crtl->calls_eh_return)
4055 return 0;
4056
4057 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4058 return 0;
4059
4060 /* In the case of a leaf or flat function, anything can go into the slot. */
4061 if (sparc_leaf_function_p || TARGET_FLAT)
4062 return 1;
4063
4064 if (!NONJUMP_INSN_P (trial))
4065 return 0;
4066
4067 pat = PATTERN (trial);
4068 if (GET_CODE (pat) == PARALLEL)
4069 {
4070 int i;
4071
4072 if (! TARGET_V9)
4073 return 0;
4074 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4075 {
4076 rtx expr = XVECEXP (pat, 0, i);
4077 if (GET_CODE (expr) != SET)
4078 return 0;
4079 if (GET_CODE (SET_DEST (expr)) != REG)
4080 return 0;
4081 regno = REGNO (SET_DEST (expr));
4082 if (regno >= 8 && regno < 24)
4083 return 0;
4084 }
4085 return !epilogue_renumber (&pat, 1);
4086 }
4087
4088 if (GET_CODE (pat) != SET)
4089 return 0;
4090
4091 if (GET_CODE (SET_DEST (pat)) != REG)
4092 return 0;
4093
4094 regno = REGNO (SET_DEST (pat));
4095
4096 /* Otherwise, only operations which can be done in tandem with
4097 a `restore' or `return' insn can go into the delay slot. */
4098 if (regno >= 8 && regno < 24)
4099 return 0;
4100
4101 /* If this instruction sets up floating point register and we have a return
4102 instruction, it can probably go in. But restore will not work
4103 with FP_REGS. */
4104 if (! SPARC_INT_REG_P (regno))
4105 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4106
4107 return eligible_for_restore_insn (trial, true);
4108 }
4109
4110 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4111
4112 int
4113 eligible_for_sibcall_delay (rtx_insn *trial)
4114 {
4115 rtx pat;
4116
4117 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4118 return 0;
4119
4120 if (!NONJUMP_INSN_P (trial))
4121 return 0;
4122
4123 pat = PATTERN (trial);
4124
4125 if (sparc_leaf_function_p || TARGET_FLAT)
4126 {
4127 /* If the tail call is done using the call instruction,
4128 we have to restore %o7 in the delay slot. */
4129 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4130 return 0;
4131
4132 /* %g1 is used to build the function address */
4133 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4134 return 0;
4135
4136 return 1;
4137 }
4138
4139 if (GET_CODE (pat) != SET)
4140 return 0;
4141
4142 /* Otherwise, only operations which can be done in tandem with
4143 a `restore' insn can go into the delay slot. */
4144 if (GET_CODE (SET_DEST (pat)) != REG
4145 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4146 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4147 return 0;
4148
4149 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4150 in most cases. */
4151 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4152 return 0;
4153
4154 return eligible_for_restore_insn (trial, false);
4155 }
4156 \f
4157 /* Determine if it's legal to put X into the constant pool. This
4158 is not possible if X contains the address of a symbol that is
4159 not constant (TLS) or not known at final link time (PIC). */
4160
4161 static bool
4162 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4163 {
4164 switch (GET_CODE (x))
4165 {
4166 case CONST_INT:
4167 case CONST_WIDE_INT:
4168 case CONST_DOUBLE:
4169 case CONST_VECTOR:
4170 /* Accept all non-symbolic constants. */
4171 return false;
4172
4173 case LABEL_REF:
4174 /* Labels are OK iff we are non-PIC. */
4175 return flag_pic != 0;
4176
4177 case SYMBOL_REF:
4178 /* 'Naked' TLS symbol references are never OK,
4179 non-TLS symbols are OK iff we are non-PIC. */
4180 if (SYMBOL_REF_TLS_MODEL (x))
4181 return true;
4182 else
4183 return flag_pic != 0;
4184
4185 case CONST:
4186 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4187 case PLUS:
4188 case MINUS:
4189 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4190 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4191 case UNSPEC:
4192 return true;
4193 default:
4194 gcc_unreachable ();
4195 }
4196 }
4197 \f
4198 /* Global Offset Table support. */
4199 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4200 static GTY(()) rtx got_register_rtx = NULL_RTX;
4201 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4202
4203 static GTY(()) bool got_helper_needed = false;
4204
4205 /* Return the SYMBOL_REF for the Global Offset Table. */
4206
4207 static rtx
4208 sparc_got (void)
4209 {
4210 if (!got_symbol_rtx)
4211 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4212
4213 return got_symbol_rtx;
4214 }
4215
4216 /* Output the load_pcrel_sym pattern. */
4217
4218 const char *
4219 output_load_pcrel_sym (rtx *operands)
4220 {
4221 if (flag_delayed_branch)
4222 {
4223 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4224 output_asm_insn ("call\t%a2", operands);
4225 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4226 }
4227 else
4228 {
4229 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4230 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4231 output_asm_insn ("call\t%a2", operands);
4232 output_asm_insn (" nop", NULL);
4233 }
4234
4235 if (operands[2] == got_helper_rtx)
4236 got_helper_needed = true;
4237
4238 return "";
4239 }
4240
4241 #ifdef HAVE_GAS_HIDDEN
4242 # define USE_HIDDEN_LINKONCE 1
4243 #else
4244 # define USE_HIDDEN_LINKONCE 0
4245 #endif
4246
4247 /* Emit code to load the GOT register. */
4248
4249 void
4250 load_got_register (void)
4251 {
4252 rtx insn;
4253
4254 if (TARGET_VXWORKS_RTP)
4255 {
4256 if (!got_register_rtx)
4257 got_register_rtx = pic_offset_table_rtx;
4258
4259 insn = gen_vxworks_load_got ();
4260 }
4261 else
4262 {
4263 if (!got_register_rtx)
4264 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4265
4266 /* The GOT symbol is subject to a PC-relative relocation so we need a
4267 helper function to add the PC value and thus get the final value. */
4268 if (!got_helper_rtx)
4269 {
4270 char name[32];
4271
4272 /* Skip the leading '%' as that cannot be used in a symbol name. */
4273 if (USE_HIDDEN_LINKONCE)
4274 sprintf (name, "__sparc_get_pc_thunk.%s",
4275 reg_names[REGNO (got_register_rtx)] + 1);
4276 else
4277 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4278 REGNO (got_register_rtx));
4279
4280 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4281 }
4282
4283 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4284 const int orig_flag_pic = flag_pic;
4285 flag_pic = 0;
4286 insn = gen_load_pcrel_sym (Pmode,
4287 got_register_rtx,
4288 sparc_got (),
4289 got_helper_rtx,
4290 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM));
4291 flag_pic = orig_flag_pic;
4292 }
4293
4294 emit_insn (insn);
4295 }
4296
4297 /* Ensure that we are not using patterns that are not OK with PIC. */
4298
4299 int
4300 check_pic (int i)
4301 {
4302 rtx op;
4303
4304 switch (flag_pic)
4305 {
4306 case 1:
4307 op = recog_data.operand[i];
4308 gcc_assert (GET_CODE (op) != SYMBOL_REF
4309 && (GET_CODE (op) != CONST
4310 || (GET_CODE (XEXP (op, 0)) == MINUS
4311 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4312 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4313 /* fallthrough */
4314 case 2:
4315 default:
4316 return 1;
4317 }
4318 }
4319
4320 /* Return true if X is an address which needs a temporary register when
4321 reloaded while generating PIC code. */
4322
4323 int
4324 pic_address_needs_scratch (rtx x)
4325 {
4326 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4327 if (GET_CODE (x) == CONST
4328 && GET_CODE (XEXP (x, 0)) == PLUS
4329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4330 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4331 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4332 return 1;
4333
4334 return 0;
4335 }
4336
4337 /* Determine if a given RTX is a valid constant. We already know this
4338 satisfies CONSTANT_P. */
4339
4340 static bool
4341 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4342 {
4343 switch (GET_CODE (x))
4344 {
4345 case CONST:
4346 case SYMBOL_REF:
4347 if (sparc_tls_referenced_p (x))
4348 return false;
4349 break;
4350
4351 case CONST_DOUBLE:
4352 /* Floating point constants are generally not ok.
4353 The only exception is 0.0 and all-ones in VIS. */
4354 if (TARGET_VIS
4355 && SCALAR_FLOAT_MODE_P (mode)
4356 && (const_zero_operand (x, mode)
4357 || const_all_ones_operand (x, mode)))
4358 return true;
4359
4360 return false;
4361
4362 case CONST_VECTOR:
4363 /* Vector constants are generally not ok.
4364 The only exception is 0 or -1 in VIS. */
4365 if (TARGET_VIS
4366 && (const_zero_operand (x, mode)
4367 || const_all_ones_operand (x, mode)))
4368 return true;
4369
4370 return false;
4371
4372 default:
4373 break;
4374 }
4375
4376 return true;
4377 }
4378
4379 /* Determine if a given RTX is a valid constant address. */
4380
4381 bool
4382 constant_address_p (rtx x)
4383 {
4384 switch (GET_CODE (x))
4385 {
4386 case LABEL_REF:
4387 case CONST_INT:
4388 case HIGH:
4389 return true;
4390
4391 case CONST:
4392 if (flag_pic && pic_address_needs_scratch (x))
4393 return false;
4394 return sparc_legitimate_constant_p (Pmode, x);
4395
4396 case SYMBOL_REF:
4397 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4398
4399 default:
4400 return false;
4401 }
4402 }
4403
4404 /* Nonzero if the constant value X is a legitimate general operand
4405 when generating PIC code. It is given that flag_pic is on and
4406 that X satisfies CONSTANT_P. */
4407
4408 bool
4409 legitimate_pic_operand_p (rtx x)
4410 {
4411 if (pic_address_needs_scratch (x))
4412 return false;
4413 if (sparc_tls_referenced_p (x))
4414 return false;
4415 return true;
4416 }
4417
4418 /* Return true if X is a representation of the PIC register. */
4419
4420 static bool
4421 sparc_pic_register_p (rtx x)
4422 {
4423 if (!REG_P (x) || !pic_offset_table_rtx)
4424 return false;
4425
4426 if (x == pic_offset_table_rtx)
4427 return true;
4428
4429 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4430 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4431 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4432 return true;
4433
4434 return false;
4435 }
4436
4437 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4438 (CONST_INT_P (X) \
4439 && INTVAL (X) >= -0x1000 \
4440 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4441
4442 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4443 (CONST_INT_P (X) \
4444 && INTVAL (X) >= -0x1000 \
4445 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4446
4447 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4448
4449 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4450 ordinarily. This changes a bit when generating PIC. */
4451
4452 static bool
4453 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4454 {
4455 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4456
4457 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4458 rs1 = addr;
4459 else if (GET_CODE (addr) == PLUS)
4460 {
4461 rs1 = XEXP (addr, 0);
4462 rs2 = XEXP (addr, 1);
4463
4464 /* Canonicalize. REG comes first, if there are no regs,
4465 LO_SUM comes first. */
4466 if (!REG_P (rs1)
4467 && GET_CODE (rs1) != SUBREG
4468 && (REG_P (rs2)
4469 || GET_CODE (rs2) == SUBREG
4470 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4471 {
4472 rs1 = XEXP (addr, 1);
4473 rs2 = XEXP (addr, 0);
4474 }
4475
4476 if ((flag_pic == 1
4477 && sparc_pic_register_p (rs1)
4478 && !REG_P (rs2)
4479 && GET_CODE (rs2) != SUBREG
4480 && GET_CODE (rs2) != LO_SUM
4481 && GET_CODE (rs2) != MEM
4482 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4483 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4484 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4485 || ((REG_P (rs1)
4486 || GET_CODE (rs1) == SUBREG)
4487 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4488 {
4489 imm1 = rs2;
4490 rs2 = NULL;
4491 }
4492 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4493 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4494 {
4495 /* We prohibit REG + REG for TFmode when there are no quad move insns
4496 and we consequently need to split. We do this because REG+REG
4497 is not an offsettable address. If we get the situation in reload
4498 where source and destination of a movtf pattern are both MEMs with
4499 REG+REG address, then only one of them gets converted to an
4500 offsettable address. */
4501 if (mode == TFmode
4502 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4503 return 0;
4504
4505 /* Likewise for TImode, but in all cases. */
4506 if (mode == TImode)
4507 return 0;
4508
4509 /* We prohibit REG + REG on ARCH32 if not optimizing for
4510 DFmode/DImode because then mem_min_alignment is likely to be zero
4511 after reload and the forced split would lack a matching splitter
4512 pattern. */
4513 if (TARGET_ARCH32 && !optimize
4514 && (mode == DFmode || mode == DImode))
4515 return 0;
4516 }
4517 else if (USE_AS_OFFSETABLE_LO10
4518 && GET_CODE (rs1) == LO_SUM
4519 && TARGET_ARCH64
4520 && ! TARGET_CM_MEDMID
4521 && RTX_OK_FOR_OLO10_P (rs2, mode))
4522 {
4523 rs2 = NULL;
4524 imm1 = XEXP (rs1, 1);
4525 rs1 = XEXP (rs1, 0);
4526 if (!CONSTANT_P (imm1)
4527 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4528 return 0;
4529 }
4530 }
4531 else if (GET_CODE (addr) == LO_SUM)
4532 {
4533 rs1 = XEXP (addr, 0);
4534 imm1 = XEXP (addr, 1);
4535
4536 if (!CONSTANT_P (imm1)
4537 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4538 return 0;
4539
4540 /* We can't allow TFmode in 32-bit mode, because an offset greater
4541 than the alignment (8) may cause the LO_SUM to overflow. */
4542 if (mode == TFmode && TARGET_ARCH32)
4543 return 0;
4544
4545 /* During reload, accept the HIGH+LO_SUM construct generated by
4546 sparc_legitimize_reload_address. */
4547 if (reload_in_progress
4548 && GET_CODE (rs1) == HIGH
4549 && XEXP (rs1, 0) == imm1)
4550 return 1;
4551 }
4552 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4553 return 1;
4554 else
4555 return 0;
4556
4557 if (GET_CODE (rs1) == SUBREG)
4558 rs1 = SUBREG_REG (rs1);
4559 if (!REG_P (rs1))
4560 return 0;
4561
4562 if (rs2)
4563 {
4564 if (GET_CODE (rs2) == SUBREG)
4565 rs2 = SUBREG_REG (rs2);
4566 if (!REG_P (rs2))
4567 return 0;
4568 }
4569
4570 if (strict)
4571 {
4572 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4573 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4574 return 0;
4575 }
4576 else
4577 {
4578 if ((! SPARC_INT_REG_P (REGNO (rs1))
4579 && REGNO (rs1) != FRAME_POINTER_REGNUM
4580 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4581 || (rs2
4582 && (! SPARC_INT_REG_P (REGNO (rs2))
4583 && REGNO (rs2) != FRAME_POINTER_REGNUM
4584 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4585 return 0;
4586 }
4587 return 1;
4588 }
4589
4590 /* Return the SYMBOL_REF for the tls_get_addr function. */
4591
4592 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4593
4594 static rtx
4595 sparc_tls_get_addr (void)
4596 {
4597 if (!sparc_tls_symbol)
4598 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4599
4600 return sparc_tls_symbol;
4601 }
4602
4603 /* Return the Global Offset Table to be used in TLS mode. */
4604
4605 static rtx
4606 sparc_tls_got (void)
4607 {
4608 /* In PIC mode, this is just the PIC offset table. */
4609 if (flag_pic)
4610 {
4611 crtl->uses_pic_offset_table = 1;
4612 return pic_offset_table_rtx;
4613 }
4614
4615 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4616 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4617 if (TARGET_SUN_TLS && TARGET_ARCH32)
4618 {
4619 load_got_register ();
4620 return got_register_rtx;
4621 }
4622
4623 /* In all other cases, we load a new pseudo with the GOT symbol. */
4624 return copy_to_reg (sparc_got ());
4625 }
4626
4627 /* Return true if X contains a thread-local symbol. */
4628
4629 static bool
4630 sparc_tls_referenced_p (rtx x)
4631 {
4632 if (!TARGET_HAVE_TLS)
4633 return false;
4634
4635 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4636 x = XEXP (XEXP (x, 0), 0);
4637
4638 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4639 return true;
4640
4641 /* That's all we handle in sparc_legitimize_tls_address for now. */
4642 return false;
4643 }
4644
4645 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4646 this (thread-local) address. */
4647
4648 static rtx
4649 sparc_legitimize_tls_address (rtx addr)
4650 {
4651 rtx temp1, temp2, temp3, ret, o0, got;
4652 rtx_insn *insn;
4653
4654 gcc_assert (can_create_pseudo_p ());
4655
4656 if (GET_CODE (addr) == SYMBOL_REF)
4657 /* Although the various sethi/or sequences generate SImode values, many of
4658 them can be transformed by the linker when relaxing and, if relaxing to
4659 local-exec, will become a sethi/xor pair, which is signed and therefore
4660 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4661 values be spilled onto the stack in 64-bit mode. */
4662 switch (SYMBOL_REF_TLS_MODEL (addr))
4663 {
4664 case TLS_MODEL_GLOBAL_DYNAMIC:
4665 start_sequence ();
4666 temp1 = gen_reg_rtx (Pmode);
4667 temp2 = gen_reg_rtx (Pmode);
4668 ret = gen_reg_rtx (Pmode);
4669 o0 = gen_rtx_REG (Pmode, 8);
4670 got = sparc_tls_got ();
4671 emit_insn (gen_tgd_hi22 (Pmode, temp1, addr));
4672 emit_insn (gen_tgd_lo10 (Pmode, temp2, temp1, addr));
4673 emit_insn (gen_tgd_add (Pmode, o0, got, temp2, addr));
4674 insn = emit_call_insn (gen_tgd_call (Pmode, o0, sparc_tls_get_addr (),
4675 addr, const1_rtx));
4676 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4677 RTL_CONST_CALL_P (insn) = 1;
4678 insn = get_insns ();
4679 end_sequence ();
4680 emit_libcall_block (insn, ret, o0, addr);
4681 break;
4682
4683 case TLS_MODEL_LOCAL_DYNAMIC:
4684 start_sequence ();
4685 temp1 = gen_reg_rtx (Pmode);
4686 temp2 = gen_reg_rtx (Pmode);
4687 temp3 = gen_reg_rtx (Pmode);
4688 ret = gen_reg_rtx (Pmode);
4689 o0 = gen_rtx_REG (Pmode, 8);
4690 got = sparc_tls_got ();
4691 emit_insn (gen_tldm_hi22 (Pmode, temp1));
4692 emit_insn (gen_tldm_lo10 (Pmode, temp2, temp1));
4693 emit_insn (gen_tldm_add (Pmode, o0, got, temp2));
4694 insn = emit_call_insn (gen_tldm_call (Pmode, o0, sparc_tls_get_addr (),
4695 const1_rtx));
4696 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4697 RTL_CONST_CALL_P (insn) = 1;
4698 insn = get_insns ();
4699 end_sequence ();
4700 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4701 share the LD_BASE result with other LD model accesses. */
4702 emit_libcall_block (insn, temp3, o0,
4703 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4704 UNSPEC_TLSLD_BASE));
4705 temp1 = gen_reg_rtx (Pmode);
4706 temp2 = gen_reg_rtx (Pmode);
4707 emit_insn (gen_tldo_hix22 (Pmode, temp1, addr));
4708 emit_insn (gen_tldo_lox10 (Pmode, temp2, temp1, addr));
4709 emit_insn (gen_tldo_add (Pmode, ret, temp3, temp2, addr));
4710 break;
4711
4712 case TLS_MODEL_INITIAL_EXEC:
4713 temp1 = gen_reg_rtx (Pmode);
4714 temp2 = gen_reg_rtx (Pmode);
4715 temp3 = gen_reg_rtx (Pmode);
4716 got = sparc_tls_got ();
4717 emit_insn (gen_tie_hi22 (Pmode, temp1, addr));
4718 emit_insn (gen_tie_lo10 (Pmode, temp2, temp1, addr));
4719 if (TARGET_ARCH32)
4720 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4721 else
4722 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4723 if (TARGET_SUN_TLS)
4724 {
4725 ret = gen_reg_rtx (Pmode);
4726 emit_insn (gen_tie_add (Pmode, ret, gen_rtx_REG (Pmode, 7),
4727 temp3, addr));
4728 }
4729 else
4730 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4731 break;
4732
4733 case TLS_MODEL_LOCAL_EXEC:
4734 temp1 = gen_reg_rtx (Pmode);
4735 temp2 = gen_reg_rtx (Pmode);
4736 emit_insn (gen_tle_hix22 (Pmode, temp1, addr));
4737 emit_insn (gen_tle_lox10 (Pmode, temp2, temp1, addr));
4738 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4739 break;
4740
4741 default:
4742 gcc_unreachable ();
4743 }
4744
4745 else if (GET_CODE (addr) == CONST)
4746 {
4747 rtx base, offset;
4748
4749 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4750
4751 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4752 offset = XEXP (XEXP (addr, 0), 1);
4753
4754 base = force_operand (base, NULL_RTX);
4755 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4756 offset = force_reg (Pmode, offset);
4757 ret = gen_rtx_PLUS (Pmode, base, offset);
4758 }
4759
4760 else
4761 gcc_unreachable (); /* for now ... */
4762
4763 return ret;
4764 }
4765
4766 /* Legitimize PIC addresses. If the address is already position-independent,
4767 we return ORIG. Newly generated position-independent addresses go into a
4768 reg. This is REG if nonzero, otherwise we allocate register(s) as
4769 necessary. */
4770
4771 static rtx
4772 sparc_legitimize_pic_address (rtx orig, rtx reg)
4773 {
4774 if (GET_CODE (orig) == SYMBOL_REF
4775 /* See the comment in sparc_expand_move. */
4776 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4777 {
4778 bool gotdata_op = false;
4779 rtx pic_ref, address;
4780 rtx_insn *insn;
4781
4782 if (!reg)
4783 {
4784 gcc_assert (can_create_pseudo_p ());
4785 reg = gen_reg_rtx (Pmode);
4786 }
4787
4788 if (flag_pic == 2)
4789 {
4790 /* If not during reload, allocate another temp reg here for loading
4791 in the address, so that these instructions can be optimized
4792 properly. */
4793 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4794
4795 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4796 won't get confused into thinking that these two instructions
4797 are loading in the true address of the symbol. If in the
4798 future a PIC rtx exists, that should be used instead. */
4799 if (TARGET_ARCH64)
4800 {
4801 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4802 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4803 }
4804 else
4805 {
4806 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4807 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4808 }
4809
4810 address = temp_reg;
4811 gotdata_op = true;
4812 }
4813 else
4814 address = orig;
4815
4816 crtl->uses_pic_offset_table = 1;
4817 if (gotdata_op)
4818 {
4819 if (TARGET_ARCH64)
4820 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4821 pic_offset_table_rtx,
4822 address, orig));
4823 else
4824 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4825 pic_offset_table_rtx,
4826 address, orig));
4827 }
4828 else
4829 {
4830 pic_ref
4831 = gen_const_mem (Pmode,
4832 gen_rtx_PLUS (Pmode,
4833 pic_offset_table_rtx, address));
4834 insn = emit_move_insn (reg, pic_ref);
4835 }
4836
4837 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4838 by loop. */
4839 set_unique_reg_note (insn, REG_EQUAL, orig);
4840 return reg;
4841 }
4842 else if (GET_CODE (orig) == CONST)
4843 {
4844 rtx base, offset;
4845
4846 if (GET_CODE (XEXP (orig, 0)) == PLUS
4847 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4848 return orig;
4849
4850 if (!reg)
4851 {
4852 gcc_assert (can_create_pseudo_p ());
4853 reg = gen_reg_rtx (Pmode);
4854 }
4855
4856 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4857 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4858 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4859 base == reg ? NULL_RTX : reg);
4860
4861 if (GET_CODE (offset) == CONST_INT)
4862 {
4863 if (SMALL_INT (offset))
4864 return plus_constant (Pmode, base, INTVAL (offset));
4865 else if (can_create_pseudo_p ())
4866 offset = force_reg (Pmode, offset);
4867 else
4868 /* If we reach here, then something is seriously wrong. */
4869 gcc_unreachable ();
4870 }
4871 return gen_rtx_PLUS (Pmode, base, offset);
4872 }
4873 else if (GET_CODE (orig) == LABEL_REF)
4874 /* ??? We ought to be checking that the register is live instead, in case
4875 it is eliminated. */
4876 crtl->uses_pic_offset_table = 1;
4877
4878 return orig;
4879 }
4880
4881 /* Try machine-dependent ways of modifying an illegitimate address X
4882 to be legitimate. If we find one, return the new, valid address.
4883
4884 OLDX is the address as it was before break_out_memory_refs was called.
4885 In some cases it is useful to look at this to decide what needs to be done.
4886
4887 MODE is the mode of the operand pointed to by X.
4888
4889 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4890
4891 static rtx
4892 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4893 machine_mode mode)
4894 {
4895 rtx orig_x = x;
4896
4897 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4898 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4899 force_operand (XEXP (x, 0), NULL_RTX));
4900 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4901 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4902 force_operand (XEXP (x, 1), NULL_RTX));
4903 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4904 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4905 XEXP (x, 1));
4906 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4907 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4908 force_operand (XEXP (x, 1), NULL_RTX));
4909
4910 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4911 return x;
4912
4913 if (sparc_tls_referenced_p (x))
4914 x = sparc_legitimize_tls_address (x);
4915 else if (flag_pic)
4916 x = sparc_legitimize_pic_address (x, NULL_RTX);
4917 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4918 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4919 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4920 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4921 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4922 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4923 else if (GET_CODE (x) == SYMBOL_REF
4924 || GET_CODE (x) == CONST
4925 || GET_CODE (x) == LABEL_REF)
4926 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4927
4928 return x;
4929 }
4930
4931 /* Delegitimize an address that was legitimized by the above function. */
4932
4933 static rtx
4934 sparc_delegitimize_address (rtx x)
4935 {
4936 x = delegitimize_mem_from_attrs (x);
4937
4938 if (GET_CODE (x) == LO_SUM)
4939 x = XEXP (x, 1);
4940
4941 if (GET_CODE (x) == UNSPEC)
4942 switch (XINT (x, 1))
4943 {
4944 case UNSPEC_MOVE_PIC:
4945 case UNSPEC_TLSLE:
4946 x = XVECEXP (x, 0, 0);
4947 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4948 break;
4949 case UNSPEC_MOVE_GOTDATA:
4950 x = XVECEXP (x, 0, 2);
4951 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4952 break;
4953 default:
4954 break;
4955 }
4956
4957 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4958 if (GET_CODE (x) == MINUS
4959 && (XEXP (x, 0) == got_register_rtx
4960 || sparc_pic_register_p (XEXP (x, 0))))
4961 {
4962 rtx y = XEXP (x, 1);
4963
4964 if (GET_CODE (y) == LO_SUM)
4965 y = XEXP (y, 1);
4966
4967 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
4968 {
4969 x = XVECEXP (y, 0, 0);
4970 gcc_assert (GET_CODE (x) == LABEL_REF
4971 || (GET_CODE (x) == CONST
4972 && GET_CODE (XEXP (x, 0)) == PLUS
4973 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4974 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4975 }
4976 }
4977
4978 return x;
4979 }
4980
4981 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4982 replace the input X, or the original X if no replacement is called for.
4983 The output parameter *WIN is 1 if the calling macro should goto WIN,
4984 0 if it should not.
4985
4986 For SPARC, we wish to handle addresses by splitting them into
4987 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4988 This cuts the number of extra insns by one.
4989
4990 Do nothing when generating PIC code and the address is a symbolic
4991 operand or requires a scratch register. */
4992
4993 rtx
4994 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4995 int opnum, int type,
4996 int ind_levels ATTRIBUTE_UNUSED, int *win)
4997 {
4998 /* Decompose SImode constants into HIGH+LO_SUM. */
4999 if (CONSTANT_P (x)
5000 && (mode != TFmode || TARGET_ARCH64)
5001 && GET_MODE (x) == SImode
5002 && GET_CODE (x) != LO_SUM
5003 && GET_CODE (x) != HIGH
5004 && sparc_code_model <= CM_MEDLOW
5005 && !(flag_pic
5006 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5007 {
5008 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5009 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5010 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5011 opnum, (enum reload_type)type);
5012 *win = 1;
5013 return x;
5014 }
5015
5016 /* We have to recognize what we have already generated above. */
5017 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5018 {
5019 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5020 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5021 opnum, (enum reload_type)type);
5022 *win = 1;
5023 return x;
5024 }
5025
5026 *win = 0;
5027 return x;
5028 }
5029
5030 /* Return true if ADDR (a legitimate address expression)
5031 has an effect that depends on the machine mode it is used for.
5032
5033 In PIC mode,
5034
5035 (mem:HI [%l7+a])
5036
5037 is not equivalent to
5038
5039 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5040
5041 because [%l7+a+1] is interpreted as the address of (a+1). */
5042
5043
5044 static bool
5045 sparc_mode_dependent_address_p (const_rtx addr,
5046 addr_space_t as ATTRIBUTE_UNUSED)
5047 {
5048 if (GET_CODE (addr) == PLUS
5049 && sparc_pic_register_p (XEXP (addr, 0))
5050 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5051 return true;
5052
5053 return false;
5054 }
5055
5056 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5057 address of the call target. */
5058
5059 void
5060 sparc_emit_call_insn (rtx pat, rtx addr)
5061 {
5062 rtx_insn *insn;
5063
5064 insn = emit_call_insn (pat);
5065
5066 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5067 if (TARGET_VXWORKS_RTP
5068 && flag_pic
5069 && GET_CODE (addr) == SYMBOL_REF
5070 && (SYMBOL_REF_DECL (addr)
5071 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5072 : !SYMBOL_REF_LOCAL_P (addr)))
5073 {
5074 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5075 crtl->uses_pic_offset_table = 1;
5076 }
5077 }
5078 \f
5079 /* Return 1 if RTX is a MEM which is known to be aligned to at
5080 least a DESIRED byte boundary. */
5081
5082 int
5083 mem_min_alignment (rtx mem, int desired)
5084 {
5085 rtx addr, base, offset;
5086
5087 /* If it's not a MEM we can't accept it. */
5088 if (GET_CODE (mem) != MEM)
5089 return 0;
5090
5091 /* Obviously... */
5092 if (!TARGET_UNALIGNED_DOUBLES
5093 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5094 return 1;
5095
5096 /* ??? The rest of the function predates MEM_ALIGN so
5097 there is probably a bit of redundancy. */
5098 addr = XEXP (mem, 0);
5099 base = offset = NULL_RTX;
5100 if (GET_CODE (addr) == PLUS)
5101 {
5102 if (GET_CODE (XEXP (addr, 0)) == REG)
5103 {
5104 base = XEXP (addr, 0);
5105
5106 /* What we are saying here is that if the base
5107 REG is aligned properly, the compiler will make
5108 sure any REG based index upon it will be so
5109 as well. */
5110 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5111 offset = XEXP (addr, 1);
5112 else
5113 offset = const0_rtx;
5114 }
5115 }
5116 else if (GET_CODE (addr) == REG)
5117 {
5118 base = addr;
5119 offset = const0_rtx;
5120 }
5121
5122 if (base != NULL_RTX)
5123 {
5124 int regno = REGNO (base);
5125
5126 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5127 {
5128 /* Check if the compiler has recorded some information
5129 about the alignment of the base REG. If reload has
5130 completed, we already matched with proper alignments.
5131 If not running global_alloc, reload might give us
5132 unaligned pointer to local stack though. */
5133 if (((cfun != 0
5134 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5135 || (optimize && reload_completed))
5136 && (INTVAL (offset) & (desired - 1)) == 0)
5137 return 1;
5138 }
5139 else
5140 {
5141 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5142 return 1;
5143 }
5144 }
5145 else if (! TARGET_UNALIGNED_DOUBLES
5146 || CONSTANT_P (addr)
5147 || GET_CODE (addr) == LO_SUM)
5148 {
5149 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5150 is true, in which case we can only assume that an access is aligned if
5151 it is to a constant address, or the address involves a LO_SUM. */
5152 return 1;
5153 }
5154
5155 /* An obviously unaligned address. */
5156 return 0;
5157 }
5158
5159 \f
5160 /* Vectors to keep interesting information about registers where it can easily
5161 be got. We used to use the actual mode value as the bit number, but there
5162 are more than 32 modes now. Instead we use two tables: one indexed by
5163 hard register number, and one indexed by mode. */
5164
5165 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5166 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5167 mapped into one sparc_mode_class mode. */
5168
5169 enum sparc_mode_class {
5170 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5171 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5172 CC_MODE, CCFP_MODE
5173 };
5174
5175 /* Modes for single-word and smaller quantities. */
5176 #define S_MODES \
5177 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5178
5179 /* Modes for double-word and smaller quantities. */
5180 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5181
5182 /* Modes for quad-word and smaller quantities. */
5183 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5184
5185 /* Modes for 8-word and smaller quantities. */
5186 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5187
5188 /* Modes for single-float quantities. */
5189 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5190
5191 /* Modes for double-float and smaller quantities. */
5192 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5193
5194 /* Modes for quad-float and smaller quantities. */
5195 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5196
5197 /* Modes for quad-float pairs and smaller quantities. */
5198 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5199
5200 /* Modes for double-float only quantities. */
5201 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5202
5203 /* Modes for quad-float and double-float only quantities. */
5204 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5205
5206 /* Modes for quad-float pairs and double-float only quantities. */
5207 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5208
5209 /* Modes for condition codes. */
5210 #define CC_MODES (1 << (int) CC_MODE)
5211 #define CCFP_MODES (1 << (int) CCFP_MODE)
5212
5213 /* Value is 1 if register/mode pair is acceptable on sparc.
5214
5215 The funny mixture of D and T modes is because integer operations
5216 do not specially operate on tetra quantities, so non-quad-aligned
5217 registers can hold quadword quantities (except %o4 and %i4 because
5218 they cross fixed registers).
5219
5220 ??? Note that, despite the settings, non-double-aligned parameter
5221 registers can hold double-word quantities in 32-bit mode. */
5222
5223 /* This points to either the 32-bit or the 64-bit version. */
5224 static const int *hard_regno_mode_classes;
5225
5226 static const int hard_32bit_mode_classes[] = {
5227 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5228 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5229 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5230 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5231
5232 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5233 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5234 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5235 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5236
5237 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5238 and none can hold SFmode/SImode values. */
5239 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5240 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5241 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5242 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5243
5244 /* %fcc[0123] */
5245 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5246
5247 /* %icc, %sfp, %gsr */
5248 CC_MODES, 0, D_MODES
5249 };
5250
5251 static const int hard_64bit_mode_classes[] = {
5252 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5253 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5254 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5255 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5256
5257 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5258 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5259 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5260 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5261
5262 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5263 and none can hold SFmode/SImode values. */
5264 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5265 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5266 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5267 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5268
5269 /* %fcc[0123] */
5270 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5271
5272 /* %icc, %sfp, %gsr */
5273 CC_MODES, 0, D_MODES
5274 };
5275
5276 static int sparc_mode_class [NUM_MACHINE_MODES];
5277
5278 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5279
5280 static void
5281 sparc_init_modes (void)
5282 {
5283 int i;
5284
5285 for (i = 0; i < NUM_MACHINE_MODES; i++)
5286 {
5287 machine_mode m = (machine_mode) i;
5288 unsigned int size = GET_MODE_SIZE (m);
5289
5290 switch (GET_MODE_CLASS (m))
5291 {
5292 case MODE_INT:
5293 case MODE_PARTIAL_INT:
5294 case MODE_COMPLEX_INT:
5295 if (size < 4)
5296 sparc_mode_class[i] = 1 << (int) H_MODE;
5297 else if (size == 4)
5298 sparc_mode_class[i] = 1 << (int) S_MODE;
5299 else if (size == 8)
5300 sparc_mode_class[i] = 1 << (int) D_MODE;
5301 else if (size == 16)
5302 sparc_mode_class[i] = 1 << (int) T_MODE;
5303 else if (size == 32)
5304 sparc_mode_class[i] = 1 << (int) O_MODE;
5305 else
5306 sparc_mode_class[i] = 0;
5307 break;
5308 case MODE_VECTOR_INT:
5309 if (size == 4)
5310 sparc_mode_class[i] = 1 << (int) SF_MODE;
5311 else if (size == 8)
5312 sparc_mode_class[i] = 1 << (int) DF_MODE;
5313 else
5314 sparc_mode_class[i] = 0;
5315 break;
5316 case MODE_FLOAT:
5317 case MODE_COMPLEX_FLOAT:
5318 if (size == 4)
5319 sparc_mode_class[i] = 1 << (int) SF_MODE;
5320 else if (size == 8)
5321 sparc_mode_class[i] = 1 << (int) DF_MODE;
5322 else if (size == 16)
5323 sparc_mode_class[i] = 1 << (int) TF_MODE;
5324 else if (size == 32)
5325 sparc_mode_class[i] = 1 << (int) OF_MODE;
5326 else
5327 sparc_mode_class[i] = 0;
5328 break;
5329 case MODE_CC:
5330 if (m == CCFPmode || m == CCFPEmode)
5331 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5332 else
5333 sparc_mode_class[i] = 1 << (int) CC_MODE;
5334 break;
5335 default:
5336 sparc_mode_class[i] = 0;
5337 break;
5338 }
5339 }
5340
5341 if (TARGET_ARCH64)
5342 hard_regno_mode_classes = hard_64bit_mode_classes;
5343 else
5344 hard_regno_mode_classes = hard_32bit_mode_classes;
5345
5346 /* Initialize the array used by REGNO_REG_CLASS. */
5347 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5348 {
5349 if (i < 16 && TARGET_V8PLUS)
5350 sparc_regno_reg_class[i] = I64_REGS;
5351 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5352 sparc_regno_reg_class[i] = GENERAL_REGS;
5353 else if (i < 64)
5354 sparc_regno_reg_class[i] = FP_REGS;
5355 else if (i < 96)
5356 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5357 else if (i < 100)
5358 sparc_regno_reg_class[i] = FPCC_REGS;
5359 else
5360 sparc_regno_reg_class[i] = NO_REGS;
5361 }
5362 }
5363 \f
5364 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5365
5366 static inline bool
5367 save_global_or_fp_reg_p (unsigned int regno,
5368 int leaf_function ATTRIBUTE_UNUSED)
5369 {
5370 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5371 }
5372
5373 /* Return whether the return address register (%i7) is needed. */
5374
5375 static inline bool
5376 return_addr_reg_needed_p (int leaf_function)
5377 {
5378 /* If it is live, for example because of __builtin_return_address (0). */
5379 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5380 return true;
5381
5382 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5383 if (!leaf_function
5384 /* Loading the GOT register clobbers %o7. */
5385 || crtl->uses_pic_offset_table
5386 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5387 return true;
5388
5389 return false;
5390 }
5391
5392 /* Return whether REGNO, a local or in register, must be saved/restored. */
5393
5394 static bool
5395 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5396 {
5397 /* General case: call-saved registers live at some point. */
5398 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5399 return true;
5400
5401 /* Frame pointer register (%fp) if needed. */
5402 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5403 return true;
5404
5405 /* Return address register (%i7) if needed. */
5406 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5407 return true;
5408
5409 /* GOT register (%l7) if needed. */
5410 if (got_register_rtx && regno == REGNO (got_register_rtx))
5411 return true;
5412
5413 /* If the function accesses prior frames, the frame pointer and the return
5414 address of the previous frame must be saved on the stack. */
5415 if (crtl->accesses_prior_frames
5416 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5417 return true;
5418
5419 return false;
5420 }
5421
5422 /* Compute the frame size required by the function. This function is called
5423 during the reload pass and also by sparc_expand_prologue. */
5424
5425 static HOST_WIDE_INT
5426 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5427 {
5428 HOST_WIDE_INT frame_size, apparent_frame_size;
5429 int args_size, n_global_fp_regs = 0;
5430 bool save_local_in_regs_p = false;
5431 unsigned int i;
5432
5433 /* If the function allocates dynamic stack space, the dynamic offset is
5434 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5435 if (leaf_function && !cfun->calls_alloca)
5436 args_size = 0;
5437 else
5438 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5439
5440 /* Calculate space needed for global registers. */
5441 if (TARGET_ARCH64)
5442 {
5443 for (i = 0; i < 8; i++)
5444 if (save_global_or_fp_reg_p (i, 0))
5445 n_global_fp_regs += 2;
5446 }
5447 else
5448 {
5449 for (i = 0; i < 8; i += 2)
5450 if (save_global_or_fp_reg_p (i, 0)
5451 || save_global_or_fp_reg_p (i + 1, 0))
5452 n_global_fp_regs += 2;
5453 }
5454
5455 /* In the flat window model, find out which local and in registers need to
5456 be saved. We don't reserve space in the current frame for them as they
5457 will be spilled into the register window save area of the caller's frame.
5458 However, as soon as we use this register window save area, we must create
5459 that of the current frame to make it the live one. */
5460 if (TARGET_FLAT)
5461 for (i = 16; i < 32; i++)
5462 if (save_local_or_in_reg_p (i, leaf_function))
5463 {
5464 save_local_in_regs_p = true;
5465 break;
5466 }
5467
5468 /* Calculate space needed for FP registers. */
5469 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5470 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5471 n_global_fp_regs += 2;
5472
5473 if (size == 0
5474 && n_global_fp_regs == 0
5475 && args_size == 0
5476 && !save_local_in_regs_p)
5477 frame_size = apparent_frame_size = 0;
5478 else
5479 {
5480 /* Start from the apparent frame size. */
5481 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5482
5483 /* We need to add the size of the outgoing argument area. */
5484 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5485
5486 /* And that of the register window save area. */
5487 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5488
5489 /* Finally, bump to the appropriate alignment. */
5490 frame_size = SPARC_STACK_ALIGN (frame_size);
5491 }
5492
5493 /* Set up values for use in prologue and epilogue. */
5494 sparc_frame_size = frame_size;
5495 sparc_apparent_frame_size = apparent_frame_size;
5496 sparc_n_global_fp_regs = n_global_fp_regs;
5497 sparc_save_local_in_regs_p = save_local_in_regs_p;
5498
5499 return frame_size;
5500 }
5501
5502 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5503
5504 int
5505 sparc_initial_elimination_offset (int to)
5506 {
5507 int offset;
5508
5509 if (to == STACK_POINTER_REGNUM)
5510 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5511 else
5512 offset = 0;
5513
5514 offset += SPARC_STACK_BIAS;
5515 return offset;
5516 }
5517
5518 /* Output any necessary .register pseudo-ops. */
5519
5520 void
5521 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5522 {
5523 int i;
5524
5525 if (TARGET_ARCH32)
5526 return;
5527
5528 /* Check if %g[2367] were used without
5529 .register being printed for them already. */
5530 for (i = 2; i < 8; i++)
5531 {
5532 if (df_regs_ever_live_p (i)
5533 && ! sparc_hard_reg_printed [i])
5534 {
5535 sparc_hard_reg_printed [i] = 1;
5536 /* %g7 is used as TLS base register, use #ignore
5537 for it instead of #scratch. */
5538 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5539 i == 7 ? "ignore" : "scratch");
5540 }
5541 if (i == 3) i = 5;
5542 }
5543 }
5544
5545 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5546
5547 #if PROBE_INTERVAL > 4096
5548 #error Cannot use indexed addressing mode for stack probing
5549 #endif
5550
5551 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5552 inclusive. These are offsets from the current stack pointer.
5553
5554 Note that we don't use the REG+REG addressing mode for the probes because
5555 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5556 so the advantages of having a single code win here. */
5557
5558 static void
5559 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5560 {
5561 rtx g1 = gen_rtx_REG (Pmode, 1);
5562
5563 /* See if we have a constant small number of probes to generate. If so,
5564 that's the easy case. */
5565 if (size <= PROBE_INTERVAL)
5566 {
5567 emit_move_insn (g1, GEN_INT (first));
5568 emit_insn (gen_rtx_SET (g1,
5569 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5570 emit_stack_probe (plus_constant (Pmode, g1, -size));
5571 }
5572
5573 /* The run-time loop is made up of 9 insns in the generic case while the
5574 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5575 else if (size <= 4 * PROBE_INTERVAL)
5576 {
5577 HOST_WIDE_INT i;
5578
5579 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5580 emit_insn (gen_rtx_SET (g1,
5581 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5582 emit_stack_probe (g1);
5583
5584 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5585 it exceeds SIZE. If only two probes are needed, this will not
5586 generate any code. Then probe at FIRST + SIZE. */
5587 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5588 {
5589 emit_insn (gen_rtx_SET (g1,
5590 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5591 emit_stack_probe (g1);
5592 }
5593
5594 emit_stack_probe (plus_constant (Pmode, g1,
5595 (i - PROBE_INTERVAL) - size));
5596 }
5597
5598 /* Otherwise, do the same as above, but in a loop. Note that we must be
5599 extra careful with variables wrapping around because we might be at
5600 the very top (or the very bottom) of the address space and we have
5601 to be able to handle this case properly; in particular, we use an
5602 equality test for the loop condition. */
5603 else
5604 {
5605 HOST_WIDE_INT rounded_size;
5606 rtx g4 = gen_rtx_REG (Pmode, 4);
5607
5608 emit_move_insn (g1, GEN_INT (first));
5609
5610
5611 /* Step 1: round SIZE to the previous multiple of the interval. */
5612
5613 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5614 emit_move_insn (g4, GEN_INT (rounded_size));
5615
5616
5617 /* Step 2: compute initial and final value of the loop counter. */
5618
5619 /* TEST_ADDR = SP + FIRST. */
5620 emit_insn (gen_rtx_SET (g1,
5621 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5622
5623 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5624 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5625
5626
5627 /* Step 3: the loop
5628
5629 while (TEST_ADDR != LAST_ADDR)
5630 {
5631 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5632 probe at TEST_ADDR
5633 }
5634
5635 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5636 until it is equal to ROUNDED_SIZE. */
5637
5638 emit_insn (gen_probe_stack_range (Pmode, g1, g1, g4));
5639
5640
5641 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5642 that SIZE is equal to ROUNDED_SIZE. */
5643
5644 if (size != rounded_size)
5645 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5646 }
5647
5648 /* Make sure nothing is scheduled before we are done. */
5649 emit_insn (gen_blockage ());
5650 }
5651
5652 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5653 absolute addresses. */
5654
5655 const char *
5656 output_probe_stack_range (rtx reg1, rtx reg2)
5657 {
5658 static int labelno = 0;
5659 char loop_lab[32];
5660 rtx xops[2];
5661
5662 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5663
5664 /* Loop. */
5665 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5666
5667 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5668 xops[0] = reg1;
5669 xops[1] = GEN_INT (-PROBE_INTERVAL);
5670 output_asm_insn ("add\t%0, %1, %0", xops);
5671
5672 /* Test if TEST_ADDR == LAST_ADDR. */
5673 xops[1] = reg2;
5674 output_asm_insn ("cmp\t%0, %1", xops);
5675
5676 /* Probe at TEST_ADDR and branch. */
5677 if (TARGET_ARCH64)
5678 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5679 else
5680 fputs ("\tbne\t", asm_out_file);
5681 assemble_name_raw (asm_out_file, loop_lab);
5682 fputc ('\n', asm_out_file);
5683 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5684 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5685
5686 return "";
5687 }
5688
5689 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5690 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5691 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5692 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5693 the action to be performed if it returns false. Return the new offset. */
5694
5695 typedef bool (*sorr_pred_t) (unsigned int, int);
5696 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5697
5698 static int
5699 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5700 int offset, int leaf_function, sorr_pred_t save_p,
5701 sorr_act_t action_true, sorr_act_t action_false)
5702 {
5703 unsigned int i;
5704 rtx mem;
5705 rtx_insn *insn;
5706
5707 if (TARGET_ARCH64 && high <= 32)
5708 {
5709 int fp_offset = -1;
5710
5711 for (i = low; i < high; i++)
5712 {
5713 if (save_p (i, leaf_function))
5714 {
5715 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5716 base, offset));
5717 if (action_true == SORR_SAVE)
5718 {
5719 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5720 RTX_FRAME_RELATED_P (insn) = 1;
5721 }
5722 else /* action_true == SORR_RESTORE */
5723 {
5724 /* The frame pointer must be restored last since its old
5725 value may be used as base address for the frame. This
5726 is problematic in 64-bit mode only because of the lack
5727 of double-word load instruction. */
5728 if (i == HARD_FRAME_POINTER_REGNUM)
5729 fp_offset = offset;
5730 else
5731 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5732 }
5733 offset += 8;
5734 }
5735 else if (action_false == SORR_ADVANCE)
5736 offset += 8;
5737 }
5738
5739 if (fp_offset >= 0)
5740 {
5741 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5742 emit_move_insn (hard_frame_pointer_rtx, mem);
5743 }
5744 }
5745 else
5746 {
5747 for (i = low; i < high; i += 2)
5748 {
5749 bool reg0 = save_p (i, leaf_function);
5750 bool reg1 = save_p (i + 1, leaf_function);
5751 machine_mode mode;
5752 int regno;
5753
5754 if (reg0 && reg1)
5755 {
5756 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5757 regno = i;
5758 }
5759 else if (reg0)
5760 {
5761 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5762 regno = i;
5763 }
5764 else if (reg1)
5765 {
5766 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5767 regno = i + 1;
5768 offset += 4;
5769 }
5770 else
5771 {
5772 if (action_false == SORR_ADVANCE)
5773 offset += 8;
5774 continue;
5775 }
5776
5777 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5778 if (action_true == SORR_SAVE)
5779 {
5780 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5781 RTX_FRAME_RELATED_P (insn) = 1;
5782 if (mode == DImode)
5783 {
5784 rtx set1, set2;
5785 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5786 offset));
5787 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5788 RTX_FRAME_RELATED_P (set1) = 1;
5789 mem
5790 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5791 offset + 4));
5792 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5793 RTX_FRAME_RELATED_P (set2) = 1;
5794 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5795 gen_rtx_PARALLEL (VOIDmode,
5796 gen_rtvec (2, set1, set2)));
5797 }
5798 }
5799 else /* action_true == SORR_RESTORE */
5800 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5801
5802 /* Bump and round down to double word
5803 in case we already bumped by 4. */
5804 offset = ROUND_DOWN (offset + 8, 8);
5805 }
5806 }
5807
5808 return offset;
5809 }
5810
5811 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5812
5813 static rtx
5814 emit_adjust_base_to_offset (rtx base, int offset)
5815 {
5816 /* ??? This might be optimized a little as %g1 might already have a
5817 value close enough that a single add insn will do. */
5818 /* ??? Although, all of this is probably only a temporary fix because
5819 if %g1 can hold a function result, then sparc_expand_epilogue will
5820 lose (the result will be clobbered). */
5821 rtx new_base = gen_rtx_REG (Pmode, 1);
5822 emit_move_insn (new_base, GEN_INT (offset));
5823 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5824 return new_base;
5825 }
5826
5827 /* Emit code to save/restore call-saved global and FP registers. */
5828
5829 static void
5830 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5831 {
5832 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5833 {
5834 base = emit_adjust_base_to_offset (base, offset);
5835 offset = 0;
5836 }
5837
5838 offset
5839 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5840 save_global_or_fp_reg_p, action, SORR_NONE);
5841 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5842 save_global_or_fp_reg_p, action, SORR_NONE);
5843 }
5844
5845 /* Emit code to save/restore call-saved local and in registers. */
5846
5847 static void
5848 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5849 {
5850 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5851 {
5852 base = emit_adjust_base_to_offset (base, offset);
5853 offset = 0;
5854 }
5855
5856 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5857 save_local_or_in_reg_p, action, SORR_ADVANCE);
5858 }
5859
5860 /* Emit a window_save insn. */
5861
5862 static rtx_insn *
5863 emit_window_save (rtx increment)
5864 {
5865 rtx_insn *insn = emit_insn (gen_window_save (increment));
5866 RTX_FRAME_RELATED_P (insn) = 1;
5867
5868 /* The incoming return address (%o7) is saved in %i7. */
5869 add_reg_note (insn, REG_CFA_REGISTER,
5870 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5871 gen_rtx_REG (Pmode,
5872 INCOMING_RETURN_ADDR_REGNUM)));
5873
5874 /* The window save event. */
5875 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5876
5877 /* The CFA is %fp, the hard frame pointer. */
5878 add_reg_note (insn, REG_CFA_DEF_CFA,
5879 plus_constant (Pmode, hard_frame_pointer_rtx,
5880 INCOMING_FRAME_SP_OFFSET));
5881
5882 return insn;
5883 }
5884
5885 /* Generate an increment for the stack pointer. */
5886
5887 static rtx
5888 gen_stack_pointer_inc (rtx increment)
5889 {
5890 return gen_rtx_SET (stack_pointer_rtx,
5891 gen_rtx_PLUS (Pmode,
5892 stack_pointer_rtx,
5893 increment));
5894 }
5895
5896 /* Expand the function prologue. The prologue is responsible for reserving
5897 storage for the frame, saving the call-saved registers and loading the
5898 GOT register if needed. */
5899
5900 void
5901 sparc_expand_prologue (void)
5902 {
5903 HOST_WIDE_INT size;
5904 rtx_insn *insn;
5905
5906 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5907 on the final value of the flag means deferring the prologue/epilogue
5908 expansion until just before the second scheduling pass, which is too
5909 late to emit multiple epilogues or return insns.
5910
5911 Of course we are making the assumption that the value of the flag
5912 will not change between now and its final value. Of the three parts
5913 of the formula, only the last one can reasonably vary. Let's take a
5914 closer look, after assuming that the first two ones are set to true
5915 (otherwise the last value is effectively silenced).
5916
5917 If only_leaf_regs_used returns false, the global predicate will also
5918 be false so the actual frame size calculated below will be positive.
5919 As a consequence, the save_register_window insn will be emitted in
5920 the instruction stream; now this insn explicitly references %fp
5921 which is not a leaf register so only_leaf_regs_used will always
5922 return false subsequently.
5923
5924 If only_leaf_regs_used returns true, we hope that the subsequent
5925 optimization passes won't cause non-leaf registers to pop up. For
5926 example, the regrename pass has special provisions to not rename to
5927 non-leaf registers in a leaf function. */
5928 sparc_leaf_function_p
5929 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5930
5931 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5932
5933 if (flag_stack_usage_info)
5934 current_function_static_stack_size = size;
5935
5936 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5937 || flag_stack_clash_protection)
5938 {
5939 if (crtl->is_leaf && !cfun->calls_alloca)
5940 {
5941 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5942 sparc_emit_probe_stack_range (get_stack_check_protect (),
5943 size - get_stack_check_protect ());
5944 }
5945 else if (size > 0)
5946 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5947 }
5948
5949 if (size == 0)
5950 ; /* do nothing. */
5951 else if (sparc_leaf_function_p)
5952 {
5953 rtx size_int_rtx = GEN_INT (-size);
5954
5955 if (size <= 4096)
5956 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5957 else if (size <= 8192)
5958 {
5959 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5960 RTX_FRAME_RELATED_P (insn) = 1;
5961
5962 /* %sp is still the CFA register. */
5963 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5964 }
5965 else
5966 {
5967 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5968 emit_move_insn (size_rtx, size_int_rtx);
5969 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5970 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5971 gen_stack_pointer_inc (size_int_rtx));
5972 }
5973
5974 RTX_FRAME_RELATED_P (insn) = 1;
5975 }
5976 else
5977 {
5978 rtx size_int_rtx = GEN_INT (-size);
5979
5980 if (size <= 4096)
5981 emit_window_save (size_int_rtx);
5982 else if (size <= 8192)
5983 {
5984 emit_window_save (GEN_INT (-4096));
5985
5986 /* %sp is not the CFA register anymore. */
5987 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5988
5989 /* Make sure no %fp-based store is issued until after the frame is
5990 established. The offset between the frame pointer and the stack
5991 pointer is calculated relative to the value of the stack pointer
5992 at the end of the function prologue, and moving instructions that
5993 access the stack via the frame pointer between the instructions
5994 that decrement the stack pointer could result in accessing the
5995 register window save area, which is volatile. */
5996 emit_insn (gen_frame_blockage ());
5997 }
5998 else
5999 {
6000 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6001 emit_move_insn (size_rtx, size_int_rtx);
6002 emit_window_save (size_rtx);
6003 }
6004 }
6005
6006 if (sparc_leaf_function_p)
6007 {
6008 sparc_frame_base_reg = stack_pointer_rtx;
6009 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6010 }
6011 else
6012 {
6013 sparc_frame_base_reg = hard_frame_pointer_rtx;
6014 sparc_frame_base_offset = SPARC_STACK_BIAS;
6015 }
6016
6017 if (sparc_n_global_fp_regs > 0)
6018 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6019 sparc_frame_base_offset
6020 - sparc_apparent_frame_size,
6021 SORR_SAVE);
6022
6023 /* Advertise that the data calculated just above are now valid. */
6024 sparc_prologue_data_valid_p = true;
6025 }
6026
6027 /* Expand the function prologue. The prologue is responsible for reserving
6028 storage for the frame, saving the call-saved registers and loading the
6029 GOT register if needed. */
6030
6031 void
6032 sparc_flat_expand_prologue (void)
6033 {
6034 HOST_WIDE_INT size;
6035 rtx_insn *insn;
6036
6037 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6038
6039 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6040
6041 if (flag_stack_usage_info)
6042 current_function_static_stack_size = size;
6043
6044 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6045 || flag_stack_clash_protection)
6046 {
6047 if (crtl->is_leaf && !cfun->calls_alloca)
6048 {
6049 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6050 sparc_emit_probe_stack_range (get_stack_check_protect (),
6051 size - get_stack_check_protect ());
6052 }
6053 else if (size > 0)
6054 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6055 }
6056
6057 if (sparc_save_local_in_regs_p)
6058 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6059 SORR_SAVE);
6060
6061 if (size == 0)
6062 ; /* do nothing. */
6063 else
6064 {
6065 rtx size_int_rtx, size_rtx;
6066
6067 size_rtx = size_int_rtx = GEN_INT (-size);
6068
6069 /* We establish the frame (i.e. decrement the stack pointer) first, even
6070 if we use a frame pointer, because we cannot clobber any call-saved
6071 registers, including the frame pointer, if we haven't created a new
6072 register save area, for the sake of compatibility with the ABI. */
6073 if (size <= 4096)
6074 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6075 else if (size <= 8192 && !frame_pointer_needed)
6076 {
6077 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6078 RTX_FRAME_RELATED_P (insn) = 1;
6079 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6080 }
6081 else
6082 {
6083 size_rtx = gen_rtx_REG (Pmode, 1);
6084 emit_move_insn (size_rtx, size_int_rtx);
6085 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6086 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6087 gen_stack_pointer_inc (size_int_rtx));
6088 }
6089 RTX_FRAME_RELATED_P (insn) = 1;
6090
6091 /* Ensure nothing is scheduled until after the frame is established. */
6092 emit_insn (gen_blockage ());
6093
6094 if (frame_pointer_needed)
6095 {
6096 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6097 gen_rtx_MINUS (Pmode,
6098 stack_pointer_rtx,
6099 size_rtx)));
6100 RTX_FRAME_RELATED_P (insn) = 1;
6101
6102 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6103 gen_rtx_SET (hard_frame_pointer_rtx,
6104 plus_constant (Pmode, stack_pointer_rtx,
6105 size)));
6106 }
6107
6108 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6109 {
6110 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6111 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6112
6113 insn = emit_move_insn (i7, o7);
6114 RTX_FRAME_RELATED_P (insn) = 1;
6115
6116 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6117
6118 /* Prevent this instruction from ever being considered dead,
6119 even if this function has no epilogue. */
6120 emit_use (i7);
6121 }
6122 }
6123
6124 if (frame_pointer_needed)
6125 {
6126 sparc_frame_base_reg = hard_frame_pointer_rtx;
6127 sparc_frame_base_offset = SPARC_STACK_BIAS;
6128 }
6129 else
6130 {
6131 sparc_frame_base_reg = stack_pointer_rtx;
6132 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6133 }
6134
6135 if (sparc_n_global_fp_regs > 0)
6136 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6137 sparc_frame_base_offset
6138 - sparc_apparent_frame_size,
6139 SORR_SAVE);
6140
6141 /* Advertise that the data calculated just above are now valid. */
6142 sparc_prologue_data_valid_p = true;
6143 }
6144
6145 /* This function generates the assembly code for function entry, which boils
6146 down to emitting the necessary .register directives. */
6147
6148 static void
6149 sparc_asm_function_prologue (FILE *file)
6150 {
6151 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6152 if (!TARGET_FLAT)
6153 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6154
6155 sparc_output_scratch_registers (file);
6156 }
6157
6158 /* Expand the function epilogue, either normal or part of a sibcall.
6159 We emit all the instructions except the return or the call. */
6160
6161 void
6162 sparc_expand_epilogue (bool for_eh)
6163 {
6164 HOST_WIDE_INT size = sparc_frame_size;
6165
6166 if (cfun->calls_alloca)
6167 emit_insn (gen_frame_blockage ());
6168
6169 if (sparc_n_global_fp_regs > 0)
6170 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6171 sparc_frame_base_offset
6172 - sparc_apparent_frame_size,
6173 SORR_RESTORE);
6174
6175 if (size == 0 || for_eh)
6176 ; /* do nothing. */
6177 else if (sparc_leaf_function_p)
6178 {
6179 if (size <= 4096)
6180 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6181 else if (size <= 8192)
6182 {
6183 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6184 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6185 }
6186 else
6187 {
6188 rtx reg = gen_rtx_REG (Pmode, 1);
6189 emit_move_insn (reg, GEN_INT (size));
6190 emit_insn (gen_stack_pointer_inc (reg));
6191 }
6192 }
6193 }
6194
6195 /* Expand the function epilogue, either normal or part of a sibcall.
6196 We emit all the instructions except the return or the call. */
6197
6198 void
6199 sparc_flat_expand_epilogue (bool for_eh)
6200 {
6201 HOST_WIDE_INT size = sparc_frame_size;
6202
6203 if (sparc_n_global_fp_regs > 0)
6204 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6205 sparc_frame_base_offset
6206 - sparc_apparent_frame_size,
6207 SORR_RESTORE);
6208
6209 /* If we have a frame pointer, we'll need both to restore it before the
6210 frame is destroyed and use its current value in destroying the frame.
6211 Since we don't have an atomic way to do that in the flat window model,
6212 we save the current value into a temporary register (%g1). */
6213 if (frame_pointer_needed && !for_eh)
6214 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6215
6216 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6217 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6218 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6219
6220 if (sparc_save_local_in_regs_p)
6221 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6222 sparc_frame_base_offset,
6223 SORR_RESTORE);
6224
6225 if (size == 0 || for_eh)
6226 ; /* do nothing. */
6227 else if (frame_pointer_needed)
6228 {
6229 /* Make sure the frame is destroyed after everything else is done. */
6230 emit_insn (gen_blockage ());
6231
6232 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6233 }
6234 else
6235 {
6236 /* Likewise. */
6237 emit_insn (gen_blockage ());
6238
6239 if (size <= 4096)
6240 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6241 else if (size <= 8192)
6242 {
6243 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6244 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6245 }
6246 else
6247 {
6248 rtx reg = gen_rtx_REG (Pmode, 1);
6249 emit_move_insn (reg, GEN_INT (size));
6250 emit_insn (gen_stack_pointer_inc (reg));
6251 }
6252 }
6253 }
6254
6255 /* Return true if it is appropriate to emit `return' instructions in the
6256 body of a function. */
6257
6258 bool
6259 sparc_can_use_return_insn_p (void)
6260 {
6261 return sparc_prologue_data_valid_p
6262 && sparc_n_global_fp_regs == 0
6263 && TARGET_FLAT
6264 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6265 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6266 }
6267
6268 /* This function generates the assembly code for function exit. */
6269
6270 static void
6271 sparc_asm_function_epilogue (FILE *file)
6272 {
6273 /* If the last two instructions of a function are "call foo; dslot;"
6274 the return address might point to the first instruction in the next
6275 function and we have to output a dummy nop for the sake of sane
6276 backtraces in such cases. This is pointless for sibling calls since
6277 the return address is explicitly adjusted. */
6278
6279 rtx_insn *insn = get_last_insn ();
6280
6281 rtx last_real_insn = prev_real_insn (insn);
6282 if (last_real_insn
6283 && NONJUMP_INSN_P (last_real_insn)
6284 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6285 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6286
6287 if (last_real_insn
6288 && CALL_P (last_real_insn)
6289 && !SIBLING_CALL_P (last_real_insn))
6290 fputs("\tnop\n", file);
6291
6292 sparc_output_deferred_case_vectors ();
6293 }
6294
6295 /* Output a 'restore' instruction. */
6296
6297 static void
6298 output_restore (rtx pat)
6299 {
6300 rtx operands[3];
6301
6302 if (! pat)
6303 {
6304 fputs ("\t restore\n", asm_out_file);
6305 return;
6306 }
6307
6308 gcc_assert (GET_CODE (pat) == SET);
6309
6310 operands[0] = SET_DEST (pat);
6311 pat = SET_SRC (pat);
6312
6313 switch (GET_CODE (pat))
6314 {
6315 case PLUS:
6316 operands[1] = XEXP (pat, 0);
6317 operands[2] = XEXP (pat, 1);
6318 output_asm_insn (" restore %r1, %2, %Y0", operands);
6319 break;
6320 case LO_SUM:
6321 operands[1] = XEXP (pat, 0);
6322 operands[2] = XEXP (pat, 1);
6323 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6324 break;
6325 case ASHIFT:
6326 operands[1] = XEXP (pat, 0);
6327 gcc_assert (XEXP (pat, 1) == const1_rtx);
6328 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6329 break;
6330 default:
6331 operands[1] = pat;
6332 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6333 break;
6334 }
6335 }
6336
6337 /* Output a return. */
6338
6339 const char *
6340 output_return (rtx_insn *insn)
6341 {
6342 if (crtl->calls_eh_return)
6343 {
6344 /* If the function uses __builtin_eh_return, the eh_return
6345 machinery occupies the delay slot. */
6346 gcc_assert (!final_sequence);
6347
6348 if (flag_delayed_branch)
6349 {
6350 if (!TARGET_FLAT && TARGET_V9)
6351 fputs ("\treturn\t%i7+8\n", asm_out_file);
6352 else
6353 {
6354 if (!TARGET_FLAT)
6355 fputs ("\trestore\n", asm_out_file);
6356
6357 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6358 }
6359
6360 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6361 }
6362 else
6363 {
6364 if (!TARGET_FLAT)
6365 fputs ("\trestore\n", asm_out_file);
6366
6367 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6368 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6369 }
6370 }
6371 else if (sparc_leaf_function_p || TARGET_FLAT)
6372 {
6373 /* This is a leaf or flat function so we don't have to bother restoring
6374 the register window, which frees us from dealing with the convoluted
6375 semantics of restore/return. We simply output the jump to the
6376 return address and the insn in the delay slot (if any). */
6377
6378 return "jmp\t%%o7+%)%#";
6379 }
6380 else
6381 {
6382 /* This is a regular function so we have to restore the register window.
6383 We may have a pending insn for the delay slot, which will be either
6384 combined with the 'restore' instruction or put in the delay slot of
6385 the 'return' instruction. */
6386
6387 if (final_sequence)
6388 {
6389 rtx_insn *delay;
6390 rtx pat;
6391
6392 delay = NEXT_INSN (insn);
6393 gcc_assert (delay);
6394
6395 pat = PATTERN (delay);
6396
6397 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6398 {
6399 epilogue_renumber (&pat, 0);
6400 return "return\t%%i7+%)%#";
6401 }
6402 else
6403 {
6404 output_asm_insn ("jmp\t%%i7+%)", NULL);
6405
6406 /* We're going to output the insn in the delay slot manually.
6407 Make sure to output its source location first. */
6408 PATTERN (delay) = gen_blockage ();
6409 INSN_CODE (delay) = -1;
6410 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6411 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6412
6413 output_restore (pat);
6414 }
6415 }
6416 else
6417 {
6418 /* The delay slot is empty. */
6419 if (TARGET_V9)
6420 return "return\t%%i7+%)\n\t nop";
6421 else if (flag_delayed_branch)
6422 return "jmp\t%%i7+%)\n\t restore";
6423 else
6424 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6425 }
6426 }
6427
6428 return "";
6429 }
6430
6431 /* Output a sibling call. */
6432
6433 const char *
6434 output_sibcall (rtx_insn *insn, rtx call_operand)
6435 {
6436 rtx operands[1];
6437
6438 gcc_assert (flag_delayed_branch);
6439
6440 operands[0] = call_operand;
6441
6442 if (sparc_leaf_function_p || TARGET_FLAT)
6443 {
6444 /* This is a leaf or flat function so we don't have to bother restoring
6445 the register window. We simply output the jump to the function and
6446 the insn in the delay slot (if any). */
6447
6448 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6449
6450 if (final_sequence)
6451 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6452 operands);
6453 else
6454 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6455 it into branch if possible. */
6456 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6457 operands);
6458 }
6459 else
6460 {
6461 /* This is a regular function so we have to restore the register window.
6462 We may have a pending insn for the delay slot, which will be combined
6463 with the 'restore' instruction. */
6464
6465 output_asm_insn ("call\t%a0, 0", operands);
6466
6467 if (final_sequence)
6468 {
6469 rtx_insn *delay;
6470 rtx pat;
6471
6472 delay = NEXT_INSN (insn);
6473 gcc_assert (delay);
6474
6475 pat = PATTERN (delay);
6476
6477 /* We're going to output the insn in the delay slot manually.
6478 Make sure to output its source location first. */
6479 PATTERN (delay) = gen_blockage ();
6480 INSN_CODE (delay) = -1;
6481 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6482 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6483
6484 output_restore (pat);
6485 }
6486 else
6487 output_restore (NULL_RTX);
6488 }
6489
6490 return "";
6491 }
6492 \f
6493 /* Functions for handling argument passing.
6494
6495 For 32-bit, the first 6 args are normally in registers and the rest are
6496 pushed. Any arg that starts within the first 6 words is at least
6497 partially passed in a register unless its data type forbids.
6498
6499 For 64-bit, the argument registers are laid out as an array of 16 elements
6500 and arguments are added sequentially. The first 6 int args and up to the
6501 first 16 fp args (depending on size) are passed in regs.
6502
6503 Slot Stack Integral Float Float in structure Double Long Double
6504 ---- ----- -------- ----- ------------------ ------ -----------
6505 15 [SP+248] %f31 %f30,%f31 %d30
6506 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6507 13 [SP+232] %f27 %f26,%f27 %d26
6508 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6509 11 [SP+216] %f23 %f22,%f23 %d22
6510 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6511 9 [SP+200] %f19 %f18,%f19 %d18
6512 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6513 7 [SP+184] %f15 %f14,%f15 %d14
6514 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6515 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6516 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6517 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6518 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6519 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6520 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6521
6522 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6523
6524 Integral arguments are always passed as 64-bit quantities appropriately
6525 extended.
6526
6527 Passing of floating point values is handled as follows.
6528 If a prototype is in scope:
6529 If the value is in a named argument (i.e. not a stdarg function or a
6530 value not part of the `...') then the value is passed in the appropriate
6531 fp reg.
6532 If the value is part of the `...' and is passed in one of the first 6
6533 slots then the value is passed in the appropriate int reg.
6534 If the value is part of the `...' and is not passed in one of the first 6
6535 slots then the value is passed in memory.
6536 If a prototype is not in scope:
6537 If the value is one of the first 6 arguments the value is passed in the
6538 appropriate integer reg and the appropriate fp reg.
6539 If the value is not one of the first 6 arguments the value is passed in
6540 the appropriate fp reg and in memory.
6541
6542
6543 Summary of the calling conventions implemented by GCC on the SPARC:
6544
6545 32-bit ABI:
6546 size argument return value
6547
6548 small integer <4 int. reg. int. reg.
6549 word 4 int. reg. int. reg.
6550 double word 8 int. reg. int. reg.
6551
6552 _Complex small integer <8 int. reg. int. reg.
6553 _Complex word 8 int. reg. int. reg.
6554 _Complex double word 16 memory int. reg.
6555
6556 vector integer <=8 int. reg. FP reg.
6557 vector integer >8 memory memory
6558
6559 float 4 int. reg. FP reg.
6560 double 8 int. reg. FP reg.
6561 long double 16 memory memory
6562
6563 _Complex float 8 memory FP reg.
6564 _Complex double 16 memory FP reg.
6565 _Complex long double 32 memory FP reg.
6566
6567 vector float any memory memory
6568
6569 aggregate any memory memory
6570
6571
6572
6573 64-bit ABI:
6574 size argument return value
6575
6576 small integer <8 int. reg. int. reg.
6577 word 8 int. reg. int. reg.
6578 double word 16 int. reg. int. reg.
6579
6580 _Complex small integer <16 int. reg. int. reg.
6581 _Complex word 16 int. reg. int. reg.
6582 _Complex double word 32 memory int. reg.
6583
6584 vector integer <=16 FP reg. FP reg.
6585 vector integer 16<s<=32 memory FP reg.
6586 vector integer >32 memory memory
6587
6588 float 4 FP reg. FP reg.
6589 double 8 FP reg. FP reg.
6590 long double 16 FP reg. FP reg.
6591
6592 _Complex float 8 FP reg. FP reg.
6593 _Complex double 16 FP reg. FP reg.
6594 _Complex long double 32 memory FP reg.
6595
6596 vector float <=16 FP reg. FP reg.
6597 vector float 16<s<=32 memory FP reg.
6598 vector float >32 memory memory
6599
6600 aggregate <=16 reg. reg.
6601 aggregate 16<s<=32 memory reg.
6602 aggregate >32 memory memory
6603
6604
6605
6606 Note #1: complex floating-point types follow the extended SPARC ABIs as
6607 implemented by the Sun compiler.
6608
6609 Note #2: integer vector types follow the scalar floating-point types
6610 conventions to match what is implemented by the Sun VIS SDK.
6611
6612 Note #3: floating-point vector types follow the aggregate types
6613 conventions. */
6614
6615
6616 /* Maximum number of int regs for args. */
6617 #define SPARC_INT_ARG_MAX 6
6618 /* Maximum number of fp regs for args. */
6619 #define SPARC_FP_ARG_MAX 16
6620 /* Number of words (partially) occupied for a given size in units. */
6621 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6622
6623 /* Handle the INIT_CUMULATIVE_ARGS macro.
6624 Initialize a variable CUM of type CUMULATIVE_ARGS
6625 for a call to a function whose data type is FNTYPE.
6626 For a library call, FNTYPE is 0. */
6627
6628 void
6629 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6630 {
6631 cum->words = 0;
6632 cum->prototype_p = fntype && prototype_p (fntype);
6633 cum->libcall_p = !fntype;
6634 }
6635
6636 /* Handle promotion of pointer and integer arguments. */
6637
6638 static machine_mode
6639 sparc_promote_function_mode (const_tree type, machine_mode mode,
6640 int *punsignedp, const_tree, int)
6641 {
6642 if (type && POINTER_TYPE_P (type))
6643 {
6644 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6645 return Pmode;
6646 }
6647
6648 /* Integral arguments are passed as full words, as per the ABI. */
6649 if (GET_MODE_CLASS (mode) == MODE_INT
6650 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6651 return word_mode;
6652
6653 return mode;
6654 }
6655
6656 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6657
6658 static bool
6659 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6660 {
6661 return TARGET_ARCH64 ? true : false;
6662 }
6663
6664 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6665 Specify whether to pass the argument by reference. */
6666
6667 static bool
6668 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6669 {
6670 tree type = arg.type;
6671 machine_mode mode = arg.mode;
6672 if (TARGET_ARCH32)
6673 /* Original SPARC 32-bit ABI says that structures and unions,
6674 and quad-precision floats are passed by reference.
6675 All other base types are passed in registers.
6676
6677 Extended ABI (as implemented by the Sun compiler) says that all
6678 complex floats are passed by reference. Pass complex integers
6679 in registers up to 8 bytes. More generally, enforce the 2-word
6680 cap for passing arguments in registers.
6681
6682 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6683 vectors are passed like floats of the same size, that is in
6684 registers up to 8 bytes. Pass all vector floats by reference
6685 like structure and unions. */
6686 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6687 || mode == SCmode
6688 /* Catch CDImode, TFmode, DCmode and TCmode. */
6689 || GET_MODE_SIZE (mode) > 8
6690 || (type
6691 && VECTOR_TYPE_P (type)
6692 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6693 else
6694 /* Original SPARC 64-bit ABI says that structures and unions
6695 smaller than 16 bytes are passed in registers, as well as
6696 all other base types.
6697
6698 Extended ABI (as implemented by the Sun compiler) says that
6699 complex floats are passed in registers up to 16 bytes. Pass
6700 all complex integers in registers up to 16 bytes. More generally,
6701 enforce the 2-word cap for passing arguments in registers.
6702
6703 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6704 vectors are passed like floats of the same size, that is in
6705 registers (up to 16 bytes). Pass all vector floats like structure
6706 and unions. */
6707 return ((type
6708 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6709 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6710 /* Catch CTImode and TCmode. */
6711 || GET_MODE_SIZE (mode) > 16);
6712 }
6713
6714 /* Traverse the record TYPE recursively and call FUNC on its fields.
6715 NAMED is true if this is for a named parameter. DATA is passed
6716 to FUNC for each field. OFFSET is the starting position and
6717 PACKED is true if we are inside a packed record. */
6718
6719 template <typename T, void Func (const_tree, int, bool, T*)>
6720 static void
6721 traverse_record_type (const_tree type, bool named, T *data,
6722 int offset = 0, bool packed = false)
6723 {
6724 /* The ABI obviously doesn't specify how packed structures are passed.
6725 These are passed in integer regs if possible, otherwise memory. */
6726 if (!packed)
6727 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6728 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6729 {
6730 packed = true;
6731 break;
6732 }
6733
6734 /* Walk the real fields, but skip those with no size or a zero size.
6735 ??? Fields with variable offset are handled as having zero offset. */
6736 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6737 if (TREE_CODE (field) == FIELD_DECL)
6738 {
6739 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6740 continue;
6741
6742 int bitpos = offset;
6743 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6744 bitpos += int_bit_position (field);
6745
6746 tree field_type = TREE_TYPE (field);
6747 if (TREE_CODE (field_type) == RECORD_TYPE)
6748 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6749 packed);
6750 else
6751 {
6752 const bool fp_type
6753 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6754 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6755 data);
6756 }
6757 }
6758 }
6759
6760 /* Handle recursive register classifying for structure layout. */
6761
6762 typedef struct
6763 {
6764 bool fp_regs; /* true if field eligible to FP registers. */
6765 bool fp_regs_in_first_word; /* true if such field in first word. */
6766 } classify_data_t;
6767
6768 /* A subroutine of function_arg_slotno. Classify the field. */
6769
6770 inline void
6771 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6772 {
6773 if (fp)
6774 {
6775 data->fp_regs = true;
6776 if (bitpos < BITS_PER_WORD)
6777 data->fp_regs_in_first_word = true;
6778 }
6779 }
6780
6781 /* Compute the slot number to pass an argument in.
6782 Return the slot number or -1 if passing on the stack.
6783
6784 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6785 the preceding args and about the function being called.
6786 MODE is the argument's machine mode.
6787 TYPE is the data type of the argument (as a tree).
6788 This is null for libcalls where that information may
6789 not be available.
6790 NAMED is nonzero if this argument is a named parameter
6791 (otherwise it is an extra parameter matching an ellipsis).
6792 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6793 *PREGNO records the register number to use if scalar type.
6794 *PPADDING records the amount of padding needed in words. */
6795
6796 static int
6797 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6798 const_tree type, bool named, bool incoming,
6799 int *pregno, int *ppadding)
6800 {
6801 const int regbase
6802 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6803 int slotno = cum->words, regno;
6804 enum mode_class mclass = GET_MODE_CLASS (mode);
6805
6806 /* Silence warnings in the callers. */
6807 *pregno = -1;
6808 *ppadding = -1;
6809
6810 if (type && TREE_ADDRESSABLE (type))
6811 return -1;
6812
6813 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6814 if (TARGET_ARCH64
6815 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6816 && (slotno & 1) != 0)
6817 {
6818 slotno++;
6819 *ppadding = 1;
6820 }
6821 else
6822 *ppadding = 0;
6823
6824 /* Vector types deserve special treatment because they are polymorphic wrt
6825 their mode, depending upon whether VIS instructions are enabled. */
6826 if (type && VECTOR_TYPE_P (type))
6827 {
6828 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6829 {
6830 /* The SPARC port defines no floating-point vector modes. */
6831 gcc_assert (mode == BLKmode);
6832 }
6833 else
6834 {
6835 /* Integer vector types should either have a vector
6836 mode or an integral mode, because we are guaranteed
6837 by pass_by_reference that their size is not greater
6838 than 16 bytes and TImode is 16-byte wide. */
6839 gcc_assert (mode != BLKmode);
6840
6841 /* Integer vectors are handled like floats as per
6842 the Sun VIS SDK. */
6843 mclass = MODE_FLOAT;
6844 }
6845 }
6846
6847 switch (mclass)
6848 {
6849 case MODE_FLOAT:
6850 case MODE_COMPLEX_FLOAT:
6851 case MODE_VECTOR_INT:
6852 if (TARGET_ARCH64 && TARGET_FPU && named)
6853 {
6854 /* If all arg slots are filled, then must pass on stack. */
6855 if (slotno >= SPARC_FP_ARG_MAX)
6856 return -1;
6857
6858 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6859 /* Arguments filling only one single FP register are
6860 right-justified in the outer double FP register. */
6861 if (GET_MODE_SIZE (mode) <= 4)
6862 regno++;
6863 break;
6864 }
6865 /* fallthrough */
6866
6867 case MODE_INT:
6868 case MODE_COMPLEX_INT:
6869 /* If all arg slots are filled, then must pass on stack. */
6870 if (slotno >= SPARC_INT_ARG_MAX)
6871 return -1;
6872
6873 regno = regbase + slotno;
6874 break;
6875
6876 case MODE_RANDOM:
6877 /* MODE is VOIDmode when generating the actual call. */
6878 if (mode == VOIDmode)
6879 return -1;
6880
6881 if (TARGET_64BIT && TARGET_FPU && named
6882 && type
6883 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6884 {
6885 /* If all arg slots are filled, then must pass on stack. */
6886 if (slotno >= SPARC_FP_ARG_MAX)
6887 return -1;
6888
6889 if (TREE_CODE (type) == RECORD_TYPE)
6890 {
6891 classify_data_t data = { false, false };
6892 traverse_record_type<classify_data_t, classify_registers>
6893 (type, named, &data);
6894
6895 if (data.fp_regs)
6896 {
6897 /* If all FP slots are filled except for the last one and
6898 there is no FP field in the first word, then must pass
6899 on stack. */
6900 if (slotno >= SPARC_FP_ARG_MAX - 1
6901 && !data.fp_regs_in_first_word)
6902 return -1;
6903 }
6904 else
6905 {
6906 /* If all int slots are filled, then must pass on stack. */
6907 if (slotno >= SPARC_INT_ARG_MAX)
6908 return -1;
6909 }
6910
6911 /* PREGNO isn't set since both int and FP regs can be used. */
6912 return slotno;
6913 }
6914
6915 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6916 }
6917 else
6918 {
6919 /* If all arg slots are filled, then must pass on stack. */
6920 if (slotno >= SPARC_INT_ARG_MAX)
6921 return -1;
6922
6923 regno = regbase + slotno;
6924 }
6925 break;
6926
6927 default :
6928 gcc_unreachable ();
6929 }
6930
6931 *pregno = regno;
6932 return slotno;
6933 }
6934
6935 /* Handle recursive register counting/assigning for structure layout. */
6936
6937 typedef struct
6938 {
6939 int slotno; /* slot number of the argument. */
6940 int regbase; /* regno of the base register. */
6941 int intoffset; /* offset of the first pending integer field. */
6942 int nregs; /* number of words passed in registers. */
6943 bool stack; /* true if part of the argument is on the stack. */
6944 rtx ret; /* return expression being built. */
6945 } assign_data_t;
6946
6947 /* A subroutine of function_arg_record_value. Compute the number of integer
6948 registers to be assigned between PARMS->intoffset and BITPOS. Return
6949 true if at least one integer register is assigned or false otherwise. */
6950
6951 static bool
6952 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
6953 {
6954 if (data->intoffset < 0)
6955 return false;
6956
6957 const int intoffset = data->intoffset;
6958 data->intoffset = -1;
6959
6960 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6961 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6962 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6963 int nregs = (endbit - startbit) / BITS_PER_WORD;
6964
6965 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6966 {
6967 nregs = SPARC_INT_ARG_MAX - this_slotno;
6968
6969 /* We need to pass this field (partly) on the stack. */
6970 data->stack = 1;
6971 }
6972
6973 if (nregs <= 0)
6974 return false;
6975
6976 *pnregs = nregs;
6977 return true;
6978 }
6979
6980 /* A subroutine of function_arg_record_value. Compute the number and the mode
6981 of the FP registers to be assigned for FIELD. Return true if at least one
6982 FP register is assigned or false otherwise. */
6983
6984 static bool
6985 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
6986 int *pnregs, machine_mode *pmode)
6987 {
6988 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6989 machine_mode mode = DECL_MODE (field);
6990 int nregs, nslots;
6991
6992 /* Slots are counted as words while regs are counted as having the size of
6993 the (inner) mode. */
6994 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
6995 {
6996 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6997 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6998 }
6999 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7000 {
7001 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7002 nregs = 2;
7003 }
7004 else
7005 nregs = 1;
7006
7007 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7008
7009 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7010 {
7011 nslots = SPARC_FP_ARG_MAX - this_slotno;
7012 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7013
7014 /* We need to pass this field (partly) on the stack. */
7015 data->stack = 1;
7016
7017 if (nregs <= 0)
7018 return false;
7019 }
7020
7021 *pnregs = nregs;
7022 *pmode = mode;
7023 return true;
7024 }
7025
7026 /* A subroutine of function_arg_record_value. Count the number of registers
7027 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7028
7029 inline void
7030 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7031 {
7032 if (fp)
7033 {
7034 int nregs;
7035 machine_mode mode;
7036
7037 if (compute_int_layout (bitpos, data, &nregs))
7038 data->nregs += nregs;
7039
7040 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7041 data->nregs += nregs;
7042 }
7043 else
7044 {
7045 if (data->intoffset < 0)
7046 data->intoffset = bitpos;
7047 }
7048 }
7049
7050 /* A subroutine of function_arg_record_value. Assign the bits of the
7051 structure between PARMS->intoffset and BITPOS to integer registers. */
7052
7053 static void
7054 assign_int_registers (int bitpos, assign_data_t *data)
7055 {
7056 int intoffset = data->intoffset;
7057 machine_mode mode;
7058 int nregs;
7059
7060 if (!compute_int_layout (bitpos, data, &nregs))
7061 return;
7062
7063 /* If this is the trailing part of a word, only load that much into
7064 the register. Otherwise load the whole register. Note that in
7065 the latter case we may pick up unwanted bits. It's not a problem
7066 at the moment but may wish to revisit. */
7067 if (intoffset % BITS_PER_WORD != 0)
7068 mode = smallest_int_mode_for_size (BITS_PER_WORD
7069 - intoffset % BITS_PER_WORD);
7070 else
7071 mode = word_mode;
7072
7073 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7074 unsigned int regno = data->regbase + this_slotno;
7075 intoffset /= BITS_PER_UNIT;
7076
7077 do
7078 {
7079 rtx reg = gen_rtx_REG (mode, regno);
7080 XVECEXP (data->ret, 0, data->stack + data->nregs)
7081 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7082 data->nregs += 1;
7083 mode = word_mode;
7084 regno += 1;
7085 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7086 }
7087 while (--nregs > 0);
7088 }
7089
7090 /* A subroutine of function_arg_record_value. Assign FIELD at position
7091 BITPOS to FP registers. */
7092
7093 static void
7094 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7095 {
7096 int nregs;
7097 machine_mode mode;
7098
7099 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7100 return;
7101
7102 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7103 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7104 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7105 regno++;
7106 int pos = bitpos / BITS_PER_UNIT;
7107
7108 do
7109 {
7110 rtx reg = gen_rtx_REG (mode, regno);
7111 XVECEXP (data->ret, 0, data->stack + data->nregs)
7112 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7113 data->nregs += 1;
7114 regno += GET_MODE_SIZE (mode) / 4;
7115 pos += GET_MODE_SIZE (mode);
7116 }
7117 while (--nregs > 0);
7118 }
7119
7120 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7121 the structure between PARMS->intoffset and BITPOS to registers. */
7122
7123 inline void
7124 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7125 {
7126 if (fp)
7127 {
7128 assign_int_registers (bitpos, data);
7129
7130 assign_fp_registers (field, bitpos, data);
7131 }
7132 else
7133 {
7134 if (data->intoffset < 0)
7135 data->intoffset = bitpos;
7136 }
7137 }
7138
7139 /* Used by function_arg and function_value to implement the complex
7140 conventions of the 64-bit ABI for passing and returning structures.
7141 Return an expression valid as a return value for the FUNCTION_ARG
7142 and TARGET_FUNCTION_VALUE.
7143
7144 TYPE is the data type of the argument (as a tree).
7145 This is null for libcalls where that information may
7146 not be available.
7147 MODE is the argument's machine mode.
7148 SLOTNO is the index number of the argument's slot in the parameter array.
7149 NAMED is true if this argument is a named parameter
7150 (otherwise it is an extra parameter matching an ellipsis).
7151 REGBASE is the regno of the base register for the parameter array. */
7152
7153 static rtx
7154 function_arg_record_value (const_tree type, machine_mode mode,
7155 int slotno, bool named, int regbase)
7156 {
7157 const int size = int_size_in_bytes (type);
7158 assign_data_t data;
7159 int nregs;
7160
7161 data.slotno = slotno;
7162 data.regbase = regbase;
7163
7164 /* Count how many registers we need. */
7165 data.nregs = 0;
7166 data.intoffset = 0;
7167 data.stack = false;
7168 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7169
7170 /* Take into account pending integer fields. */
7171 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7172 data.nregs += nregs;
7173
7174 /* Allocate the vector and handle some annoying special cases. */
7175 nregs = data.nregs;
7176
7177 if (nregs == 0)
7178 {
7179 /* ??? Empty structure has no value? Duh? */
7180 if (size <= 0)
7181 {
7182 /* Though there's nothing really to store, return a word register
7183 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7184 leads to breakage due to the fact that there are zero bytes to
7185 load. */
7186 return gen_rtx_REG (mode, regbase);
7187 }
7188
7189 /* ??? C++ has structures with no fields, and yet a size. Give up
7190 for now and pass everything back in integer registers. */
7191 nregs = CEIL_NWORDS (size);
7192 if (nregs + slotno > SPARC_INT_ARG_MAX)
7193 nregs = SPARC_INT_ARG_MAX - slotno;
7194 }
7195
7196 gcc_assert (nregs > 0);
7197
7198 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7199
7200 /* If at least one field must be passed on the stack, generate
7201 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7202 also be passed on the stack. We can't do much better because the
7203 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7204 of structures for which the fields passed exclusively in registers
7205 are not at the beginning of the structure. */
7206 if (data.stack)
7207 XVECEXP (data.ret, 0, 0)
7208 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7209
7210 /* Assign the registers. */
7211 data.nregs = 0;
7212 data.intoffset = 0;
7213 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7214
7215 /* Assign pending integer fields. */
7216 assign_int_registers (size * BITS_PER_UNIT, &data);
7217
7218 gcc_assert (data.nregs == nregs);
7219
7220 return data.ret;
7221 }
7222
7223 /* Used by function_arg and function_value to implement the conventions
7224 of the 64-bit ABI for passing and returning unions.
7225 Return an expression valid as a return value for the FUNCTION_ARG
7226 and TARGET_FUNCTION_VALUE.
7227
7228 SIZE is the size in bytes of the union.
7229 MODE is the argument's machine mode.
7230 SLOTNO is the index number of the argument's slot in the parameter array.
7231 REGNO is the hard register the union will be passed in. */
7232
7233 static rtx
7234 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7235 {
7236 unsigned int nwords;
7237
7238 /* See comment in function_arg_record_value for empty structures. */
7239 if (size <= 0)
7240 return gen_rtx_REG (mode, regno);
7241
7242 if (slotno == SPARC_INT_ARG_MAX - 1)
7243 nwords = 1;
7244 else
7245 nwords = CEIL_NWORDS (size);
7246
7247 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7248
7249 /* Unions are passed left-justified. */
7250 for (unsigned int i = 0; i < nwords; i++)
7251 XVECEXP (regs, 0, i)
7252 = gen_rtx_EXPR_LIST (VOIDmode,
7253 gen_rtx_REG (word_mode, regno + i),
7254 GEN_INT (UNITS_PER_WORD * i));
7255
7256 return regs;
7257 }
7258
7259 /* Used by function_arg and function_value to implement the conventions
7260 of the 64-bit ABI for passing and returning BLKmode vectors.
7261 Return an expression valid as a return value for the FUNCTION_ARG
7262 and TARGET_FUNCTION_VALUE.
7263
7264 SIZE is the size in bytes of the vector.
7265 SLOTNO is the index number of the argument's slot in the parameter array.
7266 NAMED is true if this argument is a named parameter
7267 (otherwise it is an extra parameter matching an ellipsis).
7268 REGNO is the hard register the vector will be passed in. */
7269
7270 static rtx
7271 function_arg_vector_value (int size, int slotno, bool named, int regno)
7272 {
7273 const int mult = (named ? 2 : 1);
7274 unsigned int nwords;
7275
7276 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7277 nwords = 1;
7278 else
7279 nwords = CEIL_NWORDS (size);
7280
7281 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7282
7283 if (size < UNITS_PER_WORD)
7284 XVECEXP (regs, 0, 0)
7285 = gen_rtx_EXPR_LIST (VOIDmode,
7286 gen_rtx_REG (SImode, regno),
7287 const0_rtx);
7288 else
7289 for (unsigned int i = 0; i < nwords; i++)
7290 XVECEXP (regs, 0, i)
7291 = gen_rtx_EXPR_LIST (VOIDmode,
7292 gen_rtx_REG (word_mode, regno + i * mult),
7293 GEN_INT (i * UNITS_PER_WORD));
7294
7295 return regs;
7296 }
7297
7298 /* Determine where to put an argument to a function.
7299 Value is zero to push the argument on the stack,
7300 or a hard register in which to store the argument.
7301
7302 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7303 the preceding args and about the function being called.
7304 ARG is a description of the argument.
7305 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7306 TARGET_FUNCTION_INCOMING_ARG. */
7307
7308 static rtx
7309 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7310 bool incoming)
7311 {
7312 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7313 const int regbase
7314 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7315 int slotno, regno, padding;
7316 tree type = arg.type;
7317 machine_mode mode = arg.mode;
7318 enum mode_class mclass = GET_MODE_CLASS (mode);
7319 bool named = arg.named;
7320
7321 slotno
7322 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7323 if (slotno == -1)
7324 return 0;
7325
7326 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7327 if (type && VECTOR_INTEGER_TYPE_P (type))
7328 mclass = MODE_FLOAT;
7329
7330 if (TARGET_ARCH32)
7331 return gen_rtx_REG (mode, regno);
7332
7333 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7334 and are promoted to registers if possible. */
7335 if (type && TREE_CODE (type) == RECORD_TYPE)
7336 {
7337 const int size = int_size_in_bytes (type);
7338 gcc_assert (size <= 16);
7339
7340 return function_arg_record_value (type, mode, slotno, named, regbase);
7341 }
7342
7343 /* Unions up to 16 bytes in size are passed in integer registers. */
7344 else if (type && TREE_CODE (type) == UNION_TYPE)
7345 {
7346 const int size = int_size_in_bytes (type);
7347 gcc_assert (size <= 16);
7348
7349 return function_arg_union_value (size, mode, slotno, regno);
7350 }
7351
7352 /* Floating-point vectors up to 16 bytes are passed in registers. */
7353 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7354 {
7355 const int size = int_size_in_bytes (type);
7356 gcc_assert (size <= 16);
7357
7358 return function_arg_vector_value (size, slotno, named, regno);
7359 }
7360
7361 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7362 but also have the slot allocated for them.
7363 If no prototype is in scope fp values in register slots get passed
7364 in two places, either fp regs and int regs or fp regs and memory. */
7365 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7366 && SPARC_FP_REG_P (regno))
7367 {
7368 rtx reg = gen_rtx_REG (mode, regno);
7369 if (cum->prototype_p || cum->libcall_p)
7370 return reg;
7371 else
7372 {
7373 rtx v0, v1;
7374
7375 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7376 {
7377 int intreg;
7378
7379 /* On incoming, we don't need to know that the value
7380 is passed in %f0 and %i0, and it confuses other parts
7381 causing needless spillage even on the simplest cases. */
7382 if (incoming)
7383 return reg;
7384
7385 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7386 + (regno - SPARC_FP_ARG_FIRST) / 2);
7387
7388 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7389 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7390 const0_rtx);
7391 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7392 }
7393 else
7394 {
7395 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7396 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7397 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7398 }
7399 }
7400 }
7401
7402 /* All other aggregate types are passed in an integer register in a mode
7403 corresponding to the size of the type. */
7404 else if (type && AGGREGATE_TYPE_P (type))
7405 {
7406 const int size = int_size_in_bytes (type);
7407 gcc_assert (size <= 16);
7408
7409 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7410 }
7411
7412 return gen_rtx_REG (mode, regno);
7413 }
7414
7415 /* Handle the TARGET_FUNCTION_ARG target hook. */
7416
7417 static rtx
7418 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7419 {
7420 return sparc_function_arg_1 (cum, arg, false);
7421 }
7422
7423 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7424
7425 static rtx
7426 sparc_function_incoming_arg (cumulative_args_t cum,
7427 const function_arg_info &arg)
7428 {
7429 return sparc_function_arg_1 (cum, arg, true);
7430 }
7431
7432 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7433
7434 static unsigned int
7435 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7436 {
7437 return ((TARGET_ARCH64
7438 && (GET_MODE_ALIGNMENT (mode) == 128
7439 || (type && TYPE_ALIGN (type) == 128)))
7440 ? 128
7441 : PARM_BOUNDARY);
7442 }
7443
7444 /* For an arg passed partly in registers and partly in memory,
7445 this is the number of bytes of registers used.
7446 For args passed entirely in registers or entirely in memory, zero.
7447
7448 Any arg that starts in the first 6 regs but won't entirely fit in them
7449 needs partial registers on v8. On v9, structures with integer
7450 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7451 values that begin in the last fp reg [where "last fp reg" varies with the
7452 mode] will be split between that reg and memory. */
7453
7454 static int
7455 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7456 {
7457 int slotno, regno, padding;
7458
7459 /* We pass false for incoming here, it doesn't matter. */
7460 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7461 arg.named, false, &regno, &padding);
7462
7463 if (slotno == -1)
7464 return 0;
7465
7466 if (TARGET_ARCH32)
7467 {
7468 /* We are guaranteed by pass_by_reference that the size of the
7469 argument is not greater than 8 bytes, so we only need to return
7470 one word if the argument is partially passed in registers. */
7471 const int size = GET_MODE_SIZE (arg.mode);
7472
7473 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7474 return UNITS_PER_WORD;
7475 }
7476 else
7477 {
7478 /* We are guaranteed by pass_by_reference that the size of the
7479 argument is not greater than 16 bytes, so we only need to return
7480 one word if the argument is partially passed in registers. */
7481 if (arg.aggregate_type_p ())
7482 {
7483 const int size = int_size_in_bytes (arg.type);
7484
7485 if (size > UNITS_PER_WORD
7486 && (slotno == SPARC_INT_ARG_MAX - 1
7487 || slotno == SPARC_FP_ARG_MAX - 1))
7488 return UNITS_PER_WORD;
7489 }
7490 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7491 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7492 || (arg.type && VECTOR_TYPE_P (arg.type)))
7493 && !(TARGET_FPU && arg.named)))
7494 {
7495 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7496 ? int_size_in_bytes (arg.type)
7497 : GET_MODE_SIZE (arg.mode);
7498
7499 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7500 return UNITS_PER_WORD;
7501 }
7502 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7503 || (arg.type && VECTOR_TYPE_P (arg.type)))
7504 {
7505 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7506 ? int_size_in_bytes (arg.type)
7507 : GET_MODE_SIZE (arg.mode);
7508
7509 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7510 return UNITS_PER_WORD;
7511 }
7512 }
7513
7514 return 0;
7515 }
7516
7517 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7518 Update the data in CUM to advance over argument ARG. */
7519
7520 static void
7521 sparc_function_arg_advance (cumulative_args_t cum_v,
7522 const function_arg_info &arg)
7523 {
7524 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7525 tree type = arg.type;
7526 machine_mode mode = arg.mode;
7527 int regno, padding;
7528
7529 /* We pass false for incoming here, it doesn't matter. */
7530 function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7531
7532 /* If argument requires leading padding, add it. */
7533 cum->words += padding;
7534
7535 if (TARGET_ARCH32)
7536 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7537 else
7538 {
7539 /* For types that can have BLKmode, get the size from the type. */
7540 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7541 {
7542 const int size = int_size_in_bytes (type);
7543
7544 /* See comment in function_arg_record_value for empty structures. */
7545 if (size <= 0)
7546 cum->words++;
7547 else
7548 cum->words += CEIL_NWORDS (size);
7549 }
7550 else
7551 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7552 }
7553 }
7554
7555 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7556 are always stored left shifted in their argument slot. */
7557
7558 static pad_direction
7559 sparc_function_arg_padding (machine_mode mode, const_tree type)
7560 {
7561 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7562 return PAD_UPWARD;
7563
7564 /* Fall back to the default. */
7565 return default_function_arg_padding (mode, type);
7566 }
7567
7568 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7569 Specify whether to return the return value in memory. */
7570
7571 static bool
7572 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7573 {
7574 if (TARGET_ARCH32)
7575 /* Original SPARC 32-bit ABI says that structures and unions, and
7576 quad-precision floats are returned in memory. But note that the
7577 first part is implemented through -fpcc-struct-return being the
7578 default, so here we only implement -freg-struct-return instead.
7579 All other base types are returned in registers.
7580
7581 Extended ABI (as implemented by the Sun compiler) says that
7582 all complex floats are returned in registers (8 FP registers
7583 at most for '_Complex long double'). Return all complex integers
7584 in registers (4 at most for '_Complex long long').
7585
7586 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7587 integers are returned like floats of the same size, that is in
7588 registers up to 8 bytes and in memory otherwise. Return all
7589 vector floats in memory like structure and unions; note that
7590 they always have BLKmode like the latter. */
7591 return (TYPE_MODE (type) == BLKmode
7592 || TYPE_MODE (type) == TFmode
7593 || (TREE_CODE (type) == VECTOR_TYPE
7594 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7595 else
7596 /* Original SPARC 64-bit ABI says that structures and unions
7597 smaller than 32 bytes are returned in registers, as well as
7598 all other base types.
7599
7600 Extended ABI (as implemented by the Sun compiler) says that all
7601 complex floats are returned in registers (8 FP registers at most
7602 for '_Complex long double'). Return all complex integers in
7603 registers (4 at most for '_Complex TItype').
7604
7605 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7606 integers are returned like floats of the same size, that is in
7607 registers. Return all vector floats like structure and unions;
7608 note that they always have BLKmode like the latter. */
7609 return (TYPE_MODE (type) == BLKmode
7610 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7611 }
7612
7613 /* Handle the TARGET_STRUCT_VALUE target hook.
7614 Return where to find the structure return value address. */
7615
7616 static rtx
7617 sparc_struct_value_rtx (tree fndecl, int incoming)
7618 {
7619 if (TARGET_ARCH64)
7620 return NULL_RTX;
7621 else
7622 {
7623 rtx mem;
7624
7625 if (incoming)
7626 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7627 STRUCT_VALUE_OFFSET));
7628 else
7629 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7630 STRUCT_VALUE_OFFSET));
7631
7632 /* Only follow the SPARC ABI for fixed-size structure returns.
7633 Variable size structure returns are handled per the normal
7634 procedures in GCC. This is enabled by -mstd-struct-return */
7635 if (incoming == 2
7636 && sparc_std_struct_return
7637 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7638 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7639 {
7640 /* We must check and adjust the return address, as it is optional
7641 as to whether the return object is really provided. */
7642 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7643 rtx scratch = gen_reg_rtx (SImode);
7644 rtx_code_label *endlab = gen_label_rtx ();
7645
7646 /* Calculate the return object size. */
7647 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7648 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7649 /* Construct a temporary return value. */
7650 rtx temp_val
7651 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7652
7653 /* Implement SPARC 32-bit psABI callee return struct checking:
7654
7655 Fetch the instruction where we will return to and see if
7656 it's an unimp instruction (the most significant 10 bits
7657 will be zero). */
7658 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7659 plus_constant (Pmode,
7660 ret_reg, 8)));
7661 /* Assume the size is valid and pre-adjust. */
7662 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7663 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7664 0, endlab);
7665 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7666 /* Write the address of the memory pointed to by temp_val into
7667 the memory pointed to by mem. */
7668 emit_move_insn (mem, XEXP (temp_val, 0));
7669 emit_label (endlab);
7670 }
7671
7672 return mem;
7673 }
7674 }
7675
7676 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7677 For v9, function return values are subject to the same rules as arguments,
7678 except that up to 32 bytes may be returned in registers. */
7679
7680 static rtx
7681 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7682 {
7683 /* Beware that the two values are swapped here wrt function_arg. */
7684 const int regbase
7685 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7686 enum mode_class mclass = GET_MODE_CLASS (mode);
7687 int regno;
7688
7689 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7690 Note that integer vectors larger than 16 bytes have BLKmode so
7691 they need to be handled like floating-point vectors below. */
7692 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7693 mclass = MODE_FLOAT;
7694
7695 if (TARGET_ARCH64 && type)
7696 {
7697 /* Structures up to 32 bytes in size are returned in registers. */
7698 if (TREE_CODE (type) == RECORD_TYPE)
7699 {
7700 const int size = int_size_in_bytes (type);
7701 gcc_assert (size <= 32);
7702
7703 return function_arg_record_value (type, mode, 0, true, regbase);
7704 }
7705
7706 /* Unions up to 32 bytes in size are returned in integer registers. */
7707 else if (TREE_CODE (type) == UNION_TYPE)
7708 {
7709 const int size = int_size_in_bytes (type);
7710 gcc_assert (size <= 32);
7711
7712 return function_arg_union_value (size, mode, 0, regbase);
7713 }
7714
7715 /* Vectors up to 32 bytes are returned in FP registers. */
7716 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7717 {
7718 const int size = int_size_in_bytes (type);
7719 gcc_assert (size <= 32);
7720
7721 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7722 }
7723
7724 /* Objects that require it are returned in FP registers. */
7725 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7726 ;
7727
7728 /* All other aggregate types are returned in an integer register in a
7729 mode corresponding to the size of the type. */
7730 else if (AGGREGATE_TYPE_P (type))
7731 {
7732 /* All other aggregate types are passed in an integer register
7733 in a mode corresponding to the size of the type. */
7734 const int size = int_size_in_bytes (type);
7735 gcc_assert (size <= 32);
7736
7737 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7738
7739 /* ??? We probably should have made the same ABI change in
7740 3.4.0 as the one we made for unions. The latter was
7741 required by the SCD though, while the former is not
7742 specified, so we favored compatibility and efficiency.
7743
7744 Now we're stuck for aggregates larger than 16 bytes,
7745 because OImode vanished in the meantime. Let's not
7746 try to be unduly clever, and simply follow the ABI
7747 for unions in that case. */
7748 if (mode == BLKmode)
7749 return function_arg_union_value (size, mode, 0, regbase);
7750 else
7751 mclass = MODE_INT;
7752 }
7753
7754 /* We should only have pointer and integer types at this point. This
7755 must match sparc_promote_function_mode. */
7756 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7757 mode = word_mode;
7758 }
7759
7760 /* We should only have pointer and integer types at this point, except with
7761 -freg-struct-return. This must match sparc_promote_function_mode. */
7762 else if (TARGET_ARCH32
7763 && !(type && AGGREGATE_TYPE_P (type))
7764 && mclass == MODE_INT
7765 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7766 mode = word_mode;
7767
7768 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7769 regno = SPARC_FP_ARG_FIRST;
7770 else
7771 regno = regbase;
7772
7773 return gen_rtx_REG (mode, regno);
7774 }
7775
7776 /* Handle TARGET_FUNCTION_VALUE.
7777 On the SPARC, the value is found in the first "output" register, but the
7778 called function leaves it in the first "input" register. */
7779
7780 static rtx
7781 sparc_function_value (const_tree valtype,
7782 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7783 bool outgoing)
7784 {
7785 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7786 }
7787
7788 /* Handle TARGET_LIBCALL_VALUE. */
7789
7790 static rtx
7791 sparc_libcall_value (machine_mode mode,
7792 const_rtx fun ATTRIBUTE_UNUSED)
7793 {
7794 return sparc_function_value_1 (NULL_TREE, mode, false);
7795 }
7796
7797 /* Handle FUNCTION_VALUE_REGNO_P.
7798 On the SPARC, the first "output" reg is used for integer values, and the
7799 first floating point register is used for floating point values. */
7800
7801 static bool
7802 sparc_function_value_regno_p (const unsigned int regno)
7803 {
7804 return (regno == 8 || (TARGET_FPU && regno == 32));
7805 }
7806
7807 /* Do what is necessary for `va_start'. We look at the current function
7808 to determine if stdarg or varargs is used and return the address of
7809 the first unnamed parameter. */
7810
7811 static rtx
7812 sparc_builtin_saveregs (void)
7813 {
7814 int first_reg = crtl->args.info.words;
7815 rtx address;
7816 int regno;
7817
7818 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7819 emit_move_insn (gen_rtx_MEM (word_mode,
7820 gen_rtx_PLUS (Pmode,
7821 frame_pointer_rtx,
7822 GEN_INT (FIRST_PARM_OFFSET (0)
7823 + (UNITS_PER_WORD
7824 * regno)))),
7825 gen_rtx_REG (word_mode,
7826 SPARC_INCOMING_INT_ARG_FIRST + regno));
7827
7828 address = gen_rtx_PLUS (Pmode,
7829 frame_pointer_rtx,
7830 GEN_INT (FIRST_PARM_OFFSET (0)
7831 + UNITS_PER_WORD * first_reg));
7832
7833 return address;
7834 }
7835
7836 /* Implement `va_start' for stdarg. */
7837
7838 static void
7839 sparc_va_start (tree valist, rtx nextarg)
7840 {
7841 nextarg = expand_builtin_saveregs ();
7842 std_expand_builtin_va_start (valist, nextarg);
7843 }
7844
7845 /* Implement `va_arg' for stdarg. */
7846
7847 static tree
7848 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7849 gimple_seq *post_p)
7850 {
7851 HOST_WIDE_INT size, rsize, align;
7852 tree addr, incr;
7853 bool indirect;
7854 tree ptrtype = build_pointer_type (type);
7855
7856 if (pass_va_arg_by_reference (type))
7857 {
7858 indirect = true;
7859 size = rsize = UNITS_PER_WORD;
7860 align = 0;
7861 }
7862 else
7863 {
7864 indirect = false;
7865 size = int_size_in_bytes (type);
7866 rsize = ROUND_UP (size, UNITS_PER_WORD);
7867 align = 0;
7868
7869 if (TARGET_ARCH64)
7870 {
7871 /* For SPARC64, objects requiring 16-byte alignment get it. */
7872 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7873 align = 2 * UNITS_PER_WORD;
7874
7875 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7876 are left-justified in their slots. */
7877 if (AGGREGATE_TYPE_P (type))
7878 {
7879 if (size == 0)
7880 size = rsize = UNITS_PER_WORD;
7881 else
7882 size = rsize;
7883 }
7884 }
7885 }
7886
7887 incr = valist;
7888 if (align)
7889 {
7890 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7891 incr = fold_convert (sizetype, incr);
7892 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7893 size_int (-align));
7894 incr = fold_convert (ptr_type_node, incr);
7895 }
7896
7897 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7898 addr = incr;
7899
7900 if (BYTES_BIG_ENDIAN && size < rsize)
7901 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7902
7903 if (indirect)
7904 {
7905 addr = fold_convert (build_pointer_type (ptrtype), addr);
7906 addr = build_va_arg_indirect_ref (addr);
7907 }
7908
7909 /* If the address isn't aligned properly for the type, we need a temporary.
7910 FIXME: This is inefficient, usually we can do this in registers. */
7911 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7912 {
7913 tree tmp = create_tmp_var (type, "va_arg_tmp");
7914 tree dest_addr = build_fold_addr_expr (tmp);
7915 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7916 3, dest_addr, addr, size_int (rsize));
7917 TREE_ADDRESSABLE (tmp) = 1;
7918 gimplify_and_add (copy, pre_p);
7919 addr = dest_addr;
7920 }
7921
7922 else
7923 addr = fold_convert (ptrtype, addr);
7924
7925 incr = fold_build_pointer_plus_hwi (incr, rsize);
7926 gimplify_assign (valist, incr, post_p);
7927
7928 return build_va_arg_indirect_ref (addr);
7929 }
7930 \f
7931 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7932 Specify whether the vector mode is supported by the hardware. */
7933
7934 static bool
7935 sparc_vector_mode_supported_p (machine_mode mode)
7936 {
7937 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7938 }
7939 \f
7940 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7941
7942 static machine_mode
7943 sparc_preferred_simd_mode (scalar_mode mode)
7944 {
7945 if (TARGET_VIS)
7946 switch (mode)
7947 {
7948 case E_SImode:
7949 return V2SImode;
7950 case E_HImode:
7951 return V4HImode;
7952 case E_QImode:
7953 return V8QImode;
7954
7955 default:;
7956 }
7957
7958 return word_mode;
7959 }
7960 \f
7961 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
7962
7963 static bool
7964 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
7965 {
7966 /* Do not fold unconditional jumps that have been created for crossing
7967 partition boundaries. */
7968 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
7969 return false;
7970
7971 return true;
7972 }
7973
7974 /* Return the string to output an unconditional branch to LABEL, which is
7975 the operand number of the label.
7976
7977 DEST is the destination insn (i.e. the label), INSN is the source. */
7978
7979 const char *
7980 output_ubranch (rtx dest, rtx_insn *insn)
7981 {
7982 static char string[64];
7983 bool v9_form = false;
7984 int delta;
7985 char *p;
7986
7987 /* Even if we are trying to use cbcond for this, evaluate
7988 whether we can use V9 branches as our backup plan. */
7989 delta = 5000000;
7990 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
7991 delta = (INSN_ADDRESSES (INSN_UID (dest))
7992 - INSN_ADDRESSES (INSN_UID (insn)));
7993
7994 /* Leave some instructions for "slop". */
7995 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7996 v9_form = true;
7997
7998 if (TARGET_CBCOND)
7999 {
8000 bool emit_nop = emit_cbcond_nop (insn);
8001 bool far = false;
8002 const char *rval;
8003
8004 if (delta < -500 || delta > 500)
8005 far = true;
8006
8007 if (far)
8008 {
8009 if (v9_form)
8010 rval = "ba,a,pt\t%%xcc, %l0";
8011 else
8012 rval = "b,a\t%l0";
8013 }
8014 else
8015 {
8016 if (emit_nop)
8017 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8018 else
8019 rval = "cwbe\t%%g0, %%g0, %l0";
8020 }
8021 return rval;
8022 }
8023
8024 if (v9_form)
8025 strcpy (string, "ba%*,pt\t%%xcc, ");
8026 else
8027 strcpy (string, "b%*\t");
8028
8029 p = strchr (string, '\0');
8030 *p++ = '%';
8031 *p++ = 'l';
8032 *p++ = '0';
8033 *p++ = '%';
8034 *p++ = '(';
8035 *p = '\0';
8036
8037 return string;
8038 }
8039
8040 /* Return the string to output a conditional branch to LABEL, which is
8041 the operand number of the label. OP is the conditional expression.
8042 XEXP (OP, 0) is assumed to be a condition code register (integer or
8043 floating point) and its mode specifies what kind of comparison we made.
8044
8045 DEST is the destination insn (i.e. the label), INSN is the source.
8046
8047 REVERSED is nonzero if we should reverse the sense of the comparison.
8048
8049 ANNUL is nonzero if we should generate an annulling branch. */
8050
8051 const char *
8052 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8053 rtx_insn *insn)
8054 {
8055 static char string[64];
8056 enum rtx_code code = GET_CODE (op);
8057 rtx cc_reg = XEXP (op, 0);
8058 machine_mode mode = GET_MODE (cc_reg);
8059 const char *labelno, *branch;
8060 int spaces = 8, far;
8061 char *p;
8062
8063 /* v9 branches are limited to +-1MB. If it is too far away,
8064 change
8065
8066 bne,pt %xcc, .LC30
8067
8068 to
8069
8070 be,pn %xcc, .+12
8071 nop
8072 ba .LC30
8073
8074 and
8075
8076 fbne,a,pn %fcc2, .LC29
8077
8078 to
8079
8080 fbe,pt %fcc2, .+16
8081 nop
8082 ba .LC29 */
8083
8084 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8085 if (reversed ^ far)
8086 {
8087 /* Reversal of FP compares takes care -- an ordered compare
8088 becomes an unordered compare and vice versa. */
8089 if (mode == CCFPmode || mode == CCFPEmode)
8090 code = reverse_condition_maybe_unordered (code);
8091 else
8092 code = reverse_condition (code);
8093 }
8094
8095 /* Start by writing the branch condition. */
8096 if (mode == CCFPmode || mode == CCFPEmode)
8097 {
8098 switch (code)
8099 {
8100 case NE:
8101 branch = "fbne";
8102 break;
8103 case EQ:
8104 branch = "fbe";
8105 break;
8106 case GE:
8107 branch = "fbge";
8108 break;
8109 case GT:
8110 branch = "fbg";
8111 break;
8112 case LE:
8113 branch = "fble";
8114 break;
8115 case LT:
8116 branch = "fbl";
8117 break;
8118 case UNORDERED:
8119 branch = "fbu";
8120 break;
8121 case ORDERED:
8122 branch = "fbo";
8123 break;
8124 case UNGT:
8125 branch = "fbug";
8126 break;
8127 case UNLT:
8128 branch = "fbul";
8129 break;
8130 case UNEQ:
8131 branch = "fbue";
8132 break;
8133 case UNGE:
8134 branch = "fbuge";
8135 break;
8136 case UNLE:
8137 branch = "fbule";
8138 break;
8139 case LTGT:
8140 branch = "fblg";
8141 break;
8142 default:
8143 gcc_unreachable ();
8144 }
8145
8146 /* ??? !v9: FP branches cannot be preceded by another floating point
8147 insn. Because there is currently no concept of pre-delay slots,
8148 we can fix this only by always emitting a nop before a floating
8149 point branch. */
8150
8151 string[0] = '\0';
8152 if (! TARGET_V9)
8153 strcpy (string, "nop\n\t");
8154 strcat (string, branch);
8155 }
8156 else
8157 {
8158 switch (code)
8159 {
8160 case NE:
8161 if (mode == CCVmode || mode == CCXVmode)
8162 branch = "bvs";
8163 else
8164 branch = "bne";
8165 break;
8166 case EQ:
8167 if (mode == CCVmode || mode == CCXVmode)
8168 branch = "bvc";
8169 else
8170 branch = "be";
8171 break;
8172 case GE:
8173 if (mode == CCNZmode || mode == CCXNZmode)
8174 branch = "bpos";
8175 else
8176 branch = "bge";
8177 break;
8178 case GT:
8179 branch = "bg";
8180 break;
8181 case LE:
8182 branch = "ble";
8183 break;
8184 case LT:
8185 if (mode == CCNZmode || mode == CCXNZmode)
8186 branch = "bneg";
8187 else
8188 branch = "bl";
8189 break;
8190 case GEU:
8191 branch = "bgeu";
8192 break;
8193 case GTU:
8194 branch = "bgu";
8195 break;
8196 case LEU:
8197 branch = "bleu";
8198 break;
8199 case LTU:
8200 branch = "blu";
8201 break;
8202 default:
8203 gcc_unreachable ();
8204 }
8205 strcpy (string, branch);
8206 }
8207 spaces -= strlen (branch);
8208 p = strchr (string, '\0');
8209
8210 /* Now add the annulling, the label, and a possible noop. */
8211 if (annul && ! far)
8212 {
8213 strcpy (p, ",a");
8214 p += 2;
8215 spaces -= 2;
8216 }
8217
8218 if (TARGET_V9)
8219 {
8220 rtx note;
8221 int v8 = 0;
8222
8223 if (! far && insn && INSN_ADDRESSES_SET_P ())
8224 {
8225 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8226 - INSN_ADDRESSES (INSN_UID (insn)));
8227 /* Leave some instructions for "slop". */
8228 if (delta < -260000 || delta >= 260000)
8229 v8 = 1;
8230 }
8231
8232 switch (mode)
8233 {
8234 case E_CCmode:
8235 case E_CCNZmode:
8236 case E_CCCmode:
8237 case E_CCVmode:
8238 labelno = "%%icc, ";
8239 if (v8)
8240 labelno = "";
8241 break;
8242 case E_CCXmode:
8243 case E_CCXNZmode:
8244 case E_CCXCmode:
8245 case E_CCXVmode:
8246 labelno = "%%xcc, ";
8247 gcc_assert (!v8);
8248 break;
8249 case E_CCFPmode:
8250 case E_CCFPEmode:
8251 {
8252 static char v9_fcc_labelno[] = "%%fccX, ";
8253 /* Set the char indicating the number of the fcc reg to use. */
8254 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8255 labelno = v9_fcc_labelno;
8256 if (v8)
8257 {
8258 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8259 labelno = "";
8260 }
8261 }
8262 break;
8263 default:
8264 gcc_unreachable ();
8265 }
8266
8267 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8268 {
8269 strcpy (p,
8270 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8271 >= profile_probability::even ()) ^ far)
8272 ? ",pt" : ",pn");
8273 p += 3;
8274 spaces -= 3;
8275 }
8276 }
8277 else
8278 labelno = "";
8279
8280 if (spaces > 0)
8281 *p++ = '\t';
8282 else
8283 *p++ = ' ';
8284 strcpy (p, labelno);
8285 p = strchr (p, '\0');
8286 if (far)
8287 {
8288 strcpy (p, ".+12\n\t nop\n\tb\t");
8289 /* Skip the next insn if requested or
8290 if we know that it will be a nop. */
8291 if (annul || ! final_sequence)
8292 p[3] = '6';
8293 p += 14;
8294 }
8295 *p++ = '%';
8296 *p++ = 'l';
8297 *p++ = label + '0';
8298 *p++ = '%';
8299 *p++ = '#';
8300 *p = '\0';
8301
8302 return string;
8303 }
8304
8305 /* Emit a library call comparison between floating point X and Y.
8306 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8307 Return the new operator to be used in the comparison sequence.
8308
8309 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8310 values as arguments instead of the TFmode registers themselves,
8311 that's why we cannot call emit_float_lib_cmp. */
8312
8313 rtx
8314 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8315 {
8316 const char *qpfunc;
8317 rtx slot0, slot1, result, tem, tem2, libfunc;
8318 machine_mode mode;
8319 enum rtx_code new_comparison;
8320
8321 switch (comparison)
8322 {
8323 case EQ:
8324 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8325 break;
8326
8327 case NE:
8328 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8329 break;
8330
8331 case GT:
8332 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8333 break;
8334
8335 case GE:
8336 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8337 break;
8338
8339 case LT:
8340 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8341 break;
8342
8343 case LE:
8344 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8345 break;
8346
8347 case ORDERED:
8348 case UNORDERED:
8349 case UNGT:
8350 case UNLT:
8351 case UNEQ:
8352 case UNGE:
8353 case UNLE:
8354 case LTGT:
8355 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8356 break;
8357
8358 default:
8359 gcc_unreachable ();
8360 }
8361
8362 if (TARGET_ARCH64)
8363 {
8364 if (MEM_P (x))
8365 {
8366 tree expr = MEM_EXPR (x);
8367 if (expr)
8368 mark_addressable (expr);
8369 slot0 = x;
8370 }
8371 else
8372 {
8373 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8374 emit_move_insn (slot0, x);
8375 }
8376
8377 if (MEM_P (y))
8378 {
8379 tree expr = MEM_EXPR (y);
8380 if (expr)
8381 mark_addressable (expr);
8382 slot1 = y;
8383 }
8384 else
8385 {
8386 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8387 emit_move_insn (slot1, y);
8388 }
8389
8390 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8391 emit_library_call (libfunc, LCT_NORMAL,
8392 DImode,
8393 XEXP (slot0, 0), Pmode,
8394 XEXP (slot1, 0), Pmode);
8395 mode = DImode;
8396 }
8397 else
8398 {
8399 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8400 emit_library_call (libfunc, LCT_NORMAL,
8401 SImode,
8402 x, TFmode, y, TFmode);
8403 mode = SImode;
8404 }
8405
8406
8407 /* Immediately move the result of the libcall into a pseudo
8408 register so reload doesn't clobber the value if it needs
8409 the return register for a spill reg. */
8410 result = gen_reg_rtx (mode);
8411 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8412
8413 switch (comparison)
8414 {
8415 default:
8416 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8417 case ORDERED:
8418 case UNORDERED:
8419 new_comparison = (comparison == UNORDERED ? EQ : NE);
8420 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8421 case UNGT:
8422 case UNGE:
8423 new_comparison = (comparison == UNGT ? GT : NE);
8424 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8425 case UNLE:
8426 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8427 case UNLT:
8428 tem = gen_reg_rtx (mode);
8429 if (TARGET_ARCH32)
8430 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8431 else
8432 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8433 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8434 case UNEQ:
8435 case LTGT:
8436 tem = gen_reg_rtx (mode);
8437 if (TARGET_ARCH32)
8438 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8439 else
8440 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8441 tem2 = gen_reg_rtx (mode);
8442 if (TARGET_ARCH32)
8443 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8444 else
8445 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8446 new_comparison = (comparison == UNEQ ? EQ : NE);
8447 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8448 }
8449
8450 gcc_unreachable ();
8451 }
8452
8453 /* Generate an unsigned DImode to FP conversion. This is the same code
8454 optabs would emit if we didn't have TFmode patterns. */
8455
8456 void
8457 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8458 {
8459 rtx i0, i1, f0, in, out;
8460
8461 out = operands[0];
8462 in = force_reg (DImode, operands[1]);
8463 rtx_code_label *neglab = gen_label_rtx ();
8464 rtx_code_label *donelab = gen_label_rtx ();
8465 i0 = gen_reg_rtx (DImode);
8466 i1 = gen_reg_rtx (DImode);
8467 f0 = gen_reg_rtx (mode);
8468
8469 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8470
8471 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8472 emit_jump_insn (gen_jump (donelab));
8473 emit_barrier ();
8474
8475 emit_label (neglab);
8476
8477 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8478 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8479 emit_insn (gen_iordi3 (i0, i0, i1));
8480 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8481 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8482
8483 emit_label (donelab);
8484 }
8485
8486 /* Generate an FP to unsigned DImode conversion. This is the same code
8487 optabs would emit if we didn't have TFmode patterns. */
8488
8489 void
8490 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8491 {
8492 rtx i0, i1, f0, in, out, limit;
8493
8494 out = operands[0];
8495 in = force_reg (mode, operands[1]);
8496 rtx_code_label *neglab = gen_label_rtx ();
8497 rtx_code_label *donelab = gen_label_rtx ();
8498 i0 = gen_reg_rtx (DImode);
8499 i1 = gen_reg_rtx (DImode);
8500 limit = gen_reg_rtx (mode);
8501 f0 = gen_reg_rtx (mode);
8502
8503 emit_move_insn (limit,
8504 const_double_from_real_value (
8505 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8506 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8507
8508 emit_insn (gen_rtx_SET (out,
8509 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8510 emit_jump_insn (gen_jump (donelab));
8511 emit_barrier ();
8512
8513 emit_label (neglab);
8514
8515 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8516 emit_insn (gen_rtx_SET (i0,
8517 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8518 emit_insn (gen_movdi (i1, const1_rtx));
8519 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8520 emit_insn (gen_xordi3 (out, i0, i1));
8521
8522 emit_label (donelab);
8523 }
8524
8525 /* Return the string to output a compare and branch instruction to DEST.
8526 DEST is the destination insn (i.e. the label), INSN is the source,
8527 and OP is the conditional expression. */
8528
8529 const char *
8530 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8531 {
8532 machine_mode mode = GET_MODE (XEXP (op, 0));
8533 enum rtx_code code = GET_CODE (op);
8534 const char *cond_str, *tmpl;
8535 int far, emit_nop, len;
8536 static char string[64];
8537 char size_char;
8538
8539 /* Compare and Branch is limited to +-2KB. If it is too far away,
8540 change
8541
8542 cxbne X, Y, .LC30
8543
8544 to
8545
8546 cxbe X, Y, .+16
8547 nop
8548 ba,pt xcc, .LC30
8549 nop */
8550
8551 len = get_attr_length (insn);
8552
8553 far = len == 4;
8554 emit_nop = len == 2;
8555
8556 if (far)
8557 code = reverse_condition (code);
8558
8559 size_char = ((mode == SImode) ? 'w' : 'x');
8560
8561 switch (code)
8562 {
8563 case NE:
8564 cond_str = "ne";
8565 break;
8566
8567 case EQ:
8568 cond_str = "e";
8569 break;
8570
8571 case GE:
8572 cond_str = "ge";
8573 break;
8574
8575 case GT:
8576 cond_str = "g";
8577 break;
8578
8579 case LE:
8580 cond_str = "le";
8581 break;
8582
8583 case LT:
8584 cond_str = "l";
8585 break;
8586
8587 case GEU:
8588 cond_str = "cc";
8589 break;
8590
8591 case GTU:
8592 cond_str = "gu";
8593 break;
8594
8595 case LEU:
8596 cond_str = "leu";
8597 break;
8598
8599 case LTU:
8600 cond_str = "cs";
8601 break;
8602
8603 default:
8604 gcc_unreachable ();
8605 }
8606
8607 if (far)
8608 {
8609 int veryfar = 1, delta;
8610
8611 if (INSN_ADDRESSES_SET_P ())
8612 {
8613 delta = (INSN_ADDRESSES (INSN_UID (dest))
8614 - INSN_ADDRESSES (INSN_UID (insn)));
8615 /* Leave some instructions for "slop". */
8616 if (delta >= -260000 && delta < 260000)
8617 veryfar = 0;
8618 }
8619
8620 if (veryfar)
8621 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8622 else
8623 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8624 }
8625 else
8626 {
8627 if (emit_nop)
8628 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8629 else
8630 tmpl = "c%cb%s\t%%1, %%2, %%3";
8631 }
8632
8633 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8634
8635 return string;
8636 }
8637
8638 /* Return the string to output a conditional branch to LABEL, testing
8639 register REG. LABEL is the operand number of the label; REG is the
8640 operand number of the reg. OP is the conditional expression. The mode
8641 of REG says what kind of comparison we made.
8642
8643 DEST is the destination insn (i.e. the label), INSN is the source.
8644
8645 REVERSED is nonzero if we should reverse the sense of the comparison.
8646
8647 ANNUL is nonzero if we should generate an annulling branch. */
8648
8649 const char *
8650 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8651 int annul, rtx_insn *insn)
8652 {
8653 static char string[64];
8654 enum rtx_code code = GET_CODE (op);
8655 machine_mode mode = GET_MODE (XEXP (op, 0));
8656 rtx note;
8657 int far;
8658 char *p;
8659
8660 /* branch on register are limited to +-128KB. If it is too far away,
8661 change
8662
8663 brnz,pt %g1, .LC30
8664
8665 to
8666
8667 brz,pn %g1, .+12
8668 nop
8669 ba,pt %xcc, .LC30
8670
8671 and
8672
8673 brgez,a,pn %o1, .LC29
8674
8675 to
8676
8677 brlz,pt %o1, .+16
8678 nop
8679 ba,pt %xcc, .LC29 */
8680
8681 far = get_attr_length (insn) >= 3;
8682
8683 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8684 if (reversed ^ far)
8685 code = reverse_condition (code);
8686
8687 /* Only 64-bit versions of these instructions exist. */
8688 gcc_assert (mode == DImode);
8689
8690 /* Start by writing the branch condition. */
8691
8692 switch (code)
8693 {
8694 case NE:
8695 strcpy (string, "brnz");
8696 break;
8697
8698 case EQ:
8699 strcpy (string, "brz");
8700 break;
8701
8702 case GE:
8703 strcpy (string, "brgez");
8704 break;
8705
8706 case LT:
8707 strcpy (string, "brlz");
8708 break;
8709
8710 case LE:
8711 strcpy (string, "brlez");
8712 break;
8713
8714 case GT:
8715 strcpy (string, "brgz");
8716 break;
8717
8718 default:
8719 gcc_unreachable ();
8720 }
8721
8722 p = strchr (string, '\0');
8723
8724 /* Now add the annulling, reg, label, and nop. */
8725 if (annul && ! far)
8726 {
8727 strcpy (p, ",a");
8728 p += 2;
8729 }
8730
8731 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8732 {
8733 strcpy (p,
8734 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8735 >= profile_probability::even ()) ^ far)
8736 ? ",pt" : ",pn");
8737 p += 3;
8738 }
8739
8740 *p = p < string + 8 ? '\t' : ' ';
8741 p++;
8742 *p++ = '%';
8743 *p++ = '0' + reg;
8744 *p++ = ',';
8745 *p++ = ' ';
8746 if (far)
8747 {
8748 int veryfar = 1, delta;
8749
8750 if (INSN_ADDRESSES_SET_P ())
8751 {
8752 delta = (INSN_ADDRESSES (INSN_UID (dest))
8753 - INSN_ADDRESSES (INSN_UID (insn)));
8754 /* Leave some instructions for "slop". */
8755 if (delta >= -260000 && delta < 260000)
8756 veryfar = 0;
8757 }
8758
8759 strcpy (p, ".+12\n\t nop\n\t");
8760 /* Skip the next insn if requested or
8761 if we know that it will be a nop. */
8762 if (annul || ! final_sequence)
8763 p[3] = '6';
8764 p += 12;
8765 if (veryfar)
8766 {
8767 strcpy (p, "b\t");
8768 p += 2;
8769 }
8770 else
8771 {
8772 strcpy (p, "ba,pt\t%%xcc, ");
8773 p += 13;
8774 }
8775 }
8776 *p++ = '%';
8777 *p++ = 'l';
8778 *p++ = '0' + label;
8779 *p++ = '%';
8780 *p++ = '#';
8781 *p = '\0';
8782
8783 return string;
8784 }
8785
8786 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8787 Such instructions cannot be used in the delay slot of return insn on v9.
8788 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8789 */
8790
8791 static int
8792 epilogue_renumber (rtx *where, int test)
8793 {
8794 const char *fmt;
8795 int i;
8796 enum rtx_code code;
8797
8798 if (*where == 0)
8799 return 0;
8800
8801 code = GET_CODE (*where);
8802
8803 switch (code)
8804 {
8805 case REG:
8806 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8807 return 1;
8808 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8809 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8810 /* fallthrough */
8811 case SCRATCH:
8812 case CC0:
8813 case PC:
8814 case CONST_INT:
8815 case CONST_WIDE_INT:
8816 case CONST_DOUBLE:
8817 return 0;
8818
8819 /* Do not replace the frame pointer with the stack pointer because
8820 it can cause the delayed instruction to load below the stack.
8821 This occurs when instructions like:
8822
8823 (set (reg/i:SI 24 %i0)
8824 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8825 (const_int -20 [0xffffffec])) 0))
8826
8827 are in the return delayed slot. */
8828 case PLUS:
8829 if (GET_CODE (XEXP (*where, 0)) == REG
8830 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8831 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8832 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8833 return 1;
8834 break;
8835
8836 case MEM:
8837 if (SPARC_STACK_BIAS
8838 && GET_CODE (XEXP (*where, 0)) == REG
8839 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8840 return 1;
8841 break;
8842
8843 default:
8844 break;
8845 }
8846
8847 fmt = GET_RTX_FORMAT (code);
8848
8849 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8850 {
8851 if (fmt[i] == 'E')
8852 {
8853 int j;
8854 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8855 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8856 return 1;
8857 }
8858 else if (fmt[i] == 'e'
8859 && epilogue_renumber (&(XEXP (*where, i)), test))
8860 return 1;
8861 }
8862 return 0;
8863 }
8864 \f
8865 /* Leaf functions and non-leaf functions have different needs. */
8866
8867 static const int
8868 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8869
8870 static const int
8871 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8872
8873 static const int *const reg_alloc_orders[] = {
8874 reg_leaf_alloc_order,
8875 reg_nonleaf_alloc_order};
8876
8877 void
8878 order_regs_for_local_alloc (void)
8879 {
8880 static int last_order_nonleaf = 1;
8881
8882 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8883 {
8884 last_order_nonleaf = !last_order_nonleaf;
8885 memcpy ((char *) reg_alloc_order,
8886 (const char *) reg_alloc_orders[last_order_nonleaf],
8887 FIRST_PSEUDO_REGISTER * sizeof (int));
8888 }
8889 }
8890 \f
8891 /* Return 1 if REG and MEM are legitimate enough to allow the various
8892 MEM<-->REG splits to be run. */
8893
8894 int
8895 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8896 {
8897 /* Punt if we are here by mistake. */
8898 gcc_assert (reload_completed);
8899
8900 /* We must have an offsettable memory reference. */
8901 if (!offsettable_memref_p (mem))
8902 return 0;
8903
8904 /* If we have legitimate args for ldd/std, we do not want
8905 the split to happen. */
8906 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8907 return 0;
8908
8909 /* Success. */
8910 return 1;
8911 }
8912
8913 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8914
8915 void
8916 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8917 {
8918 rtx high_part = gen_highpart (mode, dest);
8919 rtx low_part = gen_lowpart (mode, dest);
8920 rtx word0 = adjust_address (src, mode, 0);
8921 rtx word1 = adjust_address (src, mode, 4);
8922
8923 if (reg_overlap_mentioned_p (high_part, word1))
8924 {
8925 emit_move_insn_1 (low_part, word1);
8926 emit_move_insn_1 (high_part, word0);
8927 }
8928 else
8929 {
8930 emit_move_insn_1 (high_part, word0);
8931 emit_move_insn_1 (low_part, word1);
8932 }
8933 }
8934
8935 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8936
8937 void
8938 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8939 {
8940 rtx word0 = adjust_address (dest, mode, 0);
8941 rtx word1 = adjust_address (dest, mode, 4);
8942 rtx high_part = gen_highpart (mode, src);
8943 rtx low_part = gen_lowpart (mode, src);
8944
8945 emit_move_insn_1 (word0, high_part);
8946 emit_move_insn_1 (word1, low_part);
8947 }
8948
8949 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8950
8951 int
8952 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8953 {
8954 /* Punt if we are here by mistake. */
8955 gcc_assert (reload_completed);
8956
8957 if (GET_CODE (reg1) == SUBREG)
8958 reg1 = SUBREG_REG (reg1);
8959 if (GET_CODE (reg1) != REG)
8960 return 0;
8961 const int regno1 = REGNO (reg1);
8962
8963 if (GET_CODE (reg2) == SUBREG)
8964 reg2 = SUBREG_REG (reg2);
8965 if (GET_CODE (reg2) != REG)
8966 return 0;
8967 const int regno2 = REGNO (reg2);
8968
8969 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8970 return 1;
8971
8972 if (TARGET_VIS3)
8973 {
8974 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8975 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8976 return 1;
8977 }
8978
8979 return 0;
8980 }
8981
8982 /* Split a REG <--> REG move into a pair of moves in MODE. */
8983
8984 void
8985 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8986 {
8987 rtx dest1 = gen_highpart (mode, dest);
8988 rtx dest2 = gen_lowpart (mode, dest);
8989 rtx src1 = gen_highpart (mode, src);
8990 rtx src2 = gen_lowpart (mode, src);
8991
8992 /* Now emit using the real source and destination we found, swapping
8993 the order if we detect overlap. */
8994 if (reg_overlap_mentioned_p (dest1, src2))
8995 {
8996 emit_move_insn_1 (dest2, src2);
8997 emit_move_insn_1 (dest1, src1);
8998 }
8999 else
9000 {
9001 emit_move_insn_1 (dest1, src1);
9002 emit_move_insn_1 (dest2, src2);
9003 }
9004 }
9005
9006 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9007 This makes them candidates for using ldd and std insns.
9008
9009 Note reg1 and reg2 *must* be hard registers. */
9010
9011 int
9012 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9013 {
9014 /* We might have been passed a SUBREG. */
9015 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9016 return 0;
9017
9018 if (REGNO (reg1) % 2 != 0)
9019 return 0;
9020
9021 /* Integer ldd is deprecated in SPARC V9 */
9022 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9023 return 0;
9024
9025 return (REGNO (reg1) == REGNO (reg2) - 1);
9026 }
9027
9028 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9029 an ldd or std insn.
9030
9031 This can only happen when addr1 and addr2, the addresses in mem1
9032 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9033 addr1 must also be aligned on a 64-bit boundary.
9034
9035 Also iff dependent_reg_rtx is not null it should not be used to
9036 compute the address for mem1, i.e. we cannot optimize a sequence
9037 like:
9038 ld [%o0], %o0
9039 ld [%o0 + 4], %o1
9040 to
9041 ldd [%o0], %o0
9042 nor:
9043 ld [%g3 + 4], %g3
9044 ld [%g3], %g2
9045 to
9046 ldd [%g3], %g2
9047
9048 But, note that the transformation from:
9049 ld [%g2 + 4], %g3
9050 ld [%g2], %g2
9051 to
9052 ldd [%g2], %g2
9053 is perfectly fine. Thus, the peephole2 patterns always pass us
9054 the destination register of the first load, never the second one.
9055
9056 For stores we don't have a similar problem, so dependent_reg_rtx is
9057 NULL_RTX. */
9058
9059 int
9060 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9061 {
9062 rtx addr1, addr2;
9063 unsigned int reg1;
9064 HOST_WIDE_INT offset1;
9065
9066 /* The mems cannot be volatile. */
9067 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9068 return 0;
9069
9070 /* MEM1 should be aligned on a 64-bit boundary. */
9071 if (MEM_ALIGN (mem1) < 64)
9072 return 0;
9073
9074 addr1 = XEXP (mem1, 0);
9075 addr2 = XEXP (mem2, 0);
9076
9077 /* Extract a register number and offset (if used) from the first addr. */
9078 if (GET_CODE (addr1) == PLUS)
9079 {
9080 /* If not a REG, return zero. */
9081 if (GET_CODE (XEXP (addr1, 0)) != REG)
9082 return 0;
9083 else
9084 {
9085 reg1 = REGNO (XEXP (addr1, 0));
9086 /* The offset must be constant! */
9087 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9088 return 0;
9089 offset1 = INTVAL (XEXP (addr1, 1));
9090 }
9091 }
9092 else if (GET_CODE (addr1) != REG)
9093 return 0;
9094 else
9095 {
9096 reg1 = REGNO (addr1);
9097 /* This was a simple (mem (reg)) expression. Offset is 0. */
9098 offset1 = 0;
9099 }
9100
9101 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9102 if (GET_CODE (addr2) != PLUS)
9103 return 0;
9104
9105 if (GET_CODE (XEXP (addr2, 0)) != REG
9106 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9107 return 0;
9108
9109 if (reg1 != REGNO (XEXP (addr2, 0)))
9110 return 0;
9111
9112 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9113 return 0;
9114
9115 /* The first offset must be evenly divisible by 8 to ensure the
9116 address is 64-bit aligned. */
9117 if (offset1 % 8 != 0)
9118 return 0;
9119
9120 /* The offset for the second addr must be 4 more than the first addr. */
9121 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9122 return 0;
9123
9124 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9125 instructions. */
9126 return 1;
9127 }
9128
9129 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9130
9131 rtx
9132 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9133 {
9134 rtx x = widen_memory_access (mem1, mode, 0);
9135 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9136 return x;
9137 }
9138
9139 /* Return 1 if reg is a pseudo, or is the first register in
9140 a hard register pair. This makes it suitable for use in
9141 ldd and std insns. */
9142
9143 int
9144 register_ok_for_ldd (rtx reg)
9145 {
9146 /* We might have been passed a SUBREG. */
9147 if (!REG_P (reg))
9148 return 0;
9149
9150 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9151 return (REGNO (reg) % 2 == 0);
9152
9153 return 1;
9154 }
9155
9156 /* Return 1 if OP, a MEM, has an address which is known to be
9157 aligned to an 8-byte boundary. */
9158
9159 int
9160 memory_ok_for_ldd (rtx op)
9161 {
9162 if (!mem_min_alignment (op, 8))
9163 return 0;
9164
9165 /* We need to perform the job of a memory constraint. */
9166 if ((reload_in_progress || reload_completed)
9167 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9168 return 0;
9169
9170 if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
9171 return 0;
9172
9173 return 1;
9174 }
9175 \f
9176 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9177
9178 static bool
9179 sparc_print_operand_punct_valid_p (unsigned char code)
9180 {
9181 if (code == '#'
9182 || code == '*'
9183 || code == '('
9184 || code == ')'
9185 || code == '_'
9186 || code == '&')
9187 return true;
9188
9189 return false;
9190 }
9191
9192 /* Implement TARGET_PRINT_OPERAND.
9193 Print operand X (an rtx) in assembler syntax to file FILE.
9194 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9195 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9196
9197 static void
9198 sparc_print_operand (FILE *file, rtx x, int code)
9199 {
9200 const char *s;
9201
9202 switch (code)
9203 {
9204 case '#':
9205 /* Output an insn in a delay slot. */
9206 if (final_sequence)
9207 sparc_indent_opcode = 1;
9208 else
9209 fputs ("\n\t nop", file);
9210 return;
9211 case '*':
9212 /* Output an annul flag if there's nothing for the delay slot and we
9213 are optimizing. This is always used with '(' below.
9214 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9215 this is a dbx bug. So, we only do this when optimizing.
9216 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9217 Always emit a nop in case the next instruction is a branch. */
9218 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9219 fputs (",a", file);
9220 return;
9221 case '(':
9222 /* Output a 'nop' if there's nothing for the delay slot and we are
9223 not optimizing. This is always used with '*' above. */
9224 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9225 fputs ("\n\t nop", file);
9226 else if (final_sequence)
9227 sparc_indent_opcode = 1;
9228 return;
9229 case ')':
9230 /* Output the right displacement from the saved PC on function return.
9231 The caller may have placed an "unimp" insn immediately after the call
9232 so we have to account for it. This insn is used in the 32-bit ABI
9233 when calling a function that returns a non zero-sized structure. The
9234 64-bit ABI doesn't have it. Be careful to have this test be the same
9235 as that for the call. The exception is when sparc_std_struct_return
9236 is enabled, the psABI is followed exactly and the adjustment is made
9237 by the code in sparc_struct_value_rtx. The call emitted is the same
9238 when sparc_std_struct_return is enabled. */
9239 if (!TARGET_ARCH64
9240 && cfun->returns_struct
9241 && !sparc_std_struct_return
9242 && DECL_SIZE (DECL_RESULT (current_function_decl))
9243 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9244 == INTEGER_CST
9245 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9246 fputs ("12", file);
9247 else
9248 fputc ('8', file);
9249 return;
9250 case '_':
9251 /* Output the Embedded Medium/Anywhere code model base register. */
9252 fputs (EMBMEDANY_BASE_REG, file);
9253 return;
9254 case '&':
9255 /* Print some local dynamic TLS name. */
9256 if (const char *name = get_some_local_dynamic_name ())
9257 assemble_name (file, name);
9258 else
9259 output_operand_lossage ("'%%&' used without any "
9260 "local dynamic TLS references");
9261 return;
9262
9263 case 'Y':
9264 /* Adjust the operand to take into account a RESTORE operation. */
9265 if (GET_CODE (x) == CONST_INT)
9266 break;
9267 else if (GET_CODE (x) != REG)
9268 output_operand_lossage ("invalid %%Y operand");
9269 else if (REGNO (x) < 8)
9270 fputs (reg_names[REGNO (x)], file);
9271 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9272 fputs (reg_names[REGNO (x)-16], file);
9273 else
9274 output_operand_lossage ("invalid %%Y operand");
9275 return;
9276 case 'L':
9277 /* Print out the low order register name of a register pair. */
9278 if (WORDS_BIG_ENDIAN)
9279 fputs (reg_names[REGNO (x)+1], file);
9280 else
9281 fputs (reg_names[REGNO (x)], file);
9282 return;
9283 case 'H':
9284 /* Print out the high order register name of a register pair. */
9285 if (WORDS_BIG_ENDIAN)
9286 fputs (reg_names[REGNO (x)], file);
9287 else
9288 fputs (reg_names[REGNO (x)+1], file);
9289 return;
9290 case 'R':
9291 /* Print out the second register name of a register pair or quad.
9292 I.e., R (%o0) => %o1. */
9293 fputs (reg_names[REGNO (x)+1], file);
9294 return;
9295 case 'S':
9296 /* Print out the third register name of a register quad.
9297 I.e., S (%o0) => %o2. */
9298 fputs (reg_names[REGNO (x)+2], file);
9299 return;
9300 case 'T':
9301 /* Print out the fourth register name of a register quad.
9302 I.e., T (%o0) => %o3. */
9303 fputs (reg_names[REGNO (x)+3], file);
9304 return;
9305 case 'x':
9306 /* Print a condition code register. */
9307 if (REGNO (x) == SPARC_ICC_REG)
9308 {
9309 switch (GET_MODE (x))
9310 {
9311 case E_CCmode:
9312 case E_CCNZmode:
9313 case E_CCCmode:
9314 case E_CCVmode:
9315 s = "%icc";
9316 break;
9317 case E_CCXmode:
9318 case E_CCXNZmode:
9319 case E_CCXCmode:
9320 case E_CCXVmode:
9321 s = "%xcc";
9322 break;
9323 default:
9324 gcc_unreachable ();
9325 }
9326 fputs (s, file);
9327 }
9328 else
9329 /* %fccN register */
9330 fputs (reg_names[REGNO (x)], file);
9331 return;
9332 case 'm':
9333 /* Print the operand's address only. */
9334 output_address (GET_MODE (x), XEXP (x, 0));
9335 return;
9336 case 'r':
9337 /* In this case we need a register. Use %g0 if the
9338 operand is const0_rtx. */
9339 if (x == const0_rtx
9340 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9341 {
9342 fputs ("%g0", file);
9343 return;
9344 }
9345 else
9346 break;
9347
9348 case 'A':
9349 switch (GET_CODE (x))
9350 {
9351 case IOR:
9352 s = "or";
9353 break;
9354 case AND:
9355 s = "and";
9356 break;
9357 case XOR:
9358 s = "xor";
9359 break;
9360 default:
9361 output_operand_lossage ("invalid %%A operand");
9362 s = "";
9363 break;
9364 }
9365 fputs (s, file);
9366 return;
9367
9368 case 'B':
9369 switch (GET_CODE (x))
9370 {
9371 case IOR:
9372 s = "orn";
9373 break;
9374 case AND:
9375 s = "andn";
9376 break;
9377 case XOR:
9378 s = "xnor";
9379 break;
9380 default:
9381 output_operand_lossage ("invalid %%B operand");
9382 s = "";
9383 break;
9384 }
9385 fputs (s, file);
9386 return;
9387
9388 /* This is used by the conditional move instructions. */
9389 case 'C':
9390 {
9391 machine_mode mode = GET_MODE (XEXP (x, 0));
9392 switch (GET_CODE (x))
9393 {
9394 case NE:
9395 if (mode == CCVmode || mode == CCXVmode)
9396 s = "vs";
9397 else
9398 s = "ne";
9399 break;
9400 case EQ:
9401 if (mode == CCVmode || mode == CCXVmode)
9402 s = "vc";
9403 else
9404 s = "e";
9405 break;
9406 case GE:
9407 if (mode == CCNZmode || mode == CCXNZmode)
9408 s = "pos";
9409 else
9410 s = "ge";
9411 break;
9412 case GT:
9413 s = "g";
9414 break;
9415 case LE:
9416 s = "le";
9417 break;
9418 case LT:
9419 if (mode == CCNZmode || mode == CCXNZmode)
9420 s = "neg";
9421 else
9422 s = "l";
9423 break;
9424 case GEU:
9425 s = "geu";
9426 break;
9427 case GTU:
9428 s = "gu";
9429 break;
9430 case LEU:
9431 s = "leu";
9432 break;
9433 case LTU:
9434 s = "lu";
9435 break;
9436 case LTGT:
9437 s = "lg";
9438 break;
9439 case UNORDERED:
9440 s = "u";
9441 break;
9442 case ORDERED:
9443 s = "o";
9444 break;
9445 case UNLT:
9446 s = "ul";
9447 break;
9448 case UNLE:
9449 s = "ule";
9450 break;
9451 case UNGT:
9452 s = "ug";
9453 break;
9454 case UNGE:
9455 s = "uge"
9456 ; break;
9457 case UNEQ:
9458 s = "ue";
9459 break;
9460 default:
9461 output_operand_lossage ("invalid %%C operand");
9462 s = "";
9463 break;
9464 }
9465 fputs (s, file);
9466 return;
9467 }
9468
9469 /* This are used by the movr instruction pattern. */
9470 case 'D':
9471 {
9472 switch (GET_CODE (x))
9473 {
9474 case NE:
9475 s = "ne";
9476 break;
9477 case EQ:
9478 s = "e";
9479 break;
9480 case GE:
9481 s = "gez";
9482 break;
9483 case LT:
9484 s = "lz";
9485 break;
9486 case LE:
9487 s = "lez";
9488 break;
9489 case GT:
9490 s = "gz";
9491 break;
9492 default:
9493 output_operand_lossage ("invalid %%D operand");
9494 s = "";
9495 break;
9496 }
9497 fputs (s, file);
9498 return;
9499 }
9500
9501 case 'b':
9502 {
9503 /* Print a sign-extended character. */
9504 int i = trunc_int_for_mode (INTVAL (x), QImode);
9505 fprintf (file, "%d", i);
9506 return;
9507 }
9508
9509 case 'f':
9510 /* Operand must be a MEM; write its address. */
9511 if (GET_CODE (x) != MEM)
9512 output_operand_lossage ("invalid %%f operand");
9513 output_address (GET_MODE (x), XEXP (x, 0));
9514 return;
9515
9516 case 's':
9517 {
9518 /* Print a sign-extended 32-bit value. */
9519 HOST_WIDE_INT i;
9520 if (GET_CODE(x) == CONST_INT)
9521 i = INTVAL (x);
9522 else
9523 {
9524 output_operand_lossage ("invalid %%s operand");
9525 return;
9526 }
9527 i = trunc_int_for_mode (i, SImode);
9528 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9529 return;
9530 }
9531
9532 case 0:
9533 /* Do nothing special. */
9534 break;
9535
9536 default:
9537 /* Undocumented flag. */
9538 output_operand_lossage ("invalid operand output code");
9539 }
9540
9541 if (GET_CODE (x) == REG)
9542 fputs (reg_names[REGNO (x)], file);
9543 else if (GET_CODE (x) == MEM)
9544 {
9545 fputc ('[', file);
9546 /* Poor Sun assembler doesn't understand absolute addressing. */
9547 if (CONSTANT_P (XEXP (x, 0)))
9548 fputs ("%g0+", file);
9549 output_address (GET_MODE (x), XEXP (x, 0));
9550 fputc (']', file);
9551 }
9552 else if (GET_CODE (x) == HIGH)
9553 {
9554 fputs ("%hi(", file);
9555 output_addr_const (file, XEXP (x, 0));
9556 fputc (')', file);
9557 }
9558 else if (GET_CODE (x) == LO_SUM)
9559 {
9560 sparc_print_operand (file, XEXP (x, 0), 0);
9561 if (TARGET_CM_MEDMID)
9562 fputs ("+%l44(", file);
9563 else
9564 fputs ("+%lo(", file);
9565 output_addr_const (file, XEXP (x, 1));
9566 fputc (')', file);
9567 }
9568 else if (GET_CODE (x) == CONST_DOUBLE)
9569 output_operand_lossage ("floating-point constant not a valid immediate operand");
9570 else
9571 output_addr_const (file, x);
9572 }
9573
9574 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9575
9576 static void
9577 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9578 {
9579 rtx base, index = 0;
9580 int offset = 0;
9581 rtx addr = x;
9582
9583 if (REG_P (addr))
9584 fputs (reg_names[REGNO (addr)], file);
9585 else if (GET_CODE (addr) == PLUS)
9586 {
9587 if (CONST_INT_P (XEXP (addr, 0)))
9588 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9589 else if (CONST_INT_P (XEXP (addr, 1)))
9590 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9591 else
9592 base = XEXP (addr, 0), index = XEXP (addr, 1);
9593 if (GET_CODE (base) == LO_SUM)
9594 {
9595 gcc_assert (USE_AS_OFFSETABLE_LO10
9596 && TARGET_ARCH64
9597 && ! TARGET_CM_MEDMID);
9598 output_operand (XEXP (base, 0), 0);
9599 fputs ("+%lo(", file);
9600 output_address (VOIDmode, XEXP (base, 1));
9601 fprintf (file, ")+%d", offset);
9602 }
9603 else
9604 {
9605 fputs (reg_names[REGNO (base)], file);
9606 if (index == 0)
9607 fprintf (file, "%+d", offset);
9608 else if (REG_P (index))
9609 fprintf (file, "+%s", reg_names[REGNO (index)]);
9610 else if (GET_CODE (index) == SYMBOL_REF
9611 || GET_CODE (index) == LABEL_REF
9612 || GET_CODE (index) == CONST)
9613 fputc ('+', file), output_addr_const (file, index);
9614 else gcc_unreachable ();
9615 }
9616 }
9617 else if (GET_CODE (addr) == MINUS
9618 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9619 {
9620 output_addr_const (file, XEXP (addr, 0));
9621 fputs ("-(", file);
9622 output_addr_const (file, XEXP (addr, 1));
9623 fputs ("-.)", file);
9624 }
9625 else if (GET_CODE (addr) == LO_SUM)
9626 {
9627 output_operand (XEXP (addr, 0), 0);
9628 if (TARGET_CM_MEDMID)
9629 fputs ("+%l44(", file);
9630 else
9631 fputs ("+%lo(", file);
9632 output_address (VOIDmode, XEXP (addr, 1));
9633 fputc (')', file);
9634 }
9635 else if (flag_pic
9636 && GET_CODE (addr) == CONST
9637 && GET_CODE (XEXP (addr, 0)) == MINUS
9638 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9639 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9640 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9641 {
9642 addr = XEXP (addr, 0);
9643 output_addr_const (file, XEXP (addr, 0));
9644 /* Group the args of the second CONST in parenthesis. */
9645 fputs ("-(", file);
9646 /* Skip past the second CONST--it does nothing for us. */
9647 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9648 /* Close the parenthesis. */
9649 fputc (')', file);
9650 }
9651 else
9652 {
9653 output_addr_const (file, addr);
9654 }
9655 }
9656 \f
9657 /* Target hook for assembling integer objects. The sparc version has
9658 special handling for aligned DI-mode objects. */
9659
9660 static bool
9661 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9662 {
9663 /* ??? We only output .xword's for symbols and only then in environments
9664 where the assembler can handle them. */
9665 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9666 {
9667 if (TARGET_V9)
9668 {
9669 assemble_integer_with_op ("\t.xword\t", x);
9670 return true;
9671 }
9672 else
9673 {
9674 assemble_aligned_integer (4, const0_rtx);
9675 assemble_aligned_integer (4, x);
9676 return true;
9677 }
9678 }
9679 return default_assemble_integer (x, size, aligned_p);
9680 }
9681 \f
9682 /* Return the value of a code used in the .proc pseudo-op that says
9683 what kind of result this function returns. For non-C types, we pick
9684 the closest C type. */
9685
9686 #ifndef SHORT_TYPE_SIZE
9687 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9688 #endif
9689
9690 #ifndef INT_TYPE_SIZE
9691 #define INT_TYPE_SIZE BITS_PER_WORD
9692 #endif
9693
9694 #ifndef LONG_TYPE_SIZE
9695 #define LONG_TYPE_SIZE BITS_PER_WORD
9696 #endif
9697
9698 #ifndef LONG_LONG_TYPE_SIZE
9699 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9700 #endif
9701
9702 #ifndef FLOAT_TYPE_SIZE
9703 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9704 #endif
9705
9706 #ifndef DOUBLE_TYPE_SIZE
9707 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9708 #endif
9709
9710 #ifndef LONG_DOUBLE_TYPE_SIZE
9711 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9712 #endif
9713
9714 unsigned long
9715 sparc_type_code (tree type)
9716 {
9717 unsigned long qualifiers = 0;
9718 unsigned shift;
9719
9720 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9721 setting more, since some assemblers will give an error for this. Also,
9722 we must be careful to avoid shifts of 32 bits or more to avoid getting
9723 unpredictable results. */
9724
9725 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9726 {
9727 switch (TREE_CODE (type))
9728 {
9729 case ERROR_MARK:
9730 return qualifiers;
9731
9732 case ARRAY_TYPE:
9733 qualifiers |= (3 << shift);
9734 break;
9735
9736 case FUNCTION_TYPE:
9737 case METHOD_TYPE:
9738 qualifiers |= (2 << shift);
9739 break;
9740
9741 case POINTER_TYPE:
9742 case REFERENCE_TYPE:
9743 case OFFSET_TYPE:
9744 qualifiers |= (1 << shift);
9745 break;
9746
9747 case RECORD_TYPE:
9748 return (qualifiers | 8);
9749
9750 case UNION_TYPE:
9751 case QUAL_UNION_TYPE:
9752 return (qualifiers | 9);
9753
9754 case ENUMERAL_TYPE:
9755 return (qualifiers | 10);
9756
9757 case VOID_TYPE:
9758 return (qualifiers | 16);
9759
9760 case INTEGER_TYPE:
9761 /* If this is a range type, consider it to be the underlying
9762 type. */
9763 if (TREE_TYPE (type) != 0)
9764 break;
9765
9766 /* Carefully distinguish all the standard types of C,
9767 without messing up if the language is not C. We do this by
9768 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9769 look at both the names and the above fields, but that's redundant.
9770 Any type whose size is between two C types will be considered
9771 to be the wider of the two types. Also, we do not have a
9772 special code to use for "long long", so anything wider than
9773 long is treated the same. Note that we can't distinguish
9774 between "int" and "long" in this code if they are the same
9775 size, but that's fine, since neither can the assembler. */
9776
9777 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9778 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9779
9780 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9781 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9782
9783 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9784 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9785
9786 else
9787 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9788
9789 case REAL_TYPE:
9790 /* If this is a range type, consider it to be the underlying
9791 type. */
9792 if (TREE_TYPE (type) != 0)
9793 break;
9794
9795 /* Carefully distinguish all the standard types of C,
9796 without messing up if the language is not C. */
9797
9798 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9799 return (qualifiers | 6);
9800
9801 else
9802 return (qualifiers | 7);
9803
9804 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9805 /* ??? We need to distinguish between double and float complex types,
9806 but I don't know how yet because I can't reach this code from
9807 existing front-ends. */
9808 return (qualifiers | 7); /* Who knows? */
9809
9810 case VECTOR_TYPE:
9811 case BOOLEAN_TYPE: /* Boolean truth value type. */
9812 case LANG_TYPE:
9813 case NULLPTR_TYPE:
9814 return qualifiers;
9815
9816 default:
9817 gcc_unreachable (); /* Not a type! */
9818 }
9819 }
9820
9821 return qualifiers;
9822 }
9823 \f
9824 /* Nested function support. */
9825
9826 /* Emit RTL insns to initialize the variable parts of a trampoline.
9827 FNADDR is an RTX for the address of the function's pure code.
9828 CXT is an RTX for the static chain value for the function.
9829
9830 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9831 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9832 (to store insns). This is a bit excessive. Perhaps a different
9833 mechanism would be better here.
9834
9835 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9836
9837 static void
9838 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9839 {
9840 /* SPARC 32-bit trampoline:
9841
9842 sethi %hi(fn), %g1
9843 sethi %hi(static), %g2
9844 jmp %g1+%lo(fn)
9845 or %g2, %lo(static), %g2
9846
9847 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9848 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9849 */
9850
9851 emit_move_insn
9852 (adjust_address (m_tramp, SImode, 0),
9853 expand_binop (SImode, ior_optab,
9854 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9855 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9856 NULL_RTX, 1, OPTAB_DIRECT));
9857
9858 emit_move_insn
9859 (adjust_address (m_tramp, SImode, 4),
9860 expand_binop (SImode, ior_optab,
9861 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9862 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9863 NULL_RTX, 1, OPTAB_DIRECT));
9864
9865 emit_move_insn
9866 (adjust_address (m_tramp, SImode, 8),
9867 expand_binop (SImode, ior_optab,
9868 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9869 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9870 NULL_RTX, 1, OPTAB_DIRECT));
9871
9872 emit_move_insn
9873 (adjust_address (m_tramp, SImode, 12),
9874 expand_binop (SImode, ior_optab,
9875 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9876 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9877 NULL_RTX, 1, OPTAB_DIRECT));
9878
9879 emit_insn
9880 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 0))));
9881
9882 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9883 aligned on a 16 byte boundary so one flush clears it all. */
9884 if (sparc_cpu != PROCESSOR_ULTRASPARC
9885 && sparc_cpu != PROCESSOR_ULTRASPARC3
9886 && sparc_cpu != PROCESSOR_NIAGARA
9887 && sparc_cpu != PROCESSOR_NIAGARA2
9888 && sparc_cpu != PROCESSOR_NIAGARA3
9889 && sparc_cpu != PROCESSOR_NIAGARA4
9890 && sparc_cpu != PROCESSOR_NIAGARA7
9891 && sparc_cpu != PROCESSOR_M8)
9892 emit_insn
9893 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 8))));
9894
9895 /* Call __enable_execute_stack after writing onto the stack to make sure
9896 the stack address is accessible. */
9897 #ifdef HAVE_ENABLE_EXECUTE_STACK
9898 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9899 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9900 #endif
9901
9902 }
9903
9904 /* The 64-bit version is simpler because it makes more sense to load the
9905 values as "immediate" data out of the trampoline. It's also easier since
9906 we can read the PC without clobbering a register. */
9907
9908 static void
9909 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9910 {
9911 /* SPARC 64-bit trampoline:
9912
9913 rd %pc, %g1
9914 ldx [%g1+24], %g5
9915 jmp %g5
9916 ldx [%g1+16], %g5
9917 +16 bytes data
9918 */
9919
9920 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9921 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9922 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9923 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9924 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9925 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9926 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9927 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9928 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9929 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9930 emit_insn
9931 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 0))));
9932
9933 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9934 aligned on a 16 byte boundary so one flush clears it all. */
9935 if (sparc_cpu != PROCESSOR_ULTRASPARC
9936 && sparc_cpu != PROCESSOR_ULTRASPARC3
9937 && sparc_cpu != PROCESSOR_NIAGARA
9938 && sparc_cpu != PROCESSOR_NIAGARA2
9939 && sparc_cpu != PROCESSOR_NIAGARA3
9940 && sparc_cpu != PROCESSOR_NIAGARA4
9941 && sparc_cpu != PROCESSOR_NIAGARA7
9942 && sparc_cpu != PROCESSOR_M8)
9943 emit_insn
9944 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 8))));
9945
9946 /* Call __enable_execute_stack after writing onto the stack to make sure
9947 the stack address is accessible. */
9948 #ifdef HAVE_ENABLE_EXECUTE_STACK
9949 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9950 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9951 #endif
9952 }
9953
9954 /* Worker for TARGET_TRAMPOLINE_INIT. */
9955
9956 static void
9957 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9958 {
9959 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9960 cxt = force_reg (Pmode, cxt);
9961 if (TARGET_ARCH64)
9962 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9963 else
9964 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9965 }
9966 \f
9967 /* Adjust the cost of a scheduling dependency. Return the new cost of
9968 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9969
9970 static int
9971 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9972 int cost)
9973 {
9974 enum attr_type insn_type;
9975
9976 if (recog_memoized (insn) < 0)
9977 return cost;
9978
9979 insn_type = get_attr_type (insn);
9980
9981 if (dep_type == 0)
9982 {
9983 /* Data dependency; DEP_INSN writes a register that INSN reads some
9984 cycles later. */
9985
9986 /* if a load, then the dependence must be on the memory address;
9987 add an extra "cycle". Note that the cost could be two cycles
9988 if the reg was written late in an instruction group; we ca not tell
9989 here. */
9990 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9991 return cost + 3;
9992
9993 /* Get the delay only if the address of the store is the dependence. */
9994 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9995 {
9996 rtx pat = PATTERN(insn);
9997 rtx dep_pat = PATTERN (dep_insn);
9998
9999 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10000 return cost; /* This should not happen! */
10001
10002 /* The dependency between the two instructions was on the data that
10003 is being stored. Assume that this implies that the address of the
10004 store is not dependent. */
10005 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10006 return cost;
10007
10008 return cost + 3; /* An approximation. */
10009 }
10010
10011 /* A shift instruction cannot receive its data from an instruction
10012 in the same cycle; add a one cycle penalty. */
10013 if (insn_type == TYPE_SHIFT)
10014 return cost + 3; /* Split before cascade into shift. */
10015 }
10016 else
10017 {
10018 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10019 INSN writes some cycles later. */
10020
10021 /* These are only significant for the fpu unit; writing a fp reg before
10022 the fpu has finished with it stalls the processor. */
10023
10024 /* Reusing an integer register causes no problems. */
10025 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10026 return 0;
10027 }
10028
10029 return cost;
10030 }
10031
10032 static int
10033 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10034 int cost)
10035 {
10036 enum attr_type insn_type, dep_type;
10037 rtx pat = PATTERN(insn);
10038 rtx dep_pat = PATTERN (dep_insn);
10039
10040 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10041 return cost;
10042
10043 insn_type = get_attr_type (insn);
10044 dep_type = get_attr_type (dep_insn);
10045
10046 switch (dtype)
10047 {
10048 case 0:
10049 /* Data dependency; DEP_INSN writes a register that INSN reads some
10050 cycles later. */
10051
10052 switch (insn_type)
10053 {
10054 case TYPE_STORE:
10055 case TYPE_FPSTORE:
10056 /* Get the delay iff the address of the store is the dependence. */
10057 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10058 return cost;
10059
10060 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10061 return cost;
10062 return cost + 3;
10063
10064 case TYPE_LOAD:
10065 case TYPE_SLOAD:
10066 case TYPE_FPLOAD:
10067 /* If a load, then the dependence must be on the memory address. If
10068 the addresses aren't equal, then it might be a false dependency */
10069 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10070 {
10071 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10072 || GET_CODE (SET_DEST (dep_pat)) != MEM
10073 || GET_CODE (SET_SRC (pat)) != MEM
10074 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10075 XEXP (SET_SRC (pat), 0)))
10076 return cost + 2;
10077
10078 return cost + 8;
10079 }
10080 break;
10081
10082 case TYPE_BRANCH:
10083 /* Compare to branch latency is 0. There is no benefit from
10084 separating compare and branch. */
10085 if (dep_type == TYPE_COMPARE)
10086 return 0;
10087 /* Floating point compare to branch latency is less than
10088 compare to conditional move. */
10089 if (dep_type == TYPE_FPCMP)
10090 return cost - 1;
10091 break;
10092 default:
10093 break;
10094 }
10095 break;
10096
10097 case REG_DEP_ANTI:
10098 /* Anti-dependencies only penalize the fpu unit. */
10099 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10100 return 0;
10101 break;
10102
10103 default:
10104 break;
10105 }
10106
10107 return cost;
10108 }
10109
10110 static int
10111 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10112 unsigned int)
10113 {
10114 switch (sparc_cpu)
10115 {
10116 case PROCESSOR_SUPERSPARC:
10117 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10118 break;
10119 case PROCESSOR_HYPERSPARC:
10120 case PROCESSOR_SPARCLITE86X:
10121 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10122 break;
10123 default:
10124 break;
10125 }
10126 return cost;
10127 }
10128
10129 static void
10130 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10131 int sched_verbose ATTRIBUTE_UNUSED,
10132 int max_ready ATTRIBUTE_UNUSED)
10133 {}
10134
10135 static int
10136 sparc_use_sched_lookahead (void)
10137 {
10138 switch (sparc_cpu)
10139 {
10140 case PROCESSOR_ULTRASPARC:
10141 case PROCESSOR_ULTRASPARC3:
10142 return 4;
10143 case PROCESSOR_SUPERSPARC:
10144 case PROCESSOR_HYPERSPARC:
10145 case PROCESSOR_SPARCLITE86X:
10146 return 3;
10147 case PROCESSOR_NIAGARA4:
10148 case PROCESSOR_NIAGARA7:
10149 case PROCESSOR_M8:
10150 return 2;
10151 case PROCESSOR_NIAGARA:
10152 case PROCESSOR_NIAGARA2:
10153 case PROCESSOR_NIAGARA3:
10154 default:
10155 return 0;
10156 }
10157 }
10158
10159 static int
10160 sparc_issue_rate (void)
10161 {
10162 switch (sparc_cpu)
10163 {
10164 case PROCESSOR_ULTRASPARC:
10165 case PROCESSOR_ULTRASPARC3:
10166 case PROCESSOR_M8:
10167 return 4;
10168 case PROCESSOR_SUPERSPARC:
10169 return 3;
10170 case PROCESSOR_HYPERSPARC:
10171 case PROCESSOR_SPARCLITE86X:
10172 case PROCESSOR_V9:
10173 /* Assume V9 processors are capable of at least dual-issue. */
10174 case PROCESSOR_NIAGARA4:
10175 case PROCESSOR_NIAGARA7:
10176 return 2;
10177 case PROCESSOR_NIAGARA:
10178 case PROCESSOR_NIAGARA2:
10179 case PROCESSOR_NIAGARA3:
10180 default:
10181 return 1;
10182 }
10183 }
10184
10185 int
10186 sparc_branch_cost (bool speed_p, bool predictable_p)
10187 {
10188 if (!speed_p)
10189 return 2;
10190
10191 /* For pre-V9 processors we use a single value (usually 3) to take into
10192 account the potential annulling of the delay slot (which ends up being
10193 a bubble in the pipeline slot) plus a cycle to take into consideration
10194 the instruction cache effects.
10195
10196 On V9 and later processors, which have branch prediction facilities,
10197 we take into account whether the branch is (easily) predictable. */
10198 const int cost = sparc_costs->branch_cost;
10199
10200 switch (sparc_cpu)
10201 {
10202 case PROCESSOR_V9:
10203 case PROCESSOR_ULTRASPARC:
10204 case PROCESSOR_ULTRASPARC3:
10205 case PROCESSOR_NIAGARA:
10206 case PROCESSOR_NIAGARA2:
10207 case PROCESSOR_NIAGARA3:
10208 case PROCESSOR_NIAGARA4:
10209 case PROCESSOR_NIAGARA7:
10210 case PROCESSOR_M8:
10211 return cost + (predictable_p ? 0 : 2);
10212
10213 default:
10214 return cost;
10215 }
10216 }
10217
10218 static int
10219 set_extends (rtx_insn *insn)
10220 {
10221 rtx pat = PATTERN (insn);
10222
10223 switch (GET_CODE (SET_SRC (pat)))
10224 {
10225 /* Load and some shift instructions zero extend. */
10226 case MEM:
10227 case ZERO_EXTEND:
10228 /* sethi clears the high bits */
10229 case HIGH:
10230 /* LO_SUM is used with sethi. sethi cleared the high
10231 bits and the values used with lo_sum are positive */
10232 case LO_SUM:
10233 /* Store flag stores 0 or 1 */
10234 case LT: case LTU:
10235 case GT: case GTU:
10236 case LE: case LEU:
10237 case GE: case GEU:
10238 case EQ:
10239 case NE:
10240 return 1;
10241 case AND:
10242 {
10243 rtx op0 = XEXP (SET_SRC (pat), 0);
10244 rtx op1 = XEXP (SET_SRC (pat), 1);
10245 if (GET_CODE (op1) == CONST_INT)
10246 return INTVAL (op1) >= 0;
10247 if (GET_CODE (op0) != REG)
10248 return 0;
10249 if (sparc_check_64 (op0, insn) == 1)
10250 return 1;
10251 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10252 }
10253 case IOR:
10254 case XOR:
10255 {
10256 rtx op0 = XEXP (SET_SRC (pat), 0);
10257 rtx op1 = XEXP (SET_SRC (pat), 1);
10258 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10259 return 0;
10260 if (GET_CODE (op1) == CONST_INT)
10261 return INTVAL (op1) >= 0;
10262 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10263 }
10264 case LSHIFTRT:
10265 return GET_MODE (SET_SRC (pat)) == SImode;
10266 /* Positive integers leave the high bits zero. */
10267 case CONST_INT:
10268 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10269 case ASHIFTRT:
10270 case SIGN_EXTEND:
10271 return - (GET_MODE (SET_SRC (pat)) == SImode);
10272 case REG:
10273 return sparc_check_64 (SET_SRC (pat), insn);
10274 default:
10275 return 0;
10276 }
10277 }
10278
10279 /* We _ought_ to have only one kind per function, but... */
10280 static GTY(()) rtx sparc_addr_diff_list;
10281 static GTY(()) rtx sparc_addr_list;
10282
10283 void
10284 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10285 {
10286 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10287 if (diff)
10288 sparc_addr_diff_list
10289 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10290 else
10291 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10292 }
10293
10294 static void
10295 sparc_output_addr_vec (rtx vec)
10296 {
10297 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10298 int idx, vlen = XVECLEN (body, 0);
10299
10300 #ifdef ASM_OUTPUT_ADDR_VEC_START
10301 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10302 #endif
10303
10304 #ifdef ASM_OUTPUT_CASE_LABEL
10305 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10306 NEXT_INSN (lab));
10307 #else
10308 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10309 #endif
10310
10311 for (idx = 0; idx < vlen; idx++)
10312 {
10313 ASM_OUTPUT_ADDR_VEC_ELT
10314 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10315 }
10316
10317 #ifdef ASM_OUTPUT_ADDR_VEC_END
10318 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10319 #endif
10320 }
10321
10322 static void
10323 sparc_output_addr_diff_vec (rtx vec)
10324 {
10325 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10326 rtx base = XEXP (XEXP (body, 0), 0);
10327 int idx, vlen = XVECLEN (body, 1);
10328
10329 #ifdef ASM_OUTPUT_ADDR_VEC_START
10330 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10331 #endif
10332
10333 #ifdef ASM_OUTPUT_CASE_LABEL
10334 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10335 NEXT_INSN (lab));
10336 #else
10337 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10338 #endif
10339
10340 for (idx = 0; idx < vlen; idx++)
10341 {
10342 ASM_OUTPUT_ADDR_DIFF_ELT
10343 (asm_out_file,
10344 body,
10345 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10346 CODE_LABEL_NUMBER (base));
10347 }
10348
10349 #ifdef ASM_OUTPUT_ADDR_VEC_END
10350 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10351 #endif
10352 }
10353
10354 static void
10355 sparc_output_deferred_case_vectors (void)
10356 {
10357 rtx t;
10358 int align;
10359
10360 if (sparc_addr_list == NULL_RTX
10361 && sparc_addr_diff_list == NULL_RTX)
10362 return;
10363
10364 /* Align to cache line in the function's code section. */
10365 switch_to_section (current_function_section ());
10366
10367 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10368 if (align > 0)
10369 ASM_OUTPUT_ALIGN (asm_out_file, align);
10370
10371 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10372 sparc_output_addr_vec (XEXP (t, 0));
10373 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10374 sparc_output_addr_diff_vec (XEXP (t, 0));
10375
10376 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10377 }
10378
10379 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10380 unknown. Return 1 if the high bits are zero, -1 if the register is
10381 sign extended. */
10382 int
10383 sparc_check_64 (rtx x, rtx_insn *insn)
10384 {
10385 /* If a register is set only once it is safe to ignore insns this
10386 code does not know how to handle. The loop will either recognize
10387 the single set and return the correct value or fail to recognize
10388 it and return 0. */
10389 int set_once = 0;
10390 rtx y = x;
10391
10392 gcc_assert (GET_CODE (x) == REG);
10393
10394 if (GET_MODE (x) == DImode)
10395 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10396
10397 if (flag_expensive_optimizations
10398 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10399 set_once = 1;
10400
10401 if (insn == 0)
10402 {
10403 if (set_once)
10404 insn = get_last_insn_anywhere ();
10405 else
10406 return 0;
10407 }
10408
10409 while ((insn = PREV_INSN (insn)))
10410 {
10411 switch (GET_CODE (insn))
10412 {
10413 case JUMP_INSN:
10414 case NOTE:
10415 break;
10416 case CODE_LABEL:
10417 case CALL_INSN:
10418 default:
10419 if (! set_once)
10420 return 0;
10421 break;
10422 case INSN:
10423 {
10424 rtx pat = PATTERN (insn);
10425 if (GET_CODE (pat) != SET)
10426 return 0;
10427 if (rtx_equal_p (x, SET_DEST (pat)))
10428 return set_extends (insn);
10429 if (y && rtx_equal_p (y, SET_DEST (pat)))
10430 return set_extends (insn);
10431 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10432 return 0;
10433 }
10434 }
10435 }
10436 return 0;
10437 }
10438
10439 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10440 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10441
10442 const char *
10443 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10444 {
10445 static char asm_code[60];
10446
10447 /* The scratch register is only required when the destination
10448 register is not a 64-bit global or out register. */
10449 if (which_alternative != 2)
10450 operands[3] = operands[0];
10451
10452 /* We can only shift by constants <= 63. */
10453 if (GET_CODE (operands[2]) == CONST_INT)
10454 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10455
10456 if (GET_CODE (operands[1]) == CONST_INT)
10457 {
10458 output_asm_insn ("mov\t%1, %3", operands);
10459 }
10460 else
10461 {
10462 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10463 if (sparc_check_64 (operands[1], insn) <= 0)
10464 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10465 output_asm_insn ("or\t%L1, %3, %3", operands);
10466 }
10467
10468 strcpy (asm_code, opcode);
10469
10470 if (which_alternative != 2)
10471 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10472 else
10473 return
10474 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10475 }
10476 \f
10477 /* Output rtl to increment the profiler label LABELNO
10478 for profiling a function entry. */
10479
10480 void
10481 sparc_profile_hook (int labelno)
10482 {
10483 char buf[32];
10484 rtx lab, fun;
10485
10486 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10487 if (NO_PROFILE_COUNTERS)
10488 {
10489 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10490 }
10491 else
10492 {
10493 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10494 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10495 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10496 }
10497 }
10498 \f
10499 #ifdef TARGET_SOLARIS
10500 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10501
10502 static void
10503 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10504 tree decl ATTRIBUTE_UNUSED)
10505 {
10506 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10507 {
10508 solaris_elf_asm_comdat_section (name, flags, decl);
10509 return;
10510 }
10511
10512 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10513
10514 if (!(flags & SECTION_DEBUG))
10515 fputs (",#alloc", asm_out_file);
10516 #if HAVE_GAS_SECTION_EXCLUDE
10517 if (flags & SECTION_EXCLUDE)
10518 fputs (",#exclude", asm_out_file);
10519 #endif
10520 if (flags & SECTION_WRITE)
10521 fputs (",#write", asm_out_file);
10522 if (flags & SECTION_TLS)
10523 fputs (",#tls", asm_out_file);
10524 if (flags & SECTION_CODE)
10525 fputs (",#execinstr", asm_out_file);
10526
10527 if (flags & SECTION_NOTYPE)
10528 ;
10529 else if (flags & SECTION_BSS)
10530 fputs (",#nobits", asm_out_file);
10531 else
10532 fputs (",#progbits", asm_out_file);
10533
10534 fputc ('\n', asm_out_file);
10535 }
10536 #endif /* TARGET_SOLARIS */
10537
10538 /* We do not allow indirect calls to be optimized into sibling calls.
10539
10540 We cannot use sibling calls when delayed branches are disabled
10541 because they will likely require the call delay slot to be filled.
10542
10543 Also, on SPARC 32-bit we cannot emit a sibling call when the
10544 current function returns a structure. This is because the "unimp
10545 after call" convention would cause the callee to return to the
10546 wrong place. The generic code already disallows cases where the
10547 function being called returns a structure.
10548
10549 It may seem strange how this last case could occur. Usually there
10550 is code after the call which jumps to epilogue code which dumps the
10551 return value into the struct return area. That ought to invalidate
10552 the sibling call right? Well, in the C++ case we can end up passing
10553 the pointer to the struct return area to a constructor (which returns
10554 void) and then nothing else happens. Such a sibling call would look
10555 valid without the added check here.
10556
10557 VxWorks PIC PLT entries require the global pointer to be initialized
10558 on entry. We therefore can't emit sibling calls to them. */
10559 static bool
10560 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10561 {
10562 return (decl
10563 && flag_delayed_branch
10564 && (TARGET_ARCH64 || ! cfun->returns_struct)
10565 && !(TARGET_VXWORKS_RTP
10566 && flag_pic
10567 && !targetm.binds_local_p (decl)));
10568 }
10569 \f
10570 /* libfunc renaming. */
10571
10572 static void
10573 sparc_init_libfuncs (void)
10574 {
10575 if (TARGET_ARCH32)
10576 {
10577 /* Use the subroutines that Sun's library provides for integer
10578 multiply and divide. The `*' prevents an underscore from
10579 being prepended by the compiler. .umul is a little faster
10580 than .mul. */
10581 set_optab_libfunc (smul_optab, SImode, "*.umul");
10582 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10583 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10584 set_optab_libfunc (smod_optab, SImode, "*.rem");
10585 set_optab_libfunc (umod_optab, SImode, "*.urem");
10586
10587 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10588 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10589 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10590 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10591 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10592 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10593
10594 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10595 is because with soft-float, the SFmode and DFmode sqrt
10596 instructions will be absent, and the compiler will notice and
10597 try to use the TFmode sqrt instruction for calls to the
10598 builtin function sqrt, but this fails. */
10599 if (TARGET_FPU)
10600 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10601
10602 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10603 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10604 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10605 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10606 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10607 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10608
10609 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10610 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10611 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10612 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10613
10614 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10615 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10616 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10617 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10618
10619 if (DITF_CONVERSION_LIBFUNCS)
10620 {
10621 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10622 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10623 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10624 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10625 }
10626
10627 if (SUN_CONVERSION_LIBFUNCS)
10628 {
10629 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10630 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10631 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10632 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10633 }
10634 }
10635 if (TARGET_ARCH64)
10636 {
10637 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10638 do not exist in the library. Make sure the compiler does not
10639 emit calls to them by accident. (It should always use the
10640 hardware instructions.) */
10641 set_optab_libfunc (smul_optab, SImode, 0);
10642 set_optab_libfunc (sdiv_optab, SImode, 0);
10643 set_optab_libfunc (udiv_optab, SImode, 0);
10644 set_optab_libfunc (smod_optab, SImode, 0);
10645 set_optab_libfunc (umod_optab, SImode, 0);
10646
10647 if (SUN_INTEGER_MULTIPLY_64)
10648 {
10649 set_optab_libfunc (smul_optab, DImode, "__mul64");
10650 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10651 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10652 set_optab_libfunc (smod_optab, DImode, "__rem64");
10653 set_optab_libfunc (umod_optab, DImode, "__urem64");
10654 }
10655
10656 if (SUN_CONVERSION_LIBFUNCS)
10657 {
10658 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10659 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10660 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10661 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10662 }
10663 }
10664 }
10665 \f
10666 /* SPARC builtins. */
10667 enum sparc_builtins
10668 {
10669 /* FPU builtins. */
10670 SPARC_BUILTIN_LDFSR,
10671 SPARC_BUILTIN_STFSR,
10672
10673 /* VIS 1.0 builtins. */
10674 SPARC_BUILTIN_FPACK16,
10675 SPARC_BUILTIN_FPACK32,
10676 SPARC_BUILTIN_FPACKFIX,
10677 SPARC_BUILTIN_FEXPAND,
10678 SPARC_BUILTIN_FPMERGE,
10679 SPARC_BUILTIN_FMUL8X16,
10680 SPARC_BUILTIN_FMUL8X16AU,
10681 SPARC_BUILTIN_FMUL8X16AL,
10682 SPARC_BUILTIN_FMUL8SUX16,
10683 SPARC_BUILTIN_FMUL8ULX16,
10684 SPARC_BUILTIN_FMULD8SUX16,
10685 SPARC_BUILTIN_FMULD8ULX16,
10686 SPARC_BUILTIN_FALIGNDATAV4HI,
10687 SPARC_BUILTIN_FALIGNDATAV8QI,
10688 SPARC_BUILTIN_FALIGNDATAV2SI,
10689 SPARC_BUILTIN_FALIGNDATADI,
10690 SPARC_BUILTIN_WRGSR,
10691 SPARC_BUILTIN_RDGSR,
10692 SPARC_BUILTIN_ALIGNADDR,
10693 SPARC_BUILTIN_ALIGNADDRL,
10694 SPARC_BUILTIN_PDIST,
10695 SPARC_BUILTIN_EDGE8,
10696 SPARC_BUILTIN_EDGE8L,
10697 SPARC_BUILTIN_EDGE16,
10698 SPARC_BUILTIN_EDGE16L,
10699 SPARC_BUILTIN_EDGE32,
10700 SPARC_BUILTIN_EDGE32L,
10701 SPARC_BUILTIN_FCMPLE16,
10702 SPARC_BUILTIN_FCMPLE32,
10703 SPARC_BUILTIN_FCMPNE16,
10704 SPARC_BUILTIN_FCMPNE32,
10705 SPARC_BUILTIN_FCMPGT16,
10706 SPARC_BUILTIN_FCMPGT32,
10707 SPARC_BUILTIN_FCMPEQ16,
10708 SPARC_BUILTIN_FCMPEQ32,
10709 SPARC_BUILTIN_FPADD16,
10710 SPARC_BUILTIN_FPADD16S,
10711 SPARC_BUILTIN_FPADD32,
10712 SPARC_BUILTIN_FPADD32S,
10713 SPARC_BUILTIN_FPSUB16,
10714 SPARC_BUILTIN_FPSUB16S,
10715 SPARC_BUILTIN_FPSUB32,
10716 SPARC_BUILTIN_FPSUB32S,
10717 SPARC_BUILTIN_ARRAY8,
10718 SPARC_BUILTIN_ARRAY16,
10719 SPARC_BUILTIN_ARRAY32,
10720
10721 /* VIS 2.0 builtins. */
10722 SPARC_BUILTIN_EDGE8N,
10723 SPARC_BUILTIN_EDGE8LN,
10724 SPARC_BUILTIN_EDGE16N,
10725 SPARC_BUILTIN_EDGE16LN,
10726 SPARC_BUILTIN_EDGE32N,
10727 SPARC_BUILTIN_EDGE32LN,
10728 SPARC_BUILTIN_BMASK,
10729 SPARC_BUILTIN_BSHUFFLEV4HI,
10730 SPARC_BUILTIN_BSHUFFLEV8QI,
10731 SPARC_BUILTIN_BSHUFFLEV2SI,
10732 SPARC_BUILTIN_BSHUFFLEDI,
10733
10734 /* VIS 3.0 builtins. */
10735 SPARC_BUILTIN_CMASK8,
10736 SPARC_BUILTIN_CMASK16,
10737 SPARC_BUILTIN_CMASK32,
10738 SPARC_BUILTIN_FCHKSM16,
10739 SPARC_BUILTIN_FSLL16,
10740 SPARC_BUILTIN_FSLAS16,
10741 SPARC_BUILTIN_FSRL16,
10742 SPARC_BUILTIN_FSRA16,
10743 SPARC_BUILTIN_FSLL32,
10744 SPARC_BUILTIN_FSLAS32,
10745 SPARC_BUILTIN_FSRL32,
10746 SPARC_BUILTIN_FSRA32,
10747 SPARC_BUILTIN_PDISTN,
10748 SPARC_BUILTIN_FMEAN16,
10749 SPARC_BUILTIN_FPADD64,
10750 SPARC_BUILTIN_FPSUB64,
10751 SPARC_BUILTIN_FPADDS16,
10752 SPARC_BUILTIN_FPADDS16S,
10753 SPARC_BUILTIN_FPSUBS16,
10754 SPARC_BUILTIN_FPSUBS16S,
10755 SPARC_BUILTIN_FPADDS32,
10756 SPARC_BUILTIN_FPADDS32S,
10757 SPARC_BUILTIN_FPSUBS32,
10758 SPARC_BUILTIN_FPSUBS32S,
10759 SPARC_BUILTIN_FUCMPLE8,
10760 SPARC_BUILTIN_FUCMPNE8,
10761 SPARC_BUILTIN_FUCMPGT8,
10762 SPARC_BUILTIN_FUCMPEQ8,
10763 SPARC_BUILTIN_FHADDS,
10764 SPARC_BUILTIN_FHADDD,
10765 SPARC_BUILTIN_FHSUBS,
10766 SPARC_BUILTIN_FHSUBD,
10767 SPARC_BUILTIN_FNHADDS,
10768 SPARC_BUILTIN_FNHADDD,
10769 SPARC_BUILTIN_UMULXHI,
10770 SPARC_BUILTIN_XMULX,
10771 SPARC_BUILTIN_XMULXHI,
10772
10773 /* VIS 4.0 builtins. */
10774 SPARC_BUILTIN_FPADD8,
10775 SPARC_BUILTIN_FPADDS8,
10776 SPARC_BUILTIN_FPADDUS8,
10777 SPARC_BUILTIN_FPADDUS16,
10778 SPARC_BUILTIN_FPCMPLE8,
10779 SPARC_BUILTIN_FPCMPGT8,
10780 SPARC_BUILTIN_FPCMPULE16,
10781 SPARC_BUILTIN_FPCMPUGT16,
10782 SPARC_BUILTIN_FPCMPULE32,
10783 SPARC_BUILTIN_FPCMPUGT32,
10784 SPARC_BUILTIN_FPMAX8,
10785 SPARC_BUILTIN_FPMAX16,
10786 SPARC_BUILTIN_FPMAX32,
10787 SPARC_BUILTIN_FPMAXU8,
10788 SPARC_BUILTIN_FPMAXU16,
10789 SPARC_BUILTIN_FPMAXU32,
10790 SPARC_BUILTIN_FPMIN8,
10791 SPARC_BUILTIN_FPMIN16,
10792 SPARC_BUILTIN_FPMIN32,
10793 SPARC_BUILTIN_FPMINU8,
10794 SPARC_BUILTIN_FPMINU16,
10795 SPARC_BUILTIN_FPMINU32,
10796 SPARC_BUILTIN_FPSUB8,
10797 SPARC_BUILTIN_FPSUBS8,
10798 SPARC_BUILTIN_FPSUBUS8,
10799 SPARC_BUILTIN_FPSUBUS16,
10800
10801 /* VIS 4.0B builtins. */
10802
10803 /* Note that all the DICTUNPACK* entries should be kept
10804 contiguous. */
10805 SPARC_BUILTIN_FIRST_DICTUNPACK,
10806 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10807 SPARC_BUILTIN_DICTUNPACK16,
10808 SPARC_BUILTIN_DICTUNPACK32,
10809 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10810
10811 /* Note that all the FPCMP*SHL entries should be kept
10812 contiguous. */
10813 SPARC_BUILTIN_FIRST_FPCMPSHL,
10814 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10815 SPARC_BUILTIN_FPCMPGT8SHL,
10816 SPARC_BUILTIN_FPCMPEQ8SHL,
10817 SPARC_BUILTIN_FPCMPNE8SHL,
10818 SPARC_BUILTIN_FPCMPLE16SHL,
10819 SPARC_BUILTIN_FPCMPGT16SHL,
10820 SPARC_BUILTIN_FPCMPEQ16SHL,
10821 SPARC_BUILTIN_FPCMPNE16SHL,
10822 SPARC_BUILTIN_FPCMPLE32SHL,
10823 SPARC_BUILTIN_FPCMPGT32SHL,
10824 SPARC_BUILTIN_FPCMPEQ32SHL,
10825 SPARC_BUILTIN_FPCMPNE32SHL,
10826 SPARC_BUILTIN_FPCMPULE8SHL,
10827 SPARC_BUILTIN_FPCMPUGT8SHL,
10828 SPARC_BUILTIN_FPCMPULE16SHL,
10829 SPARC_BUILTIN_FPCMPUGT16SHL,
10830 SPARC_BUILTIN_FPCMPULE32SHL,
10831 SPARC_BUILTIN_FPCMPUGT32SHL,
10832 SPARC_BUILTIN_FPCMPDE8SHL,
10833 SPARC_BUILTIN_FPCMPDE16SHL,
10834 SPARC_BUILTIN_FPCMPDE32SHL,
10835 SPARC_BUILTIN_FPCMPUR8SHL,
10836 SPARC_BUILTIN_FPCMPUR16SHL,
10837 SPARC_BUILTIN_FPCMPUR32SHL,
10838 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10839
10840 SPARC_BUILTIN_MAX
10841 };
10842
10843 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10844 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10845
10846 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10847 The instruction should require a constant operand of some sort. The
10848 function prints an error if OPVAL is not valid. */
10849
10850 static int
10851 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10852 {
10853 if (GET_CODE (opval) != CONST_INT)
10854 {
10855 error ("%qs expects a constant argument", insn_data[icode].name);
10856 return false;
10857 }
10858
10859 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10860 {
10861 error ("constant argument out of range for %qs", insn_data[icode].name);
10862 return false;
10863 }
10864 return true;
10865 }
10866
10867 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10868 function decl or NULL_TREE if the builtin was not added. */
10869
10870 static tree
10871 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10872 tree type)
10873 {
10874 tree t
10875 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10876
10877 if (t)
10878 {
10879 sparc_builtins[code] = t;
10880 sparc_builtins_icode[code] = icode;
10881 }
10882
10883 return t;
10884 }
10885
10886 /* Likewise, but also marks the function as "const". */
10887
10888 static tree
10889 def_builtin_const (const char *name, enum insn_code icode,
10890 enum sparc_builtins code, tree type)
10891 {
10892 tree t = def_builtin (name, icode, code, type);
10893
10894 if (t)
10895 TREE_READONLY (t) = 1;
10896
10897 return t;
10898 }
10899
10900 /* Implement the TARGET_INIT_BUILTINS target hook.
10901 Create builtin functions for special SPARC instructions. */
10902
10903 static void
10904 sparc_init_builtins (void)
10905 {
10906 if (TARGET_FPU)
10907 sparc_fpu_init_builtins ();
10908
10909 if (TARGET_VIS)
10910 sparc_vis_init_builtins ();
10911 }
10912
10913 /* Create builtin functions for FPU instructions. */
10914
10915 static void
10916 sparc_fpu_init_builtins (void)
10917 {
10918 tree ftype
10919 = build_function_type_list (void_type_node,
10920 build_pointer_type (unsigned_type_node), 0);
10921 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10922 SPARC_BUILTIN_LDFSR, ftype);
10923 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10924 SPARC_BUILTIN_STFSR, ftype);
10925 }
10926
10927 /* Create builtin functions for VIS instructions. */
10928
10929 static void
10930 sparc_vis_init_builtins (void)
10931 {
10932 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10933 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10934 tree v4hi = build_vector_type (intHI_type_node, 4);
10935 tree v2hi = build_vector_type (intHI_type_node, 2);
10936 tree v2si = build_vector_type (intSI_type_node, 2);
10937 tree v1si = build_vector_type (intSI_type_node, 1);
10938
10939 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10940 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10941 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10942 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10943 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10944 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10945 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10946 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10947 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10948 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10949 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10950 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10951 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10952 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10953 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10954 v8qi, v8qi,
10955 intDI_type_node, 0);
10956 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10957 v8qi, v8qi, 0);
10958 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10959 v8qi, v8qi, 0);
10960 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10961 intSI_type_node, 0);
10962 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10963 intSI_type_node, 0);
10964 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10965 intDI_type_node, 0);
10966 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10967 intDI_type_node,
10968 intDI_type_node, 0);
10969 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10970 intSI_type_node,
10971 intSI_type_node, 0);
10972 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10973 ptr_type_node,
10974 intSI_type_node, 0);
10975 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10976 ptr_type_node,
10977 intDI_type_node, 0);
10978 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10979 ptr_type_node,
10980 ptr_type_node, 0);
10981 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10982 ptr_type_node,
10983 ptr_type_node, 0);
10984 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10985 v4hi, v4hi, 0);
10986 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10987 v2si, v2si, 0);
10988 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10989 v4hi, v4hi, 0);
10990 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10991 v2si, v2si, 0);
10992 tree void_ftype_di = build_function_type_list (void_type_node,
10993 intDI_type_node, 0);
10994 tree di_ftype_void = build_function_type_list (intDI_type_node,
10995 void_type_node, 0);
10996 tree void_ftype_si = build_function_type_list (void_type_node,
10997 intSI_type_node, 0);
10998 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10999 float_type_node,
11000 float_type_node, 0);
11001 tree df_ftype_df_df = build_function_type_list (double_type_node,
11002 double_type_node,
11003 double_type_node, 0);
11004
11005 /* Packing and expanding vectors. */
11006 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11007 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11008 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11009 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11010 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11011 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11012 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11013 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11014 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11015 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11016
11017 /* Multiplications. */
11018 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11019 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11020 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11021 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11022 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11023 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11024 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11025 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11026 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11027 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11028 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11029 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11030 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11031 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11032
11033 /* Data aligning. */
11034 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11035 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11036 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11037 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11038 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11039 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11040 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11041 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11042
11043 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11044 SPARC_BUILTIN_WRGSR, void_ftype_di);
11045 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11046 SPARC_BUILTIN_RDGSR, di_ftype_void);
11047
11048 if (TARGET_ARCH64)
11049 {
11050 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11051 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11052 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11053 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11054 }
11055 else
11056 {
11057 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11058 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11059 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11060 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11061 }
11062
11063 /* Pixel distance. */
11064 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11065 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11066
11067 /* Edge handling. */
11068 if (TARGET_ARCH64)
11069 {
11070 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11071 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11072 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11073 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11074 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11075 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11076 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11077 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11078 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11079 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11080 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11081 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11082 }
11083 else
11084 {
11085 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11086 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11087 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11088 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11089 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11090 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11091 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11092 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11093 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11094 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11095 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11096 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11097 }
11098
11099 /* Pixel compare. */
11100 if (TARGET_ARCH64)
11101 {
11102 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11103 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11104 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11105 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11106 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11107 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11108 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11109 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11110 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11111 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11112 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11113 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11114 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11115 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11116 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11117 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11118 }
11119 else
11120 {
11121 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11122 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11123 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11124 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11125 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11126 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11127 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11128 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11129 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11130 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11131 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11132 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11133 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11134 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11135 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11136 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11137 }
11138
11139 /* Addition and subtraction. */
11140 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11141 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11142 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11143 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11144 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11145 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11146 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11147 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11148 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11149 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11150 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11151 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11152 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11153 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11154 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11155 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11156
11157 /* Three-dimensional array addressing. */
11158 if (TARGET_ARCH64)
11159 {
11160 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11161 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11162 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11163 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11164 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11165 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11166 }
11167 else
11168 {
11169 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11170 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11171 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11172 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11173 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11174 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11175 }
11176
11177 if (TARGET_VIS2)
11178 {
11179 /* Edge handling. */
11180 if (TARGET_ARCH64)
11181 {
11182 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11183 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11184 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11185 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11186 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11187 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11188 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11189 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11190 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11191 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11192 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11193 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11194 }
11195 else
11196 {
11197 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11198 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11199 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11200 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11201 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11202 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11203 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11204 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11205 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11206 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11207 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11208 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11209 }
11210
11211 /* Byte mask and shuffle. */
11212 if (TARGET_ARCH64)
11213 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11214 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11215 else
11216 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11217 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11218 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11219 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11220 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11221 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11222 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11223 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11224 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11225 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11226 }
11227
11228 if (TARGET_VIS3)
11229 {
11230 if (TARGET_ARCH64)
11231 {
11232 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11233 SPARC_BUILTIN_CMASK8, void_ftype_di);
11234 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11235 SPARC_BUILTIN_CMASK16, void_ftype_di);
11236 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11237 SPARC_BUILTIN_CMASK32, void_ftype_di);
11238 }
11239 else
11240 {
11241 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11242 SPARC_BUILTIN_CMASK8, void_ftype_si);
11243 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11244 SPARC_BUILTIN_CMASK16, void_ftype_si);
11245 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11246 SPARC_BUILTIN_CMASK32, void_ftype_si);
11247 }
11248
11249 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11250 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11251
11252 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11253 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11254 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11255 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11256 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11257 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11258 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11259 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11260 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11261 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11262 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11263 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11264 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11265 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11266 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11267 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11268
11269 if (TARGET_ARCH64)
11270 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11271 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11272 else
11273 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11274 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11275
11276 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11277 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11278 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11279 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11280 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11281 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11282
11283 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11284 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11285 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11286 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11287 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11288 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11289 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11290 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11291 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11292 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11293 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11294 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11295 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11296 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11297 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11298 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11299
11300 if (TARGET_ARCH64)
11301 {
11302 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11303 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11304 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11305 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11306 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11307 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11308 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11309 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11310 }
11311 else
11312 {
11313 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11314 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11315 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11316 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11317 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11318 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11319 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11320 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11321 }
11322
11323 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11324 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11325 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11326 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11327 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11328 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11329 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11330 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11331 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11332 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11333 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11334 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11335
11336 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11337 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11338 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11339 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11340 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11341 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11342 }
11343
11344 if (TARGET_VIS4)
11345 {
11346 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11347 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11348 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11349 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11350 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11351 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11352 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11353 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11354
11355
11356 if (TARGET_ARCH64)
11357 {
11358 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11359 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11360 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11361 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11362 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11363 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11364 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11365 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11366 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11367 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11368 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11369 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11370 }
11371 else
11372 {
11373 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11374 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11375 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11376 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11377 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11378 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11379 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11380 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11381 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11382 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11383 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11384 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11385 }
11386
11387 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11388 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11389 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11390 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11391 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11392 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11393 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11394 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11395 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11396 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11397 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11398 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11399 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11400 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11401 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11402 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11403 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11404 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11405 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11406 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11407 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11408 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11409 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11410 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11411 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11412 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11413 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11414 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11415 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11416 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11417 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11418 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11419 }
11420
11421 if (TARGET_VIS4B)
11422 {
11423 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11424 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11425 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11426 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11427 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11428 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11429
11430 if (TARGET_ARCH64)
11431 {
11432 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11433 v8qi, v8qi,
11434 intSI_type_node, 0);
11435 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11436 v4hi, v4hi,
11437 intSI_type_node, 0);
11438 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11439 v2si, v2si,
11440 intSI_type_node, 0);
11441
11442 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11443 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11444 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11445 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11446 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11447 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11448 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11449 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11450
11451 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11452 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11453 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11454 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11455 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11456 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11457 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11458 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11459
11460 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11461 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11462 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11463 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11464 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11465 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11466 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11467 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11468
11469
11470 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11471 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11472 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11473 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11474
11475 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11476 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11477 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11478 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11479
11480 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11481 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11482 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11483 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11484
11485 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11486 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11487 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11488 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11489 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11490 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11491
11492 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11493 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11494 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11495 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11496 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11497 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11498
11499 }
11500 else
11501 {
11502 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11503 v8qi, v8qi,
11504 intSI_type_node, 0);
11505 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11506 v4hi, v4hi,
11507 intSI_type_node, 0);
11508 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11509 v2si, v2si,
11510 intSI_type_node, 0);
11511
11512 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11513 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11514 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11515 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11516 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11517 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11518 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11519 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11520
11521 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11522 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11523 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11524 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11525 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11526 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11527 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11528 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11529
11530 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11531 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11532 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11533 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11534 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11535 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11536 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11537 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11538
11539
11540 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11541 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11542 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11543 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11544
11545 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11546 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11547 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11548 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11549
11550 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11551 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11552 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11553 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11554
11555 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11556 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11557 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11558 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11559 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11560 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11561
11562 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11563 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11564 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11565 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11566 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11567 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11568 }
11569 }
11570 }
11571
11572 /* Implement TARGET_BUILTIN_DECL hook. */
11573
11574 static tree
11575 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11576 {
11577 if (code >= SPARC_BUILTIN_MAX)
11578 return error_mark_node;
11579
11580 return sparc_builtins[code];
11581 }
11582
11583 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11584
11585 static rtx
11586 sparc_expand_builtin (tree exp, rtx target,
11587 rtx subtarget ATTRIBUTE_UNUSED,
11588 machine_mode tmode ATTRIBUTE_UNUSED,
11589 int ignore ATTRIBUTE_UNUSED)
11590 {
11591 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11592 enum sparc_builtins code
11593 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11594 enum insn_code icode = sparc_builtins_icode[code];
11595 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11596 call_expr_arg_iterator iter;
11597 int arg_count = 0;
11598 rtx pat, op[4];
11599 tree arg;
11600
11601 if (nonvoid)
11602 {
11603 machine_mode tmode = insn_data[icode].operand[0].mode;
11604 if (!target
11605 || GET_MODE (target) != tmode
11606 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11607 op[0] = gen_reg_rtx (tmode);
11608 else
11609 op[0] = target;
11610 }
11611 else
11612 op[0] = NULL_RTX;
11613
11614 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11615 {
11616 const struct insn_operand_data *insn_op;
11617 int idx;
11618
11619 if (arg == error_mark_node)
11620 return NULL_RTX;
11621
11622 arg_count++;
11623 idx = arg_count - !nonvoid;
11624 insn_op = &insn_data[icode].operand[idx];
11625 op[arg_count] = expand_normal (arg);
11626
11627 /* Some of the builtins require constant arguments. We check
11628 for this here. */
11629 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11630 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11631 && arg_count == 3)
11632 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11633 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11634 && arg_count == 2))
11635 {
11636 if (!check_constant_argument (icode, idx, op[arg_count]))
11637 return const0_rtx;
11638 }
11639
11640 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11641 {
11642 if (!address_operand (op[arg_count], SImode))
11643 {
11644 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11645 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11646 }
11647 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11648 }
11649
11650 else if (insn_op->mode == V1DImode
11651 && GET_MODE (op[arg_count]) == DImode)
11652 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11653
11654 else if (insn_op->mode == V1SImode
11655 && GET_MODE (op[arg_count]) == SImode)
11656 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11657
11658 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11659 insn_op->mode))
11660 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11661 }
11662
11663 switch (arg_count)
11664 {
11665 case 0:
11666 pat = GEN_FCN (icode) (op[0]);
11667 break;
11668 case 1:
11669 if (nonvoid)
11670 pat = GEN_FCN (icode) (op[0], op[1]);
11671 else
11672 pat = GEN_FCN (icode) (op[1]);
11673 break;
11674 case 2:
11675 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11676 break;
11677 case 3:
11678 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11679 break;
11680 default:
11681 gcc_unreachable ();
11682 }
11683
11684 if (!pat)
11685 return NULL_RTX;
11686
11687 emit_insn (pat);
11688
11689 return (nonvoid ? op[0] : const0_rtx);
11690 }
11691
11692 /* Return the upper 16 bits of the 8x16 multiplication. */
11693
11694 static int
11695 sparc_vis_mul8x16 (int e8, int e16)
11696 {
11697 return (e8 * e16 + 128) / 256;
11698 }
11699
11700 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11701 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11702
11703 static void
11704 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11705 tree inner_type, tree cst0, tree cst1)
11706 {
11707 unsigned i, num = VECTOR_CST_NELTS (cst0);
11708 int scale;
11709
11710 switch (fncode)
11711 {
11712 case SPARC_BUILTIN_FMUL8X16:
11713 for (i = 0; i < num; ++i)
11714 {
11715 int val
11716 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11717 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11718 n_elts->quick_push (build_int_cst (inner_type, val));
11719 }
11720 break;
11721
11722 case SPARC_BUILTIN_FMUL8X16AU:
11723 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11724
11725 for (i = 0; i < num; ++i)
11726 {
11727 int val
11728 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11729 scale);
11730 n_elts->quick_push (build_int_cst (inner_type, val));
11731 }
11732 break;
11733
11734 case SPARC_BUILTIN_FMUL8X16AL:
11735 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11736
11737 for (i = 0; i < num; ++i)
11738 {
11739 int val
11740 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11741 scale);
11742 n_elts->quick_push (build_int_cst (inner_type, val));
11743 }
11744 break;
11745
11746 default:
11747 gcc_unreachable ();
11748 }
11749 }
11750
11751 /* Implement TARGET_FOLD_BUILTIN hook.
11752
11753 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11754 result of the function call is ignored. NULL_TREE is returned if the
11755 function could not be folded. */
11756
11757 static tree
11758 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11759 tree *args, bool ignore)
11760 {
11761 enum sparc_builtins code
11762 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11763 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11764 tree arg0, arg1, arg2;
11765
11766 if (ignore)
11767 switch (code)
11768 {
11769 case SPARC_BUILTIN_LDFSR:
11770 case SPARC_BUILTIN_STFSR:
11771 case SPARC_BUILTIN_ALIGNADDR:
11772 case SPARC_BUILTIN_WRGSR:
11773 case SPARC_BUILTIN_BMASK:
11774 case SPARC_BUILTIN_CMASK8:
11775 case SPARC_BUILTIN_CMASK16:
11776 case SPARC_BUILTIN_CMASK32:
11777 break;
11778
11779 default:
11780 return build_zero_cst (rtype);
11781 }
11782
11783 switch (code)
11784 {
11785 case SPARC_BUILTIN_FEXPAND:
11786 arg0 = args[0];
11787 STRIP_NOPS (arg0);
11788
11789 if (TREE_CODE (arg0) == VECTOR_CST)
11790 {
11791 tree inner_type = TREE_TYPE (rtype);
11792 unsigned i;
11793
11794 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11795 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11796 {
11797 unsigned HOST_WIDE_INT val
11798 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11799 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11800 }
11801 return n_elts.build ();
11802 }
11803 break;
11804
11805 case SPARC_BUILTIN_FMUL8X16:
11806 case SPARC_BUILTIN_FMUL8X16AU:
11807 case SPARC_BUILTIN_FMUL8X16AL:
11808 arg0 = args[0];
11809 arg1 = args[1];
11810 STRIP_NOPS (arg0);
11811 STRIP_NOPS (arg1);
11812
11813 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11814 {
11815 tree inner_type = TREE_TYPE (rtype);
11816 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11817 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11818 return n_elts.build ();
11819 }
11820 break;
11821
11822 case SPARC_BUILTIN_FPMERGE:
11823 arg0 = args[0];
11824 arg1 = args[1];
11825 STRIP_NOPS (arg0);
11826 STRIP_NOPS (arg1);
11827
11828 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11829 {
11830 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11831 unsigned i;
11832 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11833 {
11834 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11835 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11836 }
11837
11838 return n_elts.build ();
11839 }
11840 break;
11841
11842 case SPARC_BUILTIN_PDIST:
11843 case SPARC_BUILTIN_PDISTN:
11844 arg0 = args[0];
11845 arg1 = args[1];
11846 STRIP_NOPS (arg0);
11847 STRIP_NOPS (arg1);
11848 if (code == SPARC_BUILTIN_PDIST)
11849 {
11850 arg2 = args[2];
11851 STRIP_NOPS (arg2);
11852 }
11853 else
11854 arg2 = integer_zero_node;
11855
11856 if (TREE_CODE (arg0) == VECTOR_CST
11857 && TREE_CODE (arg1) == VECTOR_CST
11858 && TREE_CODE (arg2) == INTEGER_CST)
11859 {
11860 bool overflow = false;
11861 widest_int result = wi::to_widest (arg2);
11862 widest_int tmp;
11863 unsigned i;
11864
11865 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11866 {
11867 tree e0 = VECTOR_CST_ELT (arg0, i);
11868 tree e1 = VECTOR_CST_ELT (arg1, i);
11869
11870 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11871
11872 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11873 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11874 if (wi::neg_p (tmp))
11875 tmp = wi::neg (tmp, &neg2_ovf);
11876 else
11877 neg2_ovf = wi::OVF_NONE;
11878 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11879 overflow |= ((neg1_ovf != wi::OVF_NONE)
11880 | (neg2_ovf != wi::OVF_NONE)
11881 | (add1_ovf != wi::OVF_NONE)
11882 | (add2_ovf != wi::OVF_NONE));
11883 }
11884
11885 gcc_assert (!overflow);
11886
11887 return wide_int_to_tree (rtype, result);
11888 }
11889
11890 default:
11891 break;
11892 }
11893
11894 return NULL_TREE;
11895 }
11896 \f
11897 /* ??? This duplicates information provided to the compiler by the
11898 ??? scheduler description. Some day, teach genautomata to output
11899 ??? the latencies and then CSE will just use that. */
11900
11901 static bool
11902 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11903 int opno ATTRIBUTE_UNUSED,
11904 int *total, bool speed ATTRIBUTE_UNUSED)
11905 {
11906 int code = GET_CODE (x);
11907 bool float_mode_p = FLOAT_MODE_P (mode);
11908
11909 switch (code)
11910 {
11911 case CONST_INT:
11912 if (SMALL_INT (x))
11913 *total = 0;
11914 else
11915 *total = 2;
11916 return true;
11917
11918 case CONST_WIDE_INT:
11919 *total = 0;
11920 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11921 *total += 2;
11922 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11923 *total += 2;
11924 return true;
11925
11926 case HIGH:
11927 *total = 2;
11928 return true;
11929
11930 case CONST:
11931 case LABEL_REF:
11932 case SYMBOL_REF:
11933 *total = 4;
11934 return true;
11935
11936 case CONST_DOUBLE:
11937 *total = 8;
11938 return true;
11939
11940 case MEM:
11941 /* If outer-code was a sign or zero extension, a cost
11942 of COSTS_N_INSNS (1) was already added in. This is
11943 why we are subtracting it back out. */
11944 if (outer_code == ZERO_EXTEND)
11945 {
11946 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11947 }
11948 else if (outer_code == SIGN_EXTEND)
11949 {
11950 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11951 }
11952 else if (float_mode_p)
11953 {
11954 *total = sparc_costs->float_load;
11955 }
11956 else
11957 {
11958 *total = sparc_costs->int_load;
11959 }
11960
11961 return true;
11962
11963 case PLUS:
11964 case MINUS:
11965 if (float_mode_p)
11966 *total = sparc_costs->float_plusminus;
11967 else
11968 *total = COSTS_N_INSNS (1);
11969 return false;
11970
11971 case FMA:
11972 {
11973 rtx sub;
11974
11975 gcc_assert (float_mode_p);
11976 *total = sparc_costs->float_mul;
11977
11978 sub = XEXP (x, 0);
11979 if (GET_CODE (sub) == NEG)
11980 sub = XEXP (sub, 0);
11981 *total += rtx_cost (sub, mode, FMA, 0, speed);
11982
11983 sub = XEXP (x, 2);
11984 if (GET_CODE (sub) == NEG)
11985 sub = XEXP (sub, 0);
11986 *total += rtx_cost (sub, mode, FMA, 2, speed);
11987 return true;
11988 }
11989
11990 case MULT:
11991 if (float_mode_p)
11992 *total = sparc_costs->float_mul;
11993 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11994 *total = COSTS_N_INSNS (25);
11995 else
11996 {
11997 int bit_cost;
11998
11999 bit_cost = 0;
12000 if (sparc_costs->int_mul_bit_factor)
12001 {
12002 int nbits;
12003
12004 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12005 {
12006 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12007 for (nbits = 0; value != 0; value &= value - 1)
12008 nbits++;
12009 }
12010 else
12011 nbits = 7;
12012
12013 if (nbits < 3)
12014 nbits = 3;
12015 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12016 bit_cost = COSTS_N_INSNS (bit_cost);
12017 }
12018
12019 if (mode == DImode || !TARGET_HARD_MUL)
12020 *total = sparc_costs->int_mulX + bit_cost;
12021 else
12022 *total = sparc_costs->int_mul + bit_cost;
12023 }
12024 return false;
12025
12026 case ASHIFT:
12027 case ASHIFTRT:
12028 case LSHIFTRT:
12029 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12030 return false;
12031
12032 case DIV:
12033 case UDIV:
12034 case MOD:
12035 case UMOD:
12036 if (float_mode_p)
12037 {
12038 if (mode == DFmode)
12039 *total = sparc_costs->float_div_df;
12040 else
12041 *total = sparc_costs->float_div_sf;
12042 }
12043 else
12044 {
12045 if (mode == DImode)
12046 *total = sparc_costs->int_divX;
12047 else
12048 *total = sparc_costs->int_div;
12049 }
12050 return false;
12051
12052 case NEG:
12053 if (! float_mode_p)
12054 {
12055 *total = COSTS_N_INSNS (1);
12056 return false;
12057 }
12058 /* FALLTHRU */
12059
12060 case ABS:
12061 case FLOAT:
12062 case UNSIGNED_FLOAT:
12063 case FIX:
12064 case UNSIGNED_FIX:
12065 case FLOAT_EXTEND:
12066 case FLOAT_TRUNCATE:
12067 *total = sparc_costs->float_move;
12068 return false;
12069
12070 case SQRT:
12071 if (mode == DFmode)
12072 *total = sparc_costs->float_sqrt_df;
12073 else
12074 *total = sparc_costs->float_sqrt_sf;
12075 return false;
12076
12077 case COMPARE:
12078 if (float_mode_p)
12079 *total = sparc_costs->float_cmp;
12080 else
12081 *total = COSTS_N_INSNS (1);
12082 return false;
12083
12084 case IF_THEN_ELSE:
12085 if (float_mode_p)
12086 *total = sparc_costs->float_cmove;
12087 else
12088 *total = sparc_costs->int_cmove;
12089 return false;
12090
12091 case IOR:
12092 /* Handle the NAND vector patterns. */
12093 if (sparc_vector_mode_supported_p (mode)
12094 && GET_CODE (XEXP (x, 0)) == NOT
12095 && GET_CODE (XEXP (x, 1)) == NOT)
12096 {
12097 *total = COSTS_N_INSNS (1);
12098 return true;
12099 }
12100 else
12101 return false;
12102
12103 default:
12104 return false;
12105 }
12106 }
12107
12108 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12109
12110 static inline bool
12111 general_or_i64_p (reg_class_t rclass)
12112 {
12113 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12114 }
12115
12116 /* Implement TARGET_REGISTER_MOVE_COST. */
12117
12118 static int
12119 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12120 reg_class_t from, reg_class_t to)
12121 {
12122 bool need_memory = false;
12123
12124 /* This helps postreload CSE to eliminate redundant comparisons. */
12125 if (from == NO_REGS || to == NO_REGS)
12126 return 100;
12127
12128 if (from == FPCC_REGS || to == FPCC_REGS)
12129 need_memory = true;
12130 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12131 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12132 {
12133 if (TARGET_VIS3)
12134 {
12135 int size = GET_MODE_SIZE (mode);
12136 if (size == 8 || size == 4)
12137 {
12138 if (! TARGET_ARCH32 || size == 4)
12139 return 4;
12140 else
12141 return 6;
12142 }
12143 }
12144 need_memory = true;
12145 }
12146
12147 if (need_memory)
12148 {
12149 if (sparc_cpu == PROCESSOR_ULTRASPARC
12150 || sparc_cpu == PROCESSOR_ULTRASPARC3
12151 || sparc_cpu == PROCESSOR_NIAGARA
12152 || sparc_cpu == PROCESSOR_NIAGARA2
12153 || sparc_cpu == PROCESSOR_NIAGARA3
12154 || sparc_cpu == PROCESSOR_NIAGARA4
12155 || sparc_cpu == PROCESSOR_NIAGARA7
12156 || sparc_cpu == PROCESSOR_M8)
12157 return 12;
12158
12159 return 6;
12160 }
12161
12162 return 2;
12163 }
12164
12165 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12166 This is achieved by means of a manual dynamic stack space allocation in
12167 the current frame. We make the assumption that SEQ doesn't contain any
12168 function calls, with the possible exception of calls to the GOT helper. */
12169
12170 static void
12171 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12172 {
12173 /* We must preserve the lowest 16 words for the register save area. */
12174 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12175 /* We really need only 2 words of fresh stack space. */
12176 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12177
12178 rtx slot
12179 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12180 SPARC_STACK_BIAS + offset));
12181
12182 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12183 emit_insn (gen_rtx_SET (slot, reg));
12184 if (reg2)
12185 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12186 reg2));
12187 emit_insn (seq);
12188 if (reg2)
12189 emit_insn (gen_rtx_SET (reg2,
12190 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12191 emit_insn (gen_rtx_SET (reg, slot));
12192 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12193 }
12194
12195 /* Output the assembler code for a thunk function. THUNK_DECL is the
12196 declaration for the thunk function itself, FUNCTION is the decl for
12197 the target function. DELTA is an immediate constant offset to be
12198 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12199 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12200
12201 static void
12202 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12203 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12204 tree function)
12205 {
12206 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12207 rtx this_rtx, funexp;
12208 rtx_insn *insn;
12209 unsigned int int_arg_first;
12210
12211 reload_completed = 1;
12212 epilogue_completed = 1;
12213
12214 emit_note (NOTE_INSN_PROLOGUE_END);
12215
12216 if (TARGET_FLAT)
12217 {
12218 sparc_leaf_function_p = 1;
12219
12220 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12221 }
12222 else if (flag_delayed_branch)
12223 {
12224 /* We will emit a regular sibcall below, so we need to instruct
12225 output_sibcall that we are in a leaf function. */
12226 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12227
12228 /* This will cause final.c to invoke leaf_renumber_regs so we
12229 must behave as if we were in a not-yet-leafified function. */
12230 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12231 }
12232 else
12233 {
12234 /* We will emit the sibcall manually below, so we will need to
12235 manually spill non-leaf registers. */
12236 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12237
12238 /* We really are in a leaf function. */
12239 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12240 }
12241
12242 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12243 returns a structure, the structure return pointer is there instead. */
12244 if (TARGET_ARCH64
12245 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12246 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12247 else
12248 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12249
12250 /* Add DELTA. When possible use a plain add, otherwise load it into
12251 a register first. */
12252 if (delta)
12253 {
12254 rtx delta_rtx = GEN_INT (delta);
12255
12256 if (! SPARC_SIMM13_P (delta))
12257 {
12258 rtx scratch = gen_rtx_REG (Pmode, 1);
12259 emit_move_insn (scratch, delta_rtx);
12260 delta_rtx = scratch;
12261 }
12262
12263 /* THIS_RTX += DELTA. */
12264 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12265 }
12266
12267 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12268 if (vcall_offset)
12269 {
12270 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12271 rtx scratch = gen_rtx_REG (Pmode, 1);
12272
12273 gcc_assert (vcall_offset < 0);
12274
12275 /* SCRATCH = *THIS_RTX. */
12276 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12277
12278 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12279 may not have any available scratch register at this point. */
12280 if (SPARC_SIMM13_P (vcall_offset))
12281 ;
12282 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12283 else if (! fixed_regs[5]
12284 /* The below sequence is made up of at least 2 insns,
12285 while the default method may need only one. */
12286 && vcall_offset < -8192)
12287 {
12288 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12289 emit_move_insn (scratch2, vcall_offset_rtx);
12290 vcall_offset_rtx = scratch2;
12291 }
12292 else
12293 {
12294 rtx increment = GEN_INT (-4096);
12295
12296 /* VCALL_OFFSET is a negative number whose typical range can be
12297 estimated as -32768..0 in 32-bit mode. In almost all cases
12298 it is therefore cheaper to emit multiple add insns than
12299 spilling and loading the constant into a register (at least
12300 6 insns). */
12301 while (! SPARC_SIMM13_P (vcall_offset))
12302 {
12303 emit_insn (gen_add2_insn (scratch, increment));
12304 vcall_offset += 4096;
12305 }
12306 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12307 }
12308
12309 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12310 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12311 gen_rtx_PLUS (Pmode,
12312 scratch,
12313 vcall_offset_rtx)));
12314
12315 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12316 emit_insn (gen_add2_insn (this_rtx, scratch));
12317 }
12318
12319 /* Generate a tail call to the target function. */
12320 if (! TREE_USED (function))
12321 {
12322 assemble_external (function);
12323 TREE_USED (function) = 1;
12324 }
12325 funexp = XEXP (DECL_RTL (function), 0);
12326
12327 if (flag_delayed_branch)
12328 {
12329 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12330 insn = emit_call_insn (gen_sibcall (funexp));
12331 SIBLING_CALL_P (insn) = 1;
12332 }
12333 else
12334 {
12335 /* The hoops we have to jump through in order to generate a sibcall
12336 without using delay slots... */
12337 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12338
12339 if (flag_pic)
12340 {
12341 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12342 start_sequence ();
12343 load_got_register (); /* clobbers %o7 */
12344 if (!TARGET_VXWORKS_RTP)
12345 pic_offset_table_rtx = got_register_rtx;
12346 scratch = sparc_legitimize_pic_address (funexp, scratch);
12347 seq = get_insns ();
12348 end_sequence ();
12349 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12350 }
12351 else if (TARGET_ARCH32)
12352 {
12353 emit_insn (gen_rtx_SET (scratch,
12354 gen_rtx_HIGH (SImode, funexp)));
12355 emit_insn (gen_rtx_SET (scratch,
12356 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12357 }
12358 else /* TARGET_ARCH64 */
12359 {
12360 switch (sparc_code_model)
12361 {
12362 case CM_MEDLOW:
12363 case CM_MEDMID:
12364 /* The destination can serve as a temporary. */
12365 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12366 break;
12367
12368 case CM_MEDANY:
12369 case CM_EMBMEDANY:
12370 /* The destination cannot serve as a temporary. */
12371 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12372 start_sequence ();
12373 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12374 seq = get_insns ();
12375 end_sequence ();
12376 emit_and_preserve (seq, spill_reg, 0);
12377 break;
12378
12379 default:
12380 gcc_unreachable ();
12381 }
12382 }
12383
12384 emit_jump_insn (gen_indirect_jump (scratch));
12385 }
12386
12387 emit_barrier ();
12388
12389 /* Run just enough of rest_of_compilation to get the insns emitted.
12390 There's not really enough bulk here to make other passes such as
12391 instruction scheduling worth while. */
12392 insn = get_insns ();
12393 shorten_branches (insn);
12394 assemble_start_function (thunk_fndecl, fnname);
12395 final_start_function (insn, file, 1);
12396 final (insn, file, 1);
12397 final_end_function ();
12398 assemble_end_function (thunk_fndecl, fnname);
12399
12400 reload_completed = 0;
12401 epilogue_completed = 0;
12402 }
12403
12404 /* Return true if sparc_output_mi_thunk would be able to output the
12405 assembler code for the thunk function specified by the arguments
12406 it is passed, and false otherwise. */
12407 static bool
12408 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12409 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12410 HOST_WIDE_INT vcall_offset,
12411 const_tree function ATTRIBUTE_UNUSED)
12412 {
12413 /* Bound the loop used in the default method above. */
12414 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12415 }
12416
12417 /* How to allocate a 'struct machine_function'. */
12418
12419 static struct machine_function *
12420 sparc_init_machine_status (void)
12421 {
12422 return ggc_cleared_alloc<machine_function> ();
12423 }
12424 \f
12425 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12426
12427 static unsigned HOST_WIDE_INT
12428 sparc_asan_shadow_offset (void)
12429 {
12430 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12431 }
12432 \f
12433 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12434 We need to emit DTP-relative relocations. */
12435
12436 static void
12437 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12438 {
12439 switch (size)
12440 {
12441 case 4:
12442 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12443 break;
12444 case 8:
12445 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12446 break;
12447 default:
12448 gcc_unreachable ();
12449 }
12450 output_addr_const (file, x);
12451 fputs (")", file);
12452 }
12453
12454 /* Do whatever processing is required at the end of a file. */
12455
12456 static void
12457 sparc_file_end (void)
12458 {
12459 /* If we need to emit the special GOT helper function, do so now. */
12460 if (got_helper_needed)
12461 {
12462 const char *name = XSTR (got_helper_rtx, 0);
12463 #ifdef DWARF2_UNWIND_INFO
12464 bool do_cfi;
12465 #endif
12466
12467 if (USE_HIDDEN_LINKONCE)
12468 {
12469 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12470 get_identifier (name),
12471 build_function_type_list (void_type_node,
12472 NULL_TREE));
12473 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12474 NULL_TREE, void_type_node);
12475 TREE_PUBLIC (decl) = 1;
12476 TREE_STATIC (decl) = 1;
12477 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12478 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12479 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12480 resolve_unique_section (decl, 0, flag_function_sections);
12481 allocate_struct_function (decl, true);
12482 cfun->is_thunk = 1;
12483 current_function_decl = decl;
12484 init_varasm_status ();
12485 assemble_start_function (decl, name);
12486 }
12487 else
12488 {
12489 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12490 switch_to_section (text_section);
12491 if (align > 0)
12492 ASM_OUTPUT_ALIGN (asm_out_file, align);
12493 ASM_OUTPUT_LABEL (asm_out_file, name);
12494 }
12495
12496 #ifdef DWARF2_UNWIND_INFO
12497 do_cfi = dwarf2out_do_cfi_asm ();
12498 if (do_cfi)
12499 output_asm_insn (".cfi_startproc", NULL);
12500 #endif
12501 if (flag_delayed_branch)
12502 {
12503 output_asm_insn ("jmp\t%%o7+8", NULL);
12504 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12505 }
12506 else
12507 {
12508 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12509 output_asm_insn ("jmp\t%%o7+8", NULL);
12510 output_asm_insn (" nop", NULL);
12511 }
12512 #ifdef DWARF2_UNWIND_INFO
12513 if (do_cfi)
12514 output_asm_insn (".cfi_endproc", NULL);
12515 #endif
12516 }
12517
12518 if (NEED_INDICATE_EXEC_STACK)
12519 file_end_indicate_exec_stack ();
12520
12521 #ifdef TARGET_SOLARIS
12522 solaris_file_end ();
12523 #endif
12524 }
12525
12526 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12527 /* Implement TARGET_MANGLE_TYPE. */
12528
12529 static const char *
12530 sparc_mangle_type (const_tree type)
12531 {
12532 if (TARGET_ARCH32
12533 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12534 && TARGET_LONG_DOUBLE_128)
12535 return "g";
12536
12537 /* For all other types, use normal C++ mangling. */
12538 return NULL;
12539 }
12540 #endif
12541
12542 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12543 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12544 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12545
12546 void
12547 sparc_emit_membar_for_model (enum memmodel model,
12548 int load_store, int before_after)
12549 {
12550 /* Bits for the MEMBAR mmask field. */
12551 const int LoadLoad = 1;
12552 const int StoreLoad = 2;
12553 const int LoadStore = 4;
12554 const int StoreStore = 8;
12555
12556 int mm = 0, implied = 0;
12557
12558 switch (sparc_memory_model)
12559 {
12560 case SMM_SC:
12561 /* Sequential Consistency. All memory transactions are immediately
12562 visible in sequential execution order. No barriers needed. */
12563 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12564 break;
12565
12566 case SMM_TSO:
12567 /* Total Store Ordering: all memory transactions with store semantics
12568 are followed by an implied StoreStore. */
12569 implied |= StoreStore;
12570
12571 /* If we're not looking for a raw barrer (before+after), then atomic
12572 operations get the benefit of being both load and store. */
12573 if (load_store == 3 && before_after == 1)
12574 implied |= StoreLoad;
12575 /* FALLTHRU */
12576
12577 case SMM_PSO:
12578 /* Partial Store Ordering: all memory transactions with load semantics
12579 are followed by an implied LoadLoad | LoadStore. */
12580 implied |= LoadLoad | LoadStore;
12581
12582 /* If we're not looking for a raw barrer (before+after), then atomic
12583 operations get the benefit of being both load and store. */
12584 if (load_store == 3 && before_after == 2)
12585 implied |= StoreLoad | StoreStore;
12586 /* FALLTHRU */
12587
12588 case SMM_RMO:
12589 /* Relaxed Memory Ordering: no implicit bits. */
12590 break;
12591
12592 default:
12593 gcc_unreachable ();
12594 }
12595
12596 if (before_after & 1)
12597 {
12598 if (is_mm_release (model) || is_mm_acq_rel (model)
12599 || is_mm_seq_cst (model))
12600 {
12601 if (load_store & 1)
12602 mm |= LoadLoad | StoreLoad;
12603 if (load_store & 2)
12604 mm |= LoadStore | StoreStore;
12605 }
12606 }
12607 if (before_after & 2)
12608 {
12609 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12610 || is_mm_seq_cst (model))
12611 {
12612 if (load_store & 1)
12613 mm |= LoadLoad | LoadStore;
12614 if (load_store & 2)
12615 mm |= StoreLoad | StoreStore;
12616 }
12617 }
12618
12619 /* Remove the bits implied by the system memory model. */
12620 mm &= ~implied;
12621
12622 /* For raw barriers (before+after), always emit a barrier.
12623 This will become a compile-time barrier if needed. */
12624 if (mm || before_after == 3)
12625 emit_insn (gen_membar (GEN_INT (mm)));
12626 }
12627
12628 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12629 compare and swap on the word containing the byte or half-word. */
12630
12631 static void
12632 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12633 rtx oldval, rtx newval)
12634 {
12635 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12636 rtx addr = gen_reg_rtx (Pmode);
12637 rtx off = gen_reg_rtx (SImode);
12638 rtx oldv = gen_reg_rtx (SImode);
12639 rtx newv = gen_reg_rtx (SImode);
12640 rtx oldvalue = gen_reg_rtx (SImode);
12641 rtx newvalue = gen_reg_rtx (SImode);
12642 rtx res = gen_reg_rtx (SImode);
12643 rtx resv = gen_reg_rtx (SImode);
12644 rtx memsi, val, mask, cc;
12645
12646 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12647
12648 if (Pmode != SImode)
12649 addr1 = gen_lowpart (SImode, addr1);
12650 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12651
12652 memsi = gen_rtx_MEM (SImode, addr);
12653 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12654 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12655
12656 val = copy_to_reg (memsi);
12657
12658 emit_insn (gen_rtx_SET (off,
12659 gen_rtx_XOR (SImode, off,
12660 GEN_INT (GET_MODE (mem) == QImode
12661 ? 3 : 2))));
12662
12663 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12664
12665 if (GET_MODE (mem) == QImode)
12666 mask = force_reg (SImode, GEN_INT (0xff));
12667 else
12668 mask = force_reg (SImode, GEN_INT (0xffff));
12669
12670 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12671
12672 emit_insn (gen_rtx_SET (val,
12673 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12674 val)));
12675
12676 oldval = gen_lowpart (SImode, oldval);
12677 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12678
12679 newval = gen_lowpart_common (SImode, newval);
12680 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12681
12682 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12683
12684 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12685
12686 rtx_code_label *end_label = gen_label_rtx ();
12687 rtx_code_label *loop_label = gen_label_rtx ();
12688 emit_label (loop_label);
12689
12690 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12691
12692 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12693
12694 emit_move_insn (bool_result, const1_rtx);
12695
12696 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12697
12698 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12699
12700 emit_insn (gen_rtx_SET (resv,
12701 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12702 res)));
12703
12704 emit_move_insn (bool_result, const0_rtx);
12705
12706 cc = gen_compare_reg_1 (NE, resv, val);
12707 emit_insn (gen_rtx_SET (val, resv));
12708
12709 /* Use cbranchcc4 to separate the compare and branch! */
12710 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12711 cc, const0_rtx, loop_label));
12712
12713 emit_label (end_label);
12714
12715 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12716
12717 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12718
12719 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12720 }
12721
12722 /* Expand code to perform a compare-and-swap. */
12723
12724 void
12725 sparc_expand_compare_and_swap (rtx operands[])
12726 {
12727 rtx bval, retval, mem, oldval, newval;
12728 machine_mode mode;
12729 enum memmodel model;
12730
12731 bval = operands[0];
12732 retval = operands[1];
12733 mem = operands[2];
12734 oldval = operands[3];
12735 newval = operands[4];
12736 model = (enum memmodel) INTVAL (operands[6]);
12737 mode = GET_MODE (mem);
12738
12739 sparc_emit_membar_for_model (model, 3, 1);
12740
12741 if (reg_overlap_mentioned_p (retval, oldval))
12742 oldval = copy_to_reg (oldval);
12743
12744 if (mode == QImode || mode == HImode)
12745 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12746 else
12747 {
12748 rtx (*gen) (rtx, rtx, rtx, rtx);
12749 rtx x;
12750
12751 if (mode == SImode)
12752 gen = gen_atomic_compare_and_swapsi_1;
12753 else
12754 gen = gen_atomic_compare_and_swapdi_1;
12755 emit_insn (gen (retval, mem, oldval, newval));
12756
12757 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12758 if (x != bval)
12759 convert_move (bval, x, 1);
12760 }
12761
12762 sparc_emit_membar_for_model (model, 3, 2);
12763 }
12764
12765 void
12766 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12767 {
12768 rtx t_1, t_2, t_3;
12769
12770 sel = gen_lowpart (DImode, sel);
12771 switch (vmode)
12772 {
12773 case E_V2SImode:
12774 /* inp = xxxxxxxAxxxxxxxB */
12775 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12776 NULL_RTX, 1, OPTAB_DIRECT);
12777 /* t_1 = ....xxxxxxxAxxx. */
12778 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12779 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12780 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12781 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12782 /* sel = .......B */
12783 /* t_1 = ...A.... */
12784 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12785 /* sel = ...A...B */
12786 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12787 /* sel = AAAABBBB * 4 */
12788 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12789 /* sel = { A*4, A*4+1, A*4+2, ... } */
12790 break;
12791
12792 case E_V4HImode:
12793 /* inp = xxxAxxxBxxxCxxxD */
12794 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12795 NULL_RTX, 1, OPTAB_DIRECT);
12796 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12797 NULL_RTX, 1, OPTAB_DIRECT);
12798 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12799 NULL_RTX, 1, OPTAB_DIRECT);
12800 /* t_1 = ..xxxAxxxBxxxCxx */
12801 /* t_2 = ....xxxAxxxBxxxC */
12802 /* t_3 = ......xxxAxxxBxx */
12803 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12804 GEN_INT (0x07),
12805 NULL_RTX, 1, OPTAB_DIRECT);
12806 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12807 GEN_INT (0x0700),
12808 NULL_RTX, 1, OPTAB_DIRECT);
12809 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12810 GEN_INT (0x070000),
12811 NULL_RTX, 1, OPTAB_DIRECT);
12812 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12813 GEN_INT (0x07000000),
12814 NULL_RTX, 1, OPTAB_DIRECT);
12815 /* sel = .......D */
12816 /* t_1 = .....C.. */
12817 /* t_2 = ...B.... */
12818 /* t_3 = .A...... */
12819 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12820 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12821 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12822 /* sel = .A.B.C.D */
12823 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12824 /* sel = AABBCCDD * 2 */
12825 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12826 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12827 break;
12828
12829 case E_V8QImode:
12830 /* input = xAxBxCxDxExFxGxH */
12831 sel = expand_simple_binop (DImode, AND, sel,
12832 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12833 | 0x0f0f0f0f),
12834 NULL_RTX, 1, OPTAB_DIRECT);
12835 /* sel = .A.B.C.D.E.F.G.H */
12836 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12837 NULL_RTX, 1, OPTAB_DIRECT);
12838 /* t_1 = ..A.B.C.D.E.F.G. */
12839 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12840 NULL_RTX, 1, OPTAB_DIRECT);
12841 /* sel = .AABBCCDDEEFFGGH */
12842 sel = expand_simple_binop (DImode, AND, sel,
12843 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12844 | 0xff00ff),
12845 NULL_RTX, 1, OPTAB_DIRECT);
12846 /* sel = ..AB..CD..EF..GH */
12847 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12848 NULL_RTX, 1, OPTAB_DIRECT);
12849 /* t_1 = ....AB..CD..EF.. */
12850 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12851 NULL_RTX, 1, OPTAB_DIRECT);
12852 /* sel = ..ABABCDCDEFEFGH */
12853 sel = expand_simple_binop (DImode, AND, sel,
12854 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12855 NULL_RTX, 1, OPTAB_DIRECT);
12856 /* sel = ....ABCD....EFGH */
12857 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12858 NULL_RTX, 1, OPTAB_DIRECT);
12859 /* t_1 = ........ABCD.... */
12860 sel = gen_lowpart (SImode, sel);
12861 t_1 = gen_lowpart (SImode, t_1);
12862 break;
12863
12864 default:
12865 gcc_unreachable ();
12866 }
12867
12868 /* Always perform the final addition/merge within the bmask insn. */
12869 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12870 }
12871
12872 /* Implement TARGET_VEC_PERM_CONST. */
12873
12874 static bool
12875 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12876 rtx op1, const vec_perm_indices &sel)
12877 {
12878 if (!TARGET_VIS2)
12879 return false;
12880
12881 /* All permutes are supported. */
12882 if (!target)
12883 return true;
12884
12885 /* Force target-independent code to convert constant permutations on other
12886 modes down to V8QI. Rely on this to avoid the complexity of the byte
12887 order of the permutation. */
12888 if (vmode != V8QImode)
12889 return false;
12890
12891 rtx nop0 = force_reg (vmode, op0);
12892 if (op0 == op1)
12893 op1 = nop0;
12894 op0 = nop0;
12895 op1 = force_reg (vmode, op1);
12896
12897 unsigned int i, mask;
12898 for (i = mask = 0; i < 8; ++i)
12899 mask |= (sel[i] & 0xf) << (28 - i*4);
12900 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12901
12902 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12903 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12904 return true;
12905 }
12906
12907 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12908
12909 static bool
12910 sparc_frame_pointer_required (void)
12911 {
12912 /* If the stack pointer is dynamically modified in the function, it cannot
12913 serve as the frame pointer. */
12914 if (cfun->calls_alloca)
12915 return true;
12916
12917 /* If the function receives nonlocal gotos, it needs to save the frame
12918 pointer in the nonlocal_goto_save_area object. */
12919 if (cfun->has_nonlocal_label)
12920 return true;
12921
12922 /* In flat mode, that's it. */
12923 if (TARGET_FLAT)
12924 return false;
12925
12926 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12927 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12928 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12929 }
12930
12931 /* The way this is structured, we can't eliminate SFP in favor of SP
12932 if the frame pointer is required: we want to use the SFP->HFP elimination
12933 in that case. But the test in update_eliminables doesn't know we are
12934 assuming below that we only do the former elimination. */
12935
12936 static bool
12937 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12938 {
12939 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12940 }
12941
12942 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12943 they won't be allocated. */
12944
12945 static void
12946 sparc_conditional_register_usage (void)
12947 {
12948 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12949 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12950 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12951 /* then honor it. */
12952 if (TARGET_ARCH32 && fixed_regs[5])
12953 fixed_regs[5] = 1;
12954 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12955 fixed_regs[5] = 0;
12956 if (! TARGET_V9)
12957 {
12958 int regno;
12959 for (regno = SPARC_FIRST_V9_FP_REG;
12960 regno <= SPARC_LAST_V9_FP_REG;
12961 regno++)
12962 fixed_regs[regno] = 1;
12963 /* %fcc0 is used by v8 and v9. */
12964 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12965 regno <= SPARC_LAST_V9_FCC_REG;
12966 regno++)
12967 fixed_regs[regno] = 1;
12968 }
12969 if (! TARGET_FPU)
12970 {
12971 int regno;
12972 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12973 fixed_regs[regno] = 1;
12974 }
12975 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12976 /* then honor it. Likewise with g3 and g4. */
12977 if (fixed_regs[2] == 2)
12978 fixed_regs[2] = ! TARGET_APP_REGS;
12979 if (fixed_regs[3] == 2)
12980 fixed_regs[3] = ! TARGET_APP_REGS;
12981 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12982 fixed_regs[4] = ! TARGET_APP_REGS;
12983 else if (TARGET_CM_EMBMEDANY)
12984 fixed_regs[4] = 1;
12985 else if (fixed_regs[4] == 2)
12986 fixed_regs[4] = 0;
12987 if (TARGET_FLAT)
12988 {
12989 int regno;
12990 /* Disable leaf functions. */
12991 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12992 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12993 leaf_reg_remap [regno] = regno;
12994 }
12995 if (TARGET_VIS)
12996 global_regs[SPARC_GSR_REG] = 1;
12997 }
12998
12999 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13000
13001 static bool
13002 sparc_use_pseudo_pic_reg (void)
13003 {
13004 return !TARGET_VXWORKS_RTP && flag_pic;
13005 }
13006
13007 /* Implement TARGET_INIT_PIC_REG. */
13008
13009 static void
13010 sparc_init_pic_reg (void)
13011 {
13012 edge entry_edge;
13013 rtx_insn *seq;
13014
13015 /* In PIC mode, we need to always initialize the PIC register if optimization
13016 is enabled, because we are called from IRA and LRA may later force things
13017 to the constant pool for optimization purposes. */
13018 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13019 return;
13020
13021 start_sequence ();
13022 load_got_register ();
13023 if (!TARGET_VXWORKS_RTP)
13024 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13025 seq = get_insns ();
13026 end_sequence ();
13027
13028 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13029 insert_insn_on_edge (seq, entry_edge);
13030 commit_one_edge_insertion (entry_edge);
13031 }
13032
13033 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13034
13035 - We can't load constants into FP registers.
13036 - We can't load FP constants into integer registers when soft-float,
13037 because there is no soft-float pattern with a r/F constraint.
13038 - We can't load FP constants into integer registers for TFmode unless
13039 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13040 - Try and reload integer constants (symbolic or otherwise) back into
13041 registers directly, rather than having them dumped to memory. */
13042
13043 static reg_class_t
13044 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13045 {
13046 machine_mode mode = GET_MODE (x);
13047 if (CONSTANT_P (x))
13048 {
13049 if (FP_REG_CLASS_P (rclass)
13050 || rclass == GENERAL_OR_FP_REGS
13051 || rclass == GENERAL_OR_EXTRA_FP_REGS
13052 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13053 || (mode == TFmode && ! const_zero_operand (x, mode)))
13054 return NO_REGS;
13055
13056 if (GET_MODE_CLASS (mode) == MODE_INT)
13057 return GENERAL_REGS;
13058
13059 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13060 {
13061 if (! FP_REG_CLASS_P (rclass)
13062 || !(const_zero_operand (x, mode)
13063 || const_all_ones_operand (x, mode)))
13064 return NO_REGS;
13065 }
13066 }
13067
13068 if (TARGET_VIS3
13069 && ! TARGET_ARCH64
13070 && (rclass == EXTRA_FP_REGS
13071 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13072 {
13073 int regno = true_regnum (x);
13074
13075 if (SPARC_INT_REG_P (regno))
13076 return (rclass == EXTRA_FP_REGS
13077 ? FP_REGS : GENERAL_OR_FP_REGS);
13078 }
13079
13080 return rclass;
13081 }
13082
13083 /* Return true if we use LRA instead of reload pass. */
13084
13085 static bool
13086 sparc_lra_p (void)
13087 {
13088 return TARGET_LRA;
13089 }
13090
13091 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13092 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13093
13094 const char *
13095 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13096 {
13097 char mulstr[32];
13098
13099 gcc_assert (! TARGET_ARCH64);
13100
13101 if (sparc_check_64 (operands[1], insn) <= 0)
13102 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13103 if (which_alternative == 1)
13104 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13105 if (GET_CODE (operands[2]) == CONST_INT)
13106 {
13107 if (which_alternative == 1)
13108 {
13109 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13110 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13111 output_asm_insn (mulstr, operands);
13112 return "srlx\t%L0, 32, %H0";
13113 }
13114 else
13115 {
13116 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13117 output_asm_insn ("or\t%L1, %3, %3", operands);
13118 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13119 output_asm_insn (mulstr, operands);
13120 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13121 return "mov\t%3, %L0";
13122 }
13123 }
13124 else if (rtx_equal_p (operands[1], operands[2]))
13125 {
13126 if (which_alternative == 1)
13127 {
13128 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13129 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13130 output_asm_insn (mulstr, operands);
13131 return "srlx\t%L0, 32, %H0";
13132 }
13133 else
13134 {
13135 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13136 output_asm_insn ("or\t%L1, %3, %3", operands);
13137 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13138 output_asm_insn (mulstr, operands);
13139 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13140 return "mov\t%3, %L0";
13141 }
13142 }
13143 if (sparc_check_64 (operands[2], insn) <= 0)
13144 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13145 if (which_alternative == 1)
13146 {
13147 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13148 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13149 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13150 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13151 output_asm_insn (mulstr, operands);
13152 return "srlx\t%L0, 32, %H0";
13153 }
13154 else
13155 {
13156 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13157 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13158 output_asm_insn ("or\t%L1, %3, %3", operands);
13159 output_asm_insn ("or\t%L2, %4, %4", operands);
13160 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13161 output_asm_insn (mulstr, operands);
13162 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13163 return "mov\t%3, %L0";
13164 }
13165 }
13166
13167 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13168 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13169 and INNER_MODE are the modes describing TARGET. */
13170
13171 static void
13172 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13173 machine_mode inner_mode)
13174 {
13175 rtx t1, final_insn, sel;
13176 int bmask;
13177
13178 t1 = gen_reg_rtx (mode);
13179
13180 elt = convert_modes (SImode, inner_mode, elt, true);
13181 emit_move_insn (gen_lowpart(SImode, t1), elt);
13182
13183 switch (mode)
13184 {
13185 case E_V2SImode:
13186 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13187 bmask = 0x45674567;
13188 break;
13189 case E_V4HImode:
13190 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13191 bmask = 0x67676767;
13192 break;
13193 case E_V8QImode:
13194 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13195 bmask = 0x77777777;
13196 break;
13197 default:
13198 gcc_unreachable ();
13199 }
13200
13201 sel = force_reg (SImode, GEN_INT (bmask));
13202 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13203 emit_insn (final_insn);
13204 }
13205
13206 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13207 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13208
13209 static void
13210 vector_init_fpmerge (rtx target, rtx elt)
13211 {
13212 rtx t1, t2, t2_low, t3, t3_low;
13213
13214 t1 = gen_reg_rtx (V4QImode);
13215 elt = convert_modes (SImode, QImode, elt, true);
13216 emit_move_insn (gen_lowpart (SImode, t1), elt);
13217
13218 t2 = gen_reg_rtx (V8QImode);
13219 t2_low = gen_lowpart (V4QImode, t2);
13220 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13221
13222 t3 = gen_reg_rtx (V8QImode);
13223 t3_low = gen_lowpart (V4QImode, t3);
13224 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13225
13226 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13227 }
13228
13229 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13230 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13231
13232 static void
13233 vector_init_faligndata (rtx target, rtx elt)
13234 {
13235 rtx t1 = gen_reg_rtx (V4HImode);
13236 int i;
13237
13238 elt = convert_modes (SImode, HImode, elt, true);
13239 emit_move_insn (gen_lowpart (SImode, t1), elt);
13240
13241 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13242 force_reg (SImode, GEN_INT (6)),
13243 const0_rtx));
13244
13245 for (i = 0; i < 4; i++)
13246 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13247 }
13248
13249 /* Emit code to initialize TARGET to values for individual fields VALS. */
13250
13251 void
13252 sparc_expand_vector_init (rtx target, rtx vals)
13253 {
13254 const machine_mode mode = GET_MODE (target);
13255 const machine_mode inner_mode = GET_MODE_INNER (mode);
13256 const int n_elts = GET_MODE_NUNITS (mode);
13257 int i, n_var = 0;
13258 bool all_same = true;
13259 rtx mem;
13260
13261 for (i = 0; i < n_elts; i++)
13262 {
13263 rtx x = XVECEXP (vals, 0, i);
13264 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13265 n_var++;
13266
13267 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13268 all_same = false;
13269 }
13270
13271 if (n_var == 0)
13272 {
13273 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13274 return;
13275 }
13276
13277 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13278 {
13279 if (GET_MODE_SIZE (inner_mode) == 4)
13280 {
13281 emit_move_insn (gen_lowpart (SImode, target),
13282 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13283 return;
13284 }
13285 else if (GET_MODE_SIZE (inner_mode) == 8)
13286 {
13287 emit_move_insn (gen_lowpart (DImode, target),
13288 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13289 return;
13290 }
13291 }
13292 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13293 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13294 {
13295 emit_move_insn (gen_highpart (word_mode, target),
13296 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13297 emit_move_insn (gen_lowpart (word_mode, target),
13298 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13299 return;
13300 }
13301
13302 if (all_same && GET_MODE_SIZE (mode) == 8)
13303 {
13304 if (TARGET_VIS2)
13305 {
13306 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13307 return;
13308 }
13309 if (mode == V8QImode)
13310 {
13311 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13312 return;
13313 }
13314 if (mode == V4HImode)
13315 {
13316 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13317 return;
13318 }
13319 }
13320
13321 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13322 for (i = 0; i < n_elts; i++)
13323 emit_move_insn (adjust_address_nv (mem, inner_mode,
13324 i * GET_MODE_SIZE (inner_mode)),
13325 XVECEXP (vals, 0, i));
13326 emit_move_insn (target, mem);
13327 }
13328
13329 /* Implement TARGET_SECONDARY_RELOAD. */
13330
13331 static reg_class_t
13332 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13333 machine_mode mode, secondary_reload_info *sri)
13334 {
13335 enum reg_class rclass = (enum reg_class) rclass_i;
13336
13337 sri->icode = CODE_FOR_nothing;
13338 sri->extra_cost = 0;
13339
13340 /* We need a temporary when loading/storing a HImode/QImode value
13341 between memory and the FPU registers. This can happen when combine puts
13342 a paradoxical subreg in a float/fix conversion insn. */
13343 if (FP_REG_CLASS_P (rclass)
13344 && (mode == HImode || mode == QImode)
13345 && (GET_CODE (x) == MEM
13346 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13347 && true_regnum (x) == -1)))
13348 return GENERAL_REGS;
13349
13350 /* On 32-bit we need a temporary when loading/storing a DFmode value
13351 between unaligned memory and the upper FPU registers. */
13352 if (TARGET_ARCH32
13353 && rclass == EXTRA_FP_REGS
13354 && mode == DFmode
13355 && GET_CODE (x) == MEM
13356 && ! mem_min_alignment (x, 8))
13357 return FP_REGS;
13358
13359 if (((TARGET_CM_MEDANY
13360 && symbolic_operand (x, mode))
13361 || (TARGET_CM_EMBMEDANY
13362 && text_segment_operand (x, mode)))
13363 && ! flag_pic)
13364 {
13365 if (in_p)
13366 sri->icode = direct_optab_handler (reload_in_optab, mode);
13367 else
13368 sri->icode = direct_optab_handler (reload_out_optab, mode);
13369 return NO_REGS;
13370 }
13371
13372 if (TARGET_VIS3 && TARGET_ARCH32)
13373 {
13374 int regno = true_regnum (x);
13375
13376 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13377 to move 8-byte values in 4-byte pieces. This only works via
13378 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13379 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13380 an FP_REGS intermediate move. */
13381 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13382 || ((general_or_i64_p (rclass)
13383 || rclass == GENERAL_OR_FP_REGS)
13384 && SPARC_FP_REG_P (regno)))
13385 {
13386 sri->extra_cost = 2;
13387 return FP_REGS;
13388 }
13389 }
13390
13391 return NO_REGS;
13392 }
13393
13394 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13395
13396 On SPARC when not VIS3 it is not possible to directly move data
13397 between GENERAL_REGS and FP_REGS. */
13398
13399 static bool
13400 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13401 reg_class_t class2)
13402 {
13403 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13404 && (! TARGET_VIS3
13405 || GET_MODE_SIZE (mode) > 8
13406 || GET_MODE_SIZE (mode) < 4));
13407 }
13408
13409 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13410
13411 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13412 because the movsi and movsf patterns don't handle r/f moves.
13413 For v8 we copy the default definition. */
13414
13415 static machine_mode
13416 sparc_secondary_memory_needed_mode (machine_mode mode)
13417 {
13418 if (TARGET_ARCH64)
13419 {
13420 if (GET_MODE_BITSIZE (mode) < 32)
13421 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13422 return mode;
13423 }
13424 else
13425 {
13426 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13427 return mode_for_size (BITS_PER_WORD,
13428 GET_MODE_CLASS (mode), 0).require ();
13429 return mode;
13430 }
13431 }
13432
13433 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13434 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13435
13436 bool
13437 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13438 {
13439 enum rtx_code rc = GET_CODE (operands[1]);
13440 machine_mode cmp_mode;
13441 rtx cc_reg, dst, cmp;
13442
13443 cmp = operands[1];
13444 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13445 return false;
13446
13447 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13448 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13449
13450 cmp_mode = GET_MODE (XEXP (cmp, 0));
13451 rc = GET_CODE (cmp);
13452
13453 dst = operands[0];
13454 if (! rtx_equal_p (operands[2], dst)
13455 && ! rtx_equal_p (operands[3], dst))
13456 {
13457 if (reg_overlap_mentioned_p (dst, cmp))
13458 dst = gen_reg_rtx (mode);
13459
13460 emit_move_insn (dst, operands[3]);
13461 }
13462 else if (operands[2] == dst)
13463 {
13464 operands[2] = operands[3];
13465
13466 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13467 rc = reverse_condition_maybe_unordered (rc);
13468 else
13469 rc = reverse_condition (rc);
13470 }
13471
13472 if (XEXP (cmp, 1) == const0_rtx
13473 && GET_CODE (XEXP (cmp, 0)) == REG
13474 && cmp_mode == DImode
13475 && v9_regcmp_p (rc))
13476 cc_reg = XEXP (cmp, 0);
13477 else
13478 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13479
13480 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13481
13482 emit_insn (gen_rtx_SET (dst,
13483 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13484
13485 if (dst != operands[0])
13486 emit_move_insn (operands[0], dst);
13487
13488 return true;
13489 }
13490
13491 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13492 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13493 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13494 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13495 code to be used for the condition mask. */
13496
13497 void
13498 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13499 {
13500 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13501 enum rtx_code code = GET_CODE (operands[3]);
13502
13503 mask = gen_reg_rtx (Pmode);
13504 cop0 = operands[4];
13505 cop1 = operands[5];
13506 if (code == LT || code == GE)
13507 {
13508 rtx t;
13509
13510 code = swap_condition (code);
13511 t = cop0; cop0 = cop1; cop1 = t;
13512 }
13513
13514 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13515
13516 fcmp = gen_rtx_UNSPEC (Pmode,
13517 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13518 fcode);
13519
13520 cmask = gen_rtx_UNSPEC (DImode,
13521 gen_rtvec (2, mask, gsr),
13522 ccode);
13523
13524 bshuf = gen_rtx_UNSPEC (mode,
13525 gen_rtvec (3, operands[1], operands[2], gsr),
13526 UNSPEC_BSHUFFLE);
13527
13528 emit_insn (gen_rtx_SET (mask, fcmp));
13529 emit_insn (gen_rtx_SET (gsr, cmask));
13530
13531 emit_insn (gen_rtx_SET (operands[0], bshuf));
13532 }
13533
13534 /* On the SPARC, any mode which naturally allocates into the single float
13535 registers should return 4 here. */
13536
13537 unsigned int
13538 sparc_regmode_natural_size (machine_mode mode)
13539 {
13540 const enum mode_class cl = GET_MODE_CLASS (mode);
13541
13542 if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
13543 return 4;
13544
13545 return UNITS_PER_WORD;
13546 }
13547
13548 /* Implement TARGET_HARD_REGNO_NREGS.
13549
13550 On SPARC, ordinary registers hold 32 bits worth; this means both
13551 integer and floating point registers. On v9, integer regs hold 64
13552 bits worth; floating point regs hold 32 bits worth (this includes the
13553 new fp regs as even the odd ones are included in the hard register
13554 count). */
13555
13556 static unsigned int
13557 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13558 {
13559 if (regno == SPARC_GSR_REG)
13560 return 1;
13561 if (TARGET_ARCH64)
13562 {
13563 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13564 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13565 return CEIL (GET_MODE_SIZE (mode), 4);
13566 }
13567 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13568 }
13569
13570 /* Implement TARGET_HARD_REGNO_MODE_OK.
13571
13572 ??? Because of the funny way we pass parameters we should allow certain
13573 ??? types of float/complex values to be in integer registers during
13574 ??? RTL generation. This only matters on arch32. */
13575
13576 static bool
13577 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13578 {
13579 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13580 }
13581
13582 /* Implement TARGET_MODES_TIEABLE_P.
13583
13584 For V9 we have to deal with the fact that only the lower 32 floating
13585 point registers are 32-bit addressable. */
13586
13587 static bool
13588 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13589 {
13590 enum mode_class mclass1, mclass2;
13591 unsigned short size1, size2;
13592
13593 if (mode1 == mode2)
13594 return true;
13595
13596 mclass1 = GET_MODE_CLASS (mode1);
13597 mclass2 = GET_MODE_CLASS (mode2);
13598 if (mclass1 != mclass2)
13599 return false;
13600
13601 if (! TARGET_V9)
13602 return true;
13603
13604 /* Classes are the same and we are V9 so we have to deal with upper
13605 vs. lower floating point registers. If one of the modes is a
13606 4-byte mode, and the other is not, we have to mark them as not
13607 tieable because only the lower 32 floating point register are
13608 addressable 32-bits at a time.
13609
13610 We can't just test explicitly for SFmode, otherwise we won't
13611 cover the vector mode cases properly. */
13612
13613 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13614 return true;
13615
13616 size1 = GET_MODE_SIZE (mode1);
13617 size2 = GET_MODE_SIZE (mode2);
13618 if ((size1 > 4 && size2 == 4)
13619 || (size2 > 4 && size1 == 4))
13620 return false;
13621
13622 return true;
13623 }
13624
13625 /* Implement TARGET_CSTORE_MODE. */
13626
13627 static scalar_int_mode
13628 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13629 {
13630 return (TARGET_ARCH64 ? DImode : SImode);
13631 }
13632
13633 /* Return the compound expression made of T1 and T2. */
13634
13635 static inline tree
13636 compound_expr (tree t1, tree t2)
13637 {
13638 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13639 }
13640
13641 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13642
13643 static void
13644 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13645 {
13646 if (!TARGET_FPU)
13647 return;
13648
13649 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13650 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13651
13652 /* We generate the equivalent of feholdexcept (&fenv_var):
13653
13654 unsigned int fenv_var;
13655 __builtin_store_fsr (&fenv_var);
13656
13657 unsigned int tmp1_var;
13658 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13659
13660 __builtin_load_fsr (&tmp1_var); */
13661
13662 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13663 TREE_ADDRESSABLE (fenv_var) = 1;
13664 tree fenv_addr = build_fold_addr_expr (fenv_var);
13665 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13666 tree hold_stfsr
13667 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13668 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13669
13670 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13671 TREE_ADDRESSABLE (tmp1_var) = 1;
13672 tree masked_fenv_var
13673 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13674 build_int_cst (unsigned_type_node,
13675 ~(accrued_exception_mask | trap_enable_mask)));
13676 tree hold_mask
13677 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13678 NULL_TREE, NULL_TREE);
13679
13680 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13681 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13682 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13683
13684 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13685
13686 /* We reload the value of tmp1_var to clear the exceptions:
13687
13688 __builtin_load_fsr (&tmp1_var); */
13689
13690 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13691
13692 /* We generate the equivalent of feupdateenv (&fenv_var):
13693
13694 unsigned int tmp2_var;
13695 __builtin_store_fsr (&tmp2_var);
13696
13697 __builtin_load_fsr (&fenv_var);
13698
13699 if (SPARC_LOW_FE_EXCEPT_VALUES)
13700 tmp2_var >>= 5;
13701 __atomic_feraiseexcept ((int) tmp2_var); */
13702
13703 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13704 TREE_ADDRESSABLE (tmp2_var) = 1;
13705 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13706 tree update_stfsr
13707 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13708 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13709
13710 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13711
13712 tree atomic_feraiseexcept
13713 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13714 tree update_call
13715 = build_call_expr (atomic_feraiseexcept, 1,
13716 fold_convert (integer_type_node, tmp2_var));
13717
13718 if (SPARC_LOW_FE_EXCEPT_VALUES)
13719 {
13720 tree shifted_tmp2_var
13721 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13722 build_int_cst (unsigned_type_node, 5));
13723 tree update_shift
13724 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13725 update_call = compound_expr (update_shift, update_call);
13726 }
13727
13728 *update
13729 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13730 }
13731
13732 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13733
13734 SImode loads to floating-point registers are not zero-extended.
13735 The definition for LOAD_EXTEND_OP specifies that integer loads
13736 narrower than BITS_PER_WORD will be zero-extended. As a result,
13737 we inhibit changes from SImode unless they are to a mode that is
13738 identical in size.
13739
13740 Likewise for SFmode, since word-mode paradoxical subregs are
13741 problematic on big-endian architectures. */
13742
13743 static bool
13744 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13745 reg_class_t rclass)
13746 {
13747 if (TARGET_ARCH64
13748 && GET_MODE_SIZE (from) == 4
13749 && GET_MODE_SIZE (to) != 4)
13750 return !reg_classes_intersect_p (rclass, FP_REGS);
13751 return true;
13752 }
13753
13754 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13755
13756 static HOST_WIDE_INT
13757 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13758 {
13759 if (TREE_CODE (exp) == STRING_CST)
13760 return MAX (align, FASTEST_ALIGNMENT);
13761 return align;
13762 }
13763
13764 /* Implement TARGET_ZERO_CALL_USED_REGS.
13765
13766 Generate a sequence of instructions that zero registers specified by
13767 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
13768 zeroed. */
13769
13770 static HARD_REG_SET
13771 sparc_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
13772 {
13773 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13774 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
13775 {
13776 /* Do not touch the CC registers or the FP registers if no VIS. */
13777 if (regno >= SPARC_FCC_REG
13778 || (regno >= SPARC_FIRST_FP_REG && !TARGET_VIS))
13779 CLEAR_HARD_REG_BIT (need_zeroed_hardregs, regno);
13780
13781 /* Do not access the odd upper FP registers individually. */
13782 else if (regno >= SPARC_FIRST_V9_FP_REG && (regno & 1))
13783 ;
13784
13785 /* Use the most natural mode for the registers, which is not given by
13786 regno_reg_rtx/reg_raw_mode for the FP registers on the SPARC. */
13787 else
13788 {
13789 machine_mode mode;
13790 rtx reg;
13791
13792 if (regno < SPARC_FIRST_FP_REG)
13793 {
13794 reg = regno_reg_rtx[regno];
13795 mode = GET_MODE (reg);
13796 }
13797 else
13798 {
13799 mode = regno < SPARC_FIRST_V9_FP_REG ? SFmode : DFmode;
13800 reg = gen_raw_REG (mode, regno);
13801 }
13802
13803 emit_move_insn (reg, CONST0_RTX (mode));
13804 }
13805 }
13806
13807 return need_zeroed_hardregs;
13808 }
13809
13810 #include "gt-sparc.h"