]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Remove Pascal-related entries in code and comments.
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142 };
143
144 static const
145 struct processor_costs cypress_costs = {
146 COSTS_N_INSNS (2), /* int load */
147 COSTS_N_INSNS (2), /* int signed load */
148 COSTS_N_INSNS (2), /* int zeroed load */
149 COSTS_N_INSNS (2), /* float load */
150 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
151 COSTS_N_INSNS (5), /* fadd, fsub */
152 COSTS_N_INSNS (1), /* fcmp */
153 COSTS_N_INSNS (1), /* fmov, fmovr */
154 COSTS_N_INSNS (7), /* fmul */
155 COSTS_N_INSNS (37), /* fdivs */
156 COSTS_N_INSNS (37), /* fdivd */
157 COSTS_N_INSNS (63), /* fsqrts */
158 COSTS_N_INSNS (63), /* fsqrtd */
159 COSTS_N_INSNS (1), /* imul */
160 COSTS_N_INSNS (1), /* imulX */
161 0, /* imul bit factor */
162 COSTS_N_INSNS (1), /* idiv */
163 COSTS_N_INSNS (1), /* idivX */
164 COSTS_N_INSNS (1), /* movcc/movr */
165 0, /* shift penalty */
166 };
167
168 static const
169 struct processor_costs supersparc_costs = {
170 COSTS_N_INSNS (1), /* int load */
171 COSTS_N_INSNS (1), /* int signed load */
172 COSTS_N_INSNS (1), /* int zeroed load */
173 COSTS_N_INSNS (0), /* float load */
174 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
175 COSTS_N_INSNS (3), /* fadd, fsub */
176 COSTS_N_INSNS (3), /* fcmp */
177 COSTS_N_INSNS (1), /* fmov, fmovr */
178 COSTS_N_INSNS (3), /* fmul */
179 COSTS_N_INSNS (6), /* fdivs */
180 COSTS_N_INSNS (9), /* fdivd */
181 COSTS_N_INSNS (12), /* fsqrts */
182 COSTS_N_INSNS (12), /* fsqrtd */
183 COSTS_N_INSNS (4), /* imul */
184 COSTS_N_INSNS (4), /* imulX */
185 0, /* imul bit factor */
186 COSTS_N_INSNS (4), /* idiv */
187 COSTS_N_INSNS (4), /* idivX */
188 COSTS_N_INSNS (1), /* movcc/movr */
189 1, /* shift penalty */
190 };
191
192 static const
193 struct processor_costs hypersparc_costs = {
194 COSTS_N_INSNS (1), /* int load */
195 COSTS_N_INSNS (1), /* int signed load */
196 COSTS_N_INSNS (1), /* int zeroed load */
197 COSTS_N_INSNS (1), /* float load */
198 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
199 COSTS_N_INSNS (1), /* fadd, fsub */
200 COSTS_N_INSNS (1), /* fcmp */
201 COSTS_N_INSNS (1), /* fmov, fmovr */
202 COSTS_N_INSNS (1), /* fmul */
203 COSTS_N_INSNS (8), /* fdivs */
204 COSTS_N_INSNS (12), /* fdivd */
205 COSTS_N_INSNS (17), /* fsqrts */
206 COSTS_N_INSNS (17), /* fsqrtd */
207 COSTS_N_INSNS (17), /* imul */
208 COSTS_N_INSNS (17), /* imulX */
209 0, /* imul bit factor */
210 COSTS_N_INSNS (17), /* idiv */
211 COSTS_N_INSNS (17), /* idivX */
212 COSTS_N_INSNS (1), /* movcc/movr */
213 0, /* shift penalty */
214 };
215
216 static const
217 struct processor_costs leon_costs = {
218 COSTS_N_INSNS (1), /* int load */
219 COSTS_N_INSNS (1), /* int signed load */
220 COSTS_N_INSNS (1), /* int zeroed load */
221 COSTS_N_INSNS (1), /* float load */
222 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
223 COSTS_N_INSNS (1), /* fadd, fsub */
224 COSTS_N_INSNS (1), /* fcmp */
225 COSTS_N_INSNS (1), /* fmov, fmovr */
226 COSTS_N_INSNS (1), /* fmul */
227 COSTS_N_INSNS (15), /* fdivs */
228 COSTS_N_INSNS (15), /* fdivd */
229 COSTS_N_INSNS (23), /* fsqrts */
230 COSTS_N_INSNS (23), /* fsqrtd */
231 COSTS_N_INSNS (5), /* imul */
232 COSTS_N_INSNS (5), /* imulX */
233 0, /* imul bit factor */
234 COSTS_N_INSNS (5), /* idiv */
235 COSTS_N_INSNS (5), /* idivX */
236 COSTS_N_INSNS (1), /* movcc/movr */
237 0, /* shift penalty */
238 };
239
240 static const
241 struct processor_costs leon3_costs = {
242 COSTS_N_INSNS (1), /* int load */
243 COSTS_N_INSNS (1), /* int signed load */
244 COSTS_N_INSNS (1), /* int zeroed load */
245 COSTS_N_INSNS (1), /* float load */
246 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
247 COSTS_N_INSNS (1), /* fadd, fsub */
248 COSTS_N_INSNS (1), /* fcmp */
249 COSTS_N_INSNS (1), /* fmov, fmovr */
250 COSTS_N_INSNS (1), /* fmul */
251 COSTS_N_INSNS (14), /* fdivs */
252 COSTS_N_INSNS (15), /* fdivd */
253 COSTS_N_INSNS (22), /* fsqrts */
254 COSTS_N_INSNS (23), /* fsqrtd */
255 COSTS_N_INSNS (5), /* imul */
256 COSTS_N_INSNS (5), /* imulX */
257 0, /* imul bit factor */
258 COSTS_N_INSNS (35), /* idiv */
259 COSTS_N_INSNS (35), /* idivX */
260 COSTS_N_INSNS (1), /* movcc/movr */
261 0, /* shift penalty */
262 };
263
264 static const
265 struct processor_costs sparclet_costs = {
266 COSTS_N_INSNS (3), /* int load */
267 COSTS_N_INSNS (3), /* int signed load */
268 COSTS_N_INSNS (1), /* int zeroed load */
269 COSTS_N_INSNS (1), /* float load */
270 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
271 COSTS_N_INSNS (1), /* fadd, fsub */
272 COSTS_N_INSNS (1), /* fcmp */
273 COSTS_N_INSNS (1), /* fmov, fmovr */
274 COSTS_N_INSNS (1), /* fmul */
275 COSTS_N_INSNS (1), /* fdivs */
276 COSTS_N_INSNS (1), /* fdivd */
277 COSTS_N_INSNS (1), /* fsqrts */
278 COSTS_N_INSNS (1), /* fsqrtd */
279 COSTS_N_INSNS (5), /* imul */
280 COSTS_N_INSNS (5), /* imulX */
281 0, /* imul bit factor */
282 COSTS_N_INSNS (5), /* idiv */
283 COSTS_N_INSNS (5), /* idivX */
284 COSTS_N_INSNS (1), /* movcc/movr */
285 0, /* shift penalty */
286 };
287
288 static const
289 struct processor_costs ultrasparc_costs = {
290 COSTS_N_INSNS (2), /* int load */
291 COSTS_N_INSNS (3), /* int signed load */
292 COSTS_N_INSNS (2), /* int zeroed load */
293 COSTS_N_INSNS (2), /* float load */
294 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
295 COSTS_N_INSNS (4), /* fadd, fsub */
296 COSTS_N_INSNS (1), /* fcmp */
297 COSTS_N_INSNS (2), /* fmov, fmovr */
298 COSTS_N_INSNS (4), /* fmul */
299 COSTS_N_INSNS (13), /* fdivs */
300 COSTS_N_INSNS (23), /* fdivd */
301 COSTS_N_INSNS (13), /* fsqrts */
302 COSTS_N_INSNS (23), /* fsqrtd */
303 COSTS_N_INSNS (4), /* imul */
304 COSTS_N_INSNS (4), /* imulX */
305 2, /* imul bit factor */
306 COSTS_N_INSNS (37), /* idiv */
307 COSTS_N_INSNS (68), /* idivX */
308 COSTS_N_INSNS (2), /* movcc/movr */
309 2, /* shift penalty */
310 };
311
312 static const
313 struct processor_costs ultrasparc3_costs = {
314 COSTS_N_INSNS (2), /* int load */
315 COSTS_N_INSNS (3), /* int signed load */
316 COSTS_N_INSNS (3), /* int zeroed load */
317 COSTS_N_INSNS (2), /* float load */
318 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
319 COSTS_N_INSNS (4), /* fadd, fsub */
320 COSTS_N_INSNS (5), /* fcmp */
321 COSTS_N_INSNS (3), /* fmov, fmovr */
322 COSTS_N_INSNS (4), /* fmul */
323 COSTS_N_INSNS (17), /* fdivs */
324 COSTS_N_INSNS (20), /* fdivd */
325 COSTS_N_INSNS (20), /* fsqrts */
326 COSTS_N_INSNS (29), /* fsqrtd */
327 COSTS_N_INSNS (6), /* imul */
328 COSTS_N_INSNS (6), /* imulX */
329 0, /* imul bit factor */
330 COSTS_N_INSNS (40), /* idiv */
331 COSTS_N_INSNS (71), /* idivX */
332 COSTS_N_INSNS (2), /* movcc/movr */
333 0, /* shift penalty */
334 };
335
336 static const
337 struct processor_costs niagara_costs = {
338 COSTS_N_INSNS (3), /* int load */
339 COSTS_N_INSNS (3), /* int signed load */
340 COSTS_N_INSNS (3), /* int zeroed load */
341 COSTS_N_INSNS (9), /* float load */
342 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
343 COSTS_N_INSNS (8), /* fadd, fsub */
344 COSTS_N_INSNS (26), /* fcmp */
345 COSTS_N_INSNS (8), /* fmov, fmovr */
346 COSTS_N_INSNS (29), /* fmul */
347 COSTS_N_INSNS (54), /* fdivs */
348 COSTS_N_INSNS (83), /* fdivd */
349 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
350 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
351 COSTS_N_INSNS (11), /* imul */
352 COSTS_N_INSNS (11), /* imulX */
353 0, /* imul bit factor */
354 COSTS_N_INSNS (72), /* idiv */
355 COSTS_N_INSNS (72), /* idivX */
356 COSTS_N_INSNS (1), /* movcc/movr */
357 0, /* shift penalty */
358 };
359
360 static const
361 struct processor_costs niagara2_costs = {
362 COSTS_N_INSNS (3), /* int load */
363 COSTS_N_INSNS (3), /* int signed load */
364 COSTS_N_INSNS (3), /* int zeroed load */
365 COSTS_N_INSNS (3), /* float load */
366 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
367 COSTS_N_INSNS (6), /* fadd, fsub */
368 COSTS_N_INSNS (6), /* fcmp */
369 COSTS_N_INSNS (6), /* fmov, fmovr */
370 COSTS_N_INSNS (6), /* fmul */
371 COSTS_N_INSNS (19), /* fdivs */
372 COSTS_N_INSNS (33), /* fdivd */
373 COSTS_N_INSNS (19), /* fsqrts */
374 COSTS_N_INSNS (33), /* fsqrtd */
375 COSTS_N_INSNS (5), /* imul */
376 COSTS_N_INSNS (5), /* imulX */
377 0, /* imul bit factor */
378 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
379 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
380 COSTS_N_INSNS (1), /* movcc/movr */
381 0, /* shift penalty */
382 };
383
384 static const
385 struct processor_costs niagara3_costs = {
386 COSTS_N_INSNS (3), /* int load */
387 COSTS_N_INSNS (3), /* int signed load */
388 COSTS_N_INSNS (3), /* int zeroed load */
389 COSTS_N_INSNS (3), /* float load */
390 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
391 COSTS_N_INSNS (9), /* fadd, fsub */
392 COSTS_N_INSNS (9), /* fcmp */
393 COSTS_N_INSNS (9), /* fmov, fmovr */
394 COSTS_N_INSNS (9), /* fmul */
395 COSTS_N_INSNS (23), /* fdivs */
396 COSTS_N_INSNS (37), /* fdivd */
397 COSTS_N_INSNS (23), /* fsqrts */
398 COSTS_N_INSNS (37), /* fsqrtd */
399 COSTS_N_INSNS (9), /* imul */
400 COSTS_N_INSNS (9), /* imulX */
401 0, /* imul bit factor */
402 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
403 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
404 COSTS_N_INSNS (1), /* movcc/movr */
405 0, /* shift penalty */
406 };
407
408 static const
409 struct processor_costs niagara4_costs = {
410 COSTS_N_INSNS (5), /* int load */
411 COSTS_N_INSNS (5), /* int signed load */
412 COSTS_N_INSNS (5), /* int zeroed load */
413 COSTS_N_INSNS (5), /* float load */
414 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
415 COSTS_N_INSNS (11), /* fadd, fsub */
416 COSTS_N_INSNS (11), /* fcmp */
417 COSTS_N_INSNS (11), /* fmov, fmovr */
418 COSTS_N_INSNS (11), /* fmul */
419 COSTS_N_INSNS (24), /* fdivs */
420 COSTS_N_INSNS (37), /* fdivd */
421 COSTS_N_INSNS (24), /* fsqrts */
422 COSTS_N_INSNS (37), /* fsqrtd */
423 COSTS_N_INSNS (12), /* imul */
424 COSTS_N_INSNS (12), /* imulX */
425 0, /* imul bit factor */
426 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
427 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
428 COSTS_N_INSNS (1), /* movcc/movr */
429 0, /* shift penalty */
430 };
431
432 static const
433 struct processor_costs niagara7_costs = {
434 COSTS_N_INSNS (5), /* int load */
435 COSTS_N_INSNS (5), /* int signed load */
436 COSTS_N_INSNS (5), /* int zeroed load */
437 COSTS_N_INSNS (5), /* float load */
438 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
439 COSTS_N_INSNS (11), /* fadd, fsub */
440 COSTS_N_INSNS (11), /* fcmp */
441 COSTS_N_INSNS (11), /* fmov, fmovr */
442 COSTS_N_INSNS (11), /* fmul */
443 COSTS_N_INSNS (24), /* fdivs */
444 COSTS_N_INSNS (37), /* fdivd */
445 COSTS_N_INSNS (24), /* fsqrts */
446 COSTS_N_INSNS (37), /* fsqrtd */
447 COSTS_N_INSNS (12), /* imul */
448 COSTS_N_INSNS (12), /* imulX */
449 0, /* imul bit factor */
450 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
451 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
452 COSTS_N_INSNS (1), /* movcc/movr */
453 0, /* shift penalty */
454 };
455
456 static const
457 struct processor_costs m8_costs = {
458 COSTS_N_INSNS (3), /* int load */
459 COSTS_N_INSNS (3), /* int signed load */
460 COSTS_N_INSNS (3), /* int zeroed load */
461 COSTS_N_INSNS (3), /* float load */
462 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
463 COSTS_N_INSNS (9), /* fadd, fsub */
464 COSTS_N_INSNS (9), /* fcmp */
465 COSTS_N_INSNS (9), /* fmov, fmovr */
466 COSTS_N_INSNS (9), /* fmul */
467 COSTS_N_INSNS (26), /* fdivs */
468 COSTS_N_INSNS (30), /* fdivd */
469 COSTS_N_INSNS (33), /* fsqrts */
470 COSTS_N_INSNS (41), /* fsqrtd */
471 COSTS_N_INSNS (12), /* imul */
472 COSTS_N_INSNS (10), /* imulX */
473 0, /* imul bit factor */
474 COSTS_N_INSNS (57), /* udiv/sdiv */
475 COSTS_N_INSNS (30), /* udivx/sdivx */
476 COSTS_N_INSNS (1), /* movcc/movr */
477 0, /* shift penalty */
478 };
479
480 static const struct processor_costs *sparc_costs = &cypress_costs;
481
482 #ifdef HAVE_AS_RELAX_OPTION
483 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
484 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
485 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
486 somebody does not branch between the sethi and jmp. */
487 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
488 #else
489 #define LEAF_SIBCALL_SLOT_RESERVED_P \
490 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
491 #endif
492
493 /* Vector to say how input registers are mapped to output registers.
494 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
495 eliminate it. You must use -fomit-frame-pointer to get that. */
496 char leaf_reg_remap[] =
497 { 0, 1, 2, 3, 4, 5, 6, 7,
498 -1, -1, -1, -1, -1, -1, 14, -1,
499 -1, -1, -1, -1, -1, -1, -1, -1,
500 8, 9, 10, 11, 12, 13, -1, 15,
501
502 32, 33, 34, 35, 36, 37, 38, 39,
503 40, 41, 42, 43, 44, 45, 46, 47,
504 48, 49, 50, 51, 52, 53, 54, 55,
505 56, 57, 58, 59, 60, 61, 62, 63,
506 64, 65, 66, 67, 68, 69, 70, 71,
507 72, 73, 74, 75, 76, 77, 78, 79,
508 80, 81, 82, 83, 84, 85, 86, 87,
509 88, 89, 90, 91, 92, 93, 94, 95,
510 96, 97, 98, 99, 100, 101, 102};
511
512 /* Vector, indexed by hard register number, which contains 1
513 for a register that is allowable in a candidate for leaf
514 function treatment. */
515 char sparc_leaf_regs[] =
516 { 1, 1, 1, 1, 1, 1, 1, 1,
517 0, 0, 0, 0, 0, 0, 1, 0,
518 0, 0, 0, 0, 0, 0, 0, 0,
519 1, 1, 1, 1, 1, 1, 0, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1,
526 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1,
528 1, 1, 1, 1, 1, 1, 1};
529
530 struct GTY(()) machine_function
531 {
532 /* Size of the frame of the function. */
533 HOST_WIDE_INT frame_size;
534
535 /* Size of the frame of the function minus the register window save area
536 and the outgoing argument area. */
537 HOST_WIDE_INT apparent_frame_size;
538
539 /* Register we pretend the frame pointer is allocated to. Normally, this
540 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
541 record "offset" separately as it may be too big for (reg + disp). */
542 rtx frame_base_reg;
543 HOST_WIDE_INT frame_base_offset;
544
545 /* Number of global or FP registers to be saved (as 4-byte quantities). */
546 int n_global_fp_regs;
547
548 /* True if the current function is leaf and uses only leaf regs,
549 so that the SPARC leaf function optimization can be applied.
550 Private version of crtl->uses_only_leaf_regs, see
551 sparc_expand_prologue for the rationale. */
552 int leaf_function_p;
553
554 /* True if the prologue saves local or in registers. */
555 bool save_local_in_regs_p;
556
557 /* True if the data calculated by sparc_expand_prologue are valid. */
558 bool prologue_data_valid_p;
559 };
560
561 #define sparc_frame_size cfun->machine->frame_size
562 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
563 #define sparc_frame_base_reg cfun->machine->frame_base_reg
564 #define sparc_frame_base_offset cfun->machine->frame_base_offset
565 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
566 #define sparc_leaf_function_p cfun->machine->leaf_function_p
567 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
568 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
569
570 /* 1 if the next opcode is to be specially indented. */
571 int sparc_indent_opcode = 0;
572
573 static void sparc_option_override (void);
574 static void sparc_init_modes (void);
575 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
576 const_tree, bool, bool, int *, int *);
577
578 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
579 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
580
581 static void sparc_emit_set_const32 (rtx, rtx);
582 static void sparc_emit_set_const64 (rtx, rtx);
583 static void sparc_output_addr_vec (rtx);
584 static void sparc_output_addr_diff_vec (rtx);
585 static void sparc_output_deferred_case_vectors (void);
586 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
587 static bool sparc_legitimate_constant_p (machine_mode, rtx);
588 static rtx sparc_builtin_saveregs (void);
589 static int epilogue_renumber (rtx *, int);
590 static bool sparc_assemble_integer (rtx, unsigned int, int);
591 static int set_extends (rtx_insn *);
592 static void sparc_asm_function_prologue (FILE *);
593 static void sparc_asm_function_epilogue (FILE *);
594 #ifdef TARGET_SOLARIS
595 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
596 tree) ATTRIBUTE_UNUSED;
597 #endif
598 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
599 static int sparc_issue_rate (void);
600 static void sparc_sched_init (FILE *, int, int);
601 static int sparc_use_sched_lookahead (void);
602
603 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
604 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
605 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
606 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
607 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
608
609 static bool sparc_function_ok_for_sibcall (tree, tree);
610 static void sparc_init_libfuncs (void);
611 static void sparc_init_builtins (void);
612 static void sparc_fpu_init_builtins (void);
613 static void sparc_vis_init_builtins (void);
614 static tree sparc_builtin_decl (unsigned, bool);
615 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
616 static tree sparc_fold_builtin (tree, int, tree *, bool);
617 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
618 HOST_WIDE_INT, tree);
619 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
620 HOST_WIDE_INT, const_tree);
621 static struct machine_function * sparc_init_machine_status (void);
622 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
623 static rtx sparc_tls_get_addr (void);
624 static rtx sparc_tls_got (void);
625 static int sparc_register_move_cost (machine_mode,
626 reg_class_t, reg_class_t);
627 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
628 static rtx sparc_function_value (const_tree, const_tree, bool);
629 static rtx sparc_libcall_value (machine_mode, const_rtx);
630 static bool sparc_function_value_regno_p (const unsigned int);
631 static rtx sparc_struct_value_rtx (tree, int);
632 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
633 int *, const_tree, int);
634 static bool sparc_return_in_memory (const_tree, const_tree);
635 static bool sparc_strict_argument_naming (cumulative_args_t);
636 static void sparc_va_start (tree, rtx);
637 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
638 static bool sparc_vector_mode_supported_p (machine_mode);
639 static bool sparc_tls_referenced_p (rtx);
640 static rtx sparc_legitimize_tls_address (rtx);
641 static rtx sparc_legitimize_pic_address (rtx, rtx);
642 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
643 static rtx sparc_delegitimize_address (rtx);
644 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
645 static bool sparc_pass_by_reference (cumulative_args_t,
646 machine_mode, const_tree, bool);
647 static void sparc_function_arg_advance (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_arg_1 (cumulative_args_t,
650 machine_mode, const_tree, bool, bool);
651 static rtx sparc_function_arg (cumulative_args_t,
652 machine_mode, const_tree, bool);
653 static rtx sparc_function_incoming_arg (cumulative_args_t,
654 machine_mode, const_tree, bool);
655 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
656 static unsigned int sparc_function_arg_boundary (machine_mode,
657 const_tree);
658 static int sparc_arg_partial_bytes (cumulative_args_t,
659 machine_mode, tree, bool);
660 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
661 static void sparc_file_end (void);
662 static bool sparc_frame_pointer_required (void);
663 static bool sparc_can_eliminate (const int, const int);
664 static rtx sparc_builtin_setjmp_frame_value (void);
665 static void sparc_conditional_register_usage (void);
666 static bool sparc_use_pseudo_pic_reg (void);
667 static void sparc_init_pic_reg (void);
668 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
669 static const char *sparc_mangle_type (const_tree);
670 #endif
671 static void sparc_trampoline_init (rtx, tree, rtx);
672 static machine_mode sparc_preferred_simd_mode (scalar_mode);
673 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
674 static bool sparc_lra_p (void);
675 static bool sparc_print_operand_punct_valid_p (unsigned char);
676 static void sparc_print_operand (FILE *, rtx, int);
677 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
678 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
679 machine_mode,
680 secondary_reload_info *);
681 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
682 reg_class_t);
683 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
684 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
685 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
686 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
687 static unsigned int sparc_min_arithmetic_precision (void);
688 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
689 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
690 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
691 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
692 reg_class_t);
693 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
694 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
695 const vec_perm_indices &);
696 \f
697 #ifdef SUBTARGET_ATTRIBUTE_TABLE
698 /* Table of valid machine attributes. */
699 static const struct attribute_spec sparc_attribute_table[] =
700 {
701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
702 do_diagnostic, handler, exclude } */
703 SUBTARGET_ATTRIBUTE_TABLE,
704 { NULL, 0, 0, false, false, false, false, NULL, NULL }
705 };
706 #endif
707 \f
708 /* Option handling. */
709
710 /* Parsed value. */
711 enum cmodel sparc_cmodel;
712
713 char sparc_hard_reg_printed[8];
714
715 /* Initialize the GCC target structure. */
716
717 /* The default is to use .half rather than .short for aligned HI objects. */
718 #undef TARGET_ASM_ALIGNED_HI_OP
719 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
720
721 #undef TARGET_ASM_UNALIGNED_HI_OP
722 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
723 #undef TARGET_ASM_UNALIGNED_SI_OP
724 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
725 #undef TARGET_ASM_UNALIGNED_DI_OP
726 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
727
728 /* The target hook has to handle DI-mode values. */
729 #undef TARGET_ASM_INTEGER
730 #define TARGET_ASM_INTEGER sparc_assemble_integer
731
732 #undef TARGET_ASM_FUNCTION_PROLOGUE
733 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
734 #undef TARGET_ASM_FUNCTION_EPILOGUE
735 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
736
737 #undef TARGET_SCHED_ADJUST_COST
738 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
739 #undef TARGET_SCHED_ISSUE_RATE
740 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
741 #undef TARGET_SCHED_INIT
742 #define TARGET_SCHED_INIT sparc_sched_init
743 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
744 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
745
746 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
747 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
748
749 #undef TARGET_INIT_LIBFUNCS
750 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
751
752 #undef TARGET_LEGITIMIZE_ADDRESS
753 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
754 #undef TARGET_DELEGITIMIZE_ADDRESS
755 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
756 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
757 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
758
759 #undef TARGET_INIT_BUILTINS
760 #define TARGET_INIT_BUILTINS sparc_init_builtins
761 #undef TARGET_BUILTIN_DECL
762 #define TARGET_BUILTIN_DECL sparc_builtin_decl
763 #undef TARGET_EXPAND_BUILTIN
764 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
765 #undef TARGET_FOLD_BUILTIN
766 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
767
768 #if TARGET_TLS
769 #undef TARGET_HAVE_TLS
770 #define TARGET_HAVE_TLS true
771 #endif
772
773 #undef TARGET_CANNOT_FORCE_CONST_MEM
774 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
775
776 #undef TARGET_ASM_OUTPUT_MI_THUNK
777 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
778 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
779 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
780
781 #undef TARGET_RTX_COSTS
782 #define TARGET_RTX_COSTS sparc_rtx_costs
783 #undef TARGET_ADDRESS_COST
784 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
785 #undef TARGET_REGISTER_MOVE_COST
786 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
787
788 #undef TARGET_PROMOTE_FUNCTION_MODE
789 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
790
791 #undef TARGET_FUNCTION_VALUE
792 #define TARGET_FUNCTION_VALUE sparc_function_value
793 #undef TARGET_LIBCALL_VALUE
794 #define TARGET_LIBCALL_VALUE sparc_libcall_value
795 #undef TARGET_FUNCTION_VALUE_REGNO_P
796 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
797
798 #undef TARGET_STRUCT_VALUE_RTX
799 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
800 #undef TARGET_RETURN_IN_MEMORY
801 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
802 #undef TARGET_MUST_PASS_IN_STACK
803 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
804 #undef TARGET_PASS_BY_REFERENCE
805 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
806 #undef TARGET_ARG_PARTIAL_BYTES
807 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
808 #undef TARGET_FUNCTION_ARG_ADVANCE
809 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
810 #undef TARGET_FUNCTION_ARG
811 #define TARGET_FUNCTION_ARG sparc_function_arg
812 #undef TARGET_FUNCTION_INCOMING_ARG
813 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
814 #undef TARGET_FUNCTION_ARG_PADDING
815 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
816 #undef TARGET_FUNCTION_ARG_BOUNDARY
817 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
818
819 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
820 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
821 #undef TARGET_STRICT_ARGUMENT_NAMING
822 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
823
824 #undef TARGET_EXPAND_BUILTIN_VA_START
825 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
826 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
827 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
828
829 #undef TARGET_VECTOR_MODE_SUPPORTED_P
830 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
831
832 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
833 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
834
835 #ifdef SUBTARGET_INSERT_ATTRIBUTES
836 #undef TARGET_INSERT_ATTRIBUTES
837 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
838 #endif
839
840 #ifdef SUBTARGET_ATTRIBUTE_TABLE
841 #undef TARGET_ATTRIBUTE_TABLE
842 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
843 #endif
844
845 #undef TARGET_OPTION_OVERRIDE
846 #define TARGET_OPTION_OVERRIDE sparc_option_override
847
848 #ifdef TARGET_THREAD_SSP_OFFSET
849 #undef TARGET_STACK_PROTECT_GUARD
850 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
851 #endif
852
853 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
854 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
855 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
856 #endif
857
858 #undef TARGET_ASM_FILE_END
859 #define TARGET_ASM_FILE_END sparc_file_end
860
861 #undef TARGET_FRAME_POINTER_REQUIRED
862 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
863
864 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
865 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
866
867 #undef TARGET_CAN_ELIMINATE
868 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
869
870 #undef TARGET_PREFERRED_RELOAD_CLASS
871 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
872
873 #undef TARGET_SECONDARY_RELOAD
874 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
875 #undef TARGET_SECONDARY_MEMORY_NEEDED
876 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
877 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
878 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
879
880 #undef TARGET_CONDITIONAL_REGISTER_USAGE
881 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
882
883 #undef TARGET_INIT_PIC_REG
884 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
885
886 #undef TARGET_USE_PSEUDO_PIC_REG
887 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
888
889 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
890 #undef TARGET_MANGLE_TYPE
891 #define TARGET_MANGLE_TYPE sparc_mangle_type
892 #endif
893
894 #undef TARGET_LRA_P
895 #define TARGET_LRA_P sparc_lra_p
896
897 #undef TARGET_LEGITIMATE_ADDRESS_P
898 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
899
900 #undef TARGET_LEGITIMATE_CONSTANT_P
901 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
902
903 #undef TARGET_TRAMPOLINE_INIT
904 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
905
906 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
907 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
908 #undef TARGET_PRINT_OPERAND
909 #define TARGET_PRINT_OPERAND sparc_print_operand
910 #undef TARGET_PRINT_OPERAND_ADDRESS
911 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
912
913 /* The value stored by LDSTUB. */
914 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
915 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
916
917 #undef TARGET_CSTORE_MODE
918 #define TARGET_CSTORE_MODE sparc_cstore_mode
919
920 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
921 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
922
923 #undef TARGET_FIXED_CONDITION_CODE_REGS
924 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
925
926 #undef TARGET_MIN_ARITHMETIC_PRECISION
927 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
928
929 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
930 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
931
932 #undef TARGET_HARD_REGNO_NREGS
933 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
934 #undef TARGET_HARD_REGNO_MODE_OK
935 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
936
937 #undef TARGET_MODES_TIEABLE_P
938 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
939
940 #undef TARGET_CAN_CHANGE_MODE_CLASS
941 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
942
943 #undef TARGET_CONSTANT_ALIGNMENT
944 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
945
946 #undef TARGET_VECTORIZE_VEC_PERM_CONST
947 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
948
949 struct gcc_target targetm = TARGET_INITIALIZER;
950
951 /* Return the memory reference contained in X if any, zero otherwise. */
952
953 static rtx
954 mem_ref (rtx x)
955 {
956 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
957 x = XEXP (x, 0);
958
959 if (MEM_P (x))
960 return x;
961
962 return NULL_RTX;
963 }
964
965 /* True if any of INSN's source register(s) is REG. */
966
967 static bool
968 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
969 {
970 extract_insn (insn);
971 return ((REG_P (recog_data.operand[1])
972 && REGNO (recog_data.operand[1]) == reg)
973 || (recog_data.n_operands == 3
974 && REG_P (recog_data.operand[2])
975 && REGNO (recog_data.operand[2]) == reg));
976 }
977
978 /* True if INSN is a floating-point division or square-root. */
979
980 static bool
981 div_sqrt_insn_p (rtx_insn *insn)
982 {
983 if (GET_CODE (PATTERN (insn)) != SET)
984 return false;
985
986 switch (get_attr_type (insn))
987 {
988 case TYPE_FPDIVS:
989 case TYPE_FPSQRTS:
990 case TYPE_FPDIVD:
991 case TYPE_FPSQRTD:
992 return true;
993 default:
994 return false;
995 }
996 }
997
998 /* True if INSN is a floating-point instruction. */
999
1000 static bool
1001 fpop_insn_p (rtx_insn *insn)
1002 {
1003 if (GET_CODE (PATTERN (insn)) != SET)
1004 return false;
1005
1006 switch (get_attr_type (insn))
1007 {
1008 case TYPE_FPMOVE:
1009 case TYPE_FPCMOVE:
1010 case TYPE_FP:
1011 case TYPE_FPCMP:
1012 case TYPE_FPMUL:
1013 case TYPE_FPDIVS:
1014 case TYPE_FPSQRTS:
1015 case TYPE_FPDIVD:
1016 case TYPE_FPSQRTD:
1017 return true;
1018 default:
1019 return false;
1020 }
1021 }
1022
1023 /* True if INSN is an atomic instruction. */
1024
1025 static bool
1026 atomic_insn_for_leon3_p (rtx_insn *insn)
1027 {
1028 switch (INSN_CODE (insn))
1029 {
1030 case CODE_FOR_swapsi:
1031 case CODE_FOR_ldstub:
1032 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1033 return true;
1034 default:
1035 return false;
1036 }
1037 }
1038
1039 /* We use a machine specific pass to enable workarounds for errata.
1040
1041 We need to have the (essentially) final form of the insn stream in order
1042 to properly detect the various hazards. Therefore, this machine specific
1043 pass runs as late as possible. */
1044
1045 /* True if INSN is a md pattern or asm statement. */
1046 #define USEFUL_INSN_P(INSN) \
1047 (NONDEBUG_INSN_P (INSN) \
1048 && GET_CODE (PATTERN (INSN)) != USE \
1049 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1050
1051 static unsigned int
1052 sparc_do_work_around_errata (void)
1053 {
1054 rtx_insn *insn, *next;
1055
1056 /* Force all instructions to be split into their final form. */
1057 split_all_insns_noflow ();
1058
1059 /* Now look for specific patterns in the insn stream. */
1060 for (insn = get_insns (); insn; insn = next)
1061 {
1062 bool insert_nop = false;
1063 rtx set;
1064 rtx_insn *jump;
1065 rtx_sequence *seq;
1066
1067 /* Look into the instruction in a delay slot. */
1068 if (NONJUMP_INSN_P (insn)
1069 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1070 {
1071 jump = seq->insn (0);
1072 insn = seq->insn (1);
1073 }
1074 else if (JUMP_P (insn))
1075 jump = insn;
1076 else
1077 jump = NULL;
1078
1079 /* Place a NOP at the branch target of an integer branch if it is a
1080 floating-point operation or a floating-point branch. */
1081 if (sparc_fix_gr712rc
1082 && jump
1083 && jump_to_label_p (jump)
1084 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1085 {
1086 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1087 if (target
1088 && (fpop_insn_p (target)
1089 || (JUMP_P (target)
1090 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1091 emit_insn_before (gen_nop (), target);
1092 }
1093
1094 /* Insert a NOP between load instruction and atomic instruction. Insert
1095 a NOP at branch target if there is a load in delay slot and an atomic
1096 instruction at branch target. */
1097 if (sparc_fix_ut700
1098 && NONJUMP_INSN_P (insn)
1099 && (set = single_set (insn)) != NULL_RTX
1100 && mem_ref (SET_SRC (set))
1101 && REG_P (SET_DEST (set)))
1102 {
1103 if (jump && jump_to_label_p (jump))
1104 {
1105 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1106 if (target && atomic_insn_for_leon3_p (target))
1107 emit_insn_before (gen_nop (), target);
1108 }
1109
1110 next = next_active_insn (insn);
1111 if (!next)
1112 break;
1113
1114 if (atomic_insn_for_leon3_p (next))
1115 insert_nop = true;
1116 }
1117
1118 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1119 ends with another fdiv or fsqrt instruction with no dependencies on
1120 the former, along with an appropriate pattern in between. */
1121 if (sparc_fix_lost_divsqrt
1122 && NONJUMP_INSN_P (insn)
1123 && div_sqrt_insn_p (insn))
1124 {
1125 int i;
1126 int fp_found = 0;
1127 rtx_insn *after;
1128
1129 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1130
1131 next = next_active_insn (insn);
1132 if (!next)
1133 break;
1134
1135 for (after = next, i = 0; i < 4; i++)
1136 {
1137 /* Count floating-point operations. */
1138 if (i != 3 && fpop_insn_p (after))
1139 {
1140 /* If the insn uses the destination register of
1141 the div/sqrt, then it cannot be problematic. */
1142 if (insn_uses_reg_p (after, dest_reg))
1143 break;
1144 fp_found++;
1145 }
1146
1147 /* Count floating-point loads. */
1148 if (i != 3
1149 && (set = single_set (after)) != NULL_RTX
1150 && REG_P (SET_DEST (set))
1151 && REGNO (SET_DEST (set)) > 31)
1152 {
1153 /* If the insn uses the destination register of
1154 the div/sqrt, then it cannot be problematic. */
1155 if (REGNO (SET_DEST (set)) == dest_reg)
1156 break;
1157 fp_found++;
1158 }
1159
1160 /* Check if this is a problematic sequence. */
1161 if (i > 1
1162 && fp_found >= 2
1163 && div_sqrt_insn_p (after))
1164 {
1165 /* If this is the short version of the problematic
1166 sequence we add two NOPs in a row to also prevent
1167 the long version. */
1168 if (i == 2)
1169 emit_insn_before (gen_nop (), next);
1170 insert_nop = true;
1171 break;
1172 }
1173
1174 /* No need to scan past a second div/sqrt. */
1175 if (div_sqrt_insn_p (after))
1176 break;
1177
1178 /* Insert NOP before branch. */
1179 if (i < 3
1180 && (!NONJUMP_INSN_P (after)
1181 || GET_CODE (PATTERN (after)) == SEQUENCE))
1182 {
1183 insert_nop = true;
1184 break;
1185 }
1186
1187 after = next_active_insn (after);
1188 if (!after)
1189 break;
1190 }
1191 }
1192
1193 /* Look for either of these two sequences:
1194
1195 Sequence A:
1196 1. store of word size or less (e.g. st / stb / sth / stf)
1197 2. any single instruction that is not a load or store
1198 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1199
1200 Sequence B:
1201 1. store of double word size (e.g. std / stdf)
1202 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1203 if (sparc_fix_b2bst
1204 && NONJUMP_INSN_P (insn)
1205 && (set = single_set (insn)) != NULL_RTX
1206 && MEM_P (SET_DEST (set)))
1207 {
1208 /* Sequence B begins with a double-word store. */
1209 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1210 rtx_insn *after;
1211 int i;
1212
1213 next = next_active_insn (insn);
1214 if (!next)
1215 break;
1216
1217 for (after = next, i = 0; i < 2; i++)
1218 {
1219 /* Skip empty assembly statements. */
1220 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1221 || (USEFUL_INSN_P (after)
1222 && (asm_noperands (PATTERN (after))>=0)
1223 && !strcmp (decode_asm_operands (PATTERN (after),
1224 NULL, NULL, NULL,
1225 NULL, NULL), "")))
1226 after = next_active_insn (after);
1227 if (!after)
1228 break;
1229
1230 /* If the insn is a branch, then it cannot be problematic. */
1231 if (!NONJUMP_INSN_P (after)
1232 || GET_CODE (PATTERN (after)) == SEQUENCE)
1233 break;
1234
1235 /* Sequence B is only two instructions long. */
1236 if (seq_b)
1237 {
1238 /* Add NOP if followed by a store. */
1239 if ((set = single_set (after)) != NULL_RTX
1240 && MEM_P (SET_DEST (set)))
1241 insert_nop = true;
1242
1243 /* Otherwise it is ok. */
1244 break;
1245 }
1246
1247 /* If the second instruction is a load or a store,
1248 then the sequence cannot be problematic. */
1249 if (i == 0)
1250 {
1251 if ((set = single_set (after)) != NULL_RTX
1252 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1253 break;
1254
1255 after = next_active_insn (after);
1256 if (!after)
1257 break;
1258 }
1259
1260 /* Add NOP if third instruction is a store. */
1261 if (i == 1
1262 && (set = single_set (after)) != NULL_RTX
1263 && MEM_P (SET_DEST (set)))
1264 insert_nop = true;
1265 }
1266 }
1267
1268 /* Look for a single-word load into an odd-numbered FP register. */
1269 else if (sparc_fix_at697f
1270 && NONJUMP_INSN_P (insn)
1271 && (set = single_set (insn)) != NULL_RTX
1272 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1273 && mem_ref (SET_SRC (set))
1274 && REG_P (SET_DEST (set))
1275 && REGNO (SET_DEST (set)) > 31
1276 && REGNO (SET_DEST (set)) % 2 != 0)
1277 {
1278 /* The wrong dependency is on the enclosing double register. */
1279 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1280 unsigned int src1, src2, dest;
1281 int code;
1282
1283 next = next_active_insn (insn);
1284 if (!next)
1285 break;
1286 /* If the insn is a branch, then it cannot be problematic. */
1287 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1288 continue;
1289
1290 extract_insn (next);
1291 code = INSN_CODE (next);
1292
1293 switch (code)
1294 {
1295 case CODE_FOR_adddf3:
1296 case CODE_FOR_subdf3:
1297 case CODE_FOR_muldf3:
1298 case CODE_FOR_divdf3:
1299 dest = REGNO (recog_data.operand[0]);
1300 src1 = REGNO (recog_data.operand[1]);
1301 src2 = REGNO (recog_data.operand[2]);
1302 if (src1 != src2)
1303 {
1304 /* Case [1-4]:
1305 ld [address], %fx+1
1306 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1307 if ((src1 == x || src2 == x)
1308 && (dest == src1 || dest == src2))
1309 insert_nop = true;
1310 }
1311 else
1312 {
1313 /* Case 5:
1314 ld [address], %fx+1
1315 FPOPd %fx, %fx, %fx */
1316 if (src1 == x
1317 && dest == src1
1318 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1319 insert_nop = true;
1320 }
1321 break;
1322
1323 case CODE_FOR_sqrtdf2:
1324 dest = REGNO (recog_data.operand[0]);
1325 src1 = REGNO (recog_data.operand[1]);
1326 /* Case 6:
1327 ld [address], %fx+1
1328 fsqrtd %fx, %fx */
1329 if (src1 == x && dest == src1)
1330 insert_nop = true;
1331 break;
1332
1333 default:
1334 break;
1335 }
1336 }
1337
1338 /* Look for a single-word load into an integer register. */
1339 else if (sparc_fix_ut699
1340 && NONJUMP_INSN_P (insn)
1341 && (set = single_set (insn)) != NULL_RTX
1342 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1343 && (mem_ref (SET_SRC (set)) != NULL_RTX
1344 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1345 && REG_P (SET_DEST (set))
1346 && REGNO (SET_DEST (set)) < 32)
1347 {
1348 /* There is no problem if the second memory access has a data
1349 dependency on the first single-cycle load. */
1350 rtx x = SET_DEST (set);
1351
1352 next = next_active_insn (insn);
1353 if (!next)
1354 break;
1355 /* If the insn is a branch, then it cannot be problematic. */
1356 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1357 continue;
1358
1359 /* Look for a second memory access to/from an integer register. */
1360 if ((set = single_set (next)) != NULL_RTX)
1361 {
1362 rtx src = SET_SRC (set);
1363 rtx dest = SET_DEST (set);
1364 rtx mem;
1365
1366 /* LDD is affected. */
1367 if ((mem = mem_ref (src)) != NULL_RTX
1368 && REG_P (dest)
1369 && REGNO (dest) < 32
1370 && !reg_mentioned_p (x, XEXP (mem, 0)))
1371 insert_nop = true;
1372
1373 /* STD is *not* affected. */
1374 else if (MEM_P (dest)
1375 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1376 && (src == CONST0_RTX (GET_MODE (dest))
1377 || (REG_P (src)
1378 && REGNO (src) < 32
1379 && REGNO (src) != REGNO (x)))
1380 && !reg_mentioned_p (x, XEXP (dest, 0)))
1381 insert_nop = true;
1382
1383 /* GOT accesses uses LD. */
1384 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1385 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1386 insert_nop = true;
1387 }
1388 }
1389
1390 /* Look for a single-word load/operation into an FP register. */
1391 else if (sparc_fix_ut699
1392 && NONJUMP_INSN_P (insn)
1393 && (set = single_set (insn)) != NULL_RTX
1394 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1395 && REG_P (SET_DEST (set))
1396 && REGNO (SET_DEST (set)) > 31)
1397 {
1398 /* Number of instructions in the problematic window. */
1399 const int n_insns = 4;
1400 /* The problematic combination is with the sibling FP register. */
1401 const unsigned int x = REGNO (SET_DEST (set));
1402 const unsigned int y = x ^ 1;
1403 rtx_insn *after;
1404 int i;
1405
1406 next = next_active_insn (insn);
1407 if (!next)
1408 break;
1409 /* If the insn is a branch, then it cannot be problematic. */
1410 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1411 continue;
1412
1413 /* Look for a second load/operation into the sibling FP register. */
1414 if (!((set = single_set (next)) != NULL_RTX
1415 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1416 && REG_P (SET_DEST (set))
1417 && REGNO (SET_DEST (set)) == y))
1418 continue;
1419
1420 /* Look for a (possible) store from the FP register in the next N
1421 instructions, but bail out if it is again modified or if there
1422 is a store from the sibling FP register before this store. */
1423 for (after = next, i = 0; i < n_insns; i++)
1424 {
1425 bool branch_p;
1426
1427 after = next_active_insn (after);
1428 if (!after)
1429 break;
1430
1431 /* This is a branch with an empty delay slot. */
1432 if (!NONJUMP_INSN_P (after))
1433 {
1434 if (++i == n_insns)
1435 break;
1436 branch_p = true;
1437 after = NULL;
1438 }
1439 /* This is a branch with a filled delay slot. */
1440 else if (rtx_sequence *seq =
1441 dyn_cast <rtx_sequence *> (PATTERN (after)))
1442 {
1443 if (++i == n_insns)
1444 break;
1445 branch_p = true;
1446 after = seq->insn (1);
1447 }
1448 /* This is a regular instruction. */
1449 else
1450 branch_p = false;
1451
1452 if (after && (set = single_set (after)) != NULL_RTX)
1453 {
1454 const rtx src = SET_SRC (set);
1455 const rtx dest = SET_DEST (set);
1456 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1457
1458 /* If the FP register is again modified before the store,
1459 then the store isn't affected. */
1460 if (REG_P (dest)
1461 && (REGNO (dest) == x
1462 || (REGNO (dest) == y && size == 8)))
1463 break;
1464
1465 if (MEM_P (dest) && REG_P (src))
1466 {
1467 /* If there is a store from the sibling FP register
1468 before the store, then the store is not affected. */
1469 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1470 break;
1471
1472 /* Otherwise, the store is affected. */
1473 if (REGNO (src) == x && size == 4)
1474 {
1475 insert_nop = true;
1476 break;
1477 }
1478 }
1479 }
1480
1481 /* If we have a branch in the first M instructions, then we
1482 cannot see the (M+2)th instruction so we play safe. */
1483 if (branch_p && i <= (n_insns - 2))
1484 {
1485 insert_nop = true;
1486 break;
1487 }
1488 }
1489 }
1490
1491 else
1492 next = NEXT_INSN (insn);
1493
1494 if (insert_nop)
1495 emit_insn_before (gen_nop (), next);
1496 }
1497
1498 return 0;
1499 }
1500
1501 namespace {
1502
1503 const pass_data pass_data_work_around_errata =
1504 {
1505 RTL_PASS, /* type */
1506 "errata", /* name */
1507 OPTGROUP_NONE, /* optinfo_flags */
1508 TV_MACH_DEP, /* tv_id */
1509 0, /* properties_required */
1510 0, /* properties_provided */
1511 0, /* properties_destroyed */
1512 0, /* todo_flags_start */
1513 0, /* todo_flags_finish */
1514 };
1515
1516 class pass_work_around_errata : public rtl_opt_pass
1517 {
1518 public:
1519 pass_work_around_errata(gcc::context *ctxt)
1520 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1521 {}
1522
1523 /* opt_pass methods: */
1524 virtual bool gate (function *)
1525 {
1526 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1527 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
1528 }
1529
1530 virtual unsigned int execute (function *)
1531 {
1532 return sparc_do_work_around_errata ();
1533 }
1534
1535 }; // class pass_work_around_errata
1536
1537 } // anon namespace
1538
1539 rtl_opt_pass *
1540 make_pass_work_around_errata (gcc::context *ctxt)
1541 {
1542 return new pass_work_around_errata (ctxt);
1543 }
1544
1545 /* Helpers for TARGET_DEBUG_OPTIONS. */
1546 static void
1547 dump_target_flag_bits (const int flags)
1548 {
1549 if (flags & MASK_64BIT)
1550 fprintf (stderr, "64BIT ");
1551 if (flags & MASK_APP_REGS)
1552 fprintf (stderr, "APP_REGS ");
1553 if (flags & MASK_FASTER_STRUCTS)
1554 fprintf (stderr, "FASTER_STRUCTS ");
1555 if (flags & MASK_FLAT)
1556 fprintf (stderr, "FLAT ");
1557 if (flags & MASK_FMAF)
1558 fprintf (stderr, "FMAF ");
1559 if (flags & MASK_FSMULD)
1560 fprintf (stderr, "FSMULD ");
1561 if (flags & MASK_FPU)
1562 fprintf (stderr, "FPU ");
1563 if (flags & MASK_HARD_QUAD)
1564 fprintf (stderr, "HARD_QUAD ");
1565 if (flags & MASK_POPC)
1566 fprintf (stderr, "POPC ");
1567 if (flags & MASK_PTR64)
1568 fprintf (stderr, "PTR64 ");
1569 if (flags & MASK_STACK_BIAS)
1570 fprintf (stderr, "STACK_BIAS ");
1571 if (flags & MASK_UNALIGNED_DOUBLES)
1572 fprintf (stderr, "UNALIGNED_DOUBLES ");
1573 if (flags & MASK_V8PLUS)
1574 fprintf (stderr, "V8PLUS ");
1575 if (flags & MASK_VIS)
1576 fprintf (stderr, "VIS ");
1577 if (flags & MASK_VIS2)
1578 fprintf (stderr, "VIS2 ");
1579 if (flags & MASK_VIS3)
1580 fprintf (stderr, "VIS3 ");
1581 if (flags & MASK_VIS4)
1582 fprintf (stderr, "VIS4 ");
1583 if (flags & MASK_VIS4B)
1584 fprintf (stderr, "VIS4B ");
1585 if (flags & MASK_CBCOND)
1586 fprintf (stderr, "CBCOND ");
1587 if (flags & MASK_DEPRECATED_V8_INSNS)
1588 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1589 if (flags & MASK_SPARCLET)
1590 fprintf (stderr, "SPARCLET ");
1591 if (flags & MASK_SPARCLITE)
1592 fprintf (stderr, "SPARCLITE ");
1593 if (flags & MASK_V8)
1594 fprintf (stderr, "V8 ");
1595 if (flags & MASK_V9)
1596 fprintf (stderr, "V9 ");
1597 }
1598
1599 static void
1600 dump_target_flags (const char *prefix, const int flags)
1601 {
1602 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1603 dump_target_flag_bits (flags);
1604 fprintf(stderr, "]\n");
1605 }
1606
1607 /* Validate and override various options, and do some machine dependent
1608 initialization. */
1609
1610 static void
1611 sparc_option_override (void)
1612 {
1613 static struct code_model {
1614 const char *const name;
1615 const enum cmodel value;
1616 } const cmodels[] = {
1617 { "32", CM_32 },
1618 { "medlow", CM_MEDLOW },
1619 { "medmid", CM_MEDMID },
1620 { "medany", CM_MEDANY },
1621 { "embmedany", CM_EMBMEDANY },
1622 { NULL, (enum cmodel) 0 }
1623 };
1624 const struct code_model *cmodel;
1625 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1626 static struct cpu_default {
1627 const int cpu;
1628 const enum processor_type processor;
1629 } const cpu_default[] = {
1630 /* There must be one entry here for each TARGET_CPU value. */
1631 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1632 { TARGET_CPU_v8, PROCESSOR_V8 },
1633 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1634 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1635 { TARGET_CPU_leon, PROCESSOR_LEON },
1636 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1637 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1638 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1639 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1640 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1641 { TARGET_CPU_v9, PROCESSOR_V9 },
1642 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1643 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1644 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1645 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1646 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1647 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1648 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1649 { TARGET_CPU_m8, PROCESSOR_M8 },
1650 { -1, PROCESSOR_V7 }
1651 };
1652 const struct cpu_default *def;
1653 /* Table of values for -m{cpu,tune}=. This must match the order of
1654 the enum processor_type in sparc-opts.h. */
1655 static struct cpu_table {
1656 const char *const name;
1657 const int disable;
1658 const int enable;
1659 } const cpu_table[] = {
1660 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1661 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1662 { "v8", MASK_ISA, MASK_V8 },
1663 /* TI TMS390Z55 supersparc */
1664 { "supersparc", MASK_ISA, MASK_V8 },
1665 { "hypersparc", MASK_ISA, MASK_V8 },
1666 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1667 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1668 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1669 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1670 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1671 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1672 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1673 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1674 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1675 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1676 /* TEMIC sparclet */
1677 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1678 { "v9", MASK_ISA, MASK_V9 },
1679 /* UltraSPARC I, II, IIi */
1680 { "ultrasparc", MASK_ISA,
1681 /* Although insns using %y are deprecated, it is a clear win. */
1682 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1683 /* UltraSPARC III */
1684 /* ??? Check if %y issue still holds true. */
1685 { "ultrasparc3", MASK_ISA,
1686 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1687 /* UltraSPARC T1 */
1688 { "niagara", MASK_ISA,
1689 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1690 /* UltraSPARC T2 */
1691 { "niagara2", MASK_ISA,
1692 MASK_V9|MASK_POPC|MASK_VIS2 },
1693 /* UltraSPARC T3 */
1694 { "niagara3", MASK_ISA,
1695 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1696 /* UltraSPARC T4 */
1697 { "niagara4", MASK_ISA,
1698 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1699 /* UltraSPARC M7 */
1700 { "niagara7", MASK_ISA,
1701 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1702 /* UltraSPARC M8 */
1703 { "m8", MASK_ISA,
1704 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1705 };
1706 const struct cpu_table *cpu;
1707 unsigned int i;
1708
1709 if (sparc_debug_string != NULL)
1710 {
1711 const char *q;
1712 char *p;
1713
1714 p = ASTRDUP (sparc_debug_string);
1715 while ((q = strtok (p, ",")) != NULL)
1716 {
1717 bool invert;
1718 int mask;
1719
1720 p = NULL;
1721 if (*q == '!')
1722 {
1723 invert = true;
1724 q++;
1725 }
1726 else
1727 invert = false;
1728
1729 if (! strcmp (q, "all"))
1730 mask = MASK_DEBUG_ALL;
1731 else if (! strcmp (q, "options"))
1732 mask = MASK_DEBUG_OPTIONS;
1733 else
1734 error ("unknown -mdebug-%s switch", q);
1735
1736 if (invert)
1737 sparc_debug &= ~mask;
1738 else
1739 sparc_debug |= mask;
1740 }
1741 }
1742
1743 /* Enable the FsMULd instruction by default if not explicitly specified by
1744 the user. It may be later disabled by the CPU (explicitly or not). */
1745 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1746 target_flags |= MASK_FSMULD;
1747
1748 if (TARGET_DEBUG_OPTIONS)
1749 {
1750 dump_target_flags("Initial target_flags", target_flags);
1751 dump_target_flags("target_flags_explicit", target_flags_explicit);
1752 }
1753
1754 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1755 SUBTARGET_OVERRIDE_OPTIONS;
1756 #endif
1757
1758 #ifndef SPARC_BI_ARCH
1759 /* Check for unsupported architecture size. */
1760 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1761 error ("%s is not supported by this configuration",
1762 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1763 #endif
1764
1765 /* We force all 64bit archs to use 128 bit long double */
1766 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1767 {
1768 error ("-mlong-double-64 not allowed with -m64");
1769 target_flags |= MASK_LONG_DOUBLE_128;
1770 }
1771
1772 /* Code model selection. */
1773 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1774
1775 #ifdef SPARC_BI_ARCH
1776 if (TARGET_ARCH32)
1777 sparc_cmodel = CM_32;
1778 #endif
1779
1780 if (sparc_cmodel_string != NULL)
1781 {
1782 if (TARGET_ARCH64)
1783 {
1784 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1785 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1786 break;
1787 if (cmodel->name == NULL)
1788 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1789 else
1790 sparc_cmodel = cmodel->value;
1791 }
1792 else
1793 error ("-mcmodel= is not supported on 32-bit systems");
1794 }
1795
1796 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1797 for (i = 8; i < 16; i++)
1798 if (!call_used_regs [i])
1799 {
1800 error ("-fcall-saved-REG is not supported for out registers");
1801 call_used_regs [i] = 1;
1802 }
1803
1804 /* Set the default CPU if no -mcpu option was specified. */
1805 if (!global_options_set.x_sparc_cpu_and_features)
1806 {
1807 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1808 if (def->cpu == TARGET_CPU_DEFAULT)
1809 break;
1810 gcc_assert (def->cpu != -1);
1811 sparc_cpu_and_features = def->processor;
1812 }
1813
1814 /* Set the default CPU if no -mtune option was specified. */
1815 if (!global_options_set.x_sparc_cpu)
1816 sparc_cpu = sparc_cpu_and_features;
1817
1818 cpu = &cpu_table[(int) sparc_cpu_and_features];
1819
1820 if (TARGET_DEBUG_OPTIONS)
1821 {
1822 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1823 dump_target_flags ("cpu->disable", cpu->disable);
1824 dump_target_flags ("cpu->enable", cpu->enable);
1825 }
1826
1827 target_flags &= ~cpu->disable;
1828 target_flags |= (cpu->enable
1829 #ifndef HAVE_AS_FMAF_HPC_VIS3
1830 & ~(MASK_FMAF | MASK_VIS3)
1831 #endif
1832 #ifndef HAVE_AS_SPARC4
1833 & ~MASK_CBCOND
1834 #endif
1835 #ifndef HAVE_AS_SPARC5_VIS4
1836 & ~(MASK_VIS4 | MASK_SUBXC)
1837 #endif
1838 #ifndef HAVE_AS_SPARC6
1839 & ~(MASK_VIS4B)
1840 #endif
1841 #ifndef HAVE_AS_LEON
1842 & ~(MASK_LEON | MASK_LEON3)
1843 #endif
1844 & ~(target_flags_explicit & MASK_FEATURES)
1845 );
1846
1847 /* -mvis2 implies -mvis. */
1848 if (TARGET_VIS2)
1849 target_flags |= MASK_VIS;
1850
1851 /* -mvis3 implies -mvis2 and -mvis. */
1852 if (TARGET_VIS3)
1853 target_flags |= MASK_VIS2 | MASK_VIS;
1854
1855 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1856 if (TARGET_VIS4)
1857 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1858
1859 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1860 if (TARGET_VIS4B)
1861 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1862
1863 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1864 FPU is disabled. */
1865 if (!TARGET_FPU)
1866 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1867 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1868
1869 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1870 are available; -m64 also implies v9. */
1871 if (TARGET_VIS || TARGET_ARCH64)
1872 {
1873 target_flags |= MASK_V9;
1874 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1875 }
1876
1877 /* -mvis also implies -mv8plus on 32-bit. */
1878 if (TARGET_VIS && !TARGET_ARCH64)
1879 target_flags |= MASK_V8PLUS;
1880
1881 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1882 if (TARGET_V9 && TARGET_ARCH32)
1883 target_flags |= MASK_DEPRECATED_V8_INSNS;
1884
1885 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1886 if (!TARGET_V9 || TARGET_ARCH64)
1887 target_flags &= ~MASK_V8PLUS;
1888
1889 /* Don't use stack biasing in 32-bit mode. */
1890 if (TARGET_ARCH32)
1891 target_flags &= ~MASK_STACK_BIAS;
1892
1893 /* Use LRA instead of reload, unless otherwise instructed. */
1894 if (!(target_flags_explicit & MASK_LRA))
1895 target_flags |= MASK_LRA;
1896
1897 /* Enable applicable errata workarounds for LEON3FT. */
1898 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1899 {
1900 sparc_fix_b2bst = 1;
1901 sparc_fix_lost_divsqrt = 1;
1902 }
1903
1904 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1905 if (sparc_fix_ut699)
1906 target_flags &= ~MASK_FSMULD;
1907
1908 /* Supply a default value for align_functions. */
1909 if (flag_align_functions && !str_align_functions)
1910 {
1911 if (sparc_cpu == PROCESSOR_ULTRASPARC
1912 || sparc_cpu == PROCESSOR_ULTRASPARC3
1913 || sparc_cpu == PROCESSOR_NIAGARA
1914 || sparc_cpu == PROCESSOR_NIAGARA2
1915 || sparc_cpu == PROCESSOR_NIAGARA3
1916 || sparc_cpu == PROCESSOR_NIAGARA4)
1917 str_align_functions = "32";
1918 else if (sparc_cpu == PROCESSOR_NIAGARA7
1919 || sparc_cpu == PROCESSOR_M8)
1920 str_align_functions = "64";
1921 }
1922
1923 /* Validate PCC_STRUCT_RETURN. */
1924 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1925 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1926
1927 /* Only use .uaxword when compiling for a 64-bit target. */
1928 if (!TARGET_ARCH64)
1929 targetm.asm_out.unaligned_op.di = NULL;
1930
1931 /* Do various machine dependent initializations. */
1932 sparc_init_modes ();
1933
1934 /* Set up function hooks. */
1935 init_machine_status = sparc_init_machine_status;
1936
1937 switch (sparc_cpu)
1938 {
1939 case PROCESSOR_V7:
1940 case PROCESSOR_CYPRESS:
1941 sparc_costs = &cypress_costs;
1942 break;
1943 case PROCESSOR_V8:
1944 case PROCESSOR_SPARCLITE:
1945 case PROCESSOR_SUPERSPARC:
1946 sparc_costs = &supersparc_costs;
1947 break;
1948 case PROCESSOR_F930:
1949 case PROCESSOR_F934:
1950 case PROCESSOR_HYPERSPARC:
1951 case PROCESSOR_SPARCLITE86X:
1952 sparc_costs = &hypersparc_costs;
1953 break;
1954 case PROCESSOR_LEON:
1955 sparc_costs = &leon_costs;
1956 break;
1957 case PROCESSOR_LEON3:
1958 case PROCESSOR_LEON3V7:
1959 sparc_costs = &leon3_costs;
1960 break;
1961 case PROCESSOR_SPARCLET:
1962 case PROCESSOR_TSC701:
1963 sparc_costs = &sparclet_costs;
1964 break;
1965 case PROCESSOR_V9:
1966 case PROCESSOR_ULTRASPARC:
1967 sparc_costs = &ultrasparc_costs;
1968 break;
1969 case PROCESSOR_ULTRASPARC3:
1970 sparc_costs = &ultrasparc3_costs;
1971 break;
1972 case PROCESSOR_NIAGARA:
1973 sparc_costs = &niagara_costs;
1974 break;
1975 case PROCESSOR_NIAGARA2:
1976 sparc_costs = &niagara2_costs;
1977 break;
1978 case PROCESSOR_NIAGARA3:
1979 sparc_costs = &niagara3_costs;
1980 break;
1981 case PROCESSOR_NIAGARA4:
1982 sparc_costs = &niagara4_costs;
1983 break;
1984 case PROCESSOR_NIAGARA7:
1985 sparc_costs = &niagara7_costs;
1986 break;
1987 case PROCESSOR_M8:
1988 sparc_costs = &m8_costs;
1989 break;
1990 case PROCESSOR_NATIVE:
1991 gcc_unreachable ();
1992 };
1993
1994 if (sparc_memory_model == SMM_DEFAULT)
1995 {
1996 /* Choose the memory model for the operating system. */
1997 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1998 if (os_default != SMM_DEFAULT)
1999 sparc_memory_model = os_default;
2000 /* Choose the most relaxed model for the processor. */
2001 else if (TARGET_V9)
2002 sparc_memory_model = SMM_RMO;
2003 else if (TARGET_LEON3)
2004 sparc_memory_model = SMM_TSO;
2005 else if (TARGET_LEON)
2006 sparc_memory_model = SMM_SC;
2007 else if (TARGET_V8)
2008 sparc_memory_model = SMM_PSO;
2009 else
2010 sparc_memory_model = SMM_SC;
2011 }
2012
2013 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
2014 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2015 target_flags |= MASK_LONG_DOUBLE_128;
2016 #endif
2017
2018 if (TARGET_DEBUG_OPTIONS)
2019 dump_target_flags ("Final target_flags", target_flags);
2020
2021 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2022 can run at the same time. More important, it is the threshold
2023 defining when additional prefetches will be dropped by the
2024 hardware.
2025
2026 The UltraSPARC-III features a documented prefetch queue with a
2027 size of 8. Additional prefetches issued in the cpu are
2028 dropped.
2029
2030 Niagara processors are different. In these processors prefetches
2031 are handled much like regular loads. The L1 miss buffer is 32
2032 entries, but prefetches start getting affected when 30 entries
2033 become occupied. That occupation could be a mix of regular loads
2034 and prefetches though. And that buffer is shared by all threads.
2035 Once the threshold is reached, if the core is running a single
2036 thread the prefetch will retry. If more than one thread is
2037 running, the prefetch will be dropped.
2038
2039 All this makes it very difficult to determine how many
2040 simultaneous prefetches can be issued simultaneously, even in a
2041 single-threaded program. Experimental results show that setting
2042 this parameter to 32 works well when the number of threads is not
2043 high. */
2044 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2045 ((sparc_cpu == PROCESSOR_ULTRASPARC
2046 || sparc_cpu == PROCESSOR_NIAGARA
2047 || sparc_cpu == PROCESSOR_NIAGARA2
2048 || sparc_cpu == PROCESSOR_NIAGARA3
2049 || sparc_cpu == PROCESSOR_NIAGARA4)
2050 ? 2
2051 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2052 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2053 || sparc_cpu == PROCESSOR_M8)
2054 ? 32 : 3))),
2055 global_options.x_param_values,
2056 global_options_set.x_param_values);
2057
2058 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2059 bytes.
2060
2061 The Oracle SPARC Architecture (previously the UltraSPARC
2062 Architecture) specification states that when a PREFETCH[A]
2063 instruction is executed an implementation-specific amount of data
2064 is prefetched, and that it is at least 64 bytes long (aligned to
2065 at least 64 bytes).
2066
2067 However, this is not correct. The M7 (and implementations prior
2068 to that) does not guarantee a 64B prefetch into a cache if the
2069 line size is smaller. A single cache line is all that is ever
2070 prefetched. So for the M7, where the L1D$ has 32B lines and the
2071 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2072 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2073 is a read_n prefetch, which is the only type which allocates to
2074 the L1.) */
2075 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2076 (sparc_cpu == PROCESSOR_M8
2077 ? 64 : 32),
2078 global_options.x_param_values,
2079 global_options_set.x_param_values);
2080
2081 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2082 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2083 Niagara processors feature a L1D$ of 16KB. */
2084 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2085 ((sparc_cpu == PROCESSOR_ULTRASPARC
2086 || sparc_cpu == PROCESSOR_ULTRASPARC3
2087 || sparc_cpu == PROCESSOR_NIAGARA
2088 || sparc_cpu == PROCESSOR_NIAGARA2
2089 || sparc_cpu == PROCESSOR_NIAGARA3
2090 || sparc_cpu == PROCESSOR_NIAGARA4
2091 || sparc_cpu == PROCESSOR_NIAGARA7
2092 || sparc_cpu == PROCESSOR_M8)
2093 ? 16 : 64),
2094 global_options.x_param_values,
2095 global_options_set.x_param_values);
2096
2097
2098 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2099 that 512 is the default in params.def. */
2100 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2101 ((sparc_cpu == PROCESSOR_NIAGARA4
2102 || sparc_cpu == PROCESSOR_M8)
2103 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2104 ? 256 : 512)),
2105 global_options.x_param_values,
2106 global_options_set.x_param_values);
2107
2108
2109 /* Disable save slot sharing for call-clobbered registers by default.
2110 The IRA sharing algorithm works on single registers only and this
2111 pessimizes for double floating-point registers. */
2112 if (!global_options_set.x_flag_ira_share_save_slots)
2113 flag_ira_share_save_slots = 0;
2114
2115 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2116 redundant 32-to-64-bit extensions. */
2117 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2118 flag_ree = 0;
2119 }
2120 \f
2121 /* Miscellaneous utilities. */
2122
2123 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2124 or branch on register contents instructions. */
2125
2126 int
2127 v9_regcmp_p (enum rtx_code code)
2128 {
2129 return (code == EQ || code == NE || code == GE || code == LT
2130 || code == LE || code == GT);
2131 }
2132
2133 /* Nonzero if OP is a floating point constant which can
2134 be loaded into an integer register using a single
2135 sethi instruction. */
2136
2137 int
2138 fp_sethi_p (rtx op)
2139 {
2140 if (GET_CODE (op) == CONST_DOUBLE)
2141 {
2142 long i;
2143
2144 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2145 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2146 }
2147
2148 return 0;
2149 }
2150
2151 /* Nonzero if OP is a floating point constant which can
2152 be loaded into an integer register using a single
2153 mov instruction. */
2154
2155 int
2156 fp_mov_p (rtx op)
2157 {
2158 if (GET_CODE (op) == CONST_DOUBLE)
2159 {
2160 long i;
2161
2162 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2163 return SPARC_SIMM13_P (i);
2164 }
2165
2166 return 0;
2167 }
2168
2169 /* Nonzero if OP is a floating point constant which can
2170 be loaded into an integer register using a high/losum
2171 instruction sequence. */
2172
2173 int
2174 fp_high_losum_p (rtx op)
2175 {
2176 /* The constraints calling this should only be in
2177 SFmode move insns, so any constant which cannot
2178 be moved using a single insn will do. */
2179 if (GET_CODE (op) == CONST_DOUBLE)
2180 {
2181 long i;
2182
2183 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2184 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2185 }
2186
2187 return 0;
2188 }
2189
2190 /* Return true if the address of LABEL can be loaded by means of the
2191 mov{si,di}_pic_label_ref patterns in PIC mode. */
2192
2193 static bool
2194 can_use_mov_pic_label_ref (rtx label)
2195 {
2196 /* VxWorks does not impose a fixed gap between segments; the run-time
2197 gap can be different from the object-file gap. We therefore can't
2198 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2199 are absolutely sure that X is in the same segment as the GOT.
2200 Unfortunately, the flexibility of linker scripts means that we
2201 can't be sure of that in general, so assume that GOT-relative
2202 accesses are never valid on VxWorks. */
2203 if (TARGET_VXWORKS_RTP)
2204 return false;
2205
2206 /* Similarly, if the label is non-local, it might end up being placed
2207 in a different section than the current one; now mov_pic_label_ref
2208 requires the label and the code to be in the same section. */
2209 if (LABEL_REF_NONLOCAL_P (label))
2210 return false;
2211
2212 /* Finally, if we are reordering basic blocks and partition into hot
2213 and cold sections, this might happen for any label. */
2214 if (flag_reorder_blocks_and_partition)
2215 return false;
2216
2217 return true;
2218 }
2219
2220 /* Expand a move instruction. Return true if all work is done. */
2221
2222 bool
2223 sparc_expand_move (machine_mode mode, rtx *operands)
2224 {
2225 /* Handle sets of MEM first. */
2226 if (GET_CODE (operands[0]) == MEM)
2227 {
2228 /* 0 is a register (or a pair of registers) on SPARC. */
2229 if (register_or_zero_operand (operands[1], mode))
2230 return false;
2231
2232 if (!reload_in_progress)
2233 {
2234 operands[0] = validize_mem (operands[0]);
2235 operands[1] = force_reg (mode, operands[1]);
2236 }
2237 }
2238
2239 /* Fix up TLS cases. */
2240 if (TARGET_HAVE_TLS
2241 && CONSTANT_P (operands[1])
2242 && sparc_tls_referenced_p (operands [1]))
2243 {
2244 operands[1] = sparc_legitimize_tls_address (operands[1]);
2245 return false;
2246 }
2247
2248 /* Fix up PIC cases. */
2249 if (flag_pic && CONSTANT_P (operands[1]))
2250 {
2251 if (pic_address_needs_scratch (operands[1]))
2252 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2253
2254 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2255 if ((GET_CODE (operands[1]) == LABEL_REF
2256 && can_use_mov_pic_label_ref (operands[1]))
2257 || (GET_CODE (operands[1]) == CONST
2258 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2259 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2260 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2261 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2262 {
2263 if (mode == SImode)
2264 {
2265 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2266 return true;
2267 }
2268
2269 if (mode == DImode)
2270 {
2271 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2272 return true;
2273 }
2274 }
2275
2276 if (symbolic_operand (operands[1], mode))
2277 {
2278 operands[1]
2279 = sparc_legitimize_pic_address (operands[1],
2280 reload_in_progress
2281 ? operands[0] : NULL_RTX);
2282 return false;
2283 }
2284 }
2285
2286 /* If we are trying to toss an integer constant into FP registers,
2287 or loading a FP or vector constant, force it into memory. */
2288 if (CONSTANT_P (operands[1])
2289 && REG_P (operands[0])
2290 && (SPARC_FP_REG_P (REGNO (operands[0]))
2291 || SCALAR_FLOAT_MODE_P (mode)
2292 || VECTOR_MODE_P (mode)))
2293 {
2294 /* emit_group_store will send such bogosity to us when it is
2295 not storing directly into memory. So fix this up to avoid
2296 crashes in output_constant_pool. */
2297 if (operands [1] == const0_rtx)
2298 operands[1] = CONST0_RTX (mode);
2299
2300 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2301 always other regs. */
2302 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2303 && (const_zero_operand (operands[1], mode)
2304 || const_all_ones_operand (operands[1], mode)))
2305 return false;
2306
2307 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2308 /* We are able to build any SF constant in integer registers
2309 with at most 2 instructions. */
2310 && (mode == SFmode
2311 /* And any DF constant in integer registers if needed. */
2312 || (mode == DFmode && !can_create_pseudo_p ())))
2313 return false;
2314
2315 operands[1] = force_const_mem (mode, operands[1]);
2316 if (!reload_in_progress)
2317 operands[1] = validize_mem (operands[1]);
2318 return false;
2319 }
2320
2321 /* Accept non-constants and valid constants unmodified. */
2322 if (!CONSTANT_P (operands[1])
2323 || GET_CODE (operands[1]) == HIGH
2324 || input_operand (operands[1], mode))
2325 return false;
2326
2327 switch (mode)
2328 {
2329 case E_QImode:
2330 /* All QImode constants require only one insn, so proceed. */
2331 break;
2332
2333 case E_HImode:
2334 case E_SImode:
2335 sparc_emit_set_const32 (operands[0], operands[1]);
2336 return true;
2337
2338 case E_DImode:
2339 /* input_operand should have filtered out 32-bit mode. */
2340 sparc_emit_set_const64 (operands[0], operands[1]);
2341 return true;
2342
2343 case E_TImode:
2344 {
2345 rtx high, low;
2346 /* TImode isn't available in 32-bit mode. */
2347 split_double (operands[1], &high, &low);
2348 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2349 high));
2350 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2351 low));
2352 }
2353 return true;
2354
2355 default:
2356 gcc_unreachable ();
2357 }
2358
2359 return false;
2360 }
2361
2362 /* Load OP1, a 32-bit constant, into OP0, a register.
2363 We know it can't be done in one insn when we get
2364 here, the move expander guarantees this. */
2365
2366 static void
2367 sparc_emit_set_const32 (rtx op0, rtx op1)
2368 {
2369 machine_mode mode = GET_MODE (op0);
2370 rtx temp = op0;
2371
2372 if (can_create_pseudo_p ())
2373 temp = gen_reg_rtx (mode);
2374
2375 if (GET_CODE (op1) == CONST_INT)
2376 {
2377 gcc_assert (!small_int_operand (op1, mode)
2378 && !const_high_operand (op1, mode));
2379
2380 /* Emit them as real moves instead of a HIGH/LO_SUM,
2381 this way CSE can see everything and reuse intermediate
2382 values if it wants. */
2383 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2384 & ~(HOST_WIDE_INT) 0x3ff)));
2385
2386 emit_insn (gen_rtx_SET (op0,
2387 gen_rtx_IOR (mode, temp,
2388 GEN_INT (INTVAL (op1) & 0x3ff))));
2389 }
2390 else
2391 {
2392 /* A symbol, emit in the traditional way. */
2393 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2394 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2395 }
2396 }
2397
2398 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2399 If TEMP is nonzero, we are forbidden to use any other scratch
2400 registers. Otherwise, we are allowed to generate them as needed.
2401
2402 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2403 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2404
2405 void
2406 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2407 {
2408 rtx cst, temp1, temp2, temp3, temp4, temp5;
2409 rtx ti_temp = 0;
2410
2411 /* Deal with too large offsets. */
2412 if (GET_CODE (op1) == CONST
2413 && GET_CODE (XEXP (op1, 0)) == PLUS
2414 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2415 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2416 {
2417 gcc_assert (!temp);
2418 temp1 = gen_reg_rtx (DImode);
2419 temp2 = gen_reg_rtx (DImode);
2420 sparc_emit_set_const64 (temp2, cst);
2421 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2422 NULL_RTX);
2423 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2424 return;
2425 }
2426
2427 if (temp && GET_MODE (temp) == TImode)
2428 {
2429 ti_temp = temp;
2430 temp = gen_rtx_REG (DImode, REGNO (temp));
2431 }
2432
2433 /* SPARC-V9 code-model support. */
2434 switch (sparc_cmodel)
2435 {
2436 case CM_MEDLOW:
2437 /* The range spanned by all instructions in the object is less
2438 than 2^31 bytes (2GB) and the distance from any instruction
2439 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2440 than 2^31 bytes (2GB).
2441
2442 The executable must be in the low 4TB of the virtual address
2443 space.
2444
2445 sethi %hi(symbol), %temp1
2446 or %temp1, %lo(symbol), %reg */
2447 if (temp)
2448 temp1 = temp; /* op0 is allowed. */
2449 else
2450 temp1 = gen_reg_rtx (DImode);
2451
2452 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2453 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2454 break;
2455
2456 case CM_MEDMID:
2457 /* The range spanned by all instructions in the object is less
2458 than 2^31 bytes (2GB) and the distance from any instruction
2459 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2460 than 2^31 bytes (2GB).
2461
2462 The executable must be in the low 16TB of the virtual address
2463 space.
2464
2465 sethi %h44(symbol), %temp1
2466 or %temp1, %m44(symbol), %temp2
2467 sllx %temp2, 12, %temp3
2468 or %temp3, %l44(symbol), %reg */
2469 if (temp)
2470 {
2471 temp1 = op0;
2472 temp2 = op0;
2473 temp3 = temp; /* op0 is allowed. */
2474 }
2475 else
2476 {
2477 temp1 = gen_reg_rtx (DImode);
2478 temp2 = gen_reg_rtx (DImode);
2479 temp3 = gen_reg_rtx (DImode);
2480 }
2481
2482 emit_insn (gen_seth44 (temp1, op1));
2483 emit_insn (gen_setm44 (temp2, temp1, op1));
2484 emit_insn (gen_rtx_SET (temp3,
2485 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2486 emit_insn (gen_setl44 (op0, temp3, op1));
2487 break;
2488
2489 case CM_MEDANY:
2490 /* The range spanned by all instructions in the object is less
2491 than 2^31 bytes (2GB) and the distance from any instruction
2492 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2493 than 2^31 bytes (2GB).
2494
2495 The executable can be placed anywhere in the virtual address
2496 space.
2497
2498 sethi %hh(symbol), %temp1
2499 sethi %lm(symbol), %temp2
2500 or %temp1, %hm(symbol), %temp3
2501 sllx %temp3, 32, %temp4
2502 or %temp4, %temp2, %temp5
2503 or %temp5, %lo(symbol), %reg */
2504 if (temp)
2505 {
2506 /* It is possible that one of the registers we got for operands[2]
2507 might coincide with that of operands[0] (which is why we made
2508 it TImode). Pick the other one to use as our scratch. */
2509 if (rtx_equal_p (temp, op0))
2510 {
2511 gcc_assert (ti_temp);
2512 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2513 }
2514 temp1 = op0;
2515 temp2 = temp; /* op0 is _not_ allowed, see above. */
2516 temp3 = op0;
2517 temp4 = op0;
2518 temp5 = op0;
2519 }
2520 else
2521 {
2522 temp1 = gen_reg_rtx (DImode);
2523 temp2 = gen_reg_rtx (DImode);
2524 temp3 = gen_reg_rtx (DImode);
2525 temp4 = gen_reg_rtx (DImode);
2526 temp5 = gen_reg_rtx (DImode);
2527 }
2528
2529 emit_insn (gen_sethh (temp1, op1));
2530 emit_insn (gen_setlm (temp2, op1));
2531 emit_insn (gen_sethm (temp3, temp1, op1));
2532 emit_insn (gen_rtx_SET (temp4,
2533 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2534 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2535 emit_insn (gen_setlo (op0, temp5, op1));
2536 break;
2537
2538 case CM_EMBMEDANY:
2539 /* Old old old backwards compatibility kruft here.
2540 Essentially it is MEDLOW with a fixed 64-bit
2541 virtual base added to all data segment addresses.
2542 Text-segment stuff is computed like MEDANY, we can't
2543 reuse the code above because the relocation knobs
2544 look different.
2545
2546 Data segment: sethi %hi(symbol), %temp1
2547 add %temp1, EMBMEDANY_BASE_REG, %temp2
2548 or %temp2, %lo(symbol), %reg */
2549 if (data_segment_operand (op1, GET_MODE (op1)))
2550 {
2551 if (temp)
2552 {
2553 temp1 = temp; /* op0 is allowed. */
2554 temp2 = op0;
2555 }
2556 else
2557 {
2558 temp1 = gen_reg_rtx (DImode);
2559 temp2 = gen_reg_rtx (DImode);
2560 }
2561
2562 emit_insn (gen_embmedany_sethi (temp1, op1));
2563 emit_insn (gen_embmedany_brsum (temp2, temp1));
2564 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2565 }
2566
2567 /* Text segment: sethi %uhi(symbol), %temp1
2568 sethi %hi(symbol), %temp2
2569 or %temp1, %ulo(symbol), %temp3
2570 sllx %temp3, 32, %temp4
2571 or %temp4, %temp2, %temp5
2572 or %temp5, %lo(symbol), %reg */
2573 else
2574 {
2575 if (temp)
2576 {
2577 /* It is possible that one of the registers we got for operands[2]
2578 might coincide with that of operands[0] (which is why we made
2579 it TImode). Pick the other one to use as our scratch. */
2580 if (rtx_equal_p (temp, op0))
2581 {
2582 gcc_assert (ti_temp);
2583 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2584 }
2585 temp1 = op0;
2586 temp2 = temp; /* op0 is _not_ allowed, see above. */
2587 temp3 = op0;
2588 temp4 = op0;
2589 temp5 = op0;
2590 }
2591 else
2592 {
2593 temp1 = gen_reg_rtx (DImode);
2594 temp2 = gen_reg_rtx (DImode);
2595 temp3 = gen_reg_rtx (DImode);
2596 temp4 = gen_reg_rtx (DImode);
2597 temp5 = gen_reg_rtx (DImode);
2598 }
2599
2600 emit_insn (gen_embmedany_textuhi (temp1, op1));
2601 emit_insn (gen_embmedany_texthi (temp2, op1));
2602 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2603 emit_insn (gen_rtx_SET (temp4,
2604 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2605 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2606 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2607 }
2608 break;
2609
2610 default:
2611 gcc_unreachable ();
2612 }
2613 }
2614
2615 /* These avoid problems when cross compiling. If we do not
2616 go through all this hair then the optimizer will see
2617 invalid REG_EQUAL notes or in some cases none at all. */
2618 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2619 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2620 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2621 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2622
2623 /* The optimizer is not to assume anything about exactly
2624 which bits are set for a HIGH, they are unspecified.
2625 Unfortunately this leads to many missed optimizations
2626 during CSE. We mask out the non-HIGH bits, and matches
2627 a plain movdi, to alleviate this problem. */
2628 static rtx
2629 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2630 {
2631 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2632 }
2633
2634 static rtx
2635 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2636 {
2637 return gen_rtx_SET (dest, GEN_INT (val));
2638 }
2639
2640 static rtx
2641 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2642 {
2643 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2644 }
2645
2646 static rtx
2647 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2648 {
2649 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2650 }
2651
2652 /* Worker routines for 64-bit constant formation on arch64.
2653 One of the key things to be doing in these emissions is
2654 to create as many temp REGs as possible. This makes it
2655 possible for half-built constants to be used later when
2656 such values are similar to something required later on.
2657 Without doing this, the optimizer cannot see such
2658 opportunities. */
2659
2660 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2661 unsigned HOST_WIDE_INT, int);
2662
2663 static void
2664 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2665 unsigned HOST_WIDE_INT low_bits, int is_neg)
2666 {
2667 unsigned HOST_WIDE_INT high_bits;
2668
2669 if (is_neg)
2670 high_bits = (~low_bits) & 0xffffffff;
2671 else
2672 high_bits = low_bits;
2673
2674 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2675 if (!is_neg)
2676 {
2677 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2678 }
2679 else
2680 {
2681 /* If we are XOR'ing with -1, then we should emit a one's complement
2682 instead. This way the combiner will notice logical operations
2683 such as ANDN later on and substitute. */
2684 if ((low_bits & 0x3ff) == 0x3ff)
2685 {
2686 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2687 }
2688 else
2689 {
2690 emit_insn (gen_rtx_SET (op0,
2691 gen_safe_XOR64 (temp,
2692 (-(HOST_WIDE_INT)0x400
2693 | (low_bits & 0x3ff)))));
2694 }
2695 }
2696 }
2697
2698 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2699 unsigned HOST_WIDE_INT, int);
2700
2701 static void
2702 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2703 unsigned HOST_WIDE_INT high_bits,
2704 unsigned HOST_WIDE_INT low_immediate,
2705 int shift_count)
2706 {
2707 rtx temp2 = op0;
2708
2709 if ((high_bits & 0xfffffc00) != 0)
2710 {
2711 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2712 if ((high_bits & ~0xfffffc00) != 0)
2713 emit_insn (gen_rtx_SET (op0,
2714 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2715 else
2716 temp2 = temp;
2717 }
2718 else
2719 {
2720 emit_insn (gen_safe_SET64 (temp, high_bits));
2721 temp2 = temp;
2722 }
2723
2724 /* Now shift it up into place. */
2725 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2726 GEN_INT (shift_count))));
2727
2728 /* If there is a low immediate part piece, finish up by
2729 putting that in as well. */
2730 if (low_immediate != 0)
2731 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2732 }
2733
2734 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2735 unsigned HOST_WIDE_INT);
2736
2737 /* Full 64-bit constant decomposition. Even though this is the
2738 'worst' case, we still optimize a few things away. */
2739 static void
2740 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2741 unsigned HOST_WIDE_INT high_bits,
2742 unsigned HOST_WIDE_INT low_bits)
2743 {
2744 rtx sub_temp = op0;
2745
2746 if (can_create_pseudo_p ())
2747 sub_temp = gen_reg_rtx (DImode);
2748
2749 if ((high_bits & 0xfffffc00) != 0)
2750 {
2751 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2752 if ((high_bits & ~0xfffffc00) != 0)
2753 emit_insn (gen_rtx_SET (sub_temp,
2754 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2755 else
2756 sub_temp = temp;
2757 }
2758 else
2759 {
2760 emit_insn (gen_safe_SET64 (temp, high_bits));
2761 sub_temp = temp;
2762 }
2763
2764 if (can_create_pseudo_p ())
2765 {
2766 rtx temp2 = gen_reg_rtx (DImode);
2767 rtx temp3 = gen_reg_rtx (DImode);
2768 rtx temp4 = gen_reg_rtx (DImode);
2769
2770 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2771 GEN_INT (32))));
2772
2773 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2774 if ((low_bits & ~0xfffffc00) != 0)
2775 {
2776 emit_insn (gen_rtx_SET (temp3,
2777 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2779 }
2780 else
2781 {
2782 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2783 }
2784 }
2785 else
2786 {
2787 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2788 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2789 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2790 int to_shift = 12;
2791
2792 /* We are in the middle of reload, so this is really
2793 painful. However we do still make an attempt to
2794 avoid emitting truly stupid code. */
2795 if (low1 != const0_rtx)
2796 {
2797 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2798 GEN_INT (to_shift))));
2799 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2800 sub_temp = op0;
2801 to_shift = 12;
2802 }
2803 else
2804 {
2805 to_shift += 12;
2806 }
2807 if (low2 != const0_rtx)
2808 {
2809 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2810 GEN_INT (to_shift))));
2811 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2812 sub_temp = op0;
2813 to_shift = 8;
2814 }
2815 else
2816 {
2817 to_shift += 8;
2818 }
2819 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2820 GEN_INT (to_shift))));
2821 if (low3 != const0_rtx)
2822 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2823 /* phew... */
2824 }
2825 }
2826
2827 /* Analyze a 64-bit constant for certain properties. */
2828 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2829 unsigned HOST_WIDE_INT,
2830 int *, int *, int *);
2831
2832 static void
2833 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2834 unsigned HOST_WIDE_INT low_bits,
2835 int *hbsp, int *lbsp, int *abbasp)
2836 {
2837 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2838 int i;
2839
2840 lowest_bit_set = highest_bit_set = -1;
2841 i = 0;
2842 do
2843 {
2844 if ((lowest_bit_set == -1)
2845 && ((low_bits >> i) & 1))
2846 lowest_bit_set = i;
2847 if ((highest_bit_set == -1)
2848 && ((high_bits >> (32 - i - 1)) & 1))
2849 highest_bit_set = (64 - i - 1);
2850 }
2851 while (++i < 32
2852 && ((highest_bit_set == -1)
2853 || (lowest_bit_set == -1)));
2854 if (i == 32)
2855 {
2856 i = 0;
2857 do
2858 {
2859 if ((lowest_bit_set == -1)
2860 && ((high_bits >> i) & 1))
2861 lowest_bit_set = i + 32;
2862 if ((highest_bit_set == -1)
2863 && ((low_bits >> (32 - i - 1)) & 1))
2864 highest_bit_set = 32 - i - 1;
2865 }
2866 while (++i < 32
2867 && ((highest_bit_set == -1)
2868 || (lowest_bit_set == -1)));
2869 }
2870 /* If there are no bits set this should have gone out
2871 as one instruction! */
2872 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2873 all_bits_between_are_set = 1;
2874 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2875 {
2876 if (i < 32)
2877 {
2878 if ((low_bits & (1 << i)) != 0)
2879 continue;
2880 }
2881 else
2882 {
2883 if ((high_bits & (1 << (i - 32))) != 0)
2884 continue;
2885 }
2886 all_bits_between_are_set = 0;
2887 break;
2888 }
2889 *hbsp = highest_bit_set;
2890 *lbsp = lowest_bit_set;
2891 *abbasp = all_bits_between_are_set;
2892 }
2893
2894 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2895
2896 static int
2897 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2898 unsigned HOST_WIDE_INT low_bits)
2899 {
2900 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2901
2902 if (high_bits == 0
2903 || high_bits == 0xffffffff)
2904 return 1;
2905
2906 analyze_64bit_constant (high_bits, low_bits,
2907 &highest_bit_set, &lowest_bit_set,
2908 &all_bits_between_are_set);
2909
2910 if ((highest_bit_set == 63
2911 || lowest_bit_set == 0)
2912 && all_bits_between_are_set != 0)
2913 return 1;
2914
2915 if ((highest_bit_set - lowest_bit_set) < 21)
2916 return 1;
2917
2918 return 0;
2919 }
2920
2921 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2922 unsigned HOST_WIDE_INT,
2923 int, int);
2924
2925 static unsigned HOST_WIDE_INT
2926 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2927 unsigned HOST_WIDE_INT low_bits,
2928 int lowest_bit_set, int shift)
2929 {
2930 HOST_WIDE_INT hi, lo;
2931
2932 if (lowest_bit_set < 32)
2933 {
2934 lo = (low_bits >> lowest_bit_set) << shift;
2935 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2936 }
2937 else
2938 {
2939 lo = 0;
2940 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2941 }
2942 gcc_assert (! (hi & lo));
2943 return (hi | lo);
2944 }
2945
2946 /* Here we are sure to be arch64 and this is an integer constant
2947 being loaded into a register. Emit the most efficient
2948 insn sequence possible. Detection of all the 1-insn cases
2949 has been done already. */
2950 static void
2951 sparc_emit_set_const64 (rtx op0, rtx op1)
2952 {
2953 unsigned HOST_WIDE_INT high_bits, low_bits;
2954 int lowest_bit_set, highest_bit_set;
2955 int all_bits_between_are_set;
2956 rtx temp = 0;
2957
2958 /* Sanity check that we know what we are working with. */
2959 gcc_assert (TARGET_ARCH64
2960 && (GET_CODE (op0) == SUBREG
2961 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2962
2963 if (! can_create_pseudo_p ())
2964 temp = op0;
2965
2966 if (GET_CODE (op1) != CONST_INT)
2967 {
2968 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2969 return;
2970 }
2971
2972 if (! temp)
2973 temp = gen_reg_rtx (DImode);
2974
2975 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2976 low_bits = (INTVAL (op1) & 0xffffffff);
2977
2978 /* low_bits bits 0 --> 31
2979 high_bits bits 32 --> 63 */
2980
2981 analyze_64bit_constant (high_bits, low_bits,
2982 &highest_bit_set, &lowest_bit_set,
2983 &all_bits_between_are_set);
2984
2985 /* First try for a 2-insn sequence. */
2986
2987 /* These situations are preferred because the optimizer can
2988 * do more things with them:
2989 * 1) mov -1, %reg
2990 * sllx %reg, shift, %reg
2991 * 2) mov -1, %reg
2992 * srlx %reg, shift, %reg
2993 * 3) mov some_small_const, %reg
2994 * sllx %reg, shift, %reg
2995 */
2996 if (((highest_bit_set == 63
2997 || lowest_bit_set == 0)
2998 && all_bits_between_are_set != 0)
2999 || ((highest_bit_set - lowest_bit_set) < 12))
3000 {
3001 HOST_WIDE_INT the_const = -1;
3002 int shift = lowest_bit_set;
3003
3004 if ((highest_bit_set != 63
3005 && lowest_bit_set != 0)
3006 || all_bits_between_are_set == 0)
3007 {
3008 the_const =
3009 create_simple_focus_bits (high_bits, low_bits,
3010 lowest_bit_set, 0);
3011 }
3012 else if (lowest_bit_set == 0)
3013 shift = -(63 - highest_bit_set);
3014
3015 gcc_assert (SPARC_SIMM13_P (the_const));
3016 gcc_assert (shift != 0);
3017
3018 emit_insn (gen_safe_SET64 (temp, the_const));
3019 if (shift > 0)
3020 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3021 GEN_INT (shift))));
3022 else if (shift < 0)
3023 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3024 GEN_INT (-shift))));
3025 return;
3026 }
3027
3028 /* Now a range of 22 or less bits set somewhere.
3029 * 1) sethi %hi(focus_bits), %reg
3030 * sllx %reg, shift, %reg
3031 * 2) sethi %hi(focus_bits), %reg
3032 * srlx %reg, shift, %reg
3033 */
3034 if ((highest_bit_set - lowest_bit_set) < 21)
3035 {
3036 unsigned HOST_WIDE_INT focus_bits =
3037 create_simple_focus_bits (high_bits, low_bits,
3038 lowest_bit_set, 10);
3039
3040 gcc_assert (SPARC_SETHI_P (focus_bits));
3041 gcc_assert (lowest_bit_set != 10);
3042
3043 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3044
3045 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3046 if (lowest_bit_set < 10)
3047 emit_insn (gen_rtx_SET (op0,
3048 gen_rtx_LSHIFTRT (DImode, temp,
3049 GEN_INT (10 - lowest_bit_set))));
3050 else if (lowest_bit_set > 10)
3051 emit_insn (gen_rtx_SET (op0,
3052 gen_rtx_ASHIFT (DImode, temp,
3053 GEN_INT (lowest_bit_set - 10))));
3054 return;
3055 }
3056
3057 /* 1) sethi %hi(low_bits), %reg
3058 * or %reg, %lo(low_bits), %reg
3059 * 2) sethi %hi(~low_bits), %reg
3060 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3061 */
3062 if (high_bits == 0
3063 || high_bits == 0xffffffff)
3064 {
3065 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3066 (high_bits == 0xffffffff));
3067 return;
3068 }
3069
3070 /* Now, try 3-insn sequences. */
3071
3072 /* 1) sethi %hi(high_bits), %reg
3073 * or %reg, %lo(high_bits), %reg
3074 * sllx %reg, 32, %reg
3075 */
3076 if (low_bits == 0)
3077 {
3078 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3079 return;
3080 }
3081
3082 /* We may be able to do something quick
3083 when the constant is negated, so try that. */
3084 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3085 (~low_bits) & 0xfffffc00))
3086 {
3087 /* NOTE: The trailing bits get XOR'd so we need the
3088 non-negated bits, not the negated ones. */
3089 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3090
3091 if ((((~high_bits) & 0xffffffff) == 0
3092 && ((~low_bits) & 0x80000000) == 0)
3093 || (((~high_bits) & 0xffffffff) == 0xffffffff
3094 && ((~low_bits) & 0x80000000) != 0))
3095 {
3096 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3097
3098 if ((SPARC_SETHI_P (fast_int)
3099 && (~high_bits & 0xffffffff) == 0)
3100 || SPARC_SIMM13_P (fast_int))
3101 emit_insn (gen_safe_SET64 (temp, fast_int));
3102 else
3103 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3104 }
3105 else
3106 {
3107 rtx negated_const;
3108 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3109 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3110 sparc_emit_set_const64 (temp, negated_const);
3111 }
3112
3113 /* If we are XOR'ing with -1, then we should emit a one's complement
3114 instead. This way the combiner will notice logical operations
3115 such as ANDN later on and substitute. */
3116 if (trailing_bits == 0x3ff)
3117 {
3118 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3119 }
3120 else
3121 {
3122 emit_insn (gen_rtx_SET (op0,
3123 gen_safe_XOR64 (temp,
3124 (-0x400 | trailing_bits))));
3125 }
3126 return;
3127 }
3128
3129 /* 1) sethi %hi(xxx), %reg
3130 * or %reg, %lo(xxx), %reg
3131 * sllx %reg, yyy, %reg
3132 *
3133 * ??? This is just a generalized version of the low_bits==0
3134 * thing above, FIXME...
3135 */
3136 if ((highest_bit_set - lowest_bit_set) < 32)
3137 {
3138 unsigned HOST_WIDE_INT focus_bits =
3139 create_simple_focus_bits (high_bits, low_bits,
3140 lowest_bit_set, 0);
3141
3142 /* We can't get here in this state. */
3143 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3144
3145 /* So what we know is that the set bits straddle the
3146 middle of the 64-bit word. */
3147 sparc_emit_set_const64_quick2 (op0, temp,
3148 focus_bits, 0,
3149 lowest_bit_set);
3150 return;
3151 }
3152
3153 /* 1) sethi %hi(high_bits), %reg
3154 * or %reg, %lo(high_bits), %reg
3155 * sllx %reg, 32, %reg
3156 * or %reg, low_bits, %reg
3157 */
3158 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3159 {
3160 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3161 return;
3162 }
3163
3164 /* The easiest way when all else fails, is full decomposition. */
3165 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3166 }
3167
3168 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3169
3170 static bool
3171 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3172 {
3173 *p1 = SPARC_ICC_REG;
3174 *p2 = SPARC_FCC_REG;
3175 return true;
3176 }
3177
3178 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3179
3180 static unsigned int
3181 sparc_min_arithmetic_precision (void)
3182 {
3183 return 32;
3184 }
3185
3186 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3187 return the mode to be used for the comparison. For floating-point,
3188 CCFP[E]mode is used. CCNZmode should be used when the first operand
3189 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3190 processing is needed. */
3191
3192 machine_mode
3193 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3194 {
3195 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3196 {
3197 switch (op)
3198 {
3199 case EQ:
3200 case NE:
3201 case UNORDERED:
3202 case ORDERED:
3203 case UNLT:
3204 case UNLE:
3205 case UNGT:
3206 case UNGE:
3207 case UNEQ:
3208 case LTGT:
3209 return CCFPmode;
3210
3211 case LT:
3212 case LE:
3213 case GT:
3214 case GE:
3215 return CCFPEmode;
3216
3217 default:
3218 gcc_unreachable ();
3219 }
3220 }
3221 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3222 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3223 && y == const0_rtx)
3224 {
3225 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3226 return CCXNZmode;
3227 else
3228 return CCNZmode;
3229 }
3230 else
3231 {
3232 /* This is for the cmp<mode>_sne pattern. */
3233 if (GET_CODE (x) == NOT && y == constm1_rtx)
3234 {
3235 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3236 return CCXCmode;
3237 else
3238 return CCCmode;
3239 }
3240
3241 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3242 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3243 {
3244 if (GET_CODE (y) == UNSPEC
3245 && (XINT (y, 1) == UNSPEC_ADDV
3246 || XINT (y, 1) == UNSPEC_SUBV
3247 || XINT (y, 1) == UNSPEC_NEGV))
3248 return CCVmode;
3249 else
3250 return CCCmode;
3251 }
3252
3253 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3254 return CCXmode;
3255 else
3256 return CCmode;
3257 }
3258 }
3259
3260 /* Emit the compare insn and return the CC reg for a CODE comparison
3261 with operands X and Y. */
3262
3263 static rtx
3264 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3265 {
3266 machine_mode mode;
3267 rtx cc_reg;
3268
3269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3270 return x;
3271
3272 mode = SELECT_CC_MODE (code, x, y);
3273
3274 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3275 fcc regs (cse can't tell they're really call clobbered regs and will
3276 remove a duplicate comparison even if there is an intervening function
3277 call - it will then try to reload the cc reg via an int reg which is why
3278 we need the movcc patterns). It is possible to provide the movcc
3279 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3280 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3281 to tell cse that CCFPE mode registers (even pseudos) are call
3282 clobbered. */
3283
3284 /* ??? This is an experiment. Rather than making changes to cse which may
3285 or may not be easy/clean, we do our own cse. This is possible because
3286 we will generate hard registers. Cse knows they're call clobbered (it
3287 doesn't know the same thing about pseudos). If we guess wrong, no big
3288 deal, but if we win, great! */
3289
3290 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3291 #if 1 /* experiment */
3292 {
3293 int reg;
3294 /* We cycle through the registers to ensure they're all exercised. */
3295 static int next_fcc_reg = 0;
3296 /* Previous x,y for each fcc reg. */
3297 static rtx prev_args[4][2];
3298
3299 /* Scan prev_args for x,y. */
3300 for (reg = 0; reg < 4; reg++)
3301 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3302 break;
3303 if (reg == 4)
3304 {
3305 reg = next_fcc_reg;
3306 prev_args[reg][0] = x;
3307 prev_args[reg][1] = y;
3308 next_fcc_reg = (next_fcc_reg + 1) & 3;
3309 }
3310 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3311 }
3312 #else
3313 cc_reg = gen_reg_rtx (mode);
3314 #endif /* ! experiment */
3315 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3316 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3317 else
3318 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3319
3320 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3321 will only result in an unrecognizable insn so no point in asserting. */
3322 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3323
3324 return cc_reg;
3325 }
3326
3327
3328 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3329
3330 rtx
3331 gen_compare_reg (rtx cmp)
3332 {
3333 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3334 }
3335
3336 /* This function is used for v9 only.
3337 DEST is the target of the Scc insn.
3338 CODE is the code for an Scc's comparison.
3339 X and Y are the values we compare.
3340
3341 This function is needed to turn
3342
3343 (set (reg:SI 110)
3344 (gt (reg:CCX 100 %icc)
3345 (const_int 0)))
3346 into
3347 (set (reg:SI 110)
3348 (gt:DI (reg:CCX 100 %icc)
3349 (const_int 0)))
3350
3351 IE: The instruction recognizer needs to see the mode of the comparison to
3352 find the right instruction. We could use "gt:DI" right in the
3353 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3354
3355 static int
3356 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3357 {
3358 if (! TARGET_ARCH64
3359 && (GET_MODE (x) == DImode
3360 || GET_MODE (dest) == DImode))
3361 return 0;
3362
3363 /* Try to use the movrCC insns. */
3364 if (TARGET_ARCH64
3365 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3366 && y == const0_rtx
3367 && v9_regcmp_p (compare_code))
3368 {
3369 rtx op0 = x;
3370 rtx temp;
3371
3372 /* Special case for op0 != 0. This can be done with one instruction if
3373 dest == x. */
3374
3375 if (compare_code == NE
3376 && GET_MODE (dest) == DImode
3377 && rtx_equal_p (op0, dest))
3378 {
3379 emit_insn (gen_rtx_SET (dest,
3380 gen_rtx_IF_THEN_ELSE (DImode,
3381 gen_rtx_fmt_ee (compare_code, DImode,
3382 op0, const0_rtx),
3383 const1_rtx,
3384 dest)));
3385 return 1;
3386 }
3387
3388 if (reg_overlap_mentioned_p (dest, op0))
3389 {
3390 /* Handle the case where dest == x.
3391 We "early clobber" the result. */
3392 op0 = gen_reg_rtx (GET_MODE (x));
3393 emit_move_insn (op0, x);
3394 }
3395
3396 emit_insn (gen_rtx_SET (dest, const0_rtx));
3397 if (GET_MODE (op0) != DImode)
3398 {
3399 temp = gen_reg_rtx (DImode);
3400 convert_move (temp, op0, 0);
3401 }
3402 else
3403 temp = op0;
3404 emit_insn (gen_rtx_SET (dest,
3405 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3406 gen_rtx_fmt_ee (compare_code, DImode,
3407 temp, const0_rtx),
3408 const1_rtx,
3409 dest)));
3410 return 1;
3411 }
3412 else
3413 {
3414 x = gen_compare_reg_1 (compare_code, x, y);
3415 y = const0_rtx;
3416
3417 emit_insn (gen_rtx_SET (dest, const0_rtx));
3418 emit_insn (gen_rtx_SET (dest,
3419 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3420 gen_rtx_fmt_ee (compare_code,
3421 GET_MODE (x), x, y),
3422 const1_rtx, dest)));
3423 return 1;
3424 }
3425 }
3426
3427
3428 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3429 without jumps using the addx/subx instructions. */
3430
3431 bool
3432 emit_scc_insn (rtx operands[])
3433 {
3434 rtx tem, x, y;
3435 enum rtx_code code;
3436 machine_mode mode;
3437
3438 /* The quad-word fp compare library routines all return nonzero to indicate
3439 true, which is different from the equivalent libgcc routines, so we must
3440 handle them specially here. */
3441 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3442 {
3443 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3444 GET_CODE (operands[1]));
3445 operands[2] = XEXP (operands[1], 0);
3446 operands[3] = XEXP (operands[1], 1);
3447 }
3448
3449 code = GET_CODE (operands[1]);
3450 x = operands[2];
3451 y = operands[3];
3452 mode = GET_MODE (x);
3453
3454 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3455 more applications). The exception to this is "reg != 0" which can
3456 be done in one instruction on v9 (so we do it). */
3457 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3458 {
3459 if (y != const0_rtx)
3460 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3461
3462 rtx pat = gen_rtx_SET (operands[0],
3463 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3464 x, const0_rtx));
3465
3466 /* If we can use addx/subx or addxc, add a clobber for CC. */
3467 if (mode == SImode || (code == NE && TARGET_VIS3))
3468 {
3469 rtx clobber
3470 = gen_rtx_CLOBBER (VOIDmode,
3471 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3472 SPARC_ICC_REG));
3473 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3474 }
3475
3476 emit_insn (pat);
3477 return true;
3478 }
3479
3480 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3481 if (TARGET_ARCH64
3482 && mode == DImode
3483 && !((code == LTU || code == GTU) && TARGET_VIS3)
3484 && gen_v9_scc (operands[0], code, x, y))
3485 return true;
3486
3487 /* We can do LTU and GEU using the addx/subx instructions too. And
3488 for GTU/LEU, if both operands are registers swap them and fall
3489 back to the easy case. */
3490 if (code == GTU || code == LEU)
3491 {
3492 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3493 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3494 {
3495 tem = x;
3496 x = y;
3497 y = tem;
3498 code = swap_condition (code);
3499 }
3500 }
3501
3502 if (code == LTU || code == GEU)
3503 {
3504 emit_insn (gen_rtx_SET (operands[0],
3505 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3506 gen_compare_reg_1 (code, x, y),
3507 const0_rtx)));
3508 return true;
3509 }
3510
3511 /* All the posibilities to use addx/subx based sequences has been
3512 exhausted, try for a 3 instruction sequence using v9 conditional
3513 moves. */
3514 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3515 return true;
3516
3517 /* Nope, do branches. */
3518 return false;
3519 }
3520
3521 /* Emit a conditional jump insn for the v9 architecture using comparison code
3522 CODE and jump target LABEL.
3523 This function exists to take advantage of the v9 brxx insns. */
3524
3525 static void
3526 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3527 {
3528 emit_jump_insn (gen_rtx_SET (pc_rtx,
3529 gen_rtx_IF_THEN_ELSE (VOIDmode,
3530 gen_rtx_fmt_ee (code, GET_MODE (op0),
3531 op0, const0_rtx),
3532 gen_rtx_LABEL_REF (VOIDmode, label),
3533 pc_rtx)));
3534 }
3535
3536 /* Emit a conditional jump insn for the UA2011 architecture using
3537 comparison code CODE and jump target LABEL. This function exists
3538 to take advantage of the UA2011 Compare and Branch insns. */
3539
3540 static void
3541 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3542 {
3543 rtx if_then_else;
3544
3545 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3546 gen_rtx_fmt_ee(code, GET_MODE(op0),
3547 op0, op1),
3548 gen_rtx_LABEL_REF (VOIDmode, label),
3549 pc_rtx);
3550
3551 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3552 }
3553
3554 void
3555 emit_conditional_branch_insn (rtx operands[])
3556 {
3557 /* The quad-word fp compare library routines all return nonzero to indicate
3558 true, which is different from the equivalent libgcc routines, so we must
3559 handle them specially here. */
3560 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3561 {
3562 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3563 GET_CODE (operands[0]));
3564 operands[1] = XEXP (operands[0], 0);
3565 operands[2] = XEXP (operands[0], 1);
3566 }
3567
3568 /* If we can tell early on that the comparison is against a constant
3569 that won't fit in the 5-bit signed immediate field of a cbcond,
3570 use one of the other v9 conditional branch sequences. */
3571 if (TARGET_CBCOND
3572 && GET_CODE (operands[1]) == REG
3573 && (GET_MODE (operands[1]) == SImode
3574 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3575 && (GET_CODE (operands[2]) != CONST_INT
3576 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3577 {
3578 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3579 return;
3580 }
3581
3582 if (TARGET_ARCH64 && operands[2] == const0_rtx
3583 && GET_CODE (operands[1]) == REG
3584 && GET_MODE (operands[1]) == DImode)
3585 {
3586 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3587 return;
3588 }
3589
3590 operands[1] = gen_compare_reg (operands[0]);
3591 operands[2] = const0_rtx;
3592 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3593 operands[1], operands[2]);
3594 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3595 operands[3]));
3596 }
3597
3598
3599 /* Generate a DFmode part of a hard TFmode register.
3600 REG is the TFmode hard register, LOW is 1 for the
3601 low 64bit of the register and 0 otherwise.
3602 */
3603 rtx
3604 gen_df_reg (rtx reg, int low)
3605 {
3606 int regno = REGNO (reg);
3607
3608 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3609 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3610 return gen_rtx_REG (DFmode, regno);
3611 }
3612 \f
3613 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3614 Unlike normal calls, TFmode operands are passed by reference. It is
3615 assumed that no more than 3 operands are required. */
3616
3617 static void
3618 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3619 {
3620 rtx ret_slot = NULL, arg[3], func_sym;
3621 int i;
3622
3623 /* We only expect to be called for conversions, unary, and binary ops. */
3624 gcc_assert (nargs == 2 || nargs == 3);
3625
3626 for (i = 0; i < nargs; ++i)
3627 {
3628 rtx this_arg = operands[i];
3629 rtx this_slot;
3630
3631 /* TFmode arguments and return values are passed by reference. */
3632 if (GET_MODE (this_arg) == TFmode)
3633 {
3634 int force_stack_temp;
3635
3636 force_stack_temp = 0;
3637 if (TARGET_BUGGY_QP_LIB && i == 0)
3638 force_stack_temp = 1;
3639
3640 if (GET_CODE (this_arg) == MEM
3641 && ! force_stack_temp)
3642 {
3643 tree expr = MEM_EXPR (this_arg);
3644 if (expr)
3645 mark_addressable (expr);
3646 this_arg = XEXP (this_arg, 0);
3647 }
3648 else if (CONSTANT_P (this_arg)
3649 && ! force_stack_temp)
3650 {
3651 this_slot = force_const_mem (TFmode, this_arg);
3652 this_arg = XEXP (this_slot, 0);
3653 }
3654 else
3655 {
3656 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3657
3658 /* Operand 0 is the return value. We'll copy it out later. */
3659 if (i > 0)
3660 emit_move_insn (this_slot, this_arg);
3661 else
3662 ret_slot = this_slot;
3663
3664 this_arg = XEXP (this_slot, 0);
3665 }
3666 }
3667
3668 arg[i] = this_arg;
3669 }
3670
3671 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3672
3673 if (GET_MODE (operands[0]) == TFmode)
3674 {
3675 if (nargs == 2)
3676 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3677 arg[0], GET_MODE (arg[0]),
3678 arg[1], GET_MODE (arg[1]));
3679 else
3680 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3681 arg[0], GET_MODE (arg[0]),
3682 arg[1], GET_MODE (arg[1]),
3683 arg[2], GET_MODE (arg[2]));
3684
3685 if (ret_slot)
3686 emit_move_insn (operands[0], ret_slot);
3687 }
3688 else
3689 {
3690 rtx ret;
3691
3692 gcc_assert (nargs == 2);
3693
3694 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3695 GET_MODE (operands[0]),
3696 arg[1], GET_MODE (arg[1]));
3697
3698 if (ret != operands[0])
3699 emit_move_insn (operands[0], ret);
3700 }
3701 }
3702
3703 /* Expand soft-float TFmode calls to sparc abi routines. */
3704
3705 static void
3706 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3707 {
3708 const char *func;
3709
3710 switch (code)
3711 {
3712 case PLUS:
3713 func = "_Qp_add";
3714 break;
3715 case MINUS:
3716 func = "_Qp_sub";
3717 break;
3718 case MULT:
3719 func = "_Qp_mul";
3720 break;
3721 case DIV:
3722 func = "_Qp_div";
3723 break;
3724 default:
3725 gcc_unreachable ();
3726 }
3727
3728 emit_soft_tfmode_libcall (func, 3, operands);
3729 }
3730
3731 static void
3732 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3733 {
3734 const char *func;
3735
3736 gcc_assert (code == SQRT);
3737 func = "_Qp_sqrt";
3738
3739 emit_soft_tfmode_libcall (func, 2, operands);
3740 }
3741
3742 static void
3743 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3744 {
3745 const char *func;
3746
3747 switch (code)
3748 {
3749 case FLOAT_EXTEND:
3750 switch (GET_MODE (operands[1]))
3751 {
3752 case E_SFmode:
3753 func = "_Qp_stoq";
3754 break;
3755 case E_DFmode:
3756 func = "_Qp_dtoq";
3757 break;
3758 default:
3759 gcc_unreachable ();
3760 }
3761 break;
3762
3763 case FLOAT_TRUNCATE:
3764 switch (GET_MODE (operands[0]))
3765 {
3766 case E_SFmode:
3767 func = "_Qp_qtos";
3768 break;
3769 case E_DFmode:
3770 func = "_Qp_qtod";
3771 break;
3772 default:
3773 gcc_unreachable ();
3774 }
3775 break;
3776
3777 case FLOAT:
3778 switch (GET_MODE (operands[1]))
3779 {
3780 case E_SImode:
3781 func = "_Qp_itoq";
3782 if (TARGET_ARCH64)
3783 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3784 break;
3785 case E_DImode:
3786 func = "_Qp_xtoq";
3787 break;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 break;
3792
3793 case UNSIGNED_FLOAT:
3794 switch (GET_MODE (operands[1]))
3795 {
3796 case E_SImode:
3797 func = "_Qp_uitoq";
3798 if (TARGET_ARCH64)
3799 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3800 break;
3801 case E_DImode:
3802 func = "_Qp_uxtoq";
3803 break;
3804 default:
3805 gcc_unreachable ();
3806 }
3807 break;
3808
3809 case FIX:
3810 switch (GET_MODE (operands[0]))
3811 {
3812 case E_SImode:
3813 func = "_Qp_qtoi";
3814 break;
3815 case E_DImode:
3816 func = "_Qp_qtox";
3817 break;
3818 default:
3819 gcc_unreachable ();
3820 }
3821 break;
3822
3823 case UNSIGNED_FIX:
3824 switch (GET_MODE (operands[0]))
3825 {
3826 case E_SImode:
3827 func = "_Qp_qtoui";
3828 break;
3829 case E_DImode:
3830 func = "_Qp_qtoux";
3831 break;
3832 default:
3833 gcc_unreachable ();
3834 }
3835 break;
3836
3837 default:
3838 gcc_unreachable ();
3839 }
3840
3841 emit_soft_tfmode_libcall (func, 2, operands);
3842 }
3843
3844 /* Expand a hard-float tfmode operation. All arguments must be in
3845 registers. */
3846
3847 static void
3848 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3849 {
3850 rtx op, dest;
3851
3852 if (GET_RTX_CLASS (code) == RTX_UNARY)
3853 {
3854 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3855 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3856 }
3857 else
3858 {
3859 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3860 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3861 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3862 operands[1], operands[2]);
3863 }
3864
3865 if (register_operand (operands[0], VOIDmode))
3866 dest = operands[0];
3867 else
3868 dest = gen_reg_rtx (GET_MODE (operands[0]));
3869
3870 emit_insn (gen_rtx_SET (dest, op));
3871
3872 if (dest != operands[0])
3873 emit_move_insn (operands[0], dest);
3874 }
3875
3876 void
3877 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3878 {
3879 if (TARGET_HARD_QUAD)
3880 emit_hard_tfmode_operation (code, operands);
3881 else
3882 emit_soft_tfmode_binop (code, operands);
3883 }
3884
3885 void
3886 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3887 {
3888 if (TARGET_HARD_QUAD)
3889 emit_hard_tfmode_operation (code, operands);
3890 else
3891 emit_soft_tfmode_unop (code, operands);
3892 }
3893
3894 void
3895 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3896 {
3897 if (TARGET_HARD_QUAD)
3898 emit_hard_tfmode_operation (code, operands);
3899 else
3900 emit_soft_tfmode_cvt (code, operands);
3901 }
3902 \f
3903 /* Return nonzero if a branch/jump/call instruction will be emitting
3904 nop into its delay slot. */
3905
3906 int
3907 empty_delay_slot (rtx_insn *insn)
3908 {
3909 rtx seq;
3910
3911 /* If no previous instruction (should not happen), return true. */
3912 if (PREV_INSN (insn) == NULL)
3913 return 1;
3914
3915 seq = NEXT_INSN (PREV_INSN (insn));
3916 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3917 return 0;
3918
3919 return 1;
3920 }
3921
3922 /* Return nonzero if we should emit a nop after a cbcond instruction.
3923 The cbcond instruction does not have a delay slot, however there is
3924 a severe performance penalty if a control transfer appears right
3925 after a cbcond. Therefore we emit a nop when we detect this
3926 situation. */
3927
3928 int
3929 emit_cbcond_nop (rtx_insn *insn)
3930 {
3931 rtx next = next_active_insn (insn);
3932
3933 if (!next)
3934 return 1;
3935
3936 if (NONJUMP_INSN_P (next)
3937 && GET_CODE (PATTERN (next)) == SEQUENCE)
3938 next = XVECEXP (PATTERN (next), 0, 0);
3939 else if (CALL_P (next)
3940 && GET_CODE (PATTERN (next)) == PARALLEL)
3941 {
3942 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3943
3944 if (GET_CODE (delay) == RETURN)
3945 {
3946 /* It's a sibling call. Do not emit the nop if we're going
3947 to emit something other than the jump itself as the first
3948 instruction of the sibcall sequence. */
3949 if (sparc_leaf_function_p || TARGET_FLAT)
3950 return 0;
3951 }
3952 }
3953
3954 if (NONJUMP_INSN_P (next))
3955 return 0;
3956
3957 return 1;
3958 }
3959
3960 /* Return nonzero if TRIAL can go into the call delay slot. */
3961
3962 int
3963 eligible_for_call_delay (rtx_insn *trial)
3964 {
3965 rtx pat;
3966
3967 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3968 return 0;
3969
3970 /* Binutils allows
3971 call __tls_get_addr, %tgd_call (foo)
3972 add %l7, %o0, %o0, %tgd_add (foo)
3973 while Sun as/ld does not. */
3974 if (TARGET_GNU_TLS || !TARGET_TLS)
3975 return 1;
3976
3977 pat = PATTERN (trial);
3978
3979 /* We must reject tgd_add{32|64}, i.e.
3980 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3981 and tldm_add{32|64}, i.e.
3982 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3983 for Sun as/ld. */
3984 if (GET_CODE (pat) == SET
3985 && GET_CODE (SET_SRC (pat)) == PLUS)
3986 {
3987 rtx unspec = XEXP (SET_SRC (pat), 1);
3988
3989 if (GET_CODE (unspec) == UNSPEC
3990 && (XINT (unspec, 1) == UNSPEC_TLSGD
3991 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3992 return 0;
3993 }
3994
3995 return 1;
3996 }
3997
3998 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3999 instruction. RETURN_P is true if the v9 variant 'return' is to be
4000 considered in the test too.
4001
4002 TRIAL must be a SET whose destination is a REG appropriate for the
4003 'restore' instruction or, if RETURN_P is true, for the 'return'
4004 instruction. */
4005
4006 static int
4007 eligible_for_restore_insn (rtx trial, bool return_p)
4008 {
4009 rtx pat = PATTERN (trial);
4010 rtx src = SET_SRC (pat);
4011 bool src_is_freg = false;
4012 rtx src_reg;
4013
4014 /* Since we now can do moves between float and integer registers when
4015 VIS3 is enabled, we have to catch this case. We can allow such
4016 moves when doing a 'return' however. */
4017 src_reg = src;
4018 if (GET_CODE (src_reg) == SUBREG)
4019 src_reg = SUBREG_REG (src_reg);
4020 if (GET_CODE (src_reg) == REG
4021 && SPARC_FP_REG_P (REGNO (src_reg)))
4022 src_is_freg = true;
4023
4024 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4025 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4026 && arith_operand (src, GET_MODE (src))
4027 && ! src_is_freg)
4028 {
4029 if (TARGET_ARCH64)
4030 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4031 else
4032 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4033 }
4034
4035 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4036 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4037 && arith_double_operand (src, GET_MODE (src))
4038 && ! src_is_freg)
4039 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4040
4041 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4042 else if (! TARGET_FPU && register_operand (src, SFmode))
4043 return 1;
4044
4045 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4046 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4047 return 1;
4048
4049 /* If we have the 'return' instruction, anything that does not use
4050 local or output registers and can go into a delay slot wins. */
4051 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4052 return 1;
4053
4054 /* The 'restore src1,src2,dest' pattern for SImode. */
4055 else if (GET_CODE (src) == PLUS
4056 && register_operand (XEXP (src, 0), SImode)
4057 && arith_operand (XEXP (src, 1), SImode))
4058 return 1;
4059
4060 /* The 'restore src1,src2,dest' pattern for DImode. */
4061 else if (GET_CODE (src) == PLUS
4062 && register_operand (XEXP (src, 0), DImode)
4063 && arith_double_operand (XEXP (src, 1), DImode))
4064 return 1;
4065
4066 /* The 'restore src1,%lo(src2),dest' pattern. */
4067 else if (GET_CODE (src) == LO_SUM
4068 && ! TARGET_CM_MEDMID
4069 && ((register_operand (XEXP (src, 0), SImode)
4070 && immediate_operand (XEXP (src, 1), SImode))
4071 || (TARGET_ARCH64
4072 && register_operand (XEXP (src, 0), DImode)
4073 && immediate_operand (XEXP (src, 1), DImode))))
4074 return 1;
4075
4076 /* The 'restore src,src,dest' pattern. */
4077 else if (GET_CODE (src) == ASHIFT
4078 && (register_operand (XEXP (src, 0), SImode)
4079 || register_operand (XEXP (src, 0), DImode))
4080 && XEXP (src, 1) == const1_rtx)
4081 return 1;
4082
4083 return 0;
4084 }
4085
4086 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4087
4088 int
4089 eligible_for_return_delay (rtx_insn *trial)
4090 {
4091 int regno;
4092 rtx pat;
4093
4094 /* If the function uses __builtin_eh_return, the eh_return machinery
4095 occupies the delay slot. */
4096 if (crtl->calls_eh_return)
4097 return 0;
4098
4099 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4100 return 0;
4101
4102 /* In the case of a leaf or flat function, anything can go into the slot. */
4103 if (sparc_leaf_function_p || TARGET_FLAT)
4104 return 1;
4105
4106 if (!NONJUMP_INSN_P (trial))
4107 return 0;
4108
4109 pat = PATTERN (trial);
4110 if (GET_CODE (pat) == PARALLEL)
4111 {
4112 int i;
4113
4114 if (! TARGET_V9)
4115 return 0;
4116 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4117 {
4118 rtx expr = XVECEXP (pat, 0, i);
4119 if (GET_CODE (expr) != SET)
4120 return 0;
4121 if (GET_CODE (SET_DEST (expr)) != REG)
4122 return 0;
4123 regno = REGNO (SET_DEST (expr));
4124 if (regno >= 8 && regno < 24)
4125 return 0;
4126 }
4127 return !epilogue_renumber (&pat, 1);
4128 }
4129
4130 if (GET_CODE (pat) != SET)
4131 return 0;
4132
4133 if (GET_CODE (SET_DEST (pat)) != REG)
4134 return 0;
4135
4136 regno = REGNO (SET_DEST (pat));
4137
4138 /* Otherwise, only operations which can be done in tandem with
4139 a `restore' or `return' insn can go into the delay slot. */
4140 if (regno >= 8 && regno < 24)
4141 return 0;
4142
4143 /* If this instruction sets up floating point register and we have a return
4144 instruction, it can probably go in. But restore will not work
4145 with FP_REGS. */
4146 if (! SPARC_INT_REG_P (regno))
4147 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4148
4149 return eligible_for_restore_insn (trial, true);
4150 }
4151
4152 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4153
4154 int
4155 eligible_for_sibcall_delay (rtx_insn *trial)
4156 {
4157 rtx pat;
4158
4159 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4160 return 0;
4161
4162 if (!NONJUMP_INSN_P (trial))
4163 return 0;
4164
4165 pat = PATTERN (trial);
4166
4167 if (sparc_leaf_function_p || TARGET_FLAT)
4168 {
4169 /* If the tail call is done using the call instruction,
4170 we have to restore %o7 in the delay slot. */
4171 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4172 return 0;
4173
4174 /* %g1 is used to build the function address */
4175 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4176 return 0;
4177
4178 return 1;
4179 }
4180
4181 if (GET_CODE (pat) != SET)
4182 return 0;
4183
4184 /* Otherwise, only operations which can be done in tandem with
4185 a `restore' insn can go into the delay slot. */
4186 if (GET_CODE (SET_DEST (pat)) != REG
4187 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4188 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4189 return 0;
4190
4191 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4192 in most cases. */
4193 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4194 return 0;
4195
4196 return eligible_for_restore_insn (trial, false);
4197 }
4198 \f
4199 /* Determine if it's legal to put X into the constant pool. This
4200 is not possible if X contains the address of a symbol that is
4201 not constant (TLS) or not known at final link time (PIC). */
4202
4203 static bool
4204 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4205 {
4206 switch (GET_CODE (x))
4207 {
4208 case CONST_INT:
4209 case CONST_WIDE_INT:
4210 case CONST_DOUBLE:
4211 case CONST_VECTOR:
4212 /* Accept all non-symbolic constants. */
4213 return false;
4214
4215 case LABEL_REF:
4216 /* Labels are OK iff we are non-PIC. */
4217 return flag_pic != 0;
4218
4219 case SYMBOL_REF:
4220 /* 'Naked' TLS symbol references are never OK,
4221 non-TLS symbols are OK iff we are non-PIC. */
4222 if (SYMBOL_REF_TLS_MODEL (x))
4223 return true;
4224 else
4225 return flag_pic != 0;
4226
4227 case CONST:
4228 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4229 case PLUS:
4230 case MINUS:
4231 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4232 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4233 case UNSPEC:
4234 return true;
4235 default:
4236 gcc_unreachable ();
4237 }
4238 }
4239 \f
4240 /* Global Offset Table support. */
4241 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4242 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4243
4244 /* Return the SYMBOL_REF for the Global Offset Table. */
4245
4246 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4247
4248 static rtx
4249 sparc_got (void)
4250 {
4251 if (!sparc_got_symbol)
4252 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4253
4254 return sparc_got_symbol;
4255 }
4256
4257 /* Ensure that we are not using patterns that are not OK with PIC. */
4258
4259 int
4260 check_pic (int i)
4261 {
4262 rtx op;
4263
4264 switch (flag_pic)
4265 {
4266 case 1:
4267 op = recog_data.operand[i];
4268 gcc_assert (GET_CODE (op) != SYMBOL_REF
4269 && (GET_CODE (op) != CONST
4270 || (GET_CODE (XEXP (op, 0)) == MINUS
4271 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4272 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4273 /* fallthrough */
4274 case 2:
4275 default:
4276 return 1;
4277 }
4278 }
4279
4280 /* Return true if X is an address which needs a temporary register when
4281 reloaded while generating PIC code. */
4282
4283 int
4284 pic_address_needs_scratch (rtx x)
4285 {
4286 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4287 if (GET_CODE (x) == CONST
4288 && GET_CODE (XEXP (x, 0)) == PLUS
4289 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4290 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4291 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4292 return 1;
4293
4294 return 0;
4295 }
4296
4297 /* Determine if a given RTX is a valid constant. We already know this
4298 satisfies CONSTANT_P. */
4299
4300 static bool
4301 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4302 {
4303 switch (GET_CODE (x))
4304 {
4305 case CONST:
4306 case SYMBOL_REF:
4307 if (sparc_tls_referenced_p (x))
4308 return false;
4309 break;
4310
4311 case CONST_DOUBLE:
4312 /* Floating point constants are generally not ok.
4313 The only exception is 0.0 and all-ones in VIS. */
4314 if (TARGET_VIS
4315 && SCALAR_FLOAT_MODE_P (mode)
4316 && (const_zero_operand (x, mode)
4317 || const_all_ones_operand (x, mode)))
4318 return true;
4319
4320 return false;
4321
4322 case CONST_VECTOR:
4323 /* Vector constants are generally not ok.
4324 The only exception is 0 or -1 in VIS. */
4325 if (TARGET_VIS
4326 && (const_zero_operand (x, mode)
4327 || const_all_ones_operand (x, mode)))
4328 return true;
4329
4330 return false;
4331
4332 default:
4333 break;
4334 }
4335
4336 return true;
4337 }
4338
4339 /* Determine if a given RTX is a valid constant address. */
4340
4341 bool
4342 constant_address_p (rtx x)
4343 {
4344 switch (GET_CODE (x))
4345 {
4346 case LABEL_REF:
4347 case CONST_INT:
4348 case HIGH:
4349 return true;
4350
4351 case CONST:
4352 if (flag_pic && pic_address_needs_scratch (x))
4353 return false;
4354 return sparc_legitimate_constant_p (Pmode, x);
4355
4356 case SYMBOL_REF:
4357 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4358
4359 default:
4360 return false;
4361 }
4362 }
4363
4364 /* Nonzero if the constant value X is a legitimate general operand
4365 when generating PIC code. It is given that flag_pic is on and
4366 that X satisfies CONSTANT_P. */
4367
4368 bool
4369 legitimate_pic_operand_p (rtx x)
4370 {
4371 if (pic_address_needs_scratch (x))
4372 return false;
4373 if (sparc_tls_referenced_p (x))
4374 return false;
4375 return true;
4376 }
4377
4378 /* Return true if X is a representation of the PIC register. */
4379
4380 static bool
4381 sparc_pic_register_p (rtx x)
4382 {
4383 if (!REG_P (x) || !pic_offset_table_rtx)
4384 return false;
4385
4386 if (x == pic_offset_table_rtx)
4387 return true;
4388
4389 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4390 && (HARD_REGISTER_P (x) || lra_in_progress)
4391 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4392 return true;
4393
4394 return false;
4395 }
4396
4397 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4398 (CONST_INT_P (X) \
4399 && INTVAL (X) >= -0x1000 \
4400 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4401
4402 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4403 (CONST_INT_P (X) \
4404 && INTVAL (X) >= -0x1000 \
4405 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4406
4407 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4408
4409 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4410 ordinarily. This changes a bit when generating PIC. */
4411
4412 static bool
4413 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4414 {
4415 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4416
4417 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4418 rs1 = addr;
4419 else if (GET_CODE (addr) == PLUS)
4420 {
4421 rs1 = XEXP (addr, 0);
4422 rs2 = XEXP (addr, 1);
4423
4424 /* Canonicalize. REG comes first, if there are no regs,
4425 LO_SUM comes first. */
4426 if (!REG_P (rs1)
4427 && GET_CODE (rs1) != SUBREG
4428 && (REG_P (rs2)
4429 || GET_CODE (rs2) == SUBREG
4430 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4431 {
4432 rs1 = XEXP (addr, 1);
4433 rs2 = XEXP (addr, 0);
4434 }
4435
4436 if ((flag_pic == 1
4437 && sparc_pic_register_p (rs1)
4438 && !REG_P (rs2)
4439 && GET_CODE (rs2) != SUBREG
4440 && GET_CODE (rs2) != LO_SUM
4441 && GET_CODE (rs2) != MEM
4442 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4443 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4444 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4445 || ((REG_P (rs1)
4446 || GET_CODE (rs1) == SUBREG)
4447 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4448 {
4449 imm1 = rs2;
4450 rs2 = NULL;
4451 }
4452 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4453 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4454 {
4455 /* We prohibit REG + REG for TFmode when there are no quad move insns
4456 and we consequently need to split. We do this because REG+REG
4457 is not an offsettable address. If we get the situation in reload
4458 where source and destination of a movtf pattern are both MEMs with
4459 REG+REG address, then only one of them gets converted to an
4460 offsettable address. */
4461 if (mode == TFmode
4462 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4463 return 0;
4464
4465 /* Likewise for TImode, but in all cases. */
4466 if (mode == TImode)
4467 return 0;
4468
4469 /* We prohibit REG + REG on ARCH32 if not optimizing for
4470 DFmode/DImode because then mem_min_alignment is likely to be zero
4471 after reload and the forced split would lack a matching splitter
4472 pattern. */
4473 if (TARGET_ARCH32 && !optimize
4474 && (mode == DFmode || mode == DImode))
4475 return 0;
4476 }
4477 else if (USE_AS_OFFSETABLE_LO10
4478 && GET_CODE (rs1) == LO_SUM
4479 && TARGET_ARCH64
4480 && ! TARGET_CM_MEDMID
4481 && RTX_OK_FOR_OLO10_P (rs2, mode))
4482 {
4483 rs2 = NULL;
4484 imm1 = XEXP (rs1, 1);
4485 rs1 = XEXP (rs1, 0);
4486 if (!CONSTANT_P (imm1)
4487 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4488 return 0;
4489 }
4490 }
4491 else if (GET_CODE (addr) == LO_SUM)
4492 {
4493 rs1 = XEXP (addr, 0);
4494 imm1 = XEXP (addr, 1);
4495
4496 if (!CONSTANT_P (imm1)
4497 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4498 return 0;
4499
4500 /* We can't allow TFmode in 32-bit mode, because an offset greater
4501 than the alignment (8) may cause the LO_SUM to overflow. */
4502 if (mode == TFmode && TARGET_ARCH32)
4503 return 0;
4504
4505 /* During reload, accept the HIGH+LO_SUM construct generated by
4506 sparc_legitimize_reload_address. */
4507 if (reload_in_progress
4508 && GET_CODE (rs1) == HIGH
4509 && XEXP (rs1, 0) == imm1)
4510 return 1;
4511 }
4512 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4513 return 1;
4514 else
4515 return 0;
4516
4517 if (GET_CODE (rs1) == SUBREG)
4518 rs1 = SUBREG_REG (rs1);
4519 if (!REG_P (rs1))
4520 return 0;
4521
4522 if (rs2)
4523 {
4524 if (GET_CODE (rs2) == SUBREG)
4525 rs2 = SUBREG_REG (rs2);
4526 if (!REG_P (rs2))
4527 return 0;
4528 }
4529
4530 if (strict)
4531 {
4532 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4533 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4534 return 0;
4535 }
4536 else
4537 {
4538 if ((! SPARC_INT_REG_P (REGNO (rs1))
4539 && REGNO (rs1) != FRAME_POINTER_REGNUM
4540 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4541 || (rs2
4542 && (! SPARC_INT_REG_P (REGNO (rs2))
4543 && REGNO (rs2) != FRAME_POINTER_REGNUM
4544 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4545 return 0;
4546 }
4547 return 1;
4548 }
4549
4550 /* Return the SYMBOL_REF for the tls_get_addr function. */
4551
4552 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4553
4554 static rtx
4555 sparc_tls_get_addr (void)
4556 {
4557 if (!sparc_tls_symbol)
4558 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4559
4560 return sparc_tls_symbol;
4561 }
4562
4563 /* Return the Global Offset Table to be used in TLS mode. */
4564
4565 static rtx
4566 sparc_tls_got (void)
4567 {
4568 /* In PIC mode, this is just the PIC offset table. */
4569 if (flag_pic)
4570 {
4571 crtl->uses_pic_offset_table = 1;
4572 return pic_offset_table_rtx;
4573 }
4574
4575 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4576 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4577 if (TARGET_SUN_TLS && TARGET_ARCH32)
4578 {
4579 load_got_register ();
4580 return global_offset_table_rtx;
4581 }
4582
4583 /* In all other cases, we load a new pseudo with the GOT symbol. */
4584 return copy_to_reg (sparc_got ());
4585 }
4586
4587 /* Return true if X contains a thread-local symbol. */
4588
4589 static bool
4590 sparc_tls_referenced_p (rtx x)
4591 {
4592 if (!TARGET_HAVE_TLS)
4593 return false;
4594
4595 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4596 x = XEXP (XEXP (x, 0), 0);
4597
4598 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4599 return true;
4600
4601 /* That's all we handle in sparc_legitimize_tls_address for now. */
4602 return false;
4603 }
4604
4605 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4606 this (thread-local) address. */
4607
4608 static rtx
4609 sparc_legitimize_tls_address (rtx addr)
4610 {
4611 rtx temp1, temp2, temp3, ret, o0, got;
4612 rtx_insn *insn;
4613
4614 gcc_assert (can_create_pseudo_p ());
4615
4616 if (GET_CODE (addr) == SYMBOL_REF)
4617 switch (SYMBOL_REF_TLS_MODEL (addr))
4618 {
4619 case TLS_MODEL_GLOBAL_DYNAMIC:
4620 start_sequence ();
4621 temp1 = gen_reg_rtx (SImode);
4622 temp2 = gen_reg_rtx (SImode);
4623 ret = gen_reg_rtx (Pmode);
4624 o0 = gen_rtx_REG (Pmode, 8);
4625 got = sparc_tls_got ();
4626 emit_insn (gen_tgd_hi22 (temp1, addr));
4627 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4628 if (TARGET_ARCH32)
4629 {
4630 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4631 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4632 addr, const1_rtx));
4633 }
4634 else
4635 {
4636 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4637 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4638 addr, const1_rtx));
4639 }
4640 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4641 insn = get_insns ();
4642 end_sequence ();
4643 emit_libcall_block (insn, ret, o0, addr);
4644 break;
4645
4646 case TLS_MODEL_LOCAL_DYNAMIC:
4647 start_sequence ();
4648 temp1 = gen_reg_rtx (SImode);
4649 temp2 = gen_reg_rtx (SImode);
4650 temp3 = gen_reg_rtx (Pmode);
4651 ret = gen_reg_rtx (Pmode);
4652 o0 = gen_rtx_REG (Pmode, 8);
4653 got = sparc_tls_got ();
4654 emit_insn (gen_tldm_hi22 (temp1));
4655 emit_insn (gen_tldm_lo10 (temp2, temp1));
4656 if (TARGET_ARCH32)
4657 {
4658 emit_insn (gen_tldm_add32 (o0, got, temp2));
4659 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4660 const1_rtx));
4661 }
4662 else
4663 {
4664 emit_insn (gen_tldm_add64 (o0, got, temp2));
4665 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4666 const1_rtx));
4667 }
4668 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4669 insn = get_insns ();
4670 end_sequence ();
4671 emit_libcall_block (insn, temp3, o0,
4672 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4673 UNSPEC_TLSLD_BASE));
4674 temp1 = gen_reg_rtx (SImode);
4675 temp2 = gen_reg_rtx (SImode);
4676 emit_insn (gen_tldo_hix22 (temp1, addr));
4677 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4678 if (TARGET_ARCH32)
4679 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4680 else
4681 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4682 break;
4683
4684 case TLS_MODEL_INITIAL_EXEC:
4685 temp1 = gen_reg_rtx (SImode);
4686 temp2 = gen_reg_rtx (SImode);
4687 temp3 = gen_reg_rtx (Pmode);
4688 got = sparc_tls_got ();
4689 emit_insn (gen_tie_hi22 (temp1, addr));
4690 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4691 if (TARGET_ARCH32)
4692 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4693 else
4694 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4695 if (TARGET_SUN_TLS)
4696 {
4697 ret = gen_reg_rtx (Pmode);
4698 if (TARGET_ARCH32)
4699 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4700 temp3, addr));
4701 else
4702 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4703 temp3, addr));
4704 }
4705 else
4706 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4707 break;
4708
4709 case TLS_MODEL_LOCAL_EXEC:
4710 temp1 = gen_reg_rtx (Pmode);
4711 temp2 = gen_reg_rtx (Pmode);
4712 if (TARGET_ARCH32)
4713 {
4714 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4715 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4716 }
4717 else
4718 {
4719 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4720 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4721 }
4722 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4723 break;
4724
4725 default:
4726 gcc_unreachable ();
4727 }
4728
4729 else if (GET_CODE (addr) == CONST)
4730 {
4731 rtx base, offset;
4732
4733 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4734
4735 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4736 offset = XEXP (XEXP (addr, 0), 1);
4737
4738 base = force_operand (base, NULL_RTX);
4739 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4740 offset = force_reg (Pmode, offset);
4741 ret = gen_rtx_PLUS (Pmode, base, offset);
4742 }
4743
4744 else
4745 gcc_unreachable (); /* for now ... */
4746
4747 return ret;
4748 }
4749
4750 /* Legitimize PIC addresses. If the address is already position-independent,
4751 we return ORIG. Newly generated position-independent addresses go into a
4752 reg. This is REG if nonzero, otherwise we allocate register(s) as
4753 necessary. */
4754
4755 static rtx
4756 sparc_legitimize_pic_address (rtx orig, rtx reg)
4757 {
4758 if (GET_CODE (orig) == SYMBOL_REF
4759 /* See the comment in sparc_expand_move. */
4760 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4761 {
4762 bool gotdata_op = false;
4763 rtx pic_ref, address;
4764 rtx_insn *insn;
4765
4766 if (!reg)
4767 {
4768 gcc_assert (can_create_pseudo_p ());
4769 reg = gen_reg_rtx (Pmode);
4770 }
4771
4772 if (flag_pic == 2)
4773 {
4774 /* If not during reload, allocate another temp reg here for loading
4775 in the address, so that these instructions can be optimized
4776 properly. */
4777 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4778
4779 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4780 won't get confused into thinking that these two instructions
4781 are loading in the true address of the symbol. If in the
4782 future a PIC rtx exists, that should be used instead. */
4783 if (TARGET_ARCH64)
4784 {
4785 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4786 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4787 }
4788 else
4789 {
4790 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4791 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4792 }
4793
4794 address = temp_reg;
4795 gotdata_op = true;
4796 }
4797 else
4798 address = orig;
4799
4800 crtl->uses_pic_offset_table = 1;
4801 if (gotdata_op)
4802 {
4803 if (TARGET_ARCH64)
4804 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4805 pic_offset_table_rtx,
4806 address, orig));
4807 else
4808 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4809 pic_offset_table_rtx,
4810 address, orig));
4811 }
4812 else
4813 {
4814 pic_ref
4815 = gen_const_mem (Pmode,
4816 gen_rtx_PLUS (Pmode,
4817 pic_offset_table_rtx, address));
4818 insn = emit_move_insn (reg, pic_ref);
4819 }
4820
4821 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4822 by loop. */
4823 set_unique_reg_note (insn, REG_EQUAL, orig);
4824 return reg;
4825 }
4826 else if (GET_CODE (orig) == CONST)
4827 {
4828 rtx base, offset;
4829
4830 if (GET_CODE (XEXP (orig, 0)) == PLUS
4831 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4832 return orig;
4833
4834 if (!reg)
4835 {
4836 gcc_assert (can_create_pseudo_p ());
4837 reg = gen_reg_rtx (Pmode);
4838 }
4839
4840 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4841 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4842 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4843 base == reg ? NULL_RTX : reg);
4844
4845 if (GET_CODE (offset) == CONST_INT)
4846 {
4847 if (SMALL_INT (offset))
4848 return plus_constant (Pmode, base, INTVAL (offset));
4849 else if (can_create_pseudo_p ())
4850 offset = force_reg (Pmode, offset);
4851 else
4852 /* If we reach here, then something is seriously wrong. */
4853 gcc_unreachable ();
4854 }
4855 return gen_rtx_PLUS (Pmode, base, offset);
4856 }
4857 else if (GET_CODE (orig) == LABEL_REF)
4858 /* ??? We ought to be checking that the register is live instead, in case
4859 it is eliminated. */
4860 crtl->uses_pic_offset_table = 1;
4861
4862 return orig;
4863 }
4864
4865 /* Try machine-dependent ways of modifying an illegitimate address X
4866 to be legitimate. If we find one, return the new, valid address.
4867
4868 OLDX is the address as it was before break_out_memory_refs was called.
4869 In some cases it is useful to look at this to decide what needs to be done.
4870
4871 MODE is the mode of the operand pointed to by X.
4872
4873 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4874
4875 static rtx
4876 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4877 machine_mode mode)
4878 {
4879 rtx orig_x = x;
4880
4881 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4882 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4883 force_operand (XEXP (x, 0), NULL_RTX));
4884 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4885 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4886 force_operand (XEXP (x, 1), NULL_RTX));
4887 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4888 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4889 XEXP (x, 1));
4890 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4891 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4892 force_operand (XEXP (x, 1), NULL_RTX));
4893
4894 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4895 return x;
4896
4897 if (sparc_tls_referenced_p (x))
4898 x = sparc_legitimize_tls_address (x);
4899 else if (flag_pic)
4900 x = sparc_legitimize_pic_address (x, NULL_RTX);
4901 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4902 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4903 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4904 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4905 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4906 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4907 else if (GET_CODE (x) == SYMBOL_REF
4908 || GET_CODE (x) == CONST
4909 || GET_CODE (x) == LABEL_REF)
4910 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4911
4912 return x;
4913 }
4914
4915 /* Delegitimize an address that was legitimized by the above function. */
4916
4917 static rtx
4918 sparc_delegitimize_address (rtx x)
4919 {
4920 x = delegitimize_mem_from_attrs (x);
4921
4922 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4923 switch (XINT (XEXP (x, 1), 1))
4924 {
4925 case UNSPEC_MOVE_PIC:
4926 case UNSPEC_TLSLE:
4927 x = XVECEXP (XEXP (x, 1), 0, 0);
4928 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4929 break;
4930 default:
4931 break;
4932 }
4933
4934 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4935 if (GET_CODE (x) == MINUS
4936 && sparc_pic_register_p (XEXP (x, 0))
4937 && GET_CODE (XEXP (x, 1)) == LO_SUM
4938 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4939 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4940 {
4941 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4942 gcc_assert (GET_CODE (x) == LABEL_REF
4943 || (GET_CODE (x) == CONST
4944 && GET_CODE (XEXP (x, 0)) == PLUS
4945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4947 }
4948
4949 return x;
4950 }
4951
4952 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4953 replace the input X, or the original X if no replacement is called for.
4954 The output parameter *WIN is 1 if the calling macro should goto WIN,
4955 0 if it should not.
4956
4957 For SPARC, we wish to handle addresses by splitting them into
4958 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4959 This cuts the number of extra insns by one.
4960
4961 Do nothing when generating PIC code and the address is a symbolic
4962 operand or requires a scratch register. */
4963
4964 rtx
4965 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4966 int opnum, int type,
4967 int ind_levels ATTRIBUTE_UNUSED, int *win)
4968 {
4969 /* Decompose SImode constants into HIGH+LO_SUM. */
4970 if (CONSTANT_P (x)
4971 && (mode != TFmode || TARGET_ARCH64)
4972 && GET_MODE (x) == SImode
4973 && GET_CODE (x) != LO_SUM
4974 && GET_CODE (x) != HIGH
4975 && sparc_cmodel <= CM_MEDLOW
4976 && !(flag_pic
4977 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4978 {
4979 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4980 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4981 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4982 opnum, (enum reload_type)type);
4983 *win = 1;
4984 return x;
4985 }
4986
4987 /* We have to recognize what we have already generated above. */
4988 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4989 {
4990 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4991 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4992 opnum, (enum reload_type)type);
4993 *win = 1;
4994 return x;
4995 }
4996
4997 *win = 0;
4998 return x;
4999 }
5000
5001 /* Return true if ADDR (a legitimate address expression)
5002 has an effect that depends on the machine mode it is used for.
5003
5004 In PIC mode,
5005
5006 (mem:HI [%l7+a])
5007
5008 is not equivalent to
5009
5010 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5011
5012 because [%l7+a+1] is interpreted as the address of (a+1). */
5013
5014
5015 static bool
5016 sparc_mode_dependent_address_p (const_rtx addr,
5017 addr_space_t as ATTRIBUTE_UNUSED)
5018 {
5019 if (GET_CODE (addr) == PLUS
5020 && sparc_pic_register_p (XEXP (addr, 0))
5021 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5022 return true;
5023
5024 return false;
5025 }
5026
5027 #ifdef HAVE_GAS_HIDDEN
5028 # define USE_HIDDEN_LINKONCE 1
5029 #else
5030 # define USE_HIDDEN_LINKONCE 0
5031 #endif
5032
5033 static void
5034 get_pc_thunk_name (char name[32], unsigned int regno)
5035 {
5036 const char *reg_name = reg_names[regno];
5037
5038 /* Skip the leading '%' as that cannot be used in a
5039 symbol name. */
5040 reg_name += 1;
5041
5042 if (USE_HIDDEN_LINKONCE)
5043 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5044 else
5045 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5046 }
5047
5048 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5049
5050 static rtx
5051 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
5052 {
5053 int orig_flag_pic = flag_pic;
5054 rtx insn;
5055
5056 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5057 flag_pic = 0;
5058 if (TARGET_ARCH64)
5059 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
5060 else
5061 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
5062 flag_pic = orig_flag_pic;
5063
5064 return insn;
5065 }
5066
5067 /* Emit code to load the GOT register. */
5068
5069 void
5070 load_got_register (void)
5071 {
5072 if (!global_offset_table_rtx)
5073 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5074
5075 if (TARGET_VXWORKS_RTP)
5076 emit_insn (gen_vxworks_load_got ());
5077 else
5078 {
5079 /* The GOT symbol is subject to a PC-relative relocation so we need a
5080 helper function to add the PC value and thus get the final value. */
5081 if (!got_helper_rtx)
5082 {
5083 char name[32];
5084 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5085 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5086 }
5087
5088 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5089 got_helper_rtx));
5090 }
5091 }
5092
5093 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5094 address of the call target. */
5095
5096 void
5097 sparc_emit_call_insn (rtx pat, rtx addr)
5098 {
5099 rtx_insn *insn;
5100
5101 insn = emit_call_insn (pat);
5102
5103 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5104 if (TARGET_VXWORKS_RTP
5105 && flag_pic
5106 && GET_CODE (addr) == SYMBOL_REF
5107 && (SYMBOL_REF_DECL (addr)
5108 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5109 : !SYMBOL_REF_LOCAL_P (addr)))
5110 {
5111 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5112 crtl->uses_pic_offset_table = 1;
5113 }
5114 }
5115 \f
5116 /* Return 1 if RTX is a MEM which is known to be aligned to at
5117 least a DESIRED byte boundary. */
5118
5119 int
5120 mem_min_alignment (rtx mem, int desired)
5121 {
5122 rtx addr, base, offset;
5123
5124 /* If it's not a MEM we can't accept it. */
5125 if (GET_CODE (mem) != MEM)
5126 return 0;
5127
5128 /* Obviously... */
5129 if (!TARGET_UNALIGNED_DOUBLES
5130 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5131 return 1;
5132
5133 /* ??? The rest of the function predates MEM_ALIGN so
5134 there is probably a bit of redundancy. */
5135 addr = XEXP (mem, 0);
5136 base = offset = NULL_RTX;
5137 if (GET_CODE (addr) == PLUS)
5138 {
5139 if (GET_CODE (XEXP (addr, 0)) == REG)
5140 {
5141 base = XEXP (addr, 0);
5142
5143 /* What we are saying here is that if the base
5144 REG is aligned properly, the compiler will make
5145 sure any REG based index upon it will be so
5146 as well. */
5147 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5148 offset = XEXP (addr, 1);
5149 else
5150 offset = const0_rtx;
5151 }
5152 }
5153 else if (GET_CODE (addr) == REG)
5154 {
5155 base = addr;
5156 offset = const0_rtx;
5157 }
5158
5159 if (base != NULL_RTX)
5160 {
5161 int regno = REGNO (base);
5162
5163 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5164 {
5165 /* Check if the compiler has recorded some information
5166 about the alignment of the base REG. If reload has
5167 completed, we already matched with proper alignments.
5168 If not running global_alloc, reload might give us
5169 unaligned pointer to local stack though. */
5170 if (((cfun != 0
5171 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5172 || (optimize && reload_completed))
5173 && (INTVAL (offset) & (desired - 1)) == 0)
5174 return 1;
5175 }
5176 else
5177 {
5178 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5179 return 1;
5180 }
5181 }
5182 else if (! TARGET_UNALIGNED_DOUBLES
5183 || CONSTANT_P (addr)
5184 || GET_CODE (addr) == LO_SUM)
5185 {
5186 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5187 is true, in which case we can only assume that an access is aligned if
5188 it is to a constant address, or the address involves a LO_SUM. */
5189 return 1;
5190 }
5191
5192 /* An obviously unaligned address. */
5193 return 0;
5194 }
5195
5196 \f
5197 /* Vectors to keep interesting information about registers where it can easily
5198 be got. We used to use the actual mode value as the bit number, but there
5199 are more than 32 modes now. Instead we use two tables: one indexed by
5200 hard register number, and one indexed by mode. */
5201
5202 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5203 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5204 mapped into one sparc_mode_class mode. */
5205
5206 enum sparc_mode_class {
5207 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5208 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5209 CC_MODE, CCFP_MODE
5210 };
5211
5212 /* Modes for single-word and smaller quantities. */
5213 #define S_MODES \
5214 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5215
5216 /* Modes for double-word and smaller quantities. */
5217 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5218
5219 /* Modes for quad-word and smaller quantities. */
5220 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5221
5222 /* Modes for 8-word and smaller quantities. */
5223 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5224
5225 /* Modes for single-float quantities. */
5226 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5227
5228 /* Modes for double-float and smaller quantities. */
5229 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5230
5231 /* Modes for quad-float and smaller quantities. */
5232 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5233
5234 /* Modes for quad-float pairs and smaller quantities. */
5235 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5236
5237 /* Modes for double-float only quantities. */
5238 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5239
5240 /* Modes for quad-float and double-float only quantities. */
5241 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5242
5243 /* Modes for quad-float pairs and double-float only quantities. */
5244 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5245
5246 /* Modes for condition codes. */
5247 #define CC_MODES (1 << (int) CC_MODE)
5248 #define CCFP_MODES (1 << (int) CCFP_MODE)
5249
5250 /* Value is 1 if register/mode pair is acceptable on sparc.
5251
5252 The funny mixture of D and T modes is because integer operations
5253 do not specially operate on tetra quantities, so non-quad-aligned
5254 registers can hold quadword quantities (except %o4 and %i4 because
5255 they cross fixed registers).
5256
5257 ??? Note that, despite the settings, non-double-aligned parameter
5258 registers can hold double-word quantities in 32-bit mode. */
5259
5260 /* This points to either the 32-bit or the 64-bit version. */
5261 static const int *hard_regno_mode_classes;
5262
5263 static const int hard_32bit_mode_classes[] = {
5264 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5265 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5266 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5267 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5268
5269 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5270 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5271 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5272 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5273
5274 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5275 and none can hold SFmode/SImode values. */
5276 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5277 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5278 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5279 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5280
5281 /* %fcc[0123] */
5282 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5283
5284 /* %icc, %sfp, %gsr */
5285 CC_MODES, 0, D_MODES
5286 };
5287
5288 static const int hard_64bit_mode_classes[] = {
5289 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5290 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5291 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5292 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5293
5294 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5295 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5296 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5297 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5298
5299 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5300 and none can hold SFmode/SImode values. */
5301 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5302 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5303 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5304 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5305
5306 /* %fcc[0123] */
5307 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5308
5309 /* %icc, %sfp, %gsr */
5310 CC_MODES, 0, D_MODES
5311 };
5312
5313 static int sparc_mode_class [NUM_MACHINE_MODES];
5314
5315 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5316
5317 static void
5318 sparc_init_modes (void)
5319 {
5320 int i;
5321
5322 for (i = 0; i < NUM_MACHINE_MODES; i++)
5323 {
5324 machine_mode m = (machine_mode) i;
5325 unsigned int size = GET_MODE_SIZE (m);
5326
5327 switch (GET_MODE_CLASS (m))
5328 {
5329 case MODE_INT:
5330 case MODE_PARTIAL_INT:
5331 case MODE_COMPLEX_INT:
5332 if (size < 4)
5333 sparc_mode_class[i] = 1 << (int) H_MODE;
5334 else if (size == 4)
5335 sparc_mode_class[i] = 1 << (int) S_MODE;
5336 else if (size == 8)
5337 sparc_mode_class[i] = 1 << (int) D_MODE;
5338 else if (size == 16)
5339 sparc_mode_class[i] = 1 << (int) T_MODE;
5340 else if (size == 32)
5341 sparc_mode_class[i] = 1 << (int) O_MODE;
5342 else
5343 sparc_mode_class[i] = 0;
5344 break;
5345 case MODE_VECTOR_INT:
5346 if (size == 4)
5347 sparc_mode_class[i] = 1 << (int) SF_MODE;
5348 else if (size == 8)
5349 sparc_mode_class[i] = 1 << (int) DF_MODE;
5350 else
5351 sparc_mode_class[i] = 0;
5352 break;
5353 case MODE_FLOAT:
5354 case MODE_COMPLEX_FLOAT:
5355 if (size == 4)
5356 sparc_mode_class[i] = 1 << (int) SF_MODE;
5357 else if (size == 8)
5358 sparc_mode_class[i] = 1 << (int) DF_MODE;
5359 else if (size == 16)
5360 sparc_mode_class[i] = 1 << (int) TF_MODE;
5361 else if (size == 32)
5362 sparc_mode_class[i] = 1 << (int) OF_MODE;
5363 else
5364 sparc_mode_class[i] = 0;
5365 break;
5366 case MODE_CC:
5367 if (m == CCFPmode || m == CCFPEmode)
5368 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5369 else
5370 sparc_mode_class[i] = 1 << (int) CC_MODE;
5371 break;
5372 default:
5373 sparc_mode_class[i] = 0;
5374 break;
5375 }
5376 }
5377
5378 if (TARGET_ARCH64)
5379 hard_regno_mode_classes = hard_64bit_mode_classes;
5380 else
5381 hard_regno_mode_classes = hard_32bit_mode_classes;
5382
5383 /* Initialize the array used by REGNO_REG_CLASS. */
5384 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5385 {
5386 if (i < 16 && TARGET_V8PLUS)
5387 sparc_regno_reg_class[i] = I64_REGS;
5388 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5389 sparc_regno_reg_class[i] = GENERAL_REGS;
5390 else if (i < 64)
5391 sparc_regno_reg_class[i] = FP_REGS;
5392 else if (i < 96)
5393 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5394 else if (i < 100)
5395 sparc_regno_reg_class[i] = FPCC_REGS;
5396 else
5397 sparc_regno_reg_class[i] = NO_REGS;
5398 }
5399 }
5400 \f
5401 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5402
5403 static inline bool
5404 save_global_or_fp_reg_p (unsigned int regno,
5405 int leaf_function ATTRIBUTE_UNUSED)
5406 {
5407 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5408 }
5409
5410 /* Return whether the return address register (%i7) is needed. */
5411
5412 static inline bool
5413 return_addr_reg_needed_p (int leaf_function)
5414 {
5415 /* If it is live, for example because of __builtin_return_address (0). */
5416 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5417 return true;
5418
5419 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5420 if (!leaf_function
5421 /* Loading the GOT register clobbers %o7. */
5422 || crtl->uses_pic_offset_table
5423 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5424 return true;
5425
5426 return false;
5427 }
5428
5429 /* Return whether REGNO, a local or in register, must be saved/restored. */
5430
5431 static bool
5432 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5433 {
5434 /* General case: call-saved registers live at some point. */
5435 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5436 return true;
5437
5438 /* Frame pointer register (%fp) if needed. */
5439 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5440 return true;
5441
5442 /* Return address register (%i7) if needed. */
5443 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5444 return true;
5445
5446 /* GOT register (%l7) if needed. */
5447 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5448 return true;
5449
5450 /* If the function accesses prior frames, the frame pointer and the return
5451 address of the previous frame must be saved on the stack. */
5452 if (crtl->accesses_prior_frames
5453 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5454 return true;
5455
5456 return false;
5457 }
5458
5459 /* Compute the frame size required by the function. This function is called
5460 during the reload pass and also by sparc_expand_prologue. */
5461
5462 static HOST_WIDE_INT
5463 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5464 {
5465 HOST_WIDE_INT frame_size, apparent_frame_size;
5466 int args_size, n_global_fp_regs = 0;
5467 bool save_local_in_regs_p = false;
5468 unsigned int i;
5469
5470 /* If the function allocates dynamic stack space, the dynamic offset is
5471 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5472 if (leaf_function && !cfun->calls_alloca)
5473 args_size = 0;
5474 else
5475 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5476
5477 /* Calculate space needed for global registers. */
5478 if (TARGET_ARCH64)
5479 {
5480 for (i = 0; i < 8; i++)
5481 if (save_global_or_fp_reg_p (i, 0))
5482 n_global_fp_regs += 2;
5483 }
5484 else
5485 {
5486 for (i = 0; i < 8; i += 2)
5487 if (save_global_or_fp_reg_p (i, 0)
5488 || save_global_or_fp_reg_p (i + 1, 0))
5489 n_global_fp_regs += 2;
5490 }
5491
5492 /* In the flat window model, find out which local and in registers need to
5493 be saved. We don't reserve space in the current frame for them as they
5494 will be spilled into the register window save area of the caller's frame.
5495 However, as soon as we use this register window save area, we must create
5496 that of the current frame to make it the live one. */
5497 if (TARGET_FLAT)
5498 for (i = 16; i < 32; i++)
5499 if (save_local_or_in_reg_p (i, leaf_function))
5500 {
5501 save_local_in_regs_p = true;
5502 break;
5503 }
5504
5505 /* Calculate space needed for FP registers. */
5506 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5507 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5508 n_global_fp_regs += 2;
5509
5510 if (size == 0
5511 && n_global_fp_regs == 0
5512 && args_size == 0
5513 && !save_local_in_regs_p)
5514 frame_size = apparent_frame_size = 0;
5515 else
5516 {
5517 /* Start from the apparent frame size. */
5518 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5519
5520 /* We need to add the size of the outgoing argument area. */
5521 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5522
5523 /* And that of the register window save area. */
5524 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5525
5526 /* Finally, bump to the appropriate alignment. */
5527 frame_size = SPARC_STACK_ALIGN (frame_size);
5528 }
5529
5530 /* Set up values for use in prologue and epilogue. */
5531 sparc_frame_size = frame_size;
5532 sparc_apparent_frame_size = apparent_frame_size;
5533 sparc_n_global_fp_regs = n_global_fp_regs;
5534 sparc_save_local_in_regs_p = save_local_in_regs_p;
5535
5536 return frame_size;
5537 }
5538
5539 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5540
5541 int
5542 sparc_initial_elimination_offset (int to)
5543 {
5544 int offset;
5545
5546 if (to == STACK_POINTER_REGNUM)
5547 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5548 else
5549 offset = 0;
5550
5551 offset += SPARC_STACK_BIAS;
5552 return offset;
5553 }
5554
5555 /* Output any necessary .register pseudo-ops. */
5556
5557 void
5558 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5559 {
5560 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5561 int i;
5562
5563 if (TARGET_ARCH32)
5564 return;
5565
5566 /* Check if %g[2367] were used without
5567 .register being printed for them already. */
5568 for (i = 2; i < 8; i++)
5569 {
5570 if (df_regs_ever_live_p (i)
5571 && ! sparc_hard_reg_printed [i])
5572 {
5573 sparc_hard_reg_printed [i] = 1;
5574 /* %g7 is used as TLS base register, use #ignore
5575 for it instead of #scratch. */
5576 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5577 i == 7 ? "ignore" : "scratch");
5578 }
5579 if (i == 3) i = 5;
5580 }
5581 #endif
5582 }
5583
5584 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5585
5586 #if PROBE_INTERVAL > 4096
5587 #error Cannot use indexed addressing mode for stack probing
5588 #endif
5589
5590 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5591 inclusive. These are offsets from the current stack pointer.
5592
5593 Note that we don't use the REG+REG addressing mode for the probes because
5594 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5595 so the advantages of having a single code win here. */
5596
5597 static void
5598 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5599 {
5600 rtx g1 = gen_rtx_REG (Pmode, 1);
5601
5602 /* See if we have a constant small number of probes to generate. If so,
5603 that's the easy case. */
5604 if (size <= PROBE_INTERVAL)
5605 {
5606 emit_move_insn (g1, GEN_INT (first));
5607 emit_insn (gen_rtx_SET (g1,
5608 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5609 emit_stack_probe (plus_constant (Pmode, g1, -size));
5610 }
5611
5612 /* The run-time loop is made up of 9 insns in the generic case while the
5613 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5614 else if (size <= 4 * PROBE_INTERVAL)
5615 {
5616 HOST_WIDE_INT i;
5617
5618 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5619 emit_insn (gen_rtx_SET (g1,
5620 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5621 emit_stack_probe (g1);
5622
5623 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5624 it exceeds SIZE. If only two probes are needed, this will not
5625 generate any code. Then probe at FIRST + SIZE. */
5626 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5627 {
5628 emit_insn (gen_rtx_SET (g1,
5629 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5630 emit_stack_probe (g1);
5631 }
5632
5633 emit_stack_probe (plus_constant (Pmode, g1,
5634 (i - PROBE_INTERVAL) - size));
5635 }
5636
5637 /* Otherwise, do the same as above, but in a loop. Note that we must be
5638 extra careful with variables wrapping around because we might be at
5639 the very top (or the very bottom) of the address space and we have
5640 to be able to handle this case properly; in particular, we use an
5641 equality test for the loop condition. */
5642 else
5643 {
5644 HOST_WIDE_INT rounded_size;
5645 rtx g4 = gen_rtx_REG (Pmode, 4);
5646
5647 emit_move_insn (g1, GEN_INT (first));
5648
5649
5650 /* Step 1: round SIZE to the previous multiple of the interval. */
5651
5652 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5653 emit_move_insn (g4, GEN_INT (rounded_size));
5654
5655
5656 /* Step 2: compute initial and final value of the loop counter. */
5657
5658 /* TEST_ADDR = SP + FIRST. */
5659 emit_insn (gen_rtx_SET (g1,
5660 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5661
5662 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5663 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5664
5665
5666 /* Step 3: the loop
5667
5668 while (TEST_ADDR != LAST_ADDR)
5669 {
5670 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5671 probe at TEST_ADDR
5672 }
5673
5674 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5675 until it is equal to ROUNDED_SIZE. */
5676
5677 if (TARGET_ARCH64)
5678 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5679 else
5680 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5681
5682
5683 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5684 that SIZE is equal to ROUNDED_SIZE. */
5685
5686 if (size != rounded_size)
5687 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5688 }
5689
5690 /* Make sure nothing is scheduled before we are done. */
5691 emit_insn (gen_blockage ());
5692 }
5693
5694 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5695 absolute addresses. */
5696
5697 const char *
5698 output_probe_stack_range (rtx reg1, rtx reg2)
5699 {
5700 static int labelno = 0;
5701 char loop_lab[32];
5702 rtx xops[2];
5703
5704 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5705
5706 /* Loop. */
5707 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5708
5709 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5710 xops[0] = reg1;
5711 xops[1] = GEN_INT (-PROBE_INTERVAL);
5712 output_asm_insn ("add\t%0, %1, %0", xops);
5713
5714 /* Test if TEST_ADDR == LAST_ADDR. */
5715 xops[1] = reg2;
5716 output_asm_insn ("cmp\t%0, %1", xops);
5717
5718 /* Probe at TEST_ADDR and branch. */
5719 if (TARGET_ARCH64)
5720 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5721 else
5722 fputs ("\tbne\t", asm_out_file);
5723 assemble_name_raw (asm_out_file, loop_lab);
5724 fputc ('\n', asm_out_file);
5725 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5726 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5727
5728 return "";
5729 }
5730
5731 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5732 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5733 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5734 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5735 the action to be performed if it returns false. Return the new offset. */
5736
5737 typedef bool (*sorr_pred_t) (unsigned int, int);
5738 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5739
5740 static int
5741 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5742 int offset, int leaf_function, sorr_pred_t save_p,
5743 sorr_act_t action_true, sorr_act_t action_false)
5744 {
5745 unsigned int i;
5746 rtx mem;
5747 rtx_insn *insn;
5748
5749 if (TARGET_ARCH64 && high <= 32)
5750 {
5751 int fp_offset = -1;
5752
5753 for (i = low; i < high; i++)
5754 {
5755 if (save_p (i, leaf_function))
5756 {
5757 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5758 base, offset));
5759 if (action_true == SORR_SAVE)
5760 {
5761 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5762 RTX_FRAME_RELATED_P (insn) = 1;
5763 }
5764 else /* action_true == SORR_RESTORE */
5765 {
5766 /* The frame pointer must be restored last since its old
5767 value may be used as base address for the frame. This
5768 is problematic in 64-bit mode only because of the lack
5769 of double-word load instruction. */
5770 if (i == HARD_FRAME_POINTER_REGNUM)
5771 fp_offset = offset;
5772 else
5773 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5774 }
5775 offset += 8;
5776 }
5777 else if (action_false == SORR_ADVANCE)
5778 offset += 8;
5779 }
5780
5781 if (fp_offset >= 0)
5782 {
5783 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5784 emit_move_insn (hard_frame_pointer_rtx, mem);
5785 }
5786 }
5787 else
5788 {
5789 for (i = low; i < high; i += 2)
5790 {
5791 bool reg0 = save_p (i, leaf_function);
5792 bool reg1 = save_p (i + 1, leaf_function);
5793 machine_mode mode;
5794 int regno;
5795
5796 if (reg0 && reg1)
5797 {
5798 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5799 regno = i;
5800 }
5801 else if (reg0)
5802 {
5803 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5804 regno = i;
5805 }
5806 else if (reg1)
5807 {
5808 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5809 regno = i + 1;
5810 offset += 4;
5811 }
5812 else
5813 {
5814 if (action_false == SORR_ADVANCE)
5815 offset += 8;
5816 continue;
5817 }
5818
5819 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5820 if (action_true == SORR_SAVE)
5821 {
5822 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5823 RTX_FRAME_RELATED_P (insn) = 1;
5824 if (mode == DImode)
5825 {
5826 rtx set1, set2;
5827 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5828 offset));
5829 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5830 RTX_FRAME_RELATED_P (set1) = 1;
5831 mem
5832 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5833 offset + 4));
5834 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5835 RTX_FRAME_RELATED_P (set2) = 1;
5836 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5837 gen_rtx_PARALLEL (VOIDmode,
5838 gen_rtvec (2, set1, set2)));
5839 }
5840 }
5841 else /* action_true == SORR_RESTORE */
5842 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5843
5844 /* Bump and round down to double word
5845 in case we already bumped by 4. */
5846 offset = ROUND_DOWN (offset + 8, 8);
5847 }
5848 }
5849
5850 return offset;
5851 }
5852
5853 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5854
5855 static rtx
5856 emit_adjust_base_to_offset (rtx base, int offset)
5857 {
5858 /* ??? This might be optimized a little as %g1 might already have a
5859 value close enough that a single add insn will do. */
5860 /* ??? Although, all of this is probably only a temporary fix because
5861 if %g1 can hold a function result, then sparc_expand_epilogue will
5862 lose (the result will be clobbered). */
5863 rtx new_base = gen_rtx_REG (Pmode, 1);
5864 emit_move_insn (new_base, GEN_INT (offset));
5865 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5866 return new_base;
5867 }
5868
5869 /* Emit code to save/restore call-saved global and FP registers. */
5870
5871 static void
5872 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5873 {
5874 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5875 {
5876 base = emit_adjust_base_to_offset (base, offset);
5877 offset = 0;
5878 }
5879
5880 offset
5881 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5882 save_global_or_fp_reg_p, action, SORR_NONE);
5883 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5884 save_global_or_fp_reg_p, action, SORR_NONE);
5885 }
5886
5887 /* Emit code to save/restore call-saved local and in registers. */
5888
5889 static void
5890 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5891 {
5892 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5893 {
5894 base = emit_adjust_base_to_offset (base, offset);
5895 offset = 0;
5896 }
5897
5898 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5899 save_local_or_in_reg_p, action, SORR_ADVANCE);
5900 }
5901
5902 /* Emit a window_save insn. */
5903
5904 static rtx_insn *
5905 emit_window_save (rtx increment)
5906 {
5907 rtx_insn *insn = emit_insn (gen_window_save (increment));
5908 RTX_FRAME_RELATED_P (insn) = 1;
5909
5910 /* The incoming return address (%o7) is saved in %i7. */
5911 add_reg_note (insn, REG_CFA_REGISTER,
5912 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5913 gen_rtx_REG (Pmode,
5914 INCOMING_RETURN_ADDR_REGNUM)));
5915
5916 /* The window save event. */
5917 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5918
5919 /* The CFA is %fp, the hard frame pointer. */
5920 add_reg_note (insn, REG_CFA_DEF_CFA,
5921 plus_constant (Pmode, hard_frame_pointer_rtx,
5922 INCOMING_FRAME_SP_OFFSET));
5923
5924 return insn;
5925 }
5926
5927 /* Generate an increment for the stack pointer. */
5928
5929 static rtx
5930 gen_stack_pointer_inc (rtx increment)
5931 {
5932 return gen_rtx_SET (stack_pointer_rtx,
5933 gen_rtx_PLUS (Pmode,
5934 stack_pointer_rtx,
5935 increment));
5936 }
5937
5938 /* Expand the function prologue. The prologue is responsible for reserving
5939 storage for the frame, saving the call-saved registers and loading the
5940 GOT register if needed. */
5941
5942 void
5943 sparc_expand_prologue (void)
5944 {
5945 HOST_WIDE_INT size;
5946 rtx_insn *insn;
5947
5948 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5949 on the final value of the flag means deferring the prologue/epilogue
5950 expansion until just before the second scheduling pass, which is too
5951 late to emit multiple epilogues or return insns.
5952
5953 Of course we are making the assumption that the value of the flag
5954 will not change between now and its final value. Of the three parts
5955 of the formula, only the last one can reasonably vary. Let's take a
5956 closer look, after assuming that the first two ones are set to true
5957 (otherwise the last value is effectively silenced).
5958
5959 If only_leaf_regs_used returns false, the global predicate will also
5960 be false so the actual frame size calculated below will be positive.
5961 As a consequence, the save_register_window insn will be emitted in
5962 the instruction stream; now this insn explicitly references %fp
5963 which is not a leaf register so only_leaf_regs_used will always
5964 return false subsequently.
5965
5966 If only_leaf_regs_used returns true, we hope that the subsequent
5967 optimization passes won't cause non-leaf registers to pop up. For
5968 example, the regrename pass has special provisions to not rename to
5969 non-leaf registers in a leaf function. */
5970 sparc_leaf_function_p
5971 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5972
5973 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5974
5975 if (flag_stack_usage_info)
5976 current_function_static_stack_size = size;
5977
5978 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5979 || flag_stack_clash_protection)
5980 {
5981 if (crtl->is_leaf && !cfun->calls_alloca)
5982 {
5983 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5984 sparc_emit_probe_stack_range (get_stack_check_protect (),
5985 size - get_stack_check_protect ());
5986 }
5987 else if (size > 0)
5988 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5989 }
5990
5991 if (size == 0)
5992 ; /* do nothing. */
5993 else if (sparc_leaf_function_p)
5994 {
5995 rtx size_int_rtx = GEN_INT (-size);
5996
5997 if (size <= 4096)
5998 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5999 else if (size <= 8192)
6000 {
6001 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6002 RTX_FRAME_RELATED_P (insn) = 1;
6003
6004 /* %sp is still the CFA register. */
6005 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6006 }
6007 else
6008 {
6009 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6010 emit_move_insn (size_rtx, size_int_rtx);
6011 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6012 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6013 gen_stack_pointer_inc (size_int_rtx));
6014 }
6015
6016 RTX_FRAME_RELATED_P (insn) = 1;
6017 }
6018 else
6019 {
6020 rtx size_int_rtx = GEN_INT (-size);
6021
6022 if (size <= 4096)
6023 emit_window_save (size_int_rtx);
6024 else if (size <= 8192)
6025 {
6026 emit_window_save (GEN_INT (-4096));
6027
6028 /* %sp is not the CFA register anymore. */
6029 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6030
6031 /* Make sure no %fp-based store is issued until after the frame is
6032 established. The offset between the frame pointer and the stack
6033 pointer is calculated relative to the value of the stack pointer
6034 at the end of the function prologue, and moving instructions that
6035 access the stack via the frame pointer between the instructions
6036 that decrement the stack pointer could result in accessing the
6037 register window save area, which is volatile. */
6038 emit_insn (gen_frame_blockage ());
6039 }
6040 else
6041 {
6042 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6043 emit_move_insn (size_rtx, size_int_rtx);
6044 emit_window_save (size_rtx);
6045 }
6046 }
6047
6048 if (sparc_leaf_function_p)
6049 {
6050 sparc_frame_base_reg = stack_pointer_rtx;
6051 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6052 }
6053 else
6054 {
6055 sparc_frame_base_reg = hard_frame_pointer_rtx;
6056 sparc_frame_base_offset = SPARC_STACK_BIAS;
6057 }
6058
6059 if (sparc_n_global_fp_regs > 0)
6060 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6061 sparc_frame_base_offset
6062 - sparc_apparent_frame_size,
6063 SORR_SAVE);
6064
6065 /* Advertise that the data calculated just above are now valid. */
6066 sparc_prologue_data_valid_p = true;
6067 }
6068
6069 /* Expand the function prologue. The prologue is responsible for reserving
6070 storage for the frame, saving the call-saved registers and loading the
6071 GOT register if needed. */
6072
6073 void
6074 sparc_flat_expand_prologue (void)
6075 {
6076 HOST_WIDE_INT size;
6077 rtx_insn *insn;
6078
6079 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6080
6081 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6082
6083 if (flag_stack_usage_info)
6084 current_function_static_stack_size = size;
6085
6086 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6087 || flag_stack_clash_protection)
6088 {
6089 if (crtl->is_leaf && !cfun->calls_alloca)
6090 {
6091 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6092 sparc_emit_probe_stack_range (get_stack_check_protect (),
6093 size - get_stack_check_protect ());
6094 }
6095 else if (size > 0)
6096 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6097 }
6098
6099 if (sparc_save_local_in_regs_p)
6100 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6101 SORR_SAVE);
6102
6103 if (size == 0)
6104 ; /* do nothing. */
6105 else
6106 {
6107 rtx size_int_rtx, size_rtx;
6108
6109 size_rtx = size_int_rtx = GEN_INT (-size);
6110
6111 /* We establish the frame (i.e. decrement the stack pointer) first, even
6112 if we use a frame pointer, because we cannot clobber any call-saved
6113 registers, including the frame pointer, if we haven't created a new
6114 register save area, for the sake of compatibility with the ABI. */
6115 if (size <= 4096)
6116 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6117 else if (size <= 8192 && !frame_pointer_needed)
6118 {
6119 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6120 RTX_FRAME_RELATED_P (insn) = 1;
6121 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6122 }
6123 else
6124 {
6125 size_rtx = gen_rtx_REG (Pmode, 1);
6126 emit_move_insn (size_rtx, size_int_rtx);
6127 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6128 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6129 gen_stack_pointer_inc (size_int_rtx));
6130 }
6131 RTX_FRAME_RELATED_P (insn) = 1;
6132
6133 /* Ensure nothing is scheduled until after the frame is established. */
6134 emit_insn (gen_blockage ());
6135
6136 if (frame_pointer_needed)
6137 {
6138 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6139 gen_rtx_MINUS (Pmode,
6140 stack_pointer_rtx,
6141 size_rtx)));
6142 RTX_FRAME_RELATED_P (insn) = 1;
6143
6144 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6145 gen_rtx_SET (hard_frame_pointer_rtx,
6146 plus_constant (Pmode, stack_pointer_rtx,
6147 size)));
6148 }
6149
6150 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6151 {
6152 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6153 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6154
6155 insn = emit_move_insn (i7, o7);
6156 RTX_FRAME_RELATED_P (insn) = 1;
6157
6158 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6159
6160 /* Prevent this instruction from ever being considered dead,
6161 even if this function has no epilogue. */
6162 emit_use (i7);
6163 }
6164 }
6165
6166 if (frame_pointer_needed)
6167 {
6168 sparc_frame_base_reg = hard_frame_pointer_rtx;
6169 sparc_frame_base_offset = SPARC_STACK_BIAS;
6170 }
6171 else
6172 {
6173 sparc_frame_base_reg = stack_pointer_rtx;
6174 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6175 }
6176
6177 if (sparc_n_global_fp_regs > 0)
6178 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6179 sparc_frame_base_offset
6180 - sparc_apparent_frame_size,
6181 SORR_SAVE);
6182
6183 /* Advertise that the data calculated just above are now valid. */
6184 sparc_prologue_data_valid_p = true;
6185 }
6186
6187 /* This function generates the assembly code for function entry, which boils
6188 down to emitting the necessary .register directives. */
6189
6190 static void
6191 sparc_asm_function_prologue (FILE *file)
6192 {
6193 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6194 if (!TARGET_FLAT)
6195 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6196
6197 sparc_output_scratch_registers (file);
6198 }
6199
6200 /* Expand the function epilogue, either normal or part of a sibcall.
6201 We emit all the instructions except the return or the call. */
6202
6203 void
6204 sparc_expand_epilogue (bool for_eh)
6205 {
6206 HOST_WIDE_INT size = sparc_frame_size;
6207
6208 if (cfun->calls_alloca)
6209 emit_insn (gen_frame_blockage ());
6210
6211 if (sparc_n_global_fp_regs > 0)
6212 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6213 sparc_frame_base_offset
6214 - sparc_apparent_frame_size,
6215 SORR_RESTORE);
6216
6217 if (size == 0 || for_eh)
6218 ; /* do nothing. */
6219 else if (sparc_leaf_function_p)
6220 {
6221 if (size <= 4096)
6222 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6223 else if (size <= 8192)
6224 {
6225 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6226 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6227 }
6228 else
6229 {
6230 rtx reg = gen_rtx_REG (Pmode, 1);
6231 emit_move_insn (reg, GEN_INT (size));
6232 emit_insn (gen_stack_pointer_inc (reg));
6233 }
6234 }
6235 }
6236
6237 /* Expand the function epilogue, either normal or part of a sibcall.
6238 We emit all the instructions except the return or the call. */
6239
6240 void
6241 sparc_flat_expand_epilogue (bool for_eh)
6242 {
6243 HOST_WIDE_INT size = sparc_frame_size;
6244
6245 if (sparc_n_global_fp_regs > 0)
6246 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6247 sparc_frame_base_offset
6248 - sparc_apparent_frame_size,
6249 SORR_RESTORE);
6250
6251 /* If we have a frame pointer, we'll need both to restore it before the
6252 frame is destroyed and use its current value in destroying the frame.
6253 Since we don't have an atomic way to do that in the flat window model,
6254 we save the current value into a temporary register (%g1). */
6255 if (frame_pointer_needed && !for_eh)
6256 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6257
6258 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6259 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6260 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6261
6262 if (sparc_save_local_in_regs_p)
6263 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6264 sparc_frame_base_offset,
6265 SORR_RESTORE);
6266
6267 if (size == 0 || for_eh)
6268 ; /* do nothing. */
6269 else if (frame_pointer_needed)
6270 {
6271 /* Make sure the frame is destroyed after everything else is done. */
6272 emit_insn (gen_blockage ());
6273
6274 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6275 }
6276 else
6277 {
6278 /* Likewise. */
6279 emit_insn (gen_blockage ());
6280
6281 if (size <= 4096)
6282 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6283 else if (size <= 8192)
6284 {
6285 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6286 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6287 }
6288 else
6289 {
6290 rtx reg = gen_rtx_REG (Pmode, 1);
6291 emit_move_insn (reg, GEN_INT (size));
6292 emit_insn (gen_stack_pointer_inc (reg));
6293 }
6294 }
6295 }
6296
6297 /* Return true if it is appropriate to emit `return' instructions in the
6298 body of a function. */
6299
6300 bool
6301 sparc_can_use_return_insn_p (void)
6302 {
6303 return sparc_prologue_data_valid_p
6304 && sparc_n_global_fp_regs == 0
6305 && TARGET_FLAT
6306 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6307 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6308 }
6309
6310 /* This function generates the assembly code for function exit. */
6311
6312 static void
6313 sparc_asm_function_epilogue (FILE *file)
6314 {
6315 /* If the last two instructions of a function are "call foo; dslot;"
6316 the return address might point to the first instruction in the next
6317 function and we have to output a dummy nop for the sake of sane
6318 backtraces in such cases. This is pointless for sibling calls since
6319 the return address is explicitly adjusted. */
6320
6321 rtx_insn *insn = get_last_insn ();
6322
6323 rtx last_real_insn = prev_real_insn (insn);
6324 if (last_real_insn
6325 && NONJUMP_INSN_P (last_real_insn)
6326 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6327 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6328
6329 if (last_real_insn
6330 && CALL_P (last_real_insn)
6331 && !SIBLING_CALL_P (last_real_insn))
6332 fputs("\tnop\n", file);
6333
6334 sparc_output_deferred_case_vectors ();
6335 }
6336
6337 /* Output a 'restore' instruction. */
6338
6339 static void
6340 output_restore (rtx pat)
6341 {
6342 rtx operands[3];
6343
6344 if (! pat)
6345 {
6346 fputs ("\t restore\n", asm_out_file);
6347 return;
6348 }
6349
6350 gcc_assert (GET_CODE (pat) == SET);
6351
6352 operands[0] = SET_DEST (pat);
6353 pat = SET_SRC (pat);
6354
6355 switch (GET_CODE (pat))
6356 {
6357 case PLUS:
6358 operands[1] = XEXP (pat, 0);
6359 operands[2] = XEXP (pat, 1);
6360 output_asm_insn (" restore %r1, %2, %Y0", operands);
6361 break;
6362 case LO_SUM:
6363 operands[1] = XEXP (pat, 0);
6364 operands[2] = XEXP (pat, 1);
6365 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6366 break;
6367 case ASHIFT:
6368 operands[1] = XEXP (pat, 0);
6369 gcc_assert (XEXP (pat, 1) == const1_rtx);
6370 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6371 break;
6372 default:
6373 operands[1] = pat;
6374 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6375 break;
6376 }
6377 }
6378
6379 /* Output a return. */
6380
6381 const char *
6382 output_return (rtx_insn *insn)
6383 {
6384 if (crtl->calls_eh_return)
6385 {
6386 /* If the function uses __builtin_eh_return, the eh_return
6387 machinery occupies the delay slot. */
6388 gcc_assert (!final_sequence);
6389
6390 if (flag_delayed_branch)
6391 {
6392 if (!TARGET_FLAT && TARGET_V9)
6393 fputs ("\treturn\t%i7+8\n", asm_out_file);
6394 else
6395 {
6396 if (!TARGET_FLAT)
6397 fputs ("\trestore\n", asm_out_file);
6398
6399 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6400 }
6401
6402 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6403 }
6404 else
6405 {
6406 if (!TARGET_FLAT)
6407 fputs ("\trestore\n", asm_out_file);
6408
6409 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6410 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6411 }
6412 }
6413 else if (sparc_leaf_function_p || TARGET_FLAT)
6414 {
6415 /* This is a leaf or flat function so we don't have to bother restoring
6416 the register window, which frees us from dealing with the convoluted
6417 semantics of restore/return. We simply output the jump to the
6418 return address and the insn in the delay slot (if any). */
6419
6420 return "jmp\t%%o7+%)%#";
6421 }
6422 else
6423 {
6424 /* This is a regular function so we have to restore the register window.
6425 We may have a pending insn for the delay slot, which will be either
6426 combined with the 'restore' instruction or put in the delay slot of
6427 the 'return' instruction. */
6428
6429 if (final_sequence)
6430 {
6431 rtx_insn *delay;
6432 rtx pat;
6433
6434 delay = NEXT_INSN (insn);
6435 gcc_assert (delay);
6436
6437 pat = PATTERN (delay);
6438
6439 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6440 {
6441 epilogue_renumber (&pat, 0);
6442 return "return\t%%i7+%)%#";
6443 }
6444 else
6445 {
6446 output_asm_insn ("jmp\t%%i7+%)", NULL);
6447
6448 /* We're going to output the insn in the delay slot manually.
6449 Make sure to output its source location first. */
6450 PATTERN (delay) = gen_blockage ();
6451 INSN_CODE (delay) = -1;
6452 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6453 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6454
6455 output_restore (pat);
6456 }
6457 }
6458 else
6459 {
6460 /* The delay slot is empty. */
6461 if (TARGET_V9)
6462 return "return\t%%i7+%)\n\t nop";
6463 else if (flag_delayed_branch)
6464 return "jmp\t%%i7+%)\n\t restore";
6465 else
6466 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6467 }
6468 }
6469
6470 return "";
6471 }
6472
6473 /* Output a sibling call. */
6474
6475 const char *
6476 output_sibcall (rtx_insn *insn, rtx call_operand)
6477 {
6478 rtx operands[1];
6479
6480 gcc_assert (flag_delayed_branch);
6481
6482 operands[0] = call_operand;
6483
6484 if (sparc_leaf_function_p || TARGET_FLAT)
6485 {
6486 /* This is a leaf or flat function so we don't have to bother restoring
6487 the register window. We simply output the jump to the function and
6488 the insn in the delay slot (if any). */
6489
6490 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6491
6492 if (final_sequence)
6493 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6494 operands);
6495 else
6496 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6497 it into branch if possible. */
6498 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6499 operands);
6500 }
6501 else
6502 {
6503 /* This is a regular function so we have to restore the register window.
6504 We may have a pending insn for the delay slot, which will be combined
6505 with the 'restore' instruction. */
6506
6507 output_asm_insn ("call\t%a0, 0", operands);
6508
6509 if (final_sequence)
6510 {
6511 rtx_insn *delay;
6512 rtx pat;
6513
6514 delay = NEXT_INSN (insn);
6515 gcc_assert (delay);
6516
6517 pat = PATTERN (delay);
6518
6519 /* We're going to output the insn in the delay slot manually.
6520 Make sure to output its source location first. */
6521 PATTERN (delay) = gen_blockage ();
6522 INSN_CODE (delay) = -1;
6523 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6524 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6525
6526 output_restore (pat);
6527 }
6528 else
6529 output_restore (NULL_RTX);
6530 }
6531
6532 return "";
6533 }
6534 \f
6535 /* Functions for handling argument passing.
6536
6537 For 32-bit, the first 6 args are normally in registers and the rest are
6538 pushed. Any arg that starts within the first 6 words is at least
6539 partially passed in a register unless its data type forbids.
6540
6541 For 64-bit, the argument registers are laid out as an array of 16 elements
6542 and arguments are added sequentially. The first 6 int args and up to the
6543 first 16 fp args (depending on size) are passed in regs.
6544
6545 Slot Stack Integral Float Float in structure Double Long Double
6546 ---- ----- -------- ----- ------------------ ------ -----------
6547 15 [SP+248] %f31 %f30,%f31 %d30
6548 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6549 13 [SP+232] %f27 %f26,%f27 %d26
6550 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6551 11 [SP+216] %f23 %f22,%f23 %d22
6552 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6553 9 [SP+200] %f19 %f18,%f19 %d18
6554 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6555 7 [SP+184] %f15 %f14,%f15 %d14
6556 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6557 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6558 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6559 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6560 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6561 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6562 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6563
6564 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6565
6566 Integral arguments are always passed as 64-bit quantities appropriately
6567 extended.
6568
6569 Passing of floating point values is handled as follows.
6570 If a prototype is in scope:
6571 If the value is in a named argument (i.e. not a stdarg function or a
6572 value not part of the `...') then the value is passed in the appropriate
6573 fp reg.
6574 If the value is part of the `...' and is passed in one of the first 6
6575 slots then the value is passed in the appropriate int reg.
6576 If the value is part of the `...' and is not passed in one of the first 6
6577 slots then the value is passed in memory.
6578 If a prototype is not in scope:
6579 If the value is one of the first 6 arguments the value is passed in the
6580 appropriate integer reg and the appropriate fp reg.
6581 If the value is not one of the first 6 arguments the value is passed in
6582 the appropriate fp reg and in memory.
6583
6584
6585 Summary of the calling conventions implemented by GCC on the SPARC:
6586
6587 32-bit ABI:
6588 size argument return value
6589
6590 small integer <4 int. reg. int. reg.
6591 word 4 int. reg. int. reg.
6592 double word 8 int. reg. int. reg.
6593
6594 _Complex small integer <8 int. reg. int. reg.
6595 _Complex word 8 int. reg. int. reg.
6596 _Complex double word 16 memory int. reg.
6597
6598 vector integer <=8 int. reg. FP reg.
6599 vector integer >8 memory memory
6600
6601 float 4 int. reg. FP reg.
6602 double 8 int. reg. FP reg.
6603 long double 16 memory memory
6604
6605 _Complex float 8 memory FP reg.
6606 _Complex double 16 memory FP reg.
6607 _Complex long double 32 memory FP reg.
6608
6609 vector float any memory memory
6610
6611 aggregate any memory memory
6612
6613
6614
6615 64-bit ABI:
6616 size argument return value
6617
6618 small integer <8 int. reg. int. reg.
6619 word 8 int. reg. int. reg.
6620 double word 16 int. reg. int. reg.
6621
6622 _Complex small integer <16 int. reg. int. reg.
6623 _Complex word 16 int. reg. int. reg.
6624 _Complex double word 32 memory int. reg.
6625
6626 vector integer <=16 FP reg. FP reg.
6627 vector integer 16<s<=32 memory FP reg.
6628 vector integer >32 memory memory
6629
6630 float 4 FP reg. FP reg.
6631 double 8 FP reg. FP reg.
6632 long double 16 FP reg. FP reg.
6633
6634 _Complex float 8 FP reg. FP reg.
6635 _Complex double 16 FP reg. FP reg.
6636 _Complex long double 32 memory FP reg.
6637
6638 vector float <=16 FP reg. FP reg.
6639 vector float 16<s<=32 memory FP reg.
6640 vector float >32 memory memory
6641
6642 aggregate <=16 reg. reg.
6643 aggregate 16<s<=32 memory reg.
6644 aggregate >32 memory memory
6645
6646
6647
6648 Note #1: complex floating-point types follow the extended SPARC ABIs as
6649 implemented by the Sun compiler.
6650
6651 Note #2: integral vector types follow the scalar floating-point types
6652 conventions to match what is implemented by the Sun VIS SDK.
6653
6654 Note #3: floating-point vector types follow the aggregate types
6655 conventions. */
6656
6657
6658 /* Maximum number of int regs for args. */
6659 #define SPARC_INT_ARG_MAX 6
6660 /* Maximum number of fp regs for args. */
6661 #define SPARC_FP_ARG_MAX 16
6662 /* Number of words (partially) occupied for a given size in units. */
6663 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6664
6665 /* Handle the INIT_CUMULATIVE_ARGS macro.
6666 Initialize a variable CUM of type CUMULATIVE_ARGS
6667 for a call to a function whose data type is FNTYPE.
6668 For a library call, FNTYPE is 0. */
6669
6670 void
6671 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6672 {
6673 cum->words = 0;
6674 cum->prototype_p = fntype && prototype_p (fntype);
6675 cum->libcall_p = !fntype;
6676 }
6677
6678 /* Handle promotion of pointer and integer arguments. */
6679
6680 static machine_mode
6681 sparc_promote_function_mode (const_tree type, machine_mode mode,
6682 int *punsignedp, const_tree, int)
6683 {
6684 if (type && POINTER_TYPE_P (type))
6685 {
6686 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6687 return Pmode;
6688 }
6689
6690 /* Integral arguments are passed as full words, as per the ABI. */
6691 if (GET_MODE_CLASS (mode) == MODE_INT
6692 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6693 return word_mode;
6694
6695 return mode;
6696 }
6697
6698 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6699
6700 static bool
6701 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6702 {
6703 return TARGET_ARCH64 ? true : false;
6704 }
6705
6706 /* Traverse the record TYPE recursively and call FUNC on its fields.
6707 NAMED is true if this is for a named parameter. DATA is passed
6708 to FUNC for each field. OFFSET is the starting position and
6709 PACKED is true if we are inside a packed record. */
6710
6711 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6712 static void
6713 traverse_record_type (const_tree type, bool named, T *data,
6714 HOST_WIDE_INT offset = 0, bool packed = false)
6715 {
6716 /* The ABI obviously doesn't specify how packed structures are passed.
6717 These are passed in integer regs if possible, otherwise memory. */
6718 if (!packed)
6719 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6720 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6721 {
6722 packed = true;
6723 break;
6724 }
6725
6726 /* Walk the real fields, but skip those with no size or a zero size.
6727 ??? Fields with variable offset are handled as having zero offset. */
6728 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6729 if (TREE_CODE (field) == FIELD_DECL)
6730 {
6731 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6732 continue;
6733
6734 HOST_WIDE_INT bitpos = offset;
6735 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6736 bitpos += int_bit_position (field);
6737
6738 tree field_type = TREE_TYPE (field);
6739 if (TREE_CODE (field_type) == RECORD_TYPE)
6740 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6741 packed);
6742 else
6743 {
6744 const bool fp_type
6745 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6746 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6747 data);
6748 }
6749 }
6750 }
6751
6752 /* Handle recursive register classifying for structure layout. */
6753
6754 typedef struct
6755 {
6756 bool fp_regs; /* true if field eligible to FP registers. */
6757 bool fp_regs_in_first_word; /* true if such field in first word. */
6758 } classify_data_t;
6759
6760 /* A subroutine of function_arg_slotno. Classify the field. */
6761
6762 inline void
6763 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6764 classify_data_t *data)
6765 {
6766 if (fp)
6767 {
6768 data->fp_regs = true;
6769 if (bitpos < BITS_PER_WORD)
6770 data->fp_regs_in_first_word = true;
6771 }
6772 }
6773
6774 /* Compute the slot number to pass an argument in.
6775 Return the slot number or -1 if passing on the stack.
6776
6777 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6778 the preceding args and about the function being called.
6779 MODE is the argument's machine mode.
6780 TYPE is the data type of the argument (as a tree).
6781 This is null for libcalls where that information may
6782 not be available.
6783 NAMED is nonzero if this argument is a named parameter
6784 (otherwise it is an extra parameter matching an ellipsis).
6785 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6786 *PREGNO records the register number to use if scalar type.
6787 *PPADDING records the amount of padding needed in words. */
6788
6789 static int
6790 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6791 const_tree type, bool named, bool incoming,
6792 int *pregno, int *ppadding)
6793 {
6794 int regbase = (incoming
6795 ? SPARC_INCOMING_INT_ARG_FIRST
6796 : SPARC_OUTGOING_INT_ARG_FIRST);
6797 int slotno = cum->words;
6798 enum mode_class mclass;
6799 int regno;
6800
6801 *ppadding = 0;
6802
6803 if (type && TREE_ADDRESSABLE (type))
6804 return -1;
6805
6806 if (TARGET_ARCH32
6807 && mode == BLKmode
6808 && type
6809 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6810 return -1;
6811
6812 /* For SPARC64, objects requiring 16-byte alignment get it. */
6813 if (TARGET_ARCH64
6814 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6815 && (slotno & 1) != 0)
6816 slotno++, *ppadding = 1;
6817
6818 mclass = GET_MODE_CLASS (mode);
6819 if (type && TREE_CODE (type) == VECTOR_TYPE)
6820 {
6821 /* Vector types deserve special treatment because they are
6822 polymorphic wrt their mode, depending upon whether VIS
6823 instructions are enabled. */
6824 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6825 {
6826 /* The SPARC port defines no floating-point vector modes. */
6827 gcc_assert (mode == BLKmode);
6828 }
6829 else
6830 {
6831 /* Integral vector types should either have a vector
6832 mode or an integral mode, because we are guaranteed
6833 by pass_by_reference that their size is not greater
6834 than 16 bytes and TImode is 16-byte wide. */
6835 gcc_assert (mode != BLKmode);
6836
6837 /* Vector integers are handled like floats according to
6838 the Sun VIS SDK. */
6839 mclass = MODE_FLOAT;
6840 }
6841 }
6842
6843 switch (mclass)
6844 {
6845 case MODE_FLOAT:
6846 case MODE_COMPLEX_FLOAT:
6847 case MODE_VECTOR_INT:
6848 if (TARGET_ARCH64 && TARGET_FPU && named)
6849 {
6850 /* If all arg slots are filled, then must pass on stack. */
6851 if (slotno >= SPARC_FP_ARG_MAX)
6852 return -1;
6853
6854 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6855 /* Arguments filling only one single FP register are
6856 right-justified in the outer double FP register. */
6857 if (GET_MODE_SIZE (mode) <= 4)
6858 regno++;
6859 break;
6860 }
6861 /* fallthrough */
6862
6863 case MODE_INT:
6864 case MODE_COMPLEX_INT:
6865 /* If all arg slots are filled, then must pass on stack. */
6866 if (slotno >= SPARC_INT_ARG_MAX)
6867 return -1;
6868
6869 regno = regbase + slotno;
6870 break;
6871
6872 case MODE_RANDOM:
6873 if (mode == VOIDmode)
6874 /* MODE is VOIDmode when generating the actual call. */
6875 return -1;
6876
6877 gcc_assert (mode == BLKmode);
6878
6879 if (TARGET_ARCH32
6880 || !type
6881 || (TREE_CODE (type) != RECORD_TYPE
6882 && TREE_CODE (type) != VECTOR_TYPE))
6883 {
6884 /* If all arg slots are filled, then must pass on stack. */
6885 if (slotno >= SPARC_INT_ARG_MAX)
6886 return -1;
6887
6888 regno = regbase + slotno;
6889 }
6890 else /* TARGET_ARCH64 && type */
6891 {
6892 /* If all arg slots are filled, then must pass on stack. */
6893 if (slotno >= SPARC_FP_ARG_MAX)
6894 return -1;
6895
6896 if (TREE_CODE (type) == RECORD_TYPE)
6897 {
6898 classify_data_t data = { false, false };
6899 traverse_record_type<classify_data_t, classify_registers>
6900 (type, named, &data);
6901
6902 if (data.fp_regs)
6903 {
6904 /* If all FP slots are filled except for the last one and
6905 there is no FP field in the first word, then must pass
6906 on stack. */
6907 if (slotno >= SPARC_FP_ARG_MAX - 1
6908 && !data.fp_regs_in_first_word)
6909 return -1;
6910 }
6911 else
6912 {
6913 /* If all int slots are filled, then must pass on stack. */
6914 if (slotno >= SPARC_INT_ARG_MAX)
6915 return -1;
6916 }
6917 }
6918
6919 /* PREGNO isn't set since both int and FP regs can be used. */
6920 return slotno;
6921 }
6922 break;
6923
6924 default :
6925 gcc_unreachable ();
6926 }
6927
6928 *pregno = regno;
6929 return slotno;
6930 }
6931
6932 /* Handle recursive register counting/assigning for structure layout. */
6933
6934 typedef struct
6935 {
6936 int slotno; /* slot number of the argument. */
6937 int regbase; /* regno of the base register. */
6938 int intoffset; /* offset of the first pending integer field. */
6939 int nregs; /* number of words passed in registers. */
6940 bool stack; /* true if part of the argument is on the stack. */
6941 rtx ret; /* return expression being built. */
6942 } assign_data_t;
6943
6944 /* A subroutine of function_arg_record_value. Compute the number of integer
6945 registers to be assigned between PARMS->intoffset and BITPOS. Return
6946 true if at least one integer register is assigned or false otherwise. */
6947
6948 static bool
6949 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6950 {
6951 if (data->intoffset < 0)
6952 return false;
6953
6954 const int intoffset = data->intoffset;
6955 data->intoffset = -1;
6956
6957 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6958 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6959 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6960 int nregs = (endbit - startbit) / BITS_PER_WORD;
6961
6962 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6963 {
6964 nregs = SPARC_INT_ARG_MAX - this_slotno;
6965
6966 /* We need to pass this field (partly) on the stack. */
6967 data->stack = 1;
6968 }
6969
6970 if (nregs <= 0)
6971 return false;
6972
6973 *pnregs = nregs;
6974 return true;
6975 }
6976
6977 /* A subroutine of function_arg_record_value. Compute the number and the mode
6978 of the FP registers to be assigned for FIELD. Return true if at least one
6979 FP register is assigned or false otherwise. */
6980
6981 static bool
6982 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6983 assign_data_t *data,
6984 int *pnregs, machine_mode *pmode)
6985 {
6986 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6987 machine_mode mode = DECL_MODE (field);
6988 int nregs, nslots;
6989
6990 /* Slots are counted as words while regs are counted as having the size of
6991 the (inner) mode. */
6992 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6993 {
6994 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6995 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6996 }
6997 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6998 {
6999 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7000 nregs = 2;
7001 }
7002 else
7003 nregs = 1;
7004
7005 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7006
7007 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7008 {
7009 nslots = SPARC_FP_ARG_MAX - this_slotno;
7010 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7011
7012 /* We need to pass this field (partly) on the stack. */
7013 data->stack = 1;
7014
7015 if (nregs <= 0)
7016 return false;
7017 }
7018
7019 *pnregs = nregs;
7020 *pmode = mode;
7021 return true;
7022 }
7023
7024 /* A subroutine of function_arg_record_value. Count the number of registers
7025 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7026
7027 inline void
7028 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7029 assign_data_t *data)
7030 {
7031 if (fp)
7032 {
7033 int nregs;
7034 machine_mode mode;
7035
7036 if (compute_int_layout (bitpos, data, &nregs))
7037 data->nregs += nregs;
7038
7039 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7040 data->nregs += nregs;
7041 }
7042 else
7043 {
7044 if (data->intoffset < 0)
7045 data->intoffset = bitpos;
7046 }
7047 }
7048
7049 /* A subroutine of function_arg_record_value. Assign the bits of the
7050 structure between PARMS->intoffset and BITPOS to integer registers. */
7051
7052 static void
7053 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
7054 {
7055 int intoffset = data->intoffset;
7056 machine_mode mode;
7057 int nregs;
7058
7059 if (!compute_int_layout (bitpos, data, &nregs))
7060 return;
7061
7062 /* If this is the trailing part of a word, only load that much into
7063 the register. Otherwise load the whole register. Note that in
7064 the latter case we may pick up unwanted bits. It's not a problem
7065 at the moment but may wish to revisit. */
7066 if (intoffset % BITS_PER_WORD != 0)
7067 mode = smallest_int_mode_for_size (BITS_PER_WORD
7068 - intoffset % BITS_PER_WORD);
7069 else
7070 mode = word_mode;
7071
7072 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7073 unsigned int regno = data->regbase + this_slotno;
7074 intoffset /= BITS_PER_UNIT;
7075
7076 do
7077 {
7078 rtx reg = gen_rtx_REG (mode, regno);
7079 XVECEXP (data->ret, 0, data->stack + data->nregs)
7080 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7081 data->nregs += 1;
7082 mode = word_mode;
7083 regno += 1;
7084 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7085 }
7086 while (--nregs > 0);
7087 }
7088
7089 /* A subroutine of function_arg_record_value. Assign FIELD at position
7090 BITPOS to FP registers. */
7091
7092 static void
7093 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
7094 assign_data_t *data)
7095 {
7096 int nregs;
7097 machine_mode mode;
7098
7099 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7100 return;
7101
7102 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7103 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7104 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7105 regno++;
7106 int pos = bitpos / BITS_PER_UNIT;
7107
7108 do
7109 {
7110 rtx reg = gen_rtx_REG (mode, regno);
7111 XVECEXP (data->ret, 0, data->stack + data->nregs)
7112 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7113 data->nregs += 1;
7114 regno += GET_MODE_SIZE (mode) / 4;
7115 pos += GET_MODE_SIZE (mode);
7116 }
7117 while (--nregs > 0);
7118 }
7119
7120 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7121 the structure between PARMS->intoffset and BITPOS to registers. */
7122
7123 inline void
7124 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7125 assign_data_t *data)
7126 {
7127 if (fp)
7128 {
7129 assign_int_registers (bitpos, data);
7130
7131 assign_fp_registers (field, bitpos, data);
7132 }
7133 else
7134 {
7135 if (data->intoffset < 0)
7136 data->intoffset = bitpos;
7137 }
7138 }
7139
7140 /* Used by function_arg and sparc_function_value_1 to implement the complex
7141 conventions of the 64-bit ABI for passing and returning structures.
7142 Return an expression valid as a return value for the FUNCTION_ARG
7143 and TARGET_FUNCTION_VALUE.
7144
7145 TYPE is the data type of the argument (as a tree).
7146 This is null for libcalls where that information may
7147 not be available.
7148 MODE is the argument's machine mode.
7149 SLOTNO is the index number of the argument's slot in the parameter array.
7150 NAMED is true if this argument is a named parameter
7151 (otherwise it is an extra parameter matching an ellipsis).
7152 REGBASE is the regno of the base register for the parameter array. */
7153
7154 static rtx
7155 function_arg_record_value (const_tree type, machine_mode mode,
7156 int slotno, bool named, int regbase)
7157 {
7158 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7159 assign_data_t data;
7160 int nregs;
7161
7162 data.slotno = slotno;
7163 data.regbase = regbase;
7164
7165 /* Count how many registers we need. */
7166 data.nregs = 0;
7167 data.intoffset = 0;
7168 data.stack = false;
7169 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7170
7171 /* Take into account pending integer fields. */
7172 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7173 data.nregs += nregs;
7174
7175 /* Allocate the vector and handle some annoying special cases. */
7176 nregs = data.nregs;
7177
7178 if (nregs == 0)
7179 {
7180 /* ??? Empty structure has no value? Duh? */
7181 if (typesize <= 0)
7182 {
7183 /* Though there's nothing really to store, return a word register
7184 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7185 leads to breakage due to the fact that there are zero bytes to
7186 load. */
7187 return gen_rtx_REG (mode, regbase);
7188 }
7189
7190 /* ??? C++ has structures with no fields, and yet a size. Give up
7191 for now and pass everything back in integer registers. */
7192 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7193 if (nregs + slotno > SPARC_INT_ARG_MAX)
7194 nregs = SPARC_INT_ARG_MAX - slotno;
7195 }
7196
7197 gcc_assert (nregs > 0);
7198
7199 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7200
7201 /* If at least one field must be passed on the stack, generate
7202 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7203 also be passed on the stack. We can't do much better because the
7204 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7205 of structures for which the fields passed exclusively in registers
7206 are not at the beginning of the structure. */
7207 if (data.stack)
7208 XVECEXP (data.ret, 0, 0)
7209 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7210
7211 /* Assign the registers. */
7212 data.nregs = 0;
7213 data.intoffset = 0;
7214 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7215
7216 /* Assign pending integer fields. */
7217 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7218
7219 gcc_assert (data.nregs == nregs);
7220
7221 return data.ret;
7222 }
7223
7224 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7225 of the 64-bit ABI for passing and returning unions.
7226 Return an expression valid as a return value for the FUNCTION_ARG
7227 and TARGET_FUNCTION_VALUE.
7228
7229 SIZE is the size in bytes of the union.
7230 MODE is the argument's machine mode.
7231 REGNO is the hard register the union will be passed in. */
7232
7233 static rtx
7234 function_arg_union_value (int size, machine_mode mode, int slotno,
7235 int regno)
7236 {
7237 int nwords = CEIL_NWORDS (size), i;
7238 rtx regs;
7239
7240 /* See comment in previous function for empty structures. */
7241 if (nwords == 0)
7242 return gen_rtx_REG (mode, regno);
7243
7244 if (slotno == SPARC_INT_ARG_MAX - 1)
7245 nwords = 1;
7246
7247 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7248
7249 for (i = 0; i < nwords; i++)
7250 {
7251 /* Unions are passed left-justified. */
7252 XVECEXP (regs, 0, i)
7253 = gen_rtx_EXPR_LIST (VOIDmode,
7254 gen_rtx_REG (word_mode, regno),
7255 GEN_INT (UNITS_PER_WORD * i));
7256 regno++;
7257 }
7258
7259 return regs;
7260 }
7261
7262 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7263 for passing and returning BLKmode vectors.
7264 Return an expression valid as a return value for the FUNCTION_ARG
7265 and TARGET_FUNCTION_VALUE.
7266
7267 SIZE is the size in bytes of the vector.
7268 REGNO is the FP hard register the vector will be passed in. */
7269
7270 static rtx
7271 function_arg_vector_value (int size, int regno)
7272 {
7273 const int nregs = MAX (1, size / 8);
7274 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7275
7276 if (size < 8)
7277 XVECEXP (regs, 0, 0)
7278 = gen_rtx_EXPR_LIST (VOIDmode,
7279 gen_rtx_REG (SImode, regno),
7280 const0_rtx);
7281 else
7282 for (int i = 0; i < nregs; i++)
7283 XVECEXP (regs, 0, i)
7284 = gen_rtx_EXPR_LIST (VOIDmode,
7285 gen_rtx_REG (DImode, regno + 2*i),
7286 GEN_INT (i*8));
7287
7288 return regs;
7289 }
7290
7291 /* Determine where to put an argument to a function.
7292 Value is zero to push the argument on the stack,
7293 or a hard register in which to store the argument.
7294
7295 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7296 the preceding args and about the function being called.
7297 MODE is the argument's machine mode.
7298 TYPE is the data type of the argument (as a tree).
7299 This is null for libcalls where that information may
7300 not be available.
7301 NAMED is true if this argument is a named parameter
7302 (otherwise it is an extra parameter matching an ellipsis).
7303 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7304 TARGET_FUNCTION_INCOMING_ARG. */
7305
7306 static rtx
7307 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7308 const_tree type, bool named, bool incoming)
7309 {
7310 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7311
7312 int regbase = (incoming
7313 ? SPARC_INCOMING_INT_ARG_FIRST
7314 : SPARC_OUTGOING_INT_ARG_FIRST);
7315 int slotno, regno, padding;
7316 enum mode_class mclass = GET_MODE_CLASS (mode);
7317
7318 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7319 &regno, &padding);
7320 if (slotno == -1)
7321 return 0;
7322
7323 /* Vector types deserve special treatment because they are polymorphic wrt
7324 their mode, depending upon whether VIS instructions are enabled. */
7325 if (type && TREE_CODE (type) == VECTOR_TYPE)
7326 {
7327 HOST_WIDE_INT size = int_size_in_bytes (type);
7328 gcc_assert ((TARGET_ARCH32 && size <= 8)
7329 || (TARGET_ARCH64 && size <= 16));
7330
7331 if (mode == BLKmode)
7332 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7333
7334 mclass = MODE_FLOAT;
7335 }
7336
7337 if (TARGET_ARCH32)
7338 return gen_rtx_REG (mode, regno);
7339
7340 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7341 and are promoted to registers if possible. */
7342 if (type && TREE_CODE (type) == RECORD_TYPE)
7343 {
7344 HOST_WIDE_INT size = int_size_in_bytes (type);
7345 gcc_assert (size <= 16);
7346
7347 return function_arg_record_value (type, mode, slotno, named, regbase);
7348 }
7349
7350 /* Unions up to 16 bytes in size are passed in integer registers. */
7351 else if (type && TREE_CODE (type) == UNION_TYPE)
7352 {
7353 HOST_WIDE_INT size = int_size_in_bytes (type);
7354 gcc_assert (size <= 16);
7355
7356 return function_arg_union_value (size, mode, slotno, regno);
7357 }
7358
7359 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7360 but also have the slot allocated for them.
7361 If no prototype is in scope fp values in register slots get passed
7362 in two places, either fp regs and int regs or fp regs and memory. */
7363 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7364 && SPARC_FP_REG_P (regno))
7365 {
7366 rtx reg = gen_rtx_REG (mode, regno);
7367 if (cum->prototype_p || cum->libcall_p)
7368 return reg;
7369 else
7370 {
7371 rtx v0, v1;
7372
7373 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7374 {
7375 int intreg;
7376
7377 /* On incoming, we don't need to know that the value
7378 is passed in %f0 and %i0, and it confuses other parts
7379 causing needless spillage even on the simplest cases. */
7380 if (incoming)
7381 return reg;
7382
7383 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7384 + (regno - SPARC_FP_ARG_FIRST) / 2);
7385
7386 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7387 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7388 const0_rtx);
7389 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7390 }
7391 else
7392 {
7393 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7394 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7395 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7396 }
7397 }
7398 }
7399
7400 /* All other aggregate types are passed in an integer register in a mode
7401 corresponding to the size of the type. */
7402 else if (type && AGGREGATE_TYPE_P (type))
7403 {
7404 HOST_WIDE_INT size = int_size_in_bytes (type);
7405 gcc_assert (size <= 16);
7406
7407 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7408 }
7409
7410 return gen_rtx_REG (mode, regno);
7411 }
7412
7413 /* Handle the TARGET_FUNCTION_ARG target hook. */
7414
7415 static rtx
7416 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7417 const_tree type, bool named)
7418 {
7419 return sparc_function_arg_1 (cum, mode, type, named, false);
7420 }
7421
7422 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7423
7424 static rtx
7425 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7426 const_tree type, bool named)
7427 {
7428 return sparc_function_arg_1 (cum, mode, type, named, true);
7429 }
7430
7431 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7432
7433 static unsigned int
7434 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7435 {
7436 return ((TARGET_ARCH64
7437 && (GET_MODE_ALIGNMENT (mode) == 128
7438 || (type && TYPE_ALIGN (type) == 128)))
7439 ? 128
7440 : PARM_BOUNDARY);
7441 }
7442
7443 /* For an arg passed partly in registers and partly in memory,
7444 this is the number of bytes of registers used.
7445 For args passed entirely in registers or entirely in memory, zero.
7446
7447 Any arg that starts in the first 6 regs but won't entirely fit in them
7448 needs partial registers on v8. On v9, structures with integer
7449 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7450 values that begin in the last fp reg [where "last fp reg" varies with the
7451 mode] will be split between that reg and memory. */
7452
7453 static int
7454 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7455 tree type, bool named)
7456 {
7457 int slotno, regno, padding;
7458
7459 /* We pass false for incoming here, it doesn't matter. */
7460 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7461 false, &regno, &padding);
7462
7463 if (slotno == -1)
7464 return 0;
7465
7466 if (TARGET_ARCH32)
7467 {
7468 if ((slotno + (mode == BLKmode
7469 ? CEIL_NWORDS (int_size_in_bytes (type))
7470 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7471 > SPARC_INT_ARG_MAX)
7472 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7473 }
7474 else
7475 {
7476 /* We are guaranteed by pass_by_reference that the size of the
7477 argument is not greater than 16 bytes, so we only need to return
7478 one word if the argument is partially passed in registers. */
7479
7480 if (type && AGGREGATE_TYPE_P (type))
7481 {
7482 int size = int_size_in_bytes (type);
7483
7484 if (size > UNITS_PER_WORD
7485 && (slotno == SPARC_INT_ARG_MAX - 1
7486 || slotno == SPARC_FP_ARG_MAX - 1))
7487 return UNITS_PER_WORD;
7488 }
7489 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7490 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7491 && ! (TARGET_FPU && named)))
7492 {
7493 /* The complex types are passed as packed types. */
7494 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7495 && slotno == SPARC_INT_ARG_MAX - 1)
7496 return UNITS_PER_WORD;
7497 }
7498 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7499 {
7500 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7501 > SPARC_FP_ARG_MAX)
7502 return UNITS_PER_WORD;
7503 }
7504 }
7505
7506 return 0;
7507 }
7508
7509 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7510 Specify whether to pass the argument by reference. */
7511
7512 static bool
7513 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7514 machine_mode mode, const_tree type,
7515 bool named ATTRIBUTE_UNUSED)
7516 {
7517 if (TARGET_ARCH32)
7518 /* Original SPARC 32-bit ABI says that structures and unions,
7519 and quad-precision floats are passed by reference.
7520 All base types are passed in registers.
7521
7522 Extended ABI (as implemented by the Sun compiler) says that all
7523 complex floats are passed by reference. Pass complex integers
7524 in registers up to 8 bytes. More generally, enforce the 2-word
7525 cap for passing arguments in registers.
7526
7527 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7528 integers are passed like floats of the same size, that is in
7529 registers up to 8 bytes. Pass all vector floats by reference
7530 like structure and unions. */
7531 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7532 || mode == SCmode
7533 /* Catch CDImode, TFmode, DCmode and TCmode. */
7534 || GET_MODE_SIZE (mode) > 8
7535 || (type
7536 && TREE_CODE (type) == VECTOR_TYPE
7537 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7538 else
7539 /* Original SPARC 64-bit ABI says that structures and unions
7540 smaller than 16 bytes are passed in registers, as well as
7541 all other base types.
7542
7543 Extended ABI (as implemented by the Sun compiler) says that
7544 complex floats are passed in registers up to 16 bytes. Pass
7545 all complex integers in registers up to 16 bytes. More generally,
7546 enforce the 2-word cap for passing arguments in registers.
7547
7548 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7549 integers are passed like floats of the same size, that is in
7550 registers (up to 16 bytes). Pass all vector floats like structure
7551 and unions. */
7552 return ((type
7553 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7554 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7555 /* Catch CTImode and TCmode. */
7556 || GET_MODE_SIZE (mode) > 16);
7557 }
7558
7559 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7560 Update the data in CUM to advance over an argument
7561 of mode MODE and data type TYPE.
7562 TYPE is null for libcalls where that information may not be available. */
7563
7564 static void
7565 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7566 const_tree type, bool named)
7567 {
7568 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7569 int regno, padding;
7570
7571 /* We pass false for incoming here, it doesn't matter. */
7572 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7573
7574 /* If argument requires leading padding, add it. */
7575 cum->words += padding;
7576
7577 if (TARGET_ARCH32)
7578 cum->words += (mode == BLKmode
7579 ? CEIL_NWORDS (int_size_in_bytes (type))
7580 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7581 else
7582 {
7583 if (type && AGGREGATE_TYPE_P (type))
7584 {
7585 int size = int_size_in_bytes (type);
7586
7587 if (size <= 8)
7588 ++cum->words;
7589 else if (size <= 16)
7590 cum->words += 2;
7591 else /* passed by reference */
7592 ++cum->words;
7593 }
7594 else
7595 cum->words += (mode == BLKmode
7596 ? CEIL_NWORDS (int_size_in_bytes (type))
7597 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7598 }
7599 }
7600
7601 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7602 are always stored left shifted in their argument slot. */
7603
7604 static pad_direction
7605 sparc_function_arg_padding (machine_mode mode, const_tree type)
7606 {
7607 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7608 return PAD_UPWARD;
7609
7610 /* Fall back to the default. */
7611 return default_function_arg_padding (mode, type);
7612 }
7613
7614 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7615 Specify whether to return the return value in memory. */
7616
7617 static bool
7618 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7619 {
7620 if (TARGET_ARCH32)
7621 /* Original SPARC 32-bit ABI says that structures and unions,
7622 and quad-precision floats are returned in memory. All other
7623 base types are returned in registers.
7624
7625 Extended ABI (as implemented by the Sun compiler) says that
7626 all complex floats are returned in registers (8 FP registers
7627 at most for '_Complex long double'). Return all complex integers
7628 in registers (4 at most for '_Complex long long').
7629
7630 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7631 integers are returned like floats of the same size, that is in
7632 registers up to 8 bytes and in memory otherwise. Return all
7633 vector floats in memory like structure and unions; note that
7634 they always have BLKmode like the latter. */
7635 return (TYPE_MODE (type) == BLKmode
7636 || TYPE_MODE (type) == TFmode
7637 || (TREE_CODE (type) == VECTOR_TYPE
7638 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7639 else
7640 /* Original SPARC 64-bit ABI says that structures and unions
7641 smaller than 32 bytes are returned in registers, as well as
7642 all other base types.
7643
7644 Extended ABI (as implemented by the Sun compiler) says that all
7645 complex floats are returned in registers (8 FP registers at most
7646 for '_Complex long double'). Return all complex integers in
7647 registers (4 at most for '_Complex TItype').
7648
7649 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7650 integers are returned like floats of the same size, that is in
7651 registers. Return all vector floats like structure and unions;
7652 note that they always have BLKmode like the latter. */
7653 return (TYPE_MODE (type) == BLKmode
7654 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7655 }
7656
7657 /* Handle the TARGET_STRUCT_VALUE target hook.
7658 Return where to find the structure return value address. */
7659
7660 static rtx
7661 sparc_struct_value_rtx (tree fndecl, int incoming)
7662 {
7663 if (TARGET_ARCH64)
7664 return 0;
7665 else
7666 {
7667 rtx mem;
7668
7669 if (incoming)
7670 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7671 STRUCT_VALUE_OFFSET));
7672 else
7673 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7674 STRUCT_VALUE_OFFSET));
7675
7676 /* Only follow the SPARC ABI for fixed-size structure returns.
7677 Variable size structure returns are handled per the normal
7678 procedures in GCC. This is enabled by -mstd-struct-return */
7679 if (incoming == 2
7680 && sparc_std_struct_return
7681 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7682 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7683 {
7684 /* We must check and adjust the return address, as it is optional
7685 as to whether the return object is really provided. */
7686 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7687 rtx scratch = gen_reg_rtx (SImode);
7688 rtx_code_label *endlab = gen_label_rtx ();
7689
7690 /* Calculate the return object size. */
7691 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7692 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7693 /* Construct a temporary return value. */
7694 rtx temp_val
7695 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7696
7697 /* Implement SPARC 32-bit psABI callee return struct checking:
7698
7699 Fetch the instruction where we will return to and see if
7700 it's an unimp instruction (the most significant 10 bits
7701 will be zero). */
7702 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7703 plus_constant (Pmode,
7704 ret_reg, 8)));
7705 /* Assume the size is valid and pre-adjust. */
7706 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7707 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7708 0, endlab);
7709 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7710 /* Write the address of the memory pointed to by temp_val into
7711 the memory pointed to by mem. */
7712 emit_move_insn (mem, XEXP (temp_val, 0));
7713 emit_label (endlab);
7714 }
7715
7716 return mem;
7717 }
7718 }
7719
7720 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7721 For v9, function return values are subject to the same rules as arguments,
7722 except that up to 32 bytes may be returned in registers. */
7723
7724 static rtx
7725 sparc_function_value_1 (const_tree type, machine_mode mode,
7726 bool outgoing)
7727 {
7728 /* Beware that the two values are swapped here wrt function_arg. */
7729 int regbase = (outgoing
7730 ? SPARC_INCOMING_INT_ARG_FIRST
7731 : SPARC_OUTGOING_INT_ARG_FIRST);
7732 enum mode_class mclass = GET_MODE_CLASS (mode);
7733 int regno;
7734
7735 /* Vector types deserve special treatment because they are polymorphic wrt
7736 their mode, depending upon whether VIS instructions are enabled. */
7737 if (type && TREE_CODE (type) == VECTOR_TYPE)
7738 {
7739 HOST_WIDE_INT size = int_size_in_bytes (type);
7740 gcc_assert ((TARGET_ARCH32 && size <= 8)
7741 || (TARGET_ARCH64 && size <= 32));
7742
7743 if (mode == BLKmode)
7744 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7745
7746 mclass = MODE_FLOAT;
7747 }
7748
7749 if (TARGET_ARCH64 && type)
7750 {
7751 /* Structures up to 32 bytes in size are returned in registers. */
7752 if (TREE_CODE (type) == RECORD_TYPE)
7753 {
7754 HOST_WIDE_INT size = int_size_in_bytes (type);
7755 gcc_assert (size <= 32);
7756
7757 return function_arg_record_value (type, mode, 0, 1, regbase);
7758 }
7759
7760 /* Unions up to 32 bytes in size are returned in integer registers. */
7761 else if (TREE_CODE (type) == UNION_TYPE)
7762 {
7763 HOST_WIDE_INT size = int_size_in_bytes (type);
7764 gcc_assert (size <= 32);
7765
7766 return function_arg_union_value (size, mode, 0, regbase);
7767 }
7768
7769 /* Objects that require it are returned in FP registers. */
7770 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7771 ;
7772
7773 /* All other aggregate types are returned in an integer register in a
7774 mode corresponding to the size of the type. */
7775 else if (AGGREGATE_TYPE_P (type))
7776 {
7777 /* All other aggregate types are passed in an integer register
7778 in a mode corresponding to the size of the type. */
7779 HOST_WIDE_INT size = int_size_in_bytes (type);
7780 gcc_assert (size <= 32);
7781
7782 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7783
7784 /* ??? We probably should have made the same ABI change in
7785 3.4.0 as the one we made for unions. The latter was
7786 required by the SCD though, while the former is not
7787 specified, so we favored compatibility and efficiency.
7788
7789 Now we're stuck for aggregates larger than 16 bytes,
7790 because OImode vanished in the meantime. Let's not
7791 try to be unduly clever, and simply follow the ABI
7792 for unions in that case. */
7793 if (mode == BLKmode)
7794 return function_arg_union_value (size, mode, 0, regbase);
7795 else
7796 mclass = MODE_INT;
7797 }
7798
7799 /* We should only have pointer and integer types at this point. This
7800 must match sparc_promote_function_mode. */
7801 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7802 mode = word_mode;
7803 }
7804
7805 /* We should only have pointer and integer types at this point, except with
7806 -freg-struct-return. This must match sparc_promote_function_mode. */
7807 else if (TARGET_ARCH32
7808 && !(type && AGGREGATE_TYPE_P (type))
7809 && mclass == MODE_INT
7810 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7811 mode = word_mode;
7812
7813 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7814 regno = SPARC_FP_ARG_FIRST;
7815 else
7816 regno = regbase;
7817
7818 return gen_rtx_REG (mode, regno);
7819 }
7820
7821 /* Handle TARGET_FUNCTION_VALUE.
7822 On the SPARC, the value is found in the first "output" register, but the
7823 called function leaves it in the first "input" register. */
7824
7825 static rtx
7826 sparc_function_value (const_tree valtype,
7827 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7828 bool outgoing)
7829 {
7830 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7831 }
7832
7833 /* Handle TARGET_LIBCALL_VALUE. */
7834
7835 static rtx
7836 sparc_libcall_value (machine_mode mode,
7837 const_rtx fun ATTRIBUTE_UNUSED)
7838 {
7839 return sparc_function_value_1 (NULL_TREE, mode, false);
7840 }
7841
7842 /* Handle FUNCTION_VALUE_REGNO_P.
7843 On the SPARC, the first "output" reg is used for integer values, and the
7844 first floating point register is used for floating point values. */
7845
7846 static bool
7847 sparc_function_value_regno_p (const unsigned int regno)
7848 {
7849 return (regno == 8 || (TARGET_FPU && regno == 32));
7850 }
7851
7852 /* Do what is necessary for `va_start'. We look at the current function
7853 to determine if stdarg or varargs is used and return the address of
7854 the first unnamed parameter. */
7855
7856 static rtx
7857 sparc_builtin_saveregs (void)
7858 {
7859 int first_reg = crtl->args.info.words;
7860 rtx address;
7861 int regno;
7862
7863 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7864 emit_move_insn (gen_rtx_MEM (word_mode,
7865 gen_rtx_PLUS (Pmode,
7866 frame_pointer_rtx,
7867 GEN_INT (FIRST_PARM_OFFSET (0)
7868 + (UNITS_PER_WORD
7869 * regno)))),
7870 gen_rtx_REG (word_mode,
7871 SPARC_INCOMING_INT_ARG_FIRST + regno));
7872
7873 address = gen_rtx_PLUS (Pmode,
7874 frame_pointer_rtx,
7875 GEN_INT (FIRST_PARM_OFFSET (0)
7876 + UNITS_PER_WORD * first_reg));
7877
7878 return address;
7879 }
7880
7881 /* Implement `va_start' for stdarg. */
7882
7883 static void
7884 sparc_va_start (tree valist, rtx nextarg)
7885 {
7886 nextarg = expand_builtin_saveregs ();
7887 std_expand_builtin_va_start (valist, nextarg);
7888 }
7889
7890 /* Implement `va_arg' for stdarg. */
7891
7892 static tree
7893 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7894 gimple_seq *post_p)
7895 {
7896 HOST_WIDE_INT size, rsize, align;
7897 tree addr, incr;
7898 bool indirect;
7899 tree ptrtype = build_pointer_type (type);
7900
7901 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7902 {
7903 indirect = true;
7904 size = rsize = UNITS_PER_WORD;
7905 align = 0;
7906 }
7907 else
7908 {
7909 indirect = false;
7910 size = int_size_in_bytes (type);
7911 rsize = ROUND_UP (size, UNITS_PER_WORD);
7912 align = 0;
7913
7914 if (TARGET_ARCH64)
7915 {
7916 /* For SPARC64, objects requiring 16-byte alignment get it. */
7917 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7918 align = 2 * UNITS_PER_WORD;
7919
7920 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7921 are left-justified in their slots. */
7922 if (AGGREGATE_TYPE_P (type))
7923 {
7924 if (size == 0)
7925 size = rsize = UNITS_PER_WORD;
7926 else
7927 size = rsize;
7928 }
7929 }
7930 }
7931
7932 incr = valist;
7933 if (align)
7934 {
7935 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7936 incr = fold_convert (sizetype, incr);
7937 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7938 size_int (-align));
7939 incr = fold_convert (ptr_type_node, incr);
7940 }
7941
7942 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7943 addr = incr;
7944
7945 if (BYTES_BIG_ENDIAN && size < rsize)
7946 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7947
7948 if (indirect)
7949 {
7950 addr = fold_convert (build_pointer_type (ptrtype), addr);
7951 addr = build_va_arg_indirect_ref (addr);
7952 }
7953
7954 /* If the address isn't aligned properly for the type, we need a temporary.
7955 FIXME: This is inefficient, usually we can do this in registers. */
7956 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7957 {
7958 tree tmp = create_tmp_var (type, "va_arg_tmp");
7959 tree dest_addr = build_fold_addr_expr (tmp);
7960 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7961 3, dest_addr, addr, size_int (rsize));
7962 TREE_ADDRESSABLE (tmp) = 1;
7963 gimplify_and_add (copy, pre_p);
7964 addr = dest_addr;
7965 }
7966
7967 else
7968 addr = fold_convert (ptrtype, addr);
7969
7970 incr = fold_build_pointer_plus_hwi (incr, rsize);
7971 gimplify_assign (valist, incr, post_p);
7972
7973 return build_va_arg_indirect_ref (addr);
7974 }
7975 \f
7976 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7977 Specify whether the vector mode is supported by the hardware. */
7978
7979 static bool
7980 sparc_vector_mode_supported_p (machine_mode mode)
7981 {
7982 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7983 }
7984 \f
7985 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7986
7987 static machine_mode
7988 sparc_preferred_simd_mode (scalar_mode mode)
7989 {
7990 if (TARGET_VIS)
7991 switch (mode)
7992 {
7993 case E_SImode:
7994 return V2SImode;
7995 case E_HImode:
7996 return V4HImode;
7997 case E_QImode:
7998 return V8QImode;
7999
8000 default:;
8001 }
8002
8003 return word_mode;
8004 }
8005 \f
8006 /* Return the string to output an unconditional branch to LABEL, which is
8007 the operand number of the label.
8008
8009 DEST is the destination insn (i.e. the label), INSN is the source. */
8010
8011 const char *
8012 output_ubranch (rtx dest, rtx_insn *insn)
8013 {
8014 static char string[64];
8015 bool v9_form = false;
8016 int delta;
8017 char *p;
8018
8019 /* Even if we are trying to use cbcond for this, evaluate
8020 whether we can use V9 branches as our backup plan. */
8021
8022 delta = 5000000;
8023 if (INSN_ADDRESSES_SET_P ())
8024 delta = (INSN_ADDRESSES (INSN_UID (dest))
8025 - INSN_ADDRESSES (INSN_UID (insn)));
8026
8027 /* Leave some instructions for "slop". */
8028 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8029 v9_form = true;
8030
8031 if (TARGET_CBCOND)
8032 {
8033 bool emit_nop = emit_cbcond_nop (insn);
8034 bool far = false;
8035 const char *rval;
8036
8037 if (delta < -500 || delta > 500)
8038 far = true;
8039
8040 if (far)
8041 {
8042 if (v9_form)
8043 rval = "ba,a,pt\t%%xcc, %l0";
8044 else
8045 rval = "b,a\t%l0";
8046 }
8047 else
8048 {
8049 if (emit_nop)
8050 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8051 else
8052 rval = "cwbe\t%%g0, %%g0, %l0";
8053 }
8054 return rval;
8055 }
8056
8057 if (v9_form)
8058 strcpy (string, "ba%*,pt\t%%xcc, ");
8059 else
8060 strcpy (string, "b%*\t");
8061
8062 p = strchr (string, '\0');
8063 *p++ = '%';
8064 *p++ = 'l';
8065 *p++ = '0';
8066 *p++ = '%';
8067 *p++ = '(';
8068 *p = '\0';
8069
8070 return string;
8071 }
8072
8073 /* Return the string to output a conditional branch to LABEL, which is
8074 the operand number of the label. OP is the conditional expression.
8075 XEXP (OP, 0) is assumed to be a condition code register (integer or
8076 floating point) and its mode specifies what kind of comparison we made.
8077
8078 DEST is the destination insn (i.e. the label), INSN is the source.
8079
8080 REVERSED is nonzero if we should reverse the sense of the comparison.
8081
8082 ANNUL is nonzero if we should generate an annulling branch. */
8083
8084 const char *
8085 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8086 rtx_insn *insn)
8087 {
8088 static char string[64];
8089 enum rtx_code code = GET_CODE (op);
8090 rtx cc_reg = XEXP (op, 0);
8091 machine_mode mode = GET_MODE (cc_reg);
8092 const char *labelno, *branch;
8093 int spaces = 8, far;
8094 char *p;
8095
8096 /* v9 branches are limited to +-1MB. If it is too far away,
8097 change
8098
8099 bne,pt %xcc, .LC30
8100
8101 to
8102
8103 be,pn %xcc, .+12
8104 nop
8105 ba .LC30
8106
8107 and
8108
8109 fbne,a,pn %fcc2, .LC29
8110
8111 to
8112
8113 fbe,pt %fcc2, .+16
8114 nop
8115 ba .LC29 */
8116
8117 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8118 if (reversed ^ far)
8119 {
8120 /* Reversal of FP compares takes care -- an ordered compare
8121 becomes an unordered compare and vice versa. */
8122 if (mode == CCFPmode || mode == CCFPEmode)
8123 code = reverse_condition_maybe_unordered (code);
8124 else
8125 code = reverse_condition (code);
8126 }
8127
8128 /* Start by writing the branch condition. */
8129 if (mode == CCFPmode || mode == CCFPEmode)
8130 {
8131 switch (code)
8132 {
8133 case NE:
8134 branch = "fbne";
8135 break;
8136 case EQ:
8137 branch = "fbe";
8138 break;
8139 case GE:
8140 branch = "fbge";
8141 break;
8142 case GT:
8143 branch = "fbg";
8144 break;
8145 case LE:
8146 branch = "fble";
8147 break;
8148 case LT:
8149 branch = "fbl";
8150 break;
8151 case UNORDERED:
8152 branch = "fbu";
8153 break;
8154 case ORDERED:
8155 branch = "fbo";
8156 break;
8157 case UNGT:
8158 branch = "fbug";
8159 break;
8160 case UNLT:
8161 branch = "fbul";
8162 break;
8163 case UNEQ:
8164 branch = "fbue";
8165 break;
8166 case UNGE:
8167 branch = "fbuge";
8168 break;
8169 case UNLE:
8170 branch = "fbule";
8171 break;
8172 case LTGT:
8173 branch = "fblg";
8174 break;
8175 default:
8176 gcc_unreachable ();
8177 }
8178
8179 /* ??? !v9: FP branches cannot be preceded by another floating point
8180 insn. Because there is currently no concept of pre-delay slots,
8181 we can fix this only by always emitting a nop before a floating
8182 point branch. */
8183
8184 string[0] = '\0';
8185 if (! TARGET_V9)
8186 strcpy (string, "nop\n\t");
8187 strcat (string, branch);
8188 }
8189 else
8190 {
8191 switch (code)
8192 {
8193 case NE:
8194 if (mode == CCVmode || mode == CCXVmode)
8195 branch = "bvs";
8196 else
8197 branch = "bne";
8198 break;
8199 case EQ:
8200 if (mode == CCVmode || mode == CCXVmode)
8201 branch = "bvc";
8202 else
8203 branch = "be";
8204 break;
8205 case GE:
8206 if (mode == CCNZmode || mode == CCXNZmode)
8207 branch = "bpos";
8208 else
8209 branch = "bge";
8210 break;
8211 case GT:
8212 branch = "bg";
8213 break;
8214 case LE:
8215 branch = "ble";
8216 break;
8217 case LT:
8218 if (mode == CCNZmode || mode == CCXNZmode)
8219 branch = "bneg";
8220 else
8221 branch = "bl";
8222 break;
8223 case GEU:
8224 branch = "bgeu";
8225 break;
8226 case GTU:
8227 branch = "bgu";
8228 break;
8229 case LEU:
8230 branch = "bleu";
8231 break;
8232 case LTU:
8233 branch = "blu";
8234 break;
8235 default:
8236 gcc_unreachable ();
8237 }
8238 strcpy (string, branch);
8239 }
8240 spaces -= strlen (branch);
8241 p = strchr (string, '\0');
8242
8243 /* Now add the annulling, the label, and a possible noop. */
8244 if (annul && ! far)
8245 {
8246 strcpy (p, ",a");
8247 p += 2;
8248 spaces -= 2;
8249 }
8250
8251 if (TARGET_V9)
8252 {
8253 rtx note;
8254 int v8 = 0;
8255
8256 if (! far && insn && INSN_ADDRESSES_SET_P ())
8257 {
8258 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8259 - INSN_ADDRESSES (INSN_UID (insn)));
8260 /* Leave some instructions for "slop". */
8261 if (delta < -260000 || delta >= 260000)
8262 v8 = 1;
8263 }
8264
8265 switch (mode)
8266 {
8267 case E_CCmode:
8268 case E_CCNZmode:
8269 case E_CCCmode:
8270 case E_CCVmode:
8271 labelno = "%%icc, ";
8272 if (v8)
8273 labelno = "";
8274 break;
8275 case E_CCXmode:
8276 case E_CCXNZmode:
8277 case E_CCXCmode:
8278 case E_CCXVmode:
8279 labelno = "%%xcc, ";
8280 gcc_assert (!v8);
8281 break;
8282 case E_CCFPmode:
8283 case E_CCFPEmode:
8284 {
8285 static char v9_fcc_labelno[] = "%%fccX, ";
8286 /* Set the char indicating the number of the fcc reg to use. */
8287 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8288 labelno = v9_fcc_labelno;
8289 if (v8)
8290 {
8291 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8292 labelno = "";
8293 }
8294 }
8295 break;
8296 default:
8297 gcc_unreachable ();
8298 }
8299
8300 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8301 {
8302 strcpy (p,
8303 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8304 >= profile_probability::even ()) ^ far)
8305 ? ",pt" : ",pn");
8306 p += 3;
8307 spaces -= 3;
8308 }
8309 }
8310 else
8311 labelno = "";
8312
8313 if (spaces > 0)
8314 *p++ = '\t';
8315 else
8316 *p++ = ' ';
8317 strcpy (p, labelno);
8318 p = strchr (p, '\0');
8319 if (far)
8320 {
8321 strcpy (p, ".+12\n\t nop\n\tb\t");
8322 /* Skip the next insn if requested or
8323 if we know that it will be a nop. */
8324 if (annul || ! final_sequence)
8325 p[3] = '6';
8326 p += 14;
8327 }
8328 *p++ = '%';
8329 *p++ = 'l';
8330 *p++ = label + '0';
8331 *p++ = '%';
8332 *p++ = '#';
8333 *p = '\0';
8334
8335 return string;
8336 }
8337
8338 /* Emit a library call comparison between floating point X and Y.
8339 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8340 Return the new operator to be used in the comparison sequence.
8341
8342 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8343 values as arguments instead of the TFmode registers themselves,
8344 that's why we cannot call emit_float_lib_cmp. */
8345
8346 rtx
8347 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8348 {
8349 const char *qpfunc;
8350 rtx slot0, slot1, result, tem, tem2, libfunc;
8351 machine_mode mode;
8352 enum rtx_code new_comparison;
8353
8354 switch (comparison)
8355 {
8356 case EQ:
8357 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8358 break;
8359
8360 case NE:
8361 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8362 break;
8363
8364 case GT:
8365 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8366 break;
8367
8368 case GE:
8369 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8370 break;
8371
8372 case LT:
8373 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8374 break;
8375
8376 case LE:
8377 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8378 break;
8379
8380 case ORDERED:
8381 case UNORDERED:
8382 case UNGT:
8383 case UNLT:
8384 case UNEQ:
8385 case UNGE:
8386 case UNLE:
8387 case LTGT:
8388 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8389 break;
8390
8391 default:
8392 gcc_unreachable ();
8393 }
8394
8395 if (TARGET_ARCH64)
8396 {
8397 if (MEM_P (x))
8398 {
8399 tree expr = MEM_EXPR (x);
8400 if (expr)
8401 mark_addressable (expr);
8402 slot0 = x;
8403 }
8404 else
8405 {
8406 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8407 emit_move_insn (slot0, x);
8408 }
8409
8410 if (MEM_P (y))
8411 {
8412 tree expr = MEM_EXPR (y);
8413 if (expr)
8414 mark_addressable (expr);
8415 slot1 = y;
8416 }
8417 else
8418 {
8419 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8420 emit_move_insn (slot1, y);
8421 }
8422
8423 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8424 emit_library_call (libfunc, LCT_NORMAL,
8425 DImode,
8426 XEXP (slot0, 0), Pmode,
8427 XEXP (slot1, 0), Pmode);
8428 mode = DImode;
8429 }
8430 else
8431 {
8432 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8433 emit_library_call (libfunc, LCT_NORMAL,
8434 SImode,
8435 x, TFmode, y, TFmode);
8436 mode = SImode;
8437 }
8438
8439
8440 /* Immediately move the result of the libcall into a pseudo
8441 register so reload doesn't clobber the value if it needs
8442 the return register for a spill reg. */
8443 result = gen_reg_rtx (mode);
8444 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8445
8446 switch (comparison)
8447 {
8448 default:
8449 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8450 case ORDERED:
8451 case UNORDERED:
8452 new_comparison = (comparison == UNORDERED ? EQ : NE);
8453 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8454 case UNGT:
8455 case UNGE:
8456 new_comparison = (comparison == UNGT ? GT : NE);
8457 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8458 case UNLE:
8459 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8460 case UNLT:
8461 tem = gen_reg_rtx (mode);
8462 if (TARGET_ARCH32)
8463 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8464 else
8465 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8466 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8467 case UNEQ:
8468 case LTGT:
8469 tem = gen_reg_rtx (mode);
8470 if (TARGET_ARCH32)
8471 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8472 else
8473 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8474 tem2 = gen_reg_rtx (mode);
8475 if (TARGET_ARCH32)
8476 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8477 else
8478 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8479 new_comparison = (comparison == UNEQ ? EQ : NE);
8480 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8481 }
8482
8483 gcc_unreachable ();
8484 }
8485
8486 /* Generate an unsigned DImode to FP conversion. This is the same code
8487 optabs would emit if we didn't have TFmode patterns. */
8488
8489 void
8490 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8491 {
8492 rtx i0, i1, f0, in, out;
8493
8494 out = operands[0];
8495 in = force_reg (DImode, operands[1]);
8496 rtx_code_label *neglab = gen_label_rtx ();
8497 rtx_code_label *donelab = gen_label_rtx ();
8498 i0 = gen_reg_rtx (DImode);
8499 i1 = gen_reg_rtx (DImode);
8500 f0 = gen_reg_rtx (mode);
8501
8502 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8503
8504 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8505 emit_jump_insn (gen_jump (donelab));
8506 emit_barrier ();
8507
8508 emit_label (neglab);
8509
8510 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8511 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8512 emit_insn (gen_iordi3 (i0, i0, i1));
8513 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8514 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8515
8516 emit_label (donelab);
8517 }
8518
8519 /* Generate an FP to unsigned DImode conversion. This is the same code
8520 optabs would emit if we didn't have TFmode patterns. */
8521
8522 void
8523 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8524 {
8525 rtx i0, i1, f0, in, out, limit;
8526
8527 out = operands[0];
8528 in = force_reg (mode, operands[1]);
8529 rtx_code_label *neglab = gen_label_rtx ();
8530 rtx_code_label *donelab = gen_label_rtx ();
8531 i0 = gen_reg_rtx (DImode);
8532 i1 = gen_reg_rtx (DImode);
8533 limit = gen_reg_rtx (mode);
8534 f0 = gen_reg_rtx (mode);
8535
8536 emit_move_insn (limit,
8537 const_double_from_real_value (
8538 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8539 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8540
8541 emit_insn (gen_rtx_SET (out,
8542 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8543 emit_jump_insn (gen_jump (donelab));
8544 emit_barrier ();
8545
8546 emit_label (neglab);
8547
8548 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8549 emit_insn (gen_rtx_SET (i0,
8550 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8551 emit_insn (gen_movdi (i1, const1_rtx));
8552 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8553 emit_insn (gen_xordi3 (out, i0, i1));
8554
8555 emit_label (donelab);
8556 }
8557
8558 /* Return the string to output a compare and branch instruction to DEST.
8559 DEST is the destination insn (i.e. the label), INSN is the source,
8560 and OP is the conditional expression. */
8561
8562 const char *
8563 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8564 {
8565 machine_mode mode = GET_MODE (XEXP (op, 0));
8566 enum rtx_code code = GET_CODE (op);
8567 const char *cond_str, *tmpl;
8568 int far, emit_nop, len;
8569 static char string[64];
8570 char size_char;
8571
8572 /* Compare and Branch is limited to +-2KB. If it is too far away,
8573 change
8574
8575 cxbne X, Y, .LC30
8576
8577 to
8578
8579 cxbe X, Y, .+16
8580 nop
8581 ba,pt xcc, .LC30
8582 nop */
8583
8584 len = get_attr_length (insn);
8585
8586 far = len == 4;
8587 emit_nop = len == 2;
8588
8589 if (far)
8590 code = reverse_condition (code);
8591
8592 size_char = ((mode == SImode) ? 'w' : 'x');
8593
8594 switch (code)
8595 {
8596 case NE:
8597 cond_str = "ne";
8598 break;
8599
8600 case EQ:
8601 cond_str = "e";
8602 break;
8603
8604 case GE:
8605 cond_str = "ge";
8606 break;
8607
8608 case GT:
8609 cond_str = "g";
8610 break;
8611
8612 case LE:
8613 cond_str = "le";
8614 break;
8615
8616 case LT:
8617 cond_str = "l";
8618 break;
8619
8620 case GEU:
8621 cond_str = "cc";
8622 break;
8623
8624 case GTU:
8625 cond_str = "gu";
8626 break;
8627
8628 case LEU:
8629 cond_str = "leu";
8630 break;
8631
8632 case LTU:
8633 cond_str = "cs";
8634 break;
8635
8636 default:
8637 gcc_unreachable ();
8638 }
8639
8640 if (far)
8641 {
8642 int veryfar = 1, delta;
8643
8644 if (INSN_ADDRESSES_SET_P ())
8645 {
8646 delta = (INSN_ADDRESSES (INSN_UID (dest))
8647 - INSN_ADDRESSES (INSN_UID (insn)));
8648 /* Leave some instructions for "slop". */
8649 if (delta >= -260000 && delta < 260000)
8650 veryfar = 0;
8651 }
8652
8653 if (veryfar)
8654 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8655 else
8656 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8657 }
8658 else
8659 {
8660 if (emit_nop)
8661 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8662 else
8663 tmpl = "c%cb%s\t%%1, %%2, %%3";
8664 }
8665
8666 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8667
8668 return string;
8669 }
8670
8671 /* Return the string to output a conditional branch to LABEL, testing
8672 register REG. LABEL is the operand number of the label; REG is the
8673 operand number of the reg. OP is the conditional expression. The mode
8674 of REG says what kind of comparison we made.
8675
8676 DEST is the destination insn (i.e. the label), INSN is the source.
8677
8678 REVERSED is nonzero if we should reverse the sense of the comparison.
8679
8680 ANNUL is nonzero if we should generate an annulling branch. */
8681
8682 const char *
8683 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8684 int annul, rtx_insn *insn)
8685 {
8686 static char string[64];
8687 enum rtx_code code = GET_CODE (op);
8688 machine_mode mode = GET_MODE (XEXP (op, 0));
8689 rtx note;
8690 int far;
8691 char *p;
8692
8693 /* branch on register are limited to +-128KB. If it is too far away,
8694 change
8695
8696 brnz,pt %g1, .LC30
8697
8698 to
8699
8700 brz,pn %g1, .+12
8701 nop
8702 ba,pt %xcc, .LC30
8703
8704 and
8705
8706 brgez,a,pn %o1, .LC29
8707
8708 to
8709
8710 brlz,pt %o1, .+16
8711 nop
8712 ba,pt %xcc, .LC29 */
8713
8714 far = get_attr_length (insn) >= 3;
8715
8716 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8717 if (reversed ^ far)
8718 code = reverse_condition (code);
8719
8720 /* Only 64-bit versions of these instructions exist. */
8721 gcc_assert (mode == DImode);
8722
8723 /* Start by writing the branch condition. */
8724
8725 switch (code)
8726 {
8727 case NE:
8728 strcpy (string, "brnz");
8729 break;
8730
8731 case EQ:
8732 strcpy (string, "brz");
8733 break;
8734
8735 case GE:
8736 strcpy (string, "brgez");
8737 break;
8738
8739 case LT:
8740 strcpy (string, "brlz");
8741 break;
8742
8743 case LE:
8744 strcpy (string, "brlez");
8745 break;
8746
8747 case GT:
8748 strcpy (string, "brgz");
8749 break;
8750
8751 default:
8752 gcc_unreachable ();
8753 }
8754
8755 p = strchr (string, '\0');
8756
8757 /* Now add the annulling, reg, label, and nop. */
8758 if (annul && ! far)
8759 {
8760 strcpy (p, ",a");
8761 p += 2;
8762 }
8763
8764 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8765 {
8766 strcpy (p,
8767 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8768 >= profile_probability::even ()) ^ far)
8769 ? ",pt" : ",pn");
8770 p += 3;
8771 }
8772
8773 *p = p < string + 8 ? '\t' : ' ';
8774 p++;
8775 *p++ = '%';
8776 *p++ = '0' + reg;
8777 *p++ = ',';
8778 *p++ = ' ';
8779 if (far)
8780 {
8781 int veryfar = 1, delta;
8782
8783 if (INSN_ADDRESSES_SET_P ())
8784 {
8785 delta = (INSN_ADDRESSES (INSN_UID (dest))
8786 - INSN_ADDRESSES (INSN_UID (insn)));
8787 /* Leave some instructions for "slop". */
8788 if (delta >= -260000 && delta < 260000)
8789 veryfar = 0;
8790 }
8791
8792 strcpy (p, ".+12\n\t nop\n\t");
8793 /* Skip the next insn if requested or
8794 if we know that it will be a nop. */
8795 if (annul || ! final_sequence)
8796 p[3] = '6';
8797 p += 12;
8798 if (veryfar)
8799 {
8800 strcpy (p, "b\t");
8801 p += 2;
8802 }
8803 else
8804 {
8805 strcpy (p, "ba,pt\t%%xcc, ");
8806 p += 13;
8807 }
8808 }
8809 *p++ = '%';
8810 *p++ = 'l';
8811 *p++ = '0' + label;
8812 *p++ = '%';
8813 *p++ = '#';
8814 *p = '\0';
8815
8816 return string;
8817 }
8818
8819 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8820 Such instructions cannot be used in the delay slot of return insn on v9.
8821 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8822 */
8823
8824 static int
8825 epilogue_renumber (register rtx *where, int test)
8826 {
8827 register const char *fmt;
8828 register int i;
8829 register enum rtx_code code;
8830
8831 if (*where == 0)
8832 return 0;
8833
8834 code = GET_CODE (*where);
8835
8836 switch (code)
8837 {
8838 case REG:
8839 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8840 return 1;
8841 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8842 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8843 /* fallthrough */
8844 case SCRATCH:
8845 case CC0:
8846 case PC:
8847 case CONST_INT:
8848 case CONST_WIDE_INT:
8849 case CONST_DOUBLE:
8850 return 0;
8851
8852 /* Do not replace the frame pointer with the stack pointer because
8853 it can cause the delayed instruction to load below the stack.
8854 This occurs when instructions like:
8855
8856 (set (reg/i:SI 24 %i0)
8857 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8858 (const_int -20 [0xffffffec])) 0))
8859
8860 are in the return delayed slot. */
8861 case PLUS:
8862 if (GET_CODE (XEXP (*where, 0)) == REG
8863 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8864 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8865 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8866 return 1;
8867 break;
8868
8869 case MEM:
8870 if (SPARC_STACK_BIAS
8871 && GET_CODE (XEXP (*where, 0)) == REG
8872 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8873 return 1;
8874 break;
8875
8876 default:
8877 break;
8878 }
8879
8880 fmt = GET_RTX_FORMAT (code);
8881
8882 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8883 {
8884 if (fmt[i] == 'E')
8885 {
8886 register int j;
8887 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8888 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8889 return 1;
8890 }
8891 else if (fmt[i] == 'e'
8892 && epilogue_renumber (&(XEXP (*where, i)), test))
8893 return 1;
8894 }
8895 return 0;
8896 }
8897 \f
8898 /* Leaf functions and non-leaf functions have different needs. */
8899
8900 static const int
8901 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8902
8903 static const int
8904 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8905
8906 static const int *const reg_alloc_orders[] = {
8907 reg_leaf_alloc_order,
8908 reg_nonleaf_alloc_order};
8909
8910 void
8911 order_regs_for_local_alloc (void)
8912 {
8913 static int last_order_nonleaf = 1;
8914
8915 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8916 {
8917 last_order_nonleaf = !last_order_nonleaf;
8918 memcpy ((char *) reg_alloc_order,
8919 (const char *) reg_alloc_orders[last_order_nonleaf],
8920 FIRST_PSEUDO_REGISTER * sizeof (int));
8921 }
8922 }
8923 \f
8924 /* Return 1 if REG and MEM are legitimate enough to allow the various
8925 MEM<-->REG splits to be run. */
8926
8927 int
8928 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8929 {
8930 /* Punt if we are here by mistake. */
8931 gcc_assert (reload_completed);
8932
8933 /* We must have an offsettable memory reference. */
8934 if (!offsettable_memref_p (mem))
8935 return 0;
8936
8937 /* If we have legitimate args for ldd/std, we do not want
8938 the split to happen. */
8939 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8940 return 0;
8941
8942 /* Success. */
8943 return 1;
8944 }
8945
8946 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8947
8948 void
8949 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8950 {
8951 rtx high_part = gen_highpart (mode, dest);
8952 rtx low_part = gen_lowpart (mode, dest);
8953 rtx word0 = adjust_address (src, mode, 0);
8954 rtx word1 = adjust_address (src, mode, 4);
8955
8956 if (reg_overlap_mentioned_p (high_part, word1))
8957 {
8958 emit_move_insn_1 (low_part, word1);
8959 emit_move_insn_1 (high_part, word0);
8960 }
8961 else
8962 {
8963 emit_move_insn_1 (high_part, word0);
8964 emit_move_insn_1 (low_part, word1);
8965 }
8966 }
8967
8968 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8969
8970 void
8971 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8972 {
8973 rtx word0 = adjust_address (dest, mode, 0);
8974 rtx word1 = adjust_address (dest, mode, 4);
8975 rtx high_part = gen_highpart (mode, src);
8976 rtx low_part = gen_lowpart (mode, src);
8977
8978 emit_move_insn_1 (word0, high_part);
8979 emit_move_insn_1 (word1, low_part);
8980 }
8981
8982 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8983
8984 int
8985 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8986 {
8987 /* Punt if we are here by mistake. */
8988 gcc_assert (reload_completed);
8989
8990 if (GET_CODE (reg1) == SUBREG)
8991 reg1 = SUBREG_REG (reg1);
8992 if (GET_CODE (reg1) != REG)
8993 return 0;
8994 const int regno1 = REGNO (reg1);
8995
8996 if (GET_CODE (reg2) == SUBREG)
8997 reg2 = SUBREG_REG (reg2);
8998 if (GET_CODE (reg2) != REG)
8999 return 0;
9000 const int regno2 = REGNO (reg2);
9001
9002 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9003 return 1;
9004
9005 if (TARGET_VIS3)
9006 {
9007 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9008 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9009 return 1;
9010 }
9011
9012 return 0;
9013 }
9014
9015 /* Split a REG <--> REG move into a pair of moves in MODE. */
9016
9017 void
9018 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9019 {
9020 rtx dest1 = gen_highpart (mode, dest);
9021 rtx dest2 = gen_lowpart (mode, dest);
9022 rtx src1 = gen_highpart (mode, src);
9023 rtx src2 = gen_lowpart (mode, src);
9024
9025 /* Now emit using the real source and destination we found, swapping
9026 the order if we detect overlap. */
9027 if (reg_overlap_mentioned_p (dest1, src2))
9028 {
9029 emit_move_insn_1 (dest2, src2);
9030 emit_move_insn_1 (dest1, src1);
9031 }
9032 else
9033 {
9034 emit_move_insn_1 (dest1, src1);
9035 emit_move_insn_1 (dest2, src2);
9036 }
9037 }
9038
9039 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9040 This makes them candidates for using ldd and std insns.
9041
9042 Note reg1 and reg2 *must* be hard registers. */
9043
9044 int
9045 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9046 {
9047 /* We might have been passed a SUBREG. */
9048 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9049 return 0;
9050
9051 if (REGNO (reg1) % 2 != 0)
9052 return 0;
9053
9054 /* Integer ldd is deprecated in SPARC V9 */
9055 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9056 return 0;
9057
9058 return (REGNO (reg1) == REGNO (reg2) - 1);
9059 }
9060
9061 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9062 an ldd or std insn.
9063
9064 This can only happen when addr1 and addr2, the addresses in mem1
9065 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9066 addr1 must also be aligned on a 64-bit boundary.
9067
9068 Also iff dependent_reg_rtx is not null it should not be used to
9069 compute the address for mem1, i.e. we cannot optimize a sequence
9070 like:
9071 ld [%o0], %o0
9072 ld [%o0 + 4], %o1
9073 to
9074 ldd [%o0], %o0
9075 nor:
9076 ld [%g3 + 4], %g3
9077 ld [%g3], %g2
9078 to
9079 ldd [%g3], %g2
9080
9081 But, note that the transformation from:
9082 ld [%g2 + 4], %g3
9083 ld [%g2], %g2
9084 to
9085 ldd [%g2], %g2
9086 is perfectly fine. Thus, the peephole2 patterns always pass us
9087 the destination register of the first load, never the second one.
9088
9089 For stores we don't have a similar problem, so dependent_reg_rtx is
9090 NULL_RTX. */
9091
9092 int
9093 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9094 {
9095 rtx addr1, addr2;
9096 unsigned int reg1;
9097 HOST_WIDE_INT offset1;
9098
9099 /* The mems cannot be volatile. */
9100 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9101 return 0;
9102
9103 /* MEM1 should be aligned on a 64-bit boundary. */
9104 if (MEM_ALIGN (mem1) < 64)
9105 return 0;
9106
9107 addr1 = XEXP (mem1, 0);
9108 addr2 = XEXP (mem2, 0);
9109
9110 /* Extract a register number and offset (if used) from the first addr. */
9111 if (GET_CODE (addr1) == PLUS)
9112 {
9113 /* If not a REG, return zero. */
9114 if (GET_CODE (XEXP (addr1, 0)) != REG)
9115 return 0;
9116 else
9117 {
9118 reg1 = REGNO (XEXP (addr1, 0));
9119 /* The offset must be constant! */
9120 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9121 return 0;
9122 offset1 = INTVAL (XEXP (addr1, 1));
9123 }
9124 }
9125 else if (GET_CODE (addr1) != REG)
9126 return 0;
9127 else
9128 {
9129 reg1 = REGNO (addr1);
9130 /* This was a simple (mem (reg)) expression. Offset is 0. */
9131 offset1 = 0;
9132 }
9133
9134 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9135 if (GET_CODE (addr2) != PLUS)
9136 return 0;
9137
9138 if (GET_CODE (XEXP (addr2, 0)) != REG
9139 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9140 return 0;
9141
9142 if (reg1 != REGNO (XEXP (addr2, 0)))
9143 return 0;
9144
9145 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9146 return 0;
9147
9148 /* The first offset must be evenly divisible by 8 to ensure the
9149 address is 64-bit aligned. */
9150 if (offset1 % 8 != 0)
9151 return 0;
9152
9153 /* The offset for the second addr must be 4 more than the first addr. */
9154 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9155 return 0;
9156
9157 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9158 instructions. */
9159 return 1;
9160 }
9161
9162 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9163
9164 rtx
9165 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9166 {
9167 rtx x = widen_memory_access (mem1, mode, 0);
9168 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9169 return x;
9170 }
9171
9172 /* Return 1 if reg is a pseudo, or is the first register in
9173 a hard register pair. This makes it suitable for use in
9174 ldd and std insns. */
9175
9176 int
9177 register_ok_for_ldd (rtx reg)
9178 {
9179 /* We might have been passed a SUBREG. */
9180 if (!REG_P (reg))
9181 return 0;
9182
9183 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9184 return (REGNO (reg) % 2 == 0);
9185
9186 return 1;
9187 }
9188
9189 /* Return 1 if OP, a MEM, has an address which is known to be
9190 aligned to an 8-byte boundary. */
9191
9192 int
9193 memory_ok_for_ldd (rtx op)
9194 {
9195 /* In 64-bit mode, we assume that the address is word-aligned. */
9196 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9197 return 0;
9198
9199 if (! can_create_pseudo_p ()
9200 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9201 return 0;
9202
9203 return 1;
9204 }
9205 \f
9206 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9207
9208 static bool
9209 sparc_print_operand_punct_valid_p (unsigned char code)
9210 {
9211 if (code == '#'
9212 || code == '*'
9213 || code == '('
9214 || code == ')'
9215 || code == '_'
9216 || code == '&')
9217 return true;
9218
9219 return false;
9220 }
9221
9222 /* Implement TARGET_PRINT_OPERAND.
9223 Print operand X (an rtx) in assembler syntax to file FILE.
9224 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9225 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9226
9227 static void
9228 sparc_print_operand (FILE *file, rtx x, int code)
9229 {
9230 const char *s;
9231
9232 switch (code)
9233 {
9234 case '#':
9235 /* Output an insn in a delay slot. */
9236 if (final_sequence)
9237 sparc_indent_opcode = 1;
9238 else
9239 fputs ("\n\t nop", file);
9240 return;
9241 case '*':
9242 /* Output an annul flag if there's nothing for the delay slot and we
9243 are optimizing. This is always used with '(' below.
9244 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9245 this is a dbx bug. So, we only do this when optimizing.
9246 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9247 Always emit a nop in case the next instruction is a branch. */
9248 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9249 fputs (",a", file);
9250 return;
9251 case '(':
9252 /* Output a 'nop' if there's nothing for the delay slot and we are
9253 not optimizing. This is always used with '*' above. */
9254 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9255 fputs ("\n\t nop", file);
9256 else if (final_sequence)
9257 sparc_indent_opcode = 1;
9258 return;
9259 case ')':
9260 /* Output the right displacement from the saved PC on function return.
9261 The caller may have placed an "unimp" insn immediately after the call
9262 so we have to account for it. This insn is used in the 32-bit ABI
9263 when calling a function that returns a non zero-sized structure. The
9264 64-bit ABI doesn't have it. Be careful to have this test be the same
9265 as that for the call. The exception is when sparc_std_struct_return
9266 is enabled, the psABI is followed exactly and the adjustment is made
9267 by the code in sparc_struct_value_rtx. The call emitted is the same
9268 when sparc_std_struct_return is enabled. */
9269 if (!TARGET_ARCH64
9270 && cfun->returns_struct
9271 && !sparc_std_struct_return
9272 && DECL_SIZE (DECL_RESULT (current_function_decl))
9273 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9274 == INTEGER_CST
9275 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9276 fputs ("12", file);
9277 else
9278 fputc ('8', file);
9279 return;
9280 case '_':
9281 /* Output the Embedded Medium/Anywhere code model base register. */
9282 fputs (EMBMEDANY_BASE_REG, file);
9283 return;
9284 case '&':
9285 /* Print some local dynamic TLS name. */
9286 if (const char *name = get_some_local_dynamic_name ())
9287 assemble_name (file, name);
9288 else
9289 output_operand_lossage ("'%%&' used without any "
9290 "local dynamic TLS references");
9291 return;
9292
9293 case 'Y':
9294 /* Adjust the operand to take into account a RESTORE operation. */
9295 if (GET_CODE (x) == CONST_INT)
9296 break;
9297 else if (GET_CODE (x) != REG)
9298 output_operand_lossage ("invalid %%Y operand");
9299 else if (REGNO (x) < 8)
9300 fputs (reg_names[REGNO (x)], file);
9301 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9302 fputs (reg_names[REGNO (x)-16], file);
9303 else
9304 output_operand_lossage ("invalid %%Y operand");
9305 return;
9306 case 'L':
9307 /* Print out the low order register name of a register pair. */
9308 if (WORDS_BIG_ENDIAN)
9309 fputs (reg_names[REGNO (x)+1], file);
9310 else
9311 fputs (reg_names[REGNO (x)], file);
9312 return;
9313 case 'H':
9314 /* Print out the high order register name of a register pair. */
9315 if (WORDS_BIG_ENDIAN)
9316 fputs (reg_names[REGNO (x)], file);
9317 else
9318 fputs (reg_names[REGNO (x)+1], file);
9319 return;
9320 case 'R':
9321 /* Print out the second register name of a register pair or quad.
9322 I.e., R (%o0) => %o1. */
9323 fputs (reg_names[REGNO (x)+1], file);
9324 return;
9325 case 'S':
9326 /* Print out the third register name of a register quad.
9327 I.e., S (%o0) => %o2. */
9328 fputs (reg_names[REGNO (x)+2], file);
9329 return;
9330 case 'T':
9331 /* Print out the fourth register name of a register quad.
9332 I.e., T (%o0) => %o3. */
9333 fputs (reg_names[REGNO (x)+3], file);
9334 return;
9335 case 'x':
9336 /* Print a condition code register. */
9337 if (REGNO (x) == SPARC_ICC_REG)
9338 {
9339 switch (GET_MODE (x))
9340 {
9341 case E_CCmode:
9342 case E_CCNZmode:
9343 case E_CCCmode:
9344 case E_CCVmode:
9345 s = "%icc";
9346 break;
9347 case E_CCXmode:
9348 case E_CCXNZmode:
9349 case E_CCXCmode:
9350 case E_CCXVmode:
9351 s = "%xcc";
9352 break;
9353 default:
9354 gcc_unreachable ();
9355 }
9356 fputs (s, file);
9357 }
9358 else
9359 /* %fccN register */
9360 fputs (reg_names[REGNO (x)], file);
9361 return;
9362 case 'm':
9363 /* Print the operand's address only. */
9364 output_address (GET_MODE (x), XEXP (x, 0));
9365 return;
9366 case 'r':
9367 /* In this case we need a register. Use %g0 if the
9368 operand is const0_rtx. */
9369 if (x == const0_rtx
9370 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9371 {
9372 fputs ("%g0", file);
9373 return;
9374 }
9375 else
9376 break;
9377
9378 case 'A':
9379 switch (GET_CODE (x))
9380 {
9381 case IOR:
9382 s = "or";
9383 break;
9384 case AND:
9385 s = "and";
9386 break;
9387 case XOR:
9388 s = "xor";
9389 break;
9390 default:
9391 output_operand_lossage ("invalid %%A operand");
9392 s = "";
9393 break;
9394 }
9395 fputs (s, file);
9396 return;
9397
9398 case 'B':
9399 switch (GET_CODE (x))
9400 {
9401 case IOR:
9402 s = "orn";
9403 break;
9404 case AND:
9405 s = "andn";
9406 break;
9407 case XOR:
9408 s = "xnor";
9409 break;
9410 default:
9411 output_operand_lossage ("invalid %%B operand");
9412 s = "";
9413 break;
9414 }
9415 fputs (s, file);
9416 return;
9417
9418 /* This is used by the conditional move instructions. */
9419 case 'C':
9420 {
9421 machine_mode mode = GET_MODE (XEXP (x, 0));
9422 switch (GET_CODE (x))
9423 {
9424 case NE:
9425 if (mode == CCVmode || mode == CCXVmode)
9426 s = "vs";
9427 else
9428 s = "ne";
9429 break;
9430 case EQ:
9431 if (mode == CCVmode || mode == CCXVmode)
9432 s = "vc";
9433 else
9434 s = "e";
9435 break;
9436 case GE:
9437 if (mode == CCNZmode || mode == CCXNZmode)
9438 s = "pos";
9439 else
9440 s = "ge";
9441 break;
9442 case GT:
9443 s = "g";
9444 break;
9445 case LE:
9446 s = "le";
9447 break;
9448 case LT:
9449 if (mode == CCNZmode || mode == CCXNZmode)
9450 s = "neg";
9451 else
9452 s = "l";
9453 break;
9454 case GEU:
9455 s = "geu";
9456 break;
9457 case GTU:
9458 s = "gu";
9459 break;
9460 case LEU:
9461 s = "leu";
9462 break;
9463 case LTU:
9464 s = "lu";
9465 break;
9466 case LTGT:
9467 s = "lg";
9468 break;
9469 case UNORDERED:
9470 s = "u";
9471 break;
9472 case ORDERED:
9473 s = "o";
9474 break;
9475 case UNLT:
9476 s = "ul";
9477 break;
9478 case UNLE:
9479 s = "ule";
9480 break;
9481 case UNGT:
9482 s = "ug";
9483 break;
9484 case UNGE:
9485 s = "uge"
9486 ; break;
9487 case UNEQ:
9488 s = "ue";
9489 break;
9490 default:
9491 output_operand_lossage ("invalid %%C operand");
9492 s = "";
9493 break;
9494 }
9495 fputs (s, file);
9496 return;
9497 }
9498
9499 /* This are used by the movr instruction pattern. */
9500 case 'D':
9501 {
9502 switch (GET_CODE (x))
9503 {
9504 case NE:
9505 s = "ne";
9506 break;
9507 case EQ:
9508 s = "e";
9509 break;
9510 case GE:
9511 s = "gez";
9512 break;
9513 case LT:
9514 s = "lz";
9515 break;
9516 case LE:
9517 s = "lez";
9518 break;
9519 case GT:
9520 s = "gz";
9521 break;
9522 default:
9523 output_operand_lossage ("invalid %%D operand");
9524 s = "";
9525 break;
9526 }
9527 fputs (s, file);
9528 return;
9529 }
9530
9531 case 'b':
9532 {
9533 /* Print a sign-extended character. */
9534 int i = trunc_int_for_mode (INTVAL (x), QImode);
9535 fprintf (file, "%d", i);
9536 return;
9537 }
9538
9539 case 'f':
9540 /* Operand must be a MEM; write its address. */
9541 if (GET_CODE (x) != MEM)
9542 output_operand_lossage ("invalid %%f operand");
9543 output_address (GET_MODE (x), XEXP (x, 0));
9544 return;
9545
9546 case 's':
9547 {
9548 /* Print a sign-extended 32-bit value. */
9549 HOST_WIDE_INT i;
9550 if (GET_CODE(x) == CONST_INT)
9551 i = INTVAL (x);
9552 else
9553 {
9554 output_operand_lossage ("invalid %%s operand");
9555 return;
9556 }
9557 i = trunc_int_for_mode (i, SImode);
9558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9559 return;
9560 }
9561
9562 case 0:
9563 /* Do nothing special. */
9564 break;
9565
9566 default:
9567 /* Undocumented flag. */
9568 output_operand_lossage ("invalid operand output code");
9569 }
9570
9571 if (GET_CODE (x) == REG)
9572 fputs (reg_names[REGNO (x)], file);
9573 else if (GET_CODE (x) == MEM)
9574 {
9575 fputc ('[', file);
9576 /* Poor Sun assembler doesn't understand absolute addressing. */
9577 if (CONSTANT_P (XEXP (x, 0)))
9578 fputs ("%g0+", file);
9579 output_address (GET_MODE (x), XEXP (x, 0));
9580 fputc (']', file);
9581 }
9582 else if (GET_CODE (x) == HIGH)
9583 {
9584 fputs ("%hi(", file);
9585 output_addr_const (file, XEXP (x, 0));
9586 fputc (')', file);
9587 }
9588 else if (GET_CODE (x) == LO_SUM)
9589 {
9590 sparc_print_operand (file, XEXP (x, 0), 0);
9591 if (TARGET_CM_MEDMID)
9592 fputs ("+%l44(", file);
9593 else
9594 fputs ("+%lo(", file);
9595 output_addr_const (file, XEXP (x, 1));
9596 fputc (')', file);
9597 }
9598 else if (GET_CODE (x) == CONST_DOUBLE)
9599 output_operand_lossage ("floating-point constant not a valid immediate operand");
9600 else
9601 output_addr_const (file, x);
9602 }
9603
9604 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9605
9606 static void
9607 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9608 {
9609 register rtx base, index = 0;
9610 int offset = 0;
9611 register rtx addr = x;
9612
9613 if (REG_P (addr))
9614 fputs (reg_names[REGNO (addr)], file);
9615 else if (GET_CODE (addr) == PLUS)
9616 {
9617 if (CONST_INT_P (XEXP (addr, 0)))
9618 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9619 else if (CONST_INT_P (XEXP (addr, 1)))
9620 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9621 else
9622 base = XEXP (addr, 0), index = XEXP (addr, 1);
9623 if (GET_CODE (base) == LO_SUM)
9624 {
9625 gcc_assert (USE_AS_OFFSETABLE_LO10
9626 && TARGET_ARCH64
9627 && ! TARGET_CM_MEDMID);
9628 output_operand (XEXP (base, 0), 0);
9629 fputs ("+%lo(", file);
9630 output_address (VOIDmode, XEXP (base, 1));
9631 fprintf (file, ")+%d", offset);
9632 }
9633 else
9634 {
9635 fputs (reg_names[REGNO (base)], file);
9636 if (index == 0)
9637 fprintf (file, "%+d", offset);
9638 else if (REG_P (index))
9639 fprintf (file, "+%s", reg_names[REGNO (index)]);
9640 else if (GET_CODE (index) == SYMBOL_REF
9641 || GET_CODE (index) == LABEL_REF
9642 || GET_CODE (index) == CONST)
9643 fputc ('+', file), output_addr_const (file, index);
9644 else gcc_unreachable ();
9645 }
9646 }
9647 else if (GET_CODE (addr) == MINUS
9648 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9649 {
9650 output_addr_const (file, XEXP (addr, 0));
9651 fputs ("-(", file);
9652 output_addr_const (file, XEXP (addr, 1));
9653 fputs ("-.)", file);
9654 }
9655 else if (GET_CODE (addr) == LO_SUM)
9656 {
9657 output_operand (XEXP (addr, 0), 0);
9658 if (TARGET_CM_MEDMID)
9659 fputs ("+%l44(", file);
9660 else
9661 fputs ("+%lo(", file);
9662 output_address (VOIDmode, XEXP (addr, 1));
9663 fputc (')', file);
9664 }
9665 else if (flag_pic
9666 && GET_CODE (addr) == CONST
9667 && GET_CODE (XEXP (addr, 0)) == MINUS
9668 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9669 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9670 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9671 {
9672 addr = XEXP (addr, 0);
9673 output_addr_const (file, XEXP (addr, 0));
9674 /* Group the args of the second CONST in parenthesis. */
9675 fputs ("-(", file);
9676 /* Skip past the second CONST--it does nothing for us. */
9677 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9678 /* Close the parenthesis. */
9679 fputc (')', file);
9680 }
9681 else
9682 {
9683 output_addr_const (file, addr);
9684 }
9685 }
9686 \f
9687 /* Target hook for assembling integer objects. The sparc version has
9688 special handling for aligned DI-mode objects. */
9689
9690 static bool
9691 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9692 {
9693 /* ??? We only output .xword's for symbols and only then in environments
9694 where the assembler can handle them. */
9695 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9696 {
9697 if (TARGET_V9)
9698 {
9699 assemble_integer_with_op ("\t.xword\t", x);
9700 return true;
9701 }
9702 else
9703 {
9704 assemble_aligned_integer (4, const0_rtx);
9705 assemble_aligned_integer (4, x);
9706 return true;
9707 }
9708 }
9709 return default_assemble_integer (x, size, aligned_p);
9710 }
9711 \f
9712 /* Return the value of a code used in the .proc pseudo-op that says
9713 what kind of result this function returns. For non-C types, we pick
9714 the closest C type. */
9715
9716 #ifndef SHORT_TYPE_SIZE
9717 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9718 #endif
9719
9720 #ifndef INT_TYPE_SIZE
9721 #define INT_TYPE_SIZE BITS_PER_WORD
9722 #endif
9723
9724 #ifndef LONG_TYPE_SIZE
9725 #define LONG_TYPE_SIZE BITS_PER_WORD
9726 #endif
9727
9728 #ifndef LONG_LONG_TYPE_SIZE
9729 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9730 #endif
9731
9732 #ifndef FLOAT_TYPE_SIZE
9733 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9734 #endif
9735
9736 #ifndef DOUBLE_TYPE_SIZE
9737 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9738 #endif
9739
9740 #ifndef LONG_DOUBLE_TYPE_SIZE
9741 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9742 #endif
9743
9744 unsigned long
9745 sparc_type_code (register tree type)
9746 {
9747 register unsigned long qualifiers = 0;
9748 register unsigned shift;
9749
9750 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9751 setting more, since some assemblers will give an error for this. Also,
9752 we must be careful to avoid shifts of 32 bits or more to avoid getting
9753 unpredictable results. */
9754
9755 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9756 {
9757 switch (TREE_CODE (type))
9758 {
9759 case ERROR_MARK:
9760 return qualifiers;
9761
9762 case ARRAY_TYPE:
9763 qualifiers |= (3 << shift);
9764 break;
9765
9766 case FUNCTION_TYPE:
9767 case METHOD_TYPE:
9768 qualifiers |= (2 << shift);
9769 break;
9770
9771 case POINTER_TYPE:
9772 case REFERENCE_TYPE:
9773 case OFFSET_TYPE:
9774 qualifiers |= (1 << shift);
9775 break;
9776
9777 case RECORD_TYPE:
9778 return (qualifiers | 8);
9779
9780 case UNION_TYPE:
9781 case QUAL_UNION_TYPE:
9782 return (qualifiers | 9);
9783
9784 case ENUMERAL_TYPE:
9785 return (qualifiers | 10);
9786
9787 case VOID_TYPE:
9788 return (qualifiers | 16);
9789
9790 case INTEGER_TYPE:
9791 /* If this is a range type, consider it to be the underlying
9792 type. */
9793 if (TREE_TYPE (type) != 0)
9794 break;
9795
9796 /* Carefully distinguish all the standard types of C,
9797 without messing up if the language is not C. We do this by
9798 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9799 look at both the names and the above fields, but that's redundant.
9800 Any type whose size is between two C types will be considered
9801 to be the wider of the two types. Also, we do not have a
9802 special code to use for "long long", so anything wider than
9803 long is treated the same. Note that we can't distinguish
9804 between "int" and "long" in this code if they are the same
9805 size, but that's fine, since neither can the assembler. */
9806
9807 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9808 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9809
9810 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9811 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9812
9813 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9814 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9815
9816 else
9817 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9818
9819 case REAL_TYPE:
9820 /* If this is a range type, consider it to be the underlying
9821 type. */
9822 if (TREE_TYPE (type) != 0)
9823 break;
9824
9825 /* Carefully distinguish all the standard types of C,
9826 without messing up if the language is not C. */
9827
9828 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9829 return (qualifiers | 6);
9830
9831 else
9832 return (qualifiers | 7);
9833
9834 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9835 /* ??? We need to distinguish between double and float complex types,
9836 but I don't know how yet because I can't reach this code from
9837 existing front-ends. */
9838 return (qualifiers | 7); /* Who knows? */
9839
9840 case VECTOR_TYPE:
9841 case BOOLEAN_TYPE: /* Boolean truth value type. */
9842 case LANG_TYPE:
9843 case NULLPTR_TYPE:
9844 return qualifiers;
9845
9846 default:
9847 gcc_unreachable (); /* Not a type! */
9848 }
9849 }
9850
9851 return qualifiers;
9852 }
9853 \f
9854 /* Nested function support. */
9855
9856 /* Emit RTL insns to initialize the variable parts of a trampoline.
9857 FNADDR is an RTX for the address of the function's pure code.
9858 CXT is an RTX for the static chain value for the function.
9859
9860 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9861 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9862 (to store insns). This is a bit excessive. Perhaps a different
9863 mechanism would be better here.
9864
9865 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9866
9867 static void
9868 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9869 {
9870 /* SPARC 32-bit trampoline:
9871
9872 sethi %hi(fn), %g1
9873 sethi %hi(static), %g2
9874 jmp %g1+%lo(fn)
9875 or %g2, %lo(static), %g2
9876
9877 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9878 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9879 */
9880
9881 emit_move_insn
9882 (adjust_address (m_tramp, SImode, 0),
9883 expand_binop (SImode, ior_optab,
9884 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9885 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9886 NULL_RTX, 1, OPTAB_DIRECT));
9887
9888 emit_move_insn
9889 (adjust_address (m_tramp, SImode, 4),
9890 expand_binop (SImode, ior_optab,
9891 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9892 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9893 NULL_RTX, 1, OPTAB_DIRECT));
9894
9895 emit_move_insn
9896 (adjust_address (m_tramp, SImode, 8),
9897 expand_binop (SImode, ior_optab,
9898 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9899 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9900 NULL_RTX, 1, OPTAB_DIRECT));
9901
9902 emit_move_insn
9903 (adjust_address (m_tramp, SImode, 12),
9904 expand_binop (SImode, ior_optab,
9905 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9906 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9907 NULL_RTX, 1, OPTAB_DIRECT));
9908
9909 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9910 aligned on a 16 byte boundary so one flush clears it all. */
9911 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9912 if (sparc_cpu != PROCESSOR_ULTRASPARC
9913 && sparc_cpu != PROCESSOR_ULTRASPARC3
9914 && sparc_cpu != PROCESSOR_NIAGARA
9915 && sparc_cpu != PROCESSOR_NIAGARA2
9916 && sparc_cpu != PROCESSOR_NIAGARA3
9917 && sparc_cpu != PROCESSOR_NIAGARA4
9918 && sparc_cpu != PROCESSOR_NIAGARA7
9919 && sparc_cpu != PROCESSOR_M8)
9920 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9921
9922 /* Call __enable_execute_stack after writing onto the stack to make sure
9923 the stack address is accessible. */
9924 #ifdef HAVE_ENABLE_EXECUTE_STACK
9925 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9926 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9927 #endif
9928
9929 }
9930
9931 /* The 64-bit version is simpler because it makes more sense to load the
9932 values as "immediate" data out of the trampoline. It's also easier since
9933 we can read the PC without clobbering a register. */
9934
9935 static void
9936 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9937 {
9938 /* SPARC 64-bit trampoline:
9939
9940 rd %pc, %g1
9941 ldx [%g1+24], %g5
9942 jmp %g5
9943 ldx [%g1+16], %g5
9944 +16 bytes data
9945 */
9946
9947 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9948 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9949 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9950 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9951 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9952 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9953 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9954 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9955 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9956 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9957 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9958
9959 if (sparc_cpu != PROCESSOR_ULTRASPARC
9960 && sparc_cpu != PROCESSOR_ULTRASPARC3
9961 && sparc_cpu != PROCESSOR_NIAGARA
9962 && sparc_cpu != PROCESSOR_NIAGARA2
9963 && sparc_cpu != PROCESSOR_NIAGARA3
9964 && sparc_cpu != PROCESSOR_NIAGARA4
9965 && sparc_cpu != PROCESSOR_NIAGARA7
9966 && sparc_cpu != PROCESSOR_M8)
9967 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9968
9969 /* Call __enable_execute_stack after writing onto the stack to make sure
9970 the stack address is accessible. */
9971 #ifdef HAVE_ENABLE_EXECUTE_STACK
9972 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9973 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9974 #endif
9975 }
9976
9977 /* Worker for TARGET_TRAMPOLINE_INIT. */
9978
9979 static void
9980 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9981 {
9982 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9983 cxt = force_reg (Pmode, cxt);
9984 if (TARGET_ARCH64)
9985 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9986 else
9987 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9988 }
9989 \f
9990 /* Adjust the cost of a scheduling dependency. Return the new cost of
9991 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9992
9993 static int
9994 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9995 int cost)
9996 {
9997 enum attr_type insn_type;
9998
9999 if (recog_memoized (insn) < 0)
10000 return cost;
10001
10002 insn_type = get_attr_type (insn);
10003
10004 if (dep_type == 0)
10005 {
10006 /* Data dependency; DEP_INSN writes a register that INSN reads some
10007 cycles later. */
10008
10009 /* if a load, then the dependence must be on the memory address;
10010 add an extra "cycle". Note that the cost could be two cycles
10011 if the reg was written late in an instruction group; we ca not tell
10012 here. */
10013 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10014 return cost + 3;
10015
10016 /* Get the delay only if the address of the store is the dependence. */
10017 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10018 {
10019 rtx pat = PATTERN(insn);
10020 rtx dep_pat = PATTERN (dep_insn);
10021
10022 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10023 return cost; /* This should not happen! */
10024
10025 /* The dependency between the two instructions was on the data that
10026 is being stored. Assume that this implies that the address of the
10027 store is not dependent. */
10028 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10029 return cost;
10030
10031 return cost + 3; /* An approximation. */
10032 }
10033
10034 /* A shift instruction cannot receive its data from an instruction
10035 in the same cycle; add a one cycle penalty. */
10036 if (insn_type == TYPE_SHIFT)
10037 return cost + 3; /* Split before cascade into shift. */
10038 }
10039 else
10040 {
10041 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10042 INSN writes some cycles later. */
10043
10044 /* These are only significant for the fpu unit; writing a fp reg before
10045 the fpu has finished with it stalls the processor. */
10046
10047 /* Reusing an integer register causes no problems. */
10048 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10049 return 0;
10050 }
10051
10052 return cost;
10053 }
10054
10055 static int
10056 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10057 int cost)
10058 {
10059 enum attr_type insn_type, dep_type;
10060 rtx pat = PATTERN(insn);
10061 rtx dep_pat = PATTERN (dep_insn);
10062
10063 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10064 return cost;
10065
10066 insn_type = get_attr_type (insn);
10067 dep_type = get_attr_type (dep_insn);
10068
10069 switch (dtype)
10070 {
10071 case 0:
10072 /* Data dependency; DEP_INSN writes a register that INSN reads some
10073 cycles later. */
10074
10075 switch (insn_type)
10076 {
10077 case TYPE_STORE:
10078 case TYPE_FPSTORE:
10079 /* Get the delay iff the address of the store is the dependence. */
10080 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10081 return cost;
10082
10083 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10084 return cost;
10085 return cost + 3;
10086
10087 case TYPE_LOAD:
10088 case TYPE_SLOAD:
10089 case TYPE_FPLOAD:
10090 /* If a load, then the dependence must be on the memory address. If
10091 the addresses aren't equal, then it might be a false dependency */
10092 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10093 {
10094 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10095 || GET_CODE (SET_DEST (dep_pat)) != MEM
10096 || GET_CODE (SET_SRC (pat)) != MEM
10097 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10098 XEXP (SET_SRC (pat), 0)))
10099 return cost + 2;
10100
10101 return cost + 8;
10102 }
10103 break;
10104
10105 case TYPE_BRANCH:
10106 /* Compare to branch latency is 0. There is no benefit from
10107 separating compare and branch. */
10108 if (dep_type == TYPE_COMPARE)
10109 return 0;
10110 /* Floating point compare to branch latency is less than
10111 compare to conditional move. */
10112 if (dep_type == TYPE_FPCMP)
10113 return cost - 1;
10114 break;
10115 default:
10116 break;
10117 }
10118 break;
10119
10120 case REG_DEP_ANTI:
10121 /* Anti-dependencies only penalize the fpu unit. */
10122 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10123 return 0;
10124 break;
10125
10126 default:
10127 break;
10128 }
10129
10130 return cost;
10131 }
10132
10133 static int
10134 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10135 unsigned int)
10136 {
10137 switch (sparc_cpu)
10138 {
10139 case PROCESSOR_SUPERSPARC:
10140 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10141 break;
10142 case PROCESSOR_HYPERSPARC:
10143 case PROCESSOR_SPARCLITE86X:
10144 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10145 break;
10146 default:
10147 break;
10148 }
10149 return cost;
10150 }
10151
10152 static void
10153 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10154 int sched_verbose ATTRIBUTE_UNUSED,
10155 int max_ready ATTRIBUTE_UNUSED)
10156 {}
10157
10158 static int
10159 sparc_use_sched_lookahead (void)
10160 {
10161 if (sparc_cpu == PROCESSOR_NIAGARA
10162 || sparc_cpu == PROCESSOR_NIAGARA2
10163 || sparc_cpu == PROCESSOR_NIAGARA3)
10164 return 0;
10165 if (sparc_cpu == PROCESSOR_NIAGARA4
10166 || sparc_cpu == PROCESSOR_NIAGARA7
10167 || sparc_cpu == PROCESSOR_M8)
10168 return 2;
10169 if (sparc_cpu == PROCESSOR_ULTRASPARC
10170 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10171 return 4;
10172 if ((1 << sparc_cpu) &
10173 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10174 (1 << PROCESSOR_SPARCLITE86X)))
10175 return 3;
10176 return 0;
10177 }
10178
10179 static int
10180 sparc_issue_rate (void)
10181 {
10182 switch (sparc_cpu)
10183 {
10184 case PROCESSOR_NIAGARA:
10185 case PROCESSOR_NIAGARA2:
10186 case PROCESSOR_NIAGARA3:
10187 default:
10188 return 1;
10189 case PROCESSOR_NIAGARA4:
10190 case PROCESSOR_NIAGARA7:
10191 case PROCESSOR_V9:
10192 /* Assume V9 processors are capable of at least dual-issue. */
10193 return 2;
10194 case PROCESSOR_SUPERSPARC:
10195 return 3;
10196 case PROCESSOR_HYPERSPARC:
10197 case PROCESSOR_SPARCLITE86X:
10198 return 2;
10199 case PROCESSOR_ULTRASPARC:
10200 case PROCESSOR_ULTRASPARC3:
10201 case PROCESSOR_M8:
10202 return 4;
10203 }
10204 }
10205
10206 static int
10207 set_extends (rtx_insn *insn)
10208 {
10209 register rtx pat = PATTERN (insn);
10210
10211 switch (GET_CODE (SET_SRC (pat)))
10212 {
10213 /* Load and some shift instructions zero extend. */
10214 case MEM:
10215 case ZERO_EXTEND:
10216 /* sethi clears the high bits */
10217 case HIGH:
10218 /* LO_SUM is used with sethi. sethi cleared the high
10219 bits and the values used with lo_sum are positive */
10220 case LO_SUM:
10221 /* Store flag stores 0 or 1 */
10222 case LT: case LTU:
10223 case GT: case GTU:
10224 case LE: case LEU:
10225 case GE: case GEU:
10226 case EQ:
10227 case NE:
10228 return 1;
10229 case AND:
10230 {
10231 rtx op0 = XEXP (SET_SRC (pat), 0);
10232 rtx op1 = XEXP (SET_SRC (pat), 1);
10233 if (GET_CODE (op1) == CONST_INT)
10234 return INTVAL (op1) >= 0;
10235 if (GET_CODE (op0) != REG)
10236 return 0;
10237 if (sparc_check_64 (op0, insn) == 1)
10238 return 1;
10239 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10240 }
10241 case IOR:
10242 case XOR:
10243 {
10244 rtx op0 = XEXP (SET_SRC (pat), 0);
10245 rtx op1 = XEXP (SET_SRC (pat), 1);
10246 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10247 return 0;
10248 if (GET_CODE (op1) == CONST_INT)
10249 return INTVAL (op1) >= 0;
10250 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10251 }
10252 case LSHIFTRT:
10253 return GET_MODE (SET_SRC (pat)) == SImode;
10254 /* Positive integers leave the high bits zero. */
10255 case CONST_INT:
10256 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10257 case ASHIFTRT:
10258 case SIGN_EXTEND:
10259 return - (GET_MODE (SET_SRC (pat)) == SImode);
10260 case REG:
10261 return sparc_check_64 (SET_SRC (pat), insn);
10262 default:
10263 return 0;
10264 }
10265 }
10266
10267 /* We _ought_ to have only one kind per function, but... */
10268 static GTY(()) rtx sparc_addr_diff_list;
10269 static GTY(()) rtx sparc_addr_list;
10270
10271 void
10272 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10273 {
10274 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10275 if (diff)
10276 sparc_addr_diff_list
10277 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10278 else
10279 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10280 }
10281
10282 static void
10283 sparc_output_addr_vec (rtx vec)
10284 {
10285 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10286 int idx, vlen = XVECLEN (body, 0);
10287
10288 #ifdef ASM_OUTPUT_ADDR_VEC_START
10289 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10290 #endif
10291
10292 #ifdef ASM_OUTPUT_CASE_LABEL
10293 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10294 NEXT_INSN (lab));
10295 #else
10296 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10297 #endif
10298
10299 for (idx = 0; idx < vlen; idx++)
10300 {
10301 ASM_OUTPUT_ADDR_VEC_ELT
10302 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10303 }
10304
10305 #ifdef ASM_OUTPUT_ADDR_VEC_END
10306 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10307 #endif
10308 }
10309
10310 static void
10311 sparc_output_addr_diff_vec (rtx vec)
10312 {
10313 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10314 rtx base = XEXP (XEXP (body, 0), 0);
10315 int idx, vlen = XVECLEN (body, 1);
10316
10317 #ifdef ASM_OUTPUT_ADDR_VEC_START
10318 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10319 #endif
10320
10321 #ifdef ASM_OUTPUT_CASE_LABEL
10322 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10323 NEXT_INSN (lab));
10324 #else
10325 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10326 #endif
10327
10328 for (idx = 0; idx < vlen; idx++)
10329 {
10330 ASM_OUTPUT_ADDR_DIFF_ELT
10331 (asm_out_file,
10332 body,
10333 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10334 CODE_LABEL_NUMBER (base));
10335 }
10336
10337 #ifdef ASM_OUTPUT_ADDR_VEC_END
10338 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10339 #endif
10340 }
10341
10342 static void
10343 sparc_output_deferred_case_vectors (void)
10344 {
10345 rtx t;
10346 int align;
10347
10348 if (sparc_addr_list == NULL_RTX
10349 && sparc_addr_diff_list == NULL_RTX)
10350 return;
10351
10352 /* Align to cache line in the function's code section. */
10353 switch_to_section (current_function_section ());
10354
10355 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10356 if (align > 0)
10357 ASM_OUTPUT_ALIGN (asm_out_file, align);
10358
10359 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10360 sparc_output_addr_vec (XEXP (t, 0));
10361 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10362 sparc_output_addr_diff_vec (XEXP (t, 0));
10363
10364 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10365 }
10366
10367 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10368 unknown. Return 1 if the high bits are zero, -1 if the register is
10369 sign extended. */
10370 int
10371 sparc_check_64 (rtx x, rtx_insn *insn)
10372 {
10373 /* If a register is set only once it is safe to ignore insns this
10374 code does not know how to handle. The loop will either recognize
10375 the single set and return the correct value or fail to recognize
10376 it and return 0. */
10377 int set_once = 0;
10378 rtx y = x;
10379
10380 gcc_assert (GET_CODE (x) == REG);
10381
10382 if (GET_MODE (x) == DImode)
10383 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10384
10385 if (flag_expensive_optimizations
10386 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10387 set_once = 1;
10388
10389 if (insn == 0)
10390 {
10391 if (set_once)
10392 insn = get_last_insn_anywhere ();
10393 else
10394 return 0;
10395 }
10396
10397 while ((insn = PREV_INSN (insn)))
10398 {
10399 switch (GET_CODE (insn))
10400 {
10401 case JUMP_INSN:
10402 case NOTE:
10403 break;
10404 case CODE_LABEL:
10405 case CALL_INSN:
10406 default:
10407 if (! set_once)
10408 return 0;
10409 break;
10410 case INSN:
10411 {
10412 rtx pat = PATTERN (insn);
10413 if (GET_CODE (pat) != SET)
10414 return 0;
10415 if (rtx_equal_p (x, SET_DEST (pat)))
10416 return set_extends (insn);
10417 if (y && rtx_equal_p (y, SET_DEST (pat)))
10418 return set_extends (insn);
10419 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10420 return 0;
10421 }
10422 }
10423 }
10424 return 0;
10425 }
10426
10427 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10428 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10429
10430 const char *
10431 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10432 {
10433 static char asm_code[60];
10434
10435 /* The scratch register is only required when the destination
10436 register is not a 64-bit global or out register. */
10437 if (which_alternative != 2)
10438 operands[3] = operands[0];
10439
10440 /* We can only shift by constants <= 63. */
10441 if (GET_CODE (operands[2]) == CONST_INT)
10442 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10443
10444 if (GET_CODE (operands[1]) == CONST_INT)
10445 {
10446 output_asm_insn ("mov\t%1, %3", operands);
10447 }
10448 else
10449 {
10450 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10451 if (sparc_check_64 (operands[1], insn) <= 0)
10452 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10453 output_asm_insn ("or\t%L1, %3, %3", operands);
10454 }
10455
10456 strcpy (asm_code, opcode);
10457
10458 if (which_alternative != 2)
10459 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10460 else
10461 return
10462 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10463 }
10464 \f
10465 /* Output rtl to increment the profiler label LABELNO
10466 for profiling a function entry. */
10467
10468 void
10469 sparc_profile_hook (int labelno)
10470 {
10471 char buf[32];
10472 rtx lab, fun;
10473
10474 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10475 if (NO_PROFILE_COUNTERS)
10476 {
10477 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10478 }
10479 else
10480 {
10481 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10482 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10483 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10484 }
10485 }
10486 \f
10487 #ifdef TARGET_SOLARIS
10488 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10489
10490 static void
10491 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10492 tree decl ATTRIBUTE_UNUSED)
10493 {
10494 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10495 {
10496 solaris_elf_asm_comdat_section (name, flags, decl);
10497 return;
10498 }
10499
10500 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10501
10502 if (!(flags & SECTION_DEBUG))
10503 fputs (",#alloc", asm_out_file);
10504 #if HAVE_GAS_SECTION_EXCLUDE
10505 if (flags & SECTION_EXCLUDE)
10506 fputs (",#exclude", asm_out_file);
10507 #endif
10508 if (flags & SECTION_WRITE)
10509 fputs (",#write", asm_out_file);
10510 if (flags & SECTION_TLS)
10511 fputs (",#tls", asm_out_file);
10512 if (flags & SECTION_CODE)
10513 fputs (",#execinstr", asm_out_file);
10514
10515 if (flags & SECTION_NOTYPE)
10516 ;
10517 else if (flags & SECTION_BSS)
10518 fputs (",#nobits", asm_out_file);
10519 else
10520 fputs (",#progbits", asm_out_file);
10521
10522 fputc ('\n', asm_out_file);
10523 }
10524 #endif /* TARGET_SOLARIS */
10525
10526 /* We do not allow indirect calls to be optimized into sibling calls.
10527
10528 We cannot use sibling calls when delayed branches are disabled
10529 because they will likely require the call delay slot to be filled.
10530
10531 Also, on SPARC 32-bit we cannot emit a sibling call when the
10532 current function returns a structure. This is because the "unimp
10533 after call" convention would cause the callee to return to the
10534 wrong place. The generic code already disallows cases where the
10535 function being called returns a structure.
10536
10537 It may seem strange how this last case could occur. Usually there
10538 is code after the call which jumps to epilogue code which dumps the
10539 return value into the struct return area. That ought to invalidate
10540 the sibling call right? Well, in the C++ case we can end up passing
10541 the pointer to the struct return area to a constructor (which returns
10542 void) and then nothing else happens. Such a sibling call would look
10543 valid without the added check here.
10544
10545 VxWorks PIC PLT entries require the global pointer to be initialized
10546 on entry. We therefore can't emit sibling calls to them. */
10547 static bool
10548 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10549 {
10550 return (decl
10551 && flag_delayed_branch
10552 && (TARGET_ARCH64 || ! cfun->returns_struct)
10553 && !(TARGET_VXWORKS_RTP
10554 && flag_pic
10555 && !targetm.binds_local_p (decl)));
10556 }
10557 \f
10558 /* libfunc renaming. */
10559
10560 static void
10561 sparc_init_libfuncs (void)
10562 {
10563 if (TARGET_ARCH32)
10564 {
10565 /* Use the subroutines that Sun's library provides for integer
10566 multiply and divide. The `*' prevents an underscore from
10567 being prepended by the compiler. .umul is a little faster
10568 than .mul. */
10569 set_optab_libfunc (smul_optab, SImode, "*.umul");
10570 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10571 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10572 set_optab_libfunc (smod_optab, SImode, "*.rem");
10573 set_optab_libfunc (umod_optab, SImode, "*.urem");
10574
10575 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10576 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10577 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10578 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10579 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10580 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10581
10582 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10583 is because with soft-float, the SFmode and DFmode sqrt
10584 instructions will be absent, and the compiler will notice and
10585 try to use the TFmode sqrt instruction for calls to the
10586 builtin function sqrt, but this fails. */
10587 if (TARGET_FPU)
10588 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10589
10590 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10591 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10592 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10593 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10594 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10595 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10596
10597 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10598 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10599 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10600 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10601
10602 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10603 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10604 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10605 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10606
10607 if (DITF_CONVERSION_LIBFUNCS)
10608 {
10609 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10610 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10611 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10612 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10613 }
10614
10615 if (SUN_CONVERSION_LIBFUNCS)
10616 {
10617 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10618 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10619 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10620 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10621 }
10622 }
10623 if (TARGET_ARCH64)
10624 {
10625 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10626 do not exist in the library. Make sure the compiler does not
10627 emit calls to them by accident. (It should always use the
10628 hardware instructions.) */
10629 set_optab_libfunc (smul_optab, SImode, 0);
10630 set_optab_libfunc (sdiv_optab, SImode, 0);
10631 set_optab_libfunc (udiv_optab, SImode, 0);
10632 set_optab_libfunc (smod_optab, SImode, 0);
10633 set_optab_libfunc (umod_optab, SImode, 0);
10634
10635 if (SUN_INTEGER_MULTIPLY_64)
10636 {
10637 set_optab_libfunc (smul_optab, DImode, "__mul64");
10638 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10639 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10640 set_optab_libfunc (smod_optab, DImode, "__rem64");
10641 set_optab_libfunc (umod_optab, DImode, "__urem64");
10642 }
10643
10644 if (SUN_CONVERSION_LIBFUNCS)
10645 {
10646 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10647 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10648 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10649 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10650 }
10651 }
10652 }
10653 \f
10654 /* SPARC builtins. */
10655 enum sparc_builtins
10656 {
10657 /* FPU builtins. */
10658 SPARC_BUILTIN_LDFSR,
10659 SPARC_BUILTIN_STFSR,
10660
10661 /* VIS 1.0 builtins. */
10662 SPARC_BUILTIN_FPACK16,
10663 SPARC_BUILTIN_FPACK32,
10664 SPARC_BUILTIN_FPACKFIX,
10665 SPARC_BUILTIN_FEXPAND,
10666 SPARC_BUILTIN_FPMERGE,
10667 SPARC_BUILTIN_FMUL8X16,
10668 SPARC_BUILTIN_FMUL8X16AU,
10669 SPARC_BUILTIN_FMUL8X16AL,
10670 SPARC_BUILTIN_FMUL8SUX16,
10671 SPARC_BUILTIN_FMUL8ULX16,
10672 SPARC_BUILTIN_FMULD8SUX16,
10673 SPARC_BUILTIN_FMULD8ULX16,
10674 SPARC_BUILTIN_FALIGNDATAV4HI,
10675 SPARC_BUILTIN_FALIGNDATAV8QI,
10676 SPARC_BUILTIN_FALIGNDATAV2SI,
10677 SPARC_BUILTIN_FALIGNDATADI,
10678 SPARC_BUILTIN_WRGSR,
10679 SPARC_BUILTIN_RDGSR,
10680 SPARC_BUILTIN_ALIGNADDR,
10681 SPARC_BUILTIN_ALIGNADDRL,
10682 SPARC_BUILTIN_PDIST,
10683 SPARC_BUILTIN_EDGE8,
10684 SPARC_BUILTIN_EDGE8L,
10685 SPARC_BUILTIN_EDGE16,
10686 SPARC_BUILTIN_EDGE16L,
10687 SPARC_BUILTIN_EDGE32,
10688 SPARC_BUILTIN_EDGE32L,
10689 SPARC_BUILTIN_FCMPLE16,
10690 SPARC_BUILTIN_FCMPLE32,
10691 SPARC_BUILTIN_FCMPNE16,
10692 SPARC_BUILTIN_FCMPNE32,
10693 SPARC_BUILTIN_FCMPGT16,
10694 SPARC_BUILTIN_FCMPGT32,
10695 SPARC_BUILTIN_FCMPEQ16,
10696 SPARC_BUILTIN_FCMPEQ32,
10697 SPARC_BUILTIN_FPADD16,
10698 SPARC_BUILTIN_FPADD16S,
10699 SPARC_BUILTIN_FPADD32,
10700 SPARC_BUILTIN_FPADD32S,
10701 SPARC_BUILTIN_FPSUB16,
10702 SPARC_BUILTIN_FPSUB16S,
10703 SPARC_BUILTIN_FPSUB32,
10704 SPARC_BUILTIN_FPSUB32S,
10705 SPARC_BUILTIN_ARRAY8,
10706 SPARC_BUILTIN_ARRAY16,
10707 SPARC_BUILTIN_ARRAY32,
10708
10709 /* VIS 2.0 builtins. */
10710 SPARC_BUILTIN_EDGE8N,
10711 SPARC_BUILTIN_EDGE8LN,
10712 SPARC_BUILTIN_EDGE16N,
10713 SPARC_BUILTIN_EDGE16LN,
10714 SPARC_BUILTIN_EDGE32N,
10715 SPARC_BUILTIN_EDGE32LN,
10716 SPARC_BUILTIN_BMASK,
10717 SPARC_BUILTIN_BSHUFFLEV4HI,
10718 SPARC_BUILTIN_BSHUFFLEV8QI,
10719 SPARC_BUILTIN_BSHUFFLEV2SI,
10720 SPARC_BUILTIN_BSHUFFLEDI,
10721
10722 /* VIS 3.0 builtins. */
10723 SPARC_BUILTIN_CMASK8,
10724 SPARC_BUILTIN_CMASK16,
10725 SPARC_BUILTIN_CMASK32,
10726 SPARC_BUILTIN_FCHKSM16,
10727 SPARC_BUILTIN_FSLL16,
10728 SPARC_BUILTIN_FSLAS16,
10729 SPARC_BUILTIN_FSRL16,
10730 SPARC_BUILTIN_FSRA16,
10731 SPARC_BUILTIN_FSLL32,
10732 SPARC_BUILTIN_FSLAS32,
10733 SPARC_BUILTIN_FSRL32,
10734 SPARC_BUILTIN_FSRA32,
10735 SPARC_BUILTIN_PDISTN,
10736 SPARC_BUILTIN_FMEAN16,
10737 SPARC_BUILTIN_FPADD64,
10738 SPARC_BUILTIN_FPSUB64,
10739 SPARC_BUILTIN_FPADDS16,
10740 SPARC_BUILTIN_FPADDS16S,
10741 SPARC_BUILTIN_FPSUBS16,
10742 SPARC_BUILTIN_FPSUBS16S,
10743 SPARC_BUILTIN_FPADDS32,
10744 SPARC_BUILTIN_FPADDS32S,
10745 SPARC_BUILTIN_FPSUBS32,
10746 SPARC_BUILTIN_FPSUBS32S,
10747 SPARC_BUILTIN_FUCMPLE8,
10748 SPARC_BUILTIN_FUCMPNE8,
10749 SPARC_BUILTIN_FUCMPGT8,
10750 SPARC_BUILTIN_FUCMPEQ8,
10751 SPARC_BUILTIN_FHADDS,
10752 SPARC_BUILTIN_FHADDD,
10753 SPARC_BUILTIN_FHSUBS,
10754 SPARC_BUILTIN_FHSUBD,
10755 SPARC_BUILTIN_FNHADDS,
10756 SPARC_BUILTIN_FNHADDD,
10757 SPARC_BUILTIN_UMULXHI,
10758 SPARC_BUILTIN_XMULX,
10759 SPARC_BUILTIN_XMULXHI,
10760
10761 /* VIS 4.0 builtins. */
10762 SPARC_BUILTIN_FPADD8,
10763 SPARC_BUILTIN_FPADDS8,
10764 SPARC_BUILTIN_FPADDUS8,
10765 SPARC_BUILTIN_FPADDUS16,
10766 SPARC_BUILTIN_FPCMPLE8,
10767 SPARC_BUILTIN_FPCMPGT8,
10768 SPARC_BUILTIN_FPCMPULE16,
10769 SPARC_BUILTIN_FPCMPUGT16,
10770 SPARC_BUILTIN_FPCMPULE32,
10771 SPARC_BUILTIN_FPCMPUGT32,
10772 SPARC_BUILTIN_FPMAX8,
10773 SPARC_BUILTIN_FPMAX16,
10774 SPARC_BUILTIN_FPMAX32,
10775 SPARC_BUILTIN_FPMAXU8,
10776 SPARC_BUILTIN_FPMAXU16,
10777 SPARC_BUILTIN_FPMAXU32,
10778 SPARC_BUILTIN_FPMIN8,
10779 SPARC_BUILTIN_FPMIN16,
10780 SPARC_BUILTIN_FPMIN32,
10781 SPARC_BUILTIN_FPMINU8,
10782 SPARC_BUILTIN_FPMINU16,
10783 SPARC_BUILTIN_FPMINU32,
10784 SPARC_BUILTIN_FPSUB8,
10785 SPARC_BUILTIN_FPSUBS8,
10786 SPARC_BUILTIN_FPSUBUS8,
10787 SPARC_BUILTIN_FPSUBUS16,
10788
10789 /* VIS 4.0B builtins. */
10790
10791 /* Note that all the DICTUNPACK* entries should be kept
10792 contiguous. */
10793 SPARC_BUILTIN_FIRST_DICTUNPACK,
10794 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10795 SPARC_BUILTIN_DICTUNPACK16,
10796 SPARC_BUILTIN_DICTUNPACK32,
10797 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10798
10799 /* Note that all the FPCMP*SHL entries should be kept
10800 contiguous. */
10801 SPARC_BUILTIN_FIRST_FPCMPSHL,
10802 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10803 SPARC_BUILTIN_FPCMPGT8SHL,
10804 SPARC_BUILTIN_FPCMPEQ8SHL,
10805 SPARC_BUILTIN_FPCMPNE8SHL,
10806 SPARC_BUILTIN_FPCMPLE16SHL,
10807 SPARC_BUILTIN_FPCMPGT16SHL,
10808 SPARC_BUILTIN_FPCMPEQ16SHL,
10809 SPARC_BUILTIN_FPCMPNE16SHL,
10810 SPARC_BUILTIN_FPCMPLE32SHL,
10811 SPARC_BUILTIN_FPCMPGT32SHL,
10812 SPARC_BUILTIN_FPCMPEQ32SHL,
10813 SPARC_BUILTIN_FPCMPNE32SHL,
10814 SPARC_BUILTIN_FPCMPULE8SHL,
10815 SPARC_BUILTIN_FPCMPUGT8SHL,
10816 SPARC_BUILTIN_FPCMPULE16SHL,
10817 SPARC_BUILTIN_FPCMPUGT16SHL,
10818 SPARC_BUILTIN_FPCMPULE32SHL,
10819 SPARC_BUILTIN_FPCMPUGT32SHL,
10820 SPARC_BUILTIN_FPCMPDE8SHL,
10821 SPARC_BUILTIN_FPCMPDE16SHL,
10822 SPARC_BUILTIN_FPCMPDE32SHL,
10823 SPARC_BUILTIN_FPCMPUR8SHL,
10824 SPARC_BUILTIN_FPCMPUR16SHL,
10825 SPARC_BUILTIN_FPCMPUR32SHL,
10826 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10827
10828 SPARC_BUILTIN_MAX
10829 };
10830
10831 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10832 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10833
10834 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10835 The instruction should require a constant operand of some sort. The
10836 function prints an error if OPVAL is not valid. */
10837
10838 static int
10839 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10840 {
10841 if (GET_CODE (opval) != CONST_INT)
10842 {
10843 error ("%qs expects a constant argument", insn_data[icode].name);
10844 return false;
10845 }
10846
10847 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10848 {
10849 error ("constant argument out of range for %qs", insn_data[icode].name);
10850 return false;
10851 }
10852 return true;
10853 }
10854
10855 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10856 function decl or NULL_TREE if the builtin was not added. */
10857
10858 static tree
10859 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10860 tree type)
10861 {
10862 tree t
10863 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10864
10865 if (t)
10866 {
10867 sparc_builtins[code] = t;
10868 sparc_builtins_icode[code] = icode;
10869 }
10870
10871 return t;
10872 }
10873
10874 /* Likewise, but also marks the function as "const". */
10875
10876 static tree
10877 def_builtin_const (const char *name, enum insn_code icode,
10878 enum sparc_builtins code, tree type)
10879 {
10880 tree t = def_builtin (name, icode, code, type);
10881
10882 if (t)
10883 TREE_READONLY (t) = 1;
10884
10885 return t;
10886 }
10887
10888 /* Implement the TARGET_INIT_BUILTINS target hook.
10889 Create builtin functions for special SPARC instructions. */
10890
10891 static void
10892 sparc_init_builtins (void)
10893 {
10894 if (TARGET_FPU)
10895 sparc_fpu_init_builtins ();
10896
10897 if (TARGET_VIS)
10898 sparc_vis_init_builtins ();
10899 }
10900
10901 /* Create builtin functions for FPU instructions. */
10902
10903 static void
10904 sparc_fpu_init_builtins (void)
10905 {
10906 tree ftype
10907 = build_function_type_list (void_type_node,
10908 build_pointer_type (unsigned_type_node), 0);
10909 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10910 SPARC_BUILTIN_LDFSR, ftype);
10911 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10912 SPARC_BUILTIN_STFSR, ftype);
10913 }
10914
10915 /* Create builtin functions for VIS instructions. */
10916
10917 static void
10918 sparc_vis_init_builtins (void)
10919 {
10920 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10921 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10922 tree v4hi = build_vector_type (intHI_type_node, 4);
10923 tree v2hi = build_vector_type (intHI_type_node, 2);
10924 tree v2si = build_vector_type (intSI_type_node, 2);
10925 tree v1si = build_vector_type (intSI_type_node, 1);
10926
10927 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10928 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10929 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10930 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10931 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10932 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10933 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10934 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10935 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10936 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10937 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10938 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10939 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10940 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10941 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10942 v8qi, v8qi,
10943 intDI_type_node, 0);
10944 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10945 v8qi, v8qi, 0);
10946 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10947 v8qi, v8qi, 0);
10948 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10949 intSI_type_node, 0);
10950 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10951 intSI_type_node, 0);
10952 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10953 intDI_type_node, 0);
10954 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10955 intDI_type_node,
10956 intDI_type_node, 0);
10957 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10958 intSI_type_node,
10959 intSI_type_node, 0);
10960 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10961 ptr_type_node,
10962 intSI_type_node, 0);
10963 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10964 ptr_type_node,
10965 intDI_type_node, 0);
10966 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10967 ptr_type_node,
10968 ptr_type_node, 0);
10969 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10970 ptr_type_node,
10971 ptr_type_node, 0);
10972 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10973 v4hi, v4hi, 0);
10974 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10975 v2si, v2si, 0);
10976 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10977 v4hi, v4hi, 0);
10978 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10979 v2si, v2si, 0);
10980 tree void_ftype_di = build_function_type_list (void_type_node,
10981 intDI_type_node, 0);
10982 tree di_ftype_void = build_function_type_list (intDI_type_node,
10983 void_type_node, 0);
10984 tree void_ftype_si = build_function_type_list (void_type_node,
10985 intSI_type_node, 0);
10986 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10987 float_type_node,
10988 float_type_node, 0);
10989 tree df_ftype_df_df = build_function_type_list (double_type_node,
10990 double_type_node,
10991 double_type_node, 0);
10992
10993 /* Packing and expanding vectors. */
10994 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10995 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10996 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10997 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10998 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10999 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11000 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11001 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11002 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11003 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11004
11005 /* Multiplications. */
11006 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11007 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11008 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11009 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11010 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11011 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11012 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11013 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11014 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11015 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11016 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11017 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11018 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11019 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11020
11021 /* Data aligning. */
11022 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11023 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11024 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11025 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11026 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11027 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11028 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11029 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11030
11031 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11032 SPARC_BUILTIN_WRGSR, void_ftype_di);
11033 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11034 SPARC_BUILTIN_RDGSR, di_ftype_void);
11035
11036 if (TARGET_ARCH64)
11037 {
11038 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11039 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11040 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11041 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11042 }
11043 else
11044 {
11045 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11046 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11047 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11048 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11049 }
11050
11051 /* Pixel distance. */
11052 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11053 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11054
11055 /* Edge handling. */
11056 if (TARGET_ARCH64)
11057 {
11058 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11059 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11060 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11061 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11062 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11063 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11064 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11065 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11066 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11067 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11068 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11069 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11070 }
11071 else
11072 {
11073 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11074 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11075 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11076 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11077 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11078 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11079 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11080 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11081 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11082 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11083 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11084 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11085 }
11086
11087 /* Pixel compare. */
11088 if (TARGET_ARCH64)
11089 {
11090 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11091 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11092 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11093 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11094 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11095 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11096 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11097 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11098 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11099 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11100 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11101 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11102 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11103 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11104 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11105 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11106 }
11107 else
11108 {
11109 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11110 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11111 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11112 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11113 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11114 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11115 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11116 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11117 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11118 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11119 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11120 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11121 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11122 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11123 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11124 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11125 }
11126
11127 /* Addition and subtraction. */
11128 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11129 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11130 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11131 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11132 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11133 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11134 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11135 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11136 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11137 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11138 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11139 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11140 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11141 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11142 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11143 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11144
11145 /* Three-dimensional array addressing. */
11146 if (TARGET_ARCH64)
11147 {
11148 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11149 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11150 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11151 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11152 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11153 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11154 }
11155 else
11156 {
11157 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11158 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11159 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11160 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11161 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11162 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11163 }
11164
11165 if (TARGET_VIS2)
11166 {
11167 /* Edge handling. */
11168 if (TARGET_ARCH64)
11169 {
11170 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11171 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11172 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11173 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11174 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11175 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11176 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11177 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11178 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11179 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11180 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11181 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11182 }
11183 else
11184 {
11185 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11186 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11187 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11188 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11189 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11190 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11191 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11192 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11193 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11194 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11195 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11196 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11197 }
11198
11199 /* Byte mask and shuffle. */
11200 if (TARGET_ARCH64)
11201 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11202 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11203 else
11204 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11205 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11206 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11207 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11208 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11209 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11210 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11211 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11212 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11213 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11214 }
11215
11216 if (TARGET_VIS3)
11217 {
11218 if (TARGET_ARCH64)
11219 {
11220 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11221 SPARC_BUILTIN_CMASK8, void_ftype_di);
11222 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11223 SPARC_BUILTIN_CMASK16, void_ftype_di);
11224 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11225 SPARC_BUILTIN_CMASK32, void_ftype_di);
11226 }
11227 else
11228 {
11229 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11230 SPARC_BUILTIN_CMASK8, void_ftype_si);
11231 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11232 SPARC_BUILTIN_CMASK16, void_ftype_si);
11233 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11234 SPARC_BUILTIN_CMASK32, void_ftype_si);
11235 }
11236
11237 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11238 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11239
11240 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11241 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11242 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11243 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11244 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11245 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11246 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11247 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11248 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11249 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11250 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11251 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11252 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11253 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11254 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11255 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11256
11257 if (TARGET_ARCH64)
11258 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11259 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11260 else
11261 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11262 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11263
11264 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11265 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11266 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11267 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11268 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11269 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11270
11271 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11272 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11273 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11274 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11275 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11276 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11277 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11278 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11279 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11280 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11281 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11282 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11283 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11284 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11285 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11286 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11287
11288 if (TARGET_ARCH64)
11289 {
11290 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11291 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11292 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11293 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11294 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11295 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11296 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11297 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11298 }
11299 else
11300 {
11301 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11302 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11303 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11304 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11305 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11306 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11307 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11308 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11309 }
11310
11311 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11312 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11313 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11314 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11315 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11316 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11317 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11318 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11319 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11320 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11321 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11322 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11323
11324 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11325 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11326 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11327 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11328 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11329 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11330 }
11331
11332 if (TARGET_VIS4)
11333 {
11334 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11335 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11336 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11337 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11338 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11339 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11340 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11341 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11342
11343
11344 if (TARGET_ARCH64)
11345 {
11346 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11347 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11348 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11349 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11350 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11351 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11352 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11353 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11354 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11355 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11356 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11357 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11358 }
11359 else
11360 {
11361 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11362 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11363 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11364 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11365 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11366 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11367 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11368 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11369 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11370 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11371 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11372 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11373 }
11374
11375 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11376 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11377 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11378 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11379 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11380 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11381 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11382 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11383 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11384 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11385 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11386 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11387 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11388 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11389 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11390 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11391 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11392 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11393 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11394 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11395 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11396 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11397 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11398 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11399 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11400 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11401 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11402 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11403 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11404 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11405 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11406 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11407 }
11408
11409 if (TARGET_VIS4B)
11410 {
11411 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11412 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11413 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11414 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11415 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11416 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11417
11418 if (TARGET_ARCH64)
11419 {
11420 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11421 v8qi, v8qi,
11422 intSI_type_node, 0);
11423 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11424 v4hi, v4hi,
11425 intSI_type_node, 0);
11426 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11427 v2si, v2si,
11428 intSI_type_node, 0);
11429
11430 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11431 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11432 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11433 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11434 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11435 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11436 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11437 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11438
11439 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11440 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11441 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11442 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11443 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11444 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11445 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11446 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11447
11448 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11449 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11450 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11451 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11452 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11453 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11454 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11455 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11456
11457
11458 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11459 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11460 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11461 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11462
11463 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11464 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11465 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11466 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11467
11468 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11469 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11470 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11471 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11472
11473 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11474 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11475 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11476 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11477 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11478 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11479
11480 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11481 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11482 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11483 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11484 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11485 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11486
11487 }
11488 else
11489 {
11490 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11491 v8qi, v8qi,
11492 intSI_type_node, 0);
11493 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11494 v4hi, v4hi,
11495 intSI_type_node, 0);
11496 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11497 v2si, v2si,
11498 intSI_type_node, 0);
11499
11500 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11501 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11502 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11503 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11504 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11505 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11506 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11507 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11508
11509 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11510 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11511 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11512 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11513 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11514 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11515 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11516 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11517
11518 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11519 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11520 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11521 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11522 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11523 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11524 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11525 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11526
11527
11528 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11529 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11530 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11531 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11532
11533 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11534 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11535 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11536 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11537
11538 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11539 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11540 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11541 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11542
11543 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11544 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11545 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11546 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11547 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11548 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11549
11550 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11551 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11552 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11553 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11554 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11555 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11556 }
11557 }
11558 }
11559
11560 /* Implement TARGET_BUILTIN_DECL hook. */
11561
11562 static tree
11563 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11564 {
11565 if (code >= SPARC_BUILTIN_MAX)
11566 return error_mark_node;
11567
11568 return sparc_builtins[code];
11569 }
11570
11571 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11572
11573 static rtx
11574 sparc_expand_builtin (tree exp, rtx target,
11575 rtx subtarget ATTRIBUTE_UNUSED,
11576 machine_mode tmode ATTRIBUTE_UNUSED,
11577 int ignore ATTRIBUTE_UNUSED)
11578 {
11579 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11580 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11581 enum insn_code icode = sparc_builtins_icode[code];
11582 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11583 call_expr_arg_iterator iter;
11584 int arg_count = 0;
11585 rtx pat, op[4];
11586 tree arg;
11587
11588 if (nonvoid)
11589 {
11590 machine_mode tmode = insn_data[icode].operand[0].mode;
11591 if (!target
11592 || GET_MODE (target) != tmode
11593 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11594 op[0] = gen_reg_rtx (tmode);
11595 else
11596 op[0] = target;
11597 }
11598 else
11599 op[0] = NULL_RTX;
11600
11601 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11602 {
11603 const struct insn_operand_data *insn_op;
11604 int idx;
11605
11606 if (arg == error_mark_node)
11607 return NULL_RTX;
11608
11609 arg_count++;
11610 idx = arg_count - !nonvoid;
11611 insn_op = &insn_data[icode].operand[idx];
11612 op[arg_count] = expand_normal (arg);
11613
11614 /* Some of the builtins require constant arguments. We check
11615 for this here. */
11616 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11617 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11618 && arg_count == 3)
11619 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11620 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11621 && arg_count == 2))
11622 {
11623 if (!check_constant_argument (icode, idx, op[arg_count]))
11624 return const0_rtx;
11625 }
11626
11627 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11628 {
11629 if (!address_operand (op[arg_count], SImode))
11630 {
11631 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11632 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11633 }
11634 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11635 }
11636
11637 else if (insn_op->mode == V1DImode
11638 && GET_MODE (op[arg_count]) == DImode)
11639 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11640
11641 else if (insn_op->mode == V1SImode
11642 && GET_MODE (op[arg_count]) == SImode)
11643 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11644
11645 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11646 insn_op->mode))
11647 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11648 }
11649
11650 switch (arg_count)
11651 {
11652 case 0:
11653 pat = GEN_FCN (icode) (op[0]);
11654 break;
11655 case 1:
11656 if (nonvoid)
11657 pat = GEN_FCN (icode) (op[0], op[1]);
11658 else
11659 pat = GEN_FCN (icode) (op[1]);
11660 break;
11661 case 2:
11662 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11663 break;
11664 case 3:
11665 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11666 break;
11667 default:
11668 gcc_unreachable ();
11669 }
11670
11671 if (!pat)
11672 return NULL_RTX;
11673
11674 emit_insn (pat);
11675
11676 return (nonvoid ? op[0] : const0_rtx);
11677 }
11678
11679 /* Return the upper 16 bits of the 8x16 multiplication. */
11680
11681 static int
11682 sparc_vis_mul8x16 (int e8, int e16)
11683 {
11684 return (e8 * e16 + 128) / 256;
11685 }
11686
11687 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11688 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11689
11690 static void
11691 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11692 tree inner_type, tree cst0, tree cst1)
11693 {
11694 unsigned i, num = VECTOR_CST_NELTS (cst0);
11695 int scale;
11696
11697 switch (fncode)
11698 {
11699 case SPARC_BUILTIN_FMUL8X16:
11700 for (i = 0; i < num; ++i)
11701 {
11702 int val
11703 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11704 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11705 n_elts->quick_push (build_int_cst (inner_type, val));
11706 }
11707 break;
11708
11709 case SPARC_BUILTIN_FMUL8X16AU:
11710 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11711
11712 for (i = 0; i < num; ++i)
11713 {
11714 int val
11715 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11716 scale);
11717 n_elts->quick_push (build_int_cst (inner_type, val));
11718 }
11719 break;
11720
11721 case SPARC_BUILTIN_FMUL8X16AL:
11722 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11723
11724 for (i = 0; i < num; ++i)
11725 {
11726 int val
11727 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11728 scale);
11729 n_elts->quick_push (build_int_cst (inner_type, val));
11730 }
11731 break;
11732
11733 default:
11734 gcc_unreachable ();
11735 }
11736 }
11737
11738 /* Implement TARGET_FOLD_BUILTIN hook.
11739
11740 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11741 result of the function call is ignored. NULL_TREE is returned if the
11742 function could not be folded. */
11743
11744 static tree
11745 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11746 tree *args, bool ignore)
11747 {
11748 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11749 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11750 tree arg0, arg1, arg2;
11751
11752 if (ignore)
11753 switch (code)
11754 {
11755 case SPARC_BUILTIN_LDFSR:
11756 case SPARC_BUILTIN_STFSR:
11757 case SPARC_BUILTIN_ALIGNADDR:
11758 case SPARC_BUILTIN_WRGSR:
11759 case SPARC_BUILTIN_BMASK:
11760 case SPARC_BUILTIN_CMASK8:
11761 case SPARC_BUILTIN_CMASK16:
11762 case SPARC_BUILTIN_CMASK32:
11763 break;
11764
11765 default:
11766 return build_zero_cst (rtype);
11767 }
11768
11769 switch (code)
11770 {
11771 case SPARC_BUILTIN_FEXPAND:
11772 arg0 = args[0];
11773 STRIP_NOPS (arg0);
11774
11775 if (TREE_CODE (arg0) == VECTOR_CST)
11776 {
11777 tree inner_type = TREE_TYPE (rtype);
11778 unsigned i;
11779
11780 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11781 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11782 {
11783 unsigned HOST_WIDE_INT val
11784 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11785 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11786 }
11787 return n_elts.build ();
11788 }
11789 break;
11790
11791 case SPARC_BUILTIN_FMUL8X16:
11792 case SPARC_BUILTIN_FMUL8X16AU:
11793 case SPARC_BUILTIN_FMUL8X16AL:
11794 arg0 = args[0];
11795 arg1 = args[1];
11796 STRIP_NOPS (arg0);
11797 STRIP_NOPS (arg1);
11798
11799 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11800 {
11801 tree inner_type = TREE_TYPE (rtype);
11802 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11803 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11804 return n_elts.build ();
11805 }
11806 break;
11807
11808 case SPARC_BUILTIN_FPMERGE:
11809 arg0 = args[0];
11810 arg1 = args[1];
11811 STRIP_NOPS (arg0);
11812 STRIP_NOPS (arg1);
11813
11814 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11815 {
11816 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11817 unsigned i;
11818 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11819 {
11820 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11821 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11822 }
11823
11824 return n_elts.build ();
11825 }
11826 break;
11827
11828 case SPARC_BUILTIN_PDIST:
11829 case SPARC_BUILTIN_PDISTN:
11830 arg0 = args[0];
11831 arg1 = args[1];
11832 STRIP_NOPS (arg0);
11833 STRIP_NOPS (arg1);
11834 if (code == SPARC_BUILTIN_PDIST)
11835 {
11836 arg2 = args[2];
11837 STRIP_NOPS (arg2);
11838 }
11839 else
11840 arg2 = integer_zero_node;
11841
11842 if (TREE_CODE (arg0) == VECTOR_CST
11843 && TREE_CODE (arg1) == VECTOR_CST
11844 && TREE_CODE (arg2) == INTEGER_CST)
11845 {
11846 bool overflow = false;
11847 widest_int result = wi::to_widest (arg2);
11848 widest_int tmp;
11849 unsigned i;
11850
11851 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11852 {
11853 tree e0 = VECTOR_CST_ELT (arg0, i);
11854 tree e1 = VECTOR_CST_ELT (arg1, i);
11855
11856 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11857
11858 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11859 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11860 if (wi::neg_p (tmp))
11861 tmp = wi::neg (tmp, &neg2_ovf);
11862 else
11863 neg2_ovf = wi::OVF_NONE;
11864 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11865 overflow |= ((neg1_ovf != wi::OVF_NONE)
11866 | (neg2_ovf != wi::OVF_NONE)
11867 | (add1_ovf != wi::OVF_NONE)
11868 | (add2_ovf != wi::OVF_NONE));
11869 }
11870
11871 gcc_assert (!overflow);
11872
11873 return wide_int_to_tree (rtype, result);
11874 }
11875
11876 default:
11877 break;
11878 }
11879
11880 return NULL_TREE;
11881 }
11882 \f
11883 /* ??? This duplicates information provided to the compiler by the
11884 ??? scheduler description. Some day, teach genautomata to output
11885 ??? the latencies and then CSE will just use that. */
11886
11887 static bool
11888 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11889 int opno ATTRIBUTE_UNUSED,
11890 int *total, bool speed ATTRIBUTE_UNUSED)
11891 {
11892 int code = GET_CODE (x);
11893 bool float_mode_p = FLOAT_MODE_P (mode);
11894
11895 switch (code)
11896 {
11897 case CONST_INT:
11898 if (SMALL_INT (x))
11899 *total = 0;
11900 else
11901 *total = 2;
11902 return true;
11903
11904 case CONST_WIDE_INT:
11905 *total = 0;
11906 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11907 *total += 2;
11908 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11909 *total += 2;
11910 return true;
11911
11912 case HIGH:
11913 *total = 2;
11914 return true;
11915
11916 case CONST:
11917 case LABEL_REF:
11918 case SYMBOL_REF:
11919 *total = 4;
11920 return true;
11921
11922 case CONST_DOUBLE:
11923 *total = 8;
11924 return true;
11925
11926 case MEM:
11927 /* If outer-code was a sign or zero extension, a cost
11928 of COSTS_N_INSNS (1) was already added in. This is
11929 why we are subtracting it back out. */
11930 if (outer_code == ZERO_EXTEND)
11931 {
11932 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11933 }
11934 else if (outer_code == SIGN_EXTEND)
11935 {
11936 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11937 }
11938 else if (float_mode_p)
11939 {
11940 *total = sparc_costs->float_load;
11941 }
11942 else
11943 {
11944 *total = sparc_costs->int_load;
11945 }
11946
11947 return true;
11948
11949 case PLUS:
11950 case MINUS:
11951 if (float_mode_p)
11952 *total = sparc_costs->float_plusminus;
11953 else
11954 *total = COSTS_N_INSNS (1);
11955 return false;
11956
11957 case FMA:
11958 {
11959 rtx sub;
11960
11961 gcc_assert (float_mode_p);
11962 *total = sparc_costs->float_mul;
11963
11964 sub = XEXP (x, 0);
11965 if (GET_CODE (sub) == NEG)
11966 sub = XEXP (sub, 0);
11967 *total += rtx_cost (sub, mode, FMA, 0, speed);
11968
11969 sub = XEXP (x, 2);
11970 if (GET_CODE (sub) == NEG)
11971 sub = XEXP (sub, 0);
11972 *total += rtx_cost (sub, mode, FMA, 2, speed);
11973 return true;
11974 }
11975
11976 case MULT:
11977 if (float_mode_p)
11978 *total = sparc_costs->float_mul;
11979 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11980 *total = COSTS_N_INSNS (25);
11981 else
11982 {
11983 int bit_cost;
11984
11985 bit_cost = 0;
11986 if (sparc_costs->int_mul_bit_factor)
11987 {
11988 int nbits;
11989
11990 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11991 {
11992 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11993 for (nbits = 0; value != 0; value &= value - 1)
11994 nbits++;
11995 }
11996 else
11997 nbits = 7;
11998
11999 if (nbits < 3)
12000 nbits = 3;
12001 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12002 bit_cost = COSTS_N_INSNS (bit_cost);
12003 }
12004
12005 if (mode == DImode || !TARGET_HARD_MUL)
12006 *total = sparc_costs->int_mulX + bit_cost;
12007 else
12008 *total = sparc_costs->int_mul + bit_cost;
12009 }
12010 return false;
12011
12012 case ASHIFT:
12013 case ASHIFTRT:
12014 case LSHIFTRT:
12015 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12016 return false;
12017
12018 case DIV:
12019 case UDIV:
12020 case MOD:
12021 case UMOD:
12022 if (float_mode_p)
12023 {
12024 if (mode == DFmode)
12025 *total = sparc_costs->float_div_df;
12026 else
12027 *total = sparc_costs->float_div_sf;
12028 }
12029 else
12030 {
12031 if (mode == DImode)
12032 *total = sparc_costs->int_divX;
12033 else
12034 *total = sparc_costs->int_div;
12035 }
12036 return false;
12037
12038 case NEG:
12039 if (! float_mode_p)
12040 {
12041 *total = COSTS_N_INSNS (1);
12042 return false;
12043 }
12044 /* FALLTHRU */
12045
12046 case ABS:
12047 case FLOAT:
12048 case UNSIGNED_FLOAT:
12049 case FIX:
12050 case UNSIGNED_FIX:
12051 case FLOAT_EXTEND:
12052 case FLOAT_TRUNCATE:
12053 *total = sparc_costs->float_move;
12054 return false;
12055
12056 case SQRT:
12057 if (mode == DFmode)
12058 *total = sparc_costs->float_sqrt_df;
12059 else
12060 *total = sparc_costs->float_sqrt_sf;
12061 return false;
12062
12063 case COMPARE:
12064 if (float_mode_p)
12065 *total = sparc_costs->float_cmp;
12066 else
12067 *total = COSTS_N_INSNS (1);
12068 return false;
12069
12070 case IF_THEN_ELSE:
12071 if (float_mode_p)
12072 *total = sparc_costs->float_cmove;
12073 else
12074 *total = sparc_costs->int_cmove;
12075 return false;
12076
12077 case IOR:
12078 /* Handle the NAND vector patterns. */
12079 if (sparc_vector_mode_supported_p (mode)
12080 && GET_CODE (XEXP (x, 0)) == NOT
12081 && GET_CODE (XEXP (x, 1)) == NOT)
12082 {
12083 *total = COSTS_N_INSNS (1);
12084 return true;
12085 }
12086 else
12087 return false;
12088
12089 default:
12090 return false;
12091 }
12092 }
12093
12094 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12095
12096 static inline bool
12097 general_or_i64_p (reg_class_t rclass)
12098 {
12099 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12100 }
12101
12102 /* Implement TARGET_REGISTER_MOVE_COST. */
12103
12104 static int
12105 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12106 reg_class_t from, reg_class_t to)
12107 {
12108 bool need_memory = false;
12109
12110 /* This helps postreload CSE to eliminate redundant comparisons. */
12111 if (from == NO_REGS || to == NO_REGS)
12112 return 100;
12113
12114 if (from == FPCC_REGS || to == FPCC_REGS)
12115 need_memory = true;
12116 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12117 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12118 {
12119 if (TARGET_VIS3)
12120 {
12121 int size = GET_MODE_SIZE (mode);
12122 if (size == 8 || size == 4)
12123 {
12124 if (! TARGET_ARCH32 || size == 4)
12125 return 4;
12126 else
12127 return 6;
12128 }
12129 }
12130 need_memory = true;
12131 }
12132
12133 if (need_memory)
12134 {
12135 if (sparc_cpu == PROCESSOR_ULTRASPARC
12136 || sparc_cpu == PROCESSOR_ULTRASPARC3
12137 || sparc_cpu == PROCESSOR_NIAGARA
12138 || sparc_cpu == PROCESSOR_NIAGARA2
12139 || sparc_cpu == PROCESSOR_NIAGARA3
12140 || sparc_cpu == PROCESSOR_NIAGARA4
12141 || sparc_cpu == PROCESSOR_NIAGARA7
12142 || sparc_cpu == PROCESSOR_M8)
12143 return 12;
12144
12145 return 6;
12146 }
12147
12148 return 2;
12149 }
12150
12151 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12152 This is achieved by means of a manual dynamic stack space allocation in
12153 the current frame. We make the assumption that SEQ doesn't contain any
12154 function calls, with the possible exception of calls to the GOT helper. */
12155
12156 static void
12157 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12158 {
12159 /* We must preserve the lowest 16 words for the register save area. */
12160 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12161 /* We really need only 2 words of fresh stack space. */
12162 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12163
12164 rtx slot
12165 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12166 SPARC_STACK_BIAS + offset));
12167
12168 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12169 emit_insn (gen_rtx_SET (slot, reg));
12170 if (reg2)
12171 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12172 reg2));
12173 emit_insn (seq);
12174 if (reg2)
12175 emit_insn (gen_rtx_SET (reg2,
12176 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12177 emit_insn (gen_rtx_SET (reg, slot));
12178 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12179 }
12180
12181 /* Output the assembler code for a thunk function. THUNK_DECL is the
12182 declaration for the thunk function itself, FUNCTION is the decl for
12183 the target function. DELTA is an immediate constant offset to be
12184 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12185 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12186
12187 static void
12188 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12189 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12190 tree function)
12191 {
12192 rtx this_rtx, funexp;
12193 rtx_insn *insn;
12194 unsigned int int_arg_first;
12195
12196 reload_completed = 1;
12197 epilogue_completed = 1;
12198
12199 emit_note (NOTE_INSN_PROLOGUE_END);
12200
12201 if (TARGET_FLAT)
12202 {
12203 sparc_leaf_function_p = 1;
12204
12205 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12206 }
12207 else if (flag_delayed_branch)
12208 {
12209 /* We will emit a regular sibcall below, so we need to instruct
12210 output_sibcall that we are in a leaf function. */
12211 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12212
12213 /* This will cause final.c to invoke leaf_renumber_regs so we
12214 must behave as if we were in a not-yet-leafified function. */
12215 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12216 }
12217 else
12218 {
12219 /* We will emit the sibcall manually below, so we will need to
12220 manually spill non-leaf registers. */
12221 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12222
12223 /* We really are in a leaf function. */
12224 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12225 }
12226
12227 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12228 returns a structure, the structure return pointer is there instead. */
12229 if (TARGET_ARCH64
12230 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12231 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12232 else
12233 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12234
12235 /* Add DELTA. When possible use a plain add, otherwise load it into
12236 a register first. */
12237 if (delta)
12238 {
12239 rtx delta_rtx = GEN_INT (delta);
12240
12241 if (! SPARC_SIMM13_P (delta))
12242 {
12243 rtx scratch = gen_rtx_REG (Pmode, 1);
12244 emit_move_insn (scratch, delta_rtx);
12245 delta_rtx = scratch;
12246 }
12247
12248 /* THIS_RTX += DELTA. */
12249 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12250 }
12251
12252 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12253 if (vcall_offset)
12254 {
12255 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12256 rtx scratch = gen_rtx_REG (Pmode, 1);
12257
12258 gcc_assert (vcall_offset < 0);
12259
12260 /* SCRATCH = *THIS_RTX. */
12261 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12262
12263 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12264 may not have any available scratch register at this point. */
12265 if (SPARC_SIMM13_P (vcall_offset))
12266 ;
12267 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12268 else if (! fixed_regs[5]
12269 /* The below sequence is made up of at least 2 insns,
12270 while the default method may need only one. */
12271 && vcall_offset < -8192)
12272 {
12273 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12274 emit_move_insn (scratch2, vcall_offset_rtx);
12275 vcall_offset_rtx = scratch2;
12276 }
12277 else
12278 {
12279 rtx increment = GEN_INT (-4096);
12280
12281 /* VCALL_OFFSET is a negative number whose typical range can be
12282 estimated as -32768..0 in 32-bit mode. In almost all cases
12283 it is therefore cheaper to emit multiple add insns than
12284 spilling and loading the constant into a register (at least
12285 6 insns). */
12286 while (! SPARC_SIMM13_P (vcall_offset))
12287 {
12288 emit_insn (gen_add2_insn (scratch, increment));
12289 vcall_offset += 4096;
12290 }
12291 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12292 }
12293
12294 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12295 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12296 gen_rtx_PLUS (Pmode,
12297 scratch,
12298 vcall_offset_rtx)));
12299
12300 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12301 emit_insn (gen_add2_insn (this_rtx, scratch));
12302 }
12303
12304 /* Generate a tail call to the target function. */
12305 if (! TREE_USED (function))
12306 {
12307 assemble_external (function);
12308 TREE_USED (function) = 1;
12309 }
12310 funexp = XEXP (DECL_RTL (function), 0);
12311
12312 if (flag_delayed_branch)
12313 {
12314 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12315 insn = emit_call_insn (gen_sibcall (funexp));
12316 SIBLING_CALL_P (insn) = 1;
12317 }
12318 else
12319 {
12320 /* The hoops we have to jump through in order to generate a sibcall
12321 without using delay slots... */
12322 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12323
12324 if (flag_pic)
12325 {
12326 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12327 start_sequence ();
12328 load_got_register (); /* clobbers %o7 */
12329 if (!TARGET_VXWORKS_RTP)
12330 pic_offset_table_rtx = global_offset_table_rtx;
12331 scratch = sparc_legitimize_pic_address (funexp, scratch);
12332 seq = get_insns ();
12333 end_sequence ();
12334 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12335 }
12336 else if (TARGET_ARCH32)
12337 {
12338 emit_insn (gen_rtx_SET (scratch,
12339 gen_rtx_HIGH (SImode, funexp)));
12340 emit_insn (gen_rtx_SET (scratch,
12341 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12342 }
12343 else /* TARGET_ARCH64 */
12344 {
12345 switch (sparc_cmodel)
12346 {
12347 case CM_MEDLOW:
12348 case CM_MEDMID:
12349 /* The destination can serve as a temporary. */
12350 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12351 break;
12352
12353 case CM_MEDANY:
12354 case CM_EMBMEDANY:
12355 /* The destination cannot serve as a temporary. */
12356 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12357 start_sequence ();
12358 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12359 seq = get_insns ();
12360 end_sequence ();
12361 emit_and_preserve (seq, spill_reg, 0);
12362 break;
12363
12364 default:
12365 gcc_unreachable ();
12366 }
12367 }
12368
12369 emit_jump_insn (gen_indirect_jump (scratch));
12370 }
12371
12372 emit_barrier ();
12373
12374 /* Run just enough of rest_of_compilation to get the insns emitted.
12375 There's not really enough bulk here to make other passes such as
12376 instruction scheduling worth while. Note that use_thunk calls
12377 assemble_start_function and assemble_end_function. */
12378 insn = get_insns ();
12379 shorten_branches (insn);
12380 final_start_function (insn, file, 1);
12381 final (insn, file, 1);
12382 final_end_function ();
12383
12384 reload_completed = 0;
12385 epilogue_completed = 0;
12386 }
12387
12388 /* Return true if sparc_output_mi_thunk would be able to output the
12389 assembler code for the thunk function specified by the arguments
12390 it is passed, and false otherwise. */
12391 static bool
12392 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12393 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12394 HOST_WIDE_INT vcall_offset,
12395 const_tree function ATTRIBUTE_UNUSED)
12396 {
12397 /* Bound the loop used in the default method above. */
12398 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12399 }
12400
12401 /* How to allocate a 'struct machine_function'. */
12402
12403 static struct machine_function *
12404 sparc_init_machine_status (void)
12405 {
12406 return ggc_cleared_alloc<machine_function> ();
12407 }
12408
12409 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12410 We need to emit DTP-relative relocations. */
12411
12412 static void
12413 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12414 {
12415 switch (size)
12416 {
12417 case 4:
12418 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12419 break;
12420 case 8:
12421 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12422 break;
12423 default:
12424 gcc_unreachable ();
12425 }
12426 output_addr_const (file, x);
12427 fputs (")", file);
12428 }
12429
12430 /* Do whatever processing is required at the end of a file. */
12431
12432 static void
12433 sparc_file_end (void)
12434 {
12435 /* If we need to emit the special GOT helper function, do so now. */
12436 if (got_helper_rtx)
12437 {
12438 const char *name = XSTR (got_helper_rtx, 0);
12439 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12440 #ifdef DWARF2_UNWIND_INFO
12441 bool do_cfi;
12442 #endif
12443
12444 if (USE_HIDDEN_LINKONCE)
12445 {
12446 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12447 get_identifier (name),
12448 build_function_type_list (void_type_node,
12449 NULL_TREE));
12450 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12451 NULL_TREE, void_type_node);
12452 TREE_PUBLIC (decl) = 1;
12453 TREE_STATIC (decl) = 1;
12454 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12455 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12456 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12457 resolve_unique_section (decl, 0, flag_function_sections);
12458 allocate_struct_function (decl, true);
12459 cfun->is_thunk = 1;
12460 current_function_decl = decl;
12461 init_varasm_status ();
12462 assemble_start_function (decl, name);
12463 }
12464 else
12465 {
12466 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12467 switch_to_section (text_section);
12468 if (align > 0)
12469 ASM_OUTPUT_ALIGN (asm_out_file, align);
12470 ASM_OUTPUT_LABEL (asm_out_file, name);
12471 }
12472
12473 #ifdef DWARF2_UNWIND_INFO
12474 do_cfi = dwarf2out_do_cfi_asm ();
12475 if (do_cfi)
12476 fprintf (asm_out_file, "\t.cfi_startproc\n");
12477 #endif
12478 if (flag_delayed_branch)
12479 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12480 reg_name, reg_name);
12481 else
12482 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12483 reg_name, reg_name);
12484 #ifdef DWARF2_UNWIND_INFO
12485 if (do_cfi)
12486 fprintf (asm_out_file, "\t.cfi_endproc\n");
12487 #endif
12488 }
12489
12490 if (NEED_INDICATE_EXEC_STACK)
12491 file_end_indicate_exec_stack ();
12492
12493 #ifdef TARGET_SOLARIS
12494 solaris_file_end ();
12495 #endif
12496 }
12497
12498 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12499 /* Implement TARGET_MANGLE_TYPE. */
12500
12501 static const char *
12502 sparc_mangle_type (const_tree type)
12503 {
12504 if (TARGET_ARCH32
12505 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12506 && TARGET_LONG_DOUBLE_128)
12507 return "g";
12508
12509 /* For all other types, use normal C++ mangling. */
12510 return NULL;
12511 }
12512 #endif
12513
12514 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12515 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12516 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12517
12518 void
12519 sparc_emit_membar_for_model (enum memmodel model,
12520 int load_store, int before_after)
12521 {
12522 /* Bits for the MEMBAR mmask field. */
12523 const int LoadLoad = 1;
12524 const int StoreLoad = 2;
12525 const int LoadStore = 4;
12526 const int StoreStore = 8;
12527
12528 int mm = 0, implied = 0;
12529
12530 switch (sparc_memory_model)
12531 {
12532 case SMM_SC:
12533 /* Sequential Consistency. All memory transactions are immediately
12534 visible in sequential execution order. No barriers needed. */
12535 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12536 break;
12537
12538 case SMM_TSO:
12539 /* Total Store Ordering: all memory transactions with store semantics
12540 are followed by an implied StoreStore. */
12541 implied |= StoreStore;
12542
12543 /* If we're not looking for a raw barrer (before+after), then atomic
12544 operations get the benefit of being both load and store. */
12545 if (load_store == 3 && before_after == 1)
12546 implied |= StoreLoad;
12547 /* FALLTHRU */
12548
12549 case SMM_PSO:
12550 /* Partial Store Ordering: all memory transactions with load semantics
12551 are followed by an implied LoadLoad | LoadStore. */
12552 implied |= LoadLoad | LoadStore;
12553
12554 /* If we're not looking for a raw barrer (before+after), then atomic
12555 operations get the benefit of being both load and store. */
12556 if (load_store == 3 && before_after == 2)
12557 implied |= StoreLoad | StoreStore;
12558 /* FALLTHRU */
12559
12560 case SMM_RMO:
12561 /* Relaxed Memory Ordering: no implicit bits. */
12562 break;
12563
12564 default:
12565 gcc_unreachable ();
12566 }
12567
12568 if (before_after & 1)
12569 {
12570 if (is_mm_release (model) || is_mm_acq_rel (model)
12571 || is_mm_seq_cst (model))
12572 {
12573 if (load_store & 1)
12574 mm |= LoadLoad | StoreLoad;
12575 if (load_store & 2)
12576 mm |= LoadStore | StoreStore;
12577 }
12578 }
12579 if (before_after & 2)
12580 {
12581 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12582 || is_mm_seq_cst (model))
12583 {
12584 if (load_store & 1)
12585 mm |= LoadLoad | LoadStore;
12586 if (load_store & 2)
12587 mm |= StoreLoad | StoreStore;
12588 }
12589 }
12590
12591 /* Remove the bits implied by the system memory model. */
12592 mm &= ~implied;
12593
12594 /* For raw barriers (before+after), always emit a barrier.
12595 This will become a compile-time barrier if needed. */
12596 if (mm || before_after == 3)
12597 emit_insn (gen_membar (GEN_INT (mm)));
12598 }
12599
12600 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12601 compare and swap on the word containing the byte or half-word. */
12602
12603 static void
12604 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12605 rtx oldval, rtx newval)
12606 {
12607 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12608 rtx addr = gen_reg_rtx (Pmode);
12609 rtx off = gen_reg_rtx (SImode);
12610 rtx oldv = gen_reg_rtx (SImode);
12611 rtx newv = gen_reg_rtx (SImode);
12612 rtx oldvalue = gen_reg_rtx (SImode);
12613 rtx newvalue = gen_reg_rtx (SImode);
12614 rtx res = gen_reg_rtx (SImode);
12615 rtx resv = gen_reg_rtx (SImode);
12616 rtx memsi, val, mask, cc;
12617
12618 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12619
12620 if (Pmode != SImode)
12621 addr1 = gen_lowpart (SImode, addr1);
12622 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12623
12624 memsi = gen_rtx_MEM (SImode, addr);
12625 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12626 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12627
12628 val = copy_to_reg (memsi);
12629
12630 emit_insn (gen_rtx_SET (off,
12631 gen_rtx_XOR (SImode, off,
12632 GEN_INT (GET_MODE (mem) == QImode
12633 ? 3 : 2))));
12634
12635 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12636
12637 if (GET_MODE (mem) == QImode)
12638 mask = force_reg (SImode, GEN_INT (0xff));
12639 else
12640 mask = force_reg (SImode, GEN_INT (0xffff));
12641
12642 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12643
12644 emit_insn (gen_rtx_SET (val,
12645 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12646 val)));
12647
12648 oldval = gen_lowpart (SImode, oldval);
12649 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12650
12651 newval = gen_lowpart_common (SImode, newval);
12652 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12653
12654 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12655
12656 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12657
12658 rtx_code_label *end_label = gen_label_rtx ();
12659 rtx_code_label *loop_label = gen_label_rtx ();
12660 emit_label (loop_label);
12661
12662 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12663
12664 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12665
12666 emit_move_insn (bool_result, const1_rtx);
12667
12668 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12669
12670 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12671
12672 emit_insn (gen_rtx_SET (resv,
12673 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12674 res)));
12675
12676 emit_move_insn (bool_result, const0_rtx);
12677
12678 cc = gen_compare_reg_1 (NE, resv, val);
12679 emit_insn (gen_rtx_SET (val, resv));
12680
12681 /* Use cbranchcc4 to separate the compare and branch! */
12682 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12683 cc, const0_rtx, loop_label));
12684
12685 emit_label (end_label);
12686
12687 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12688
12689 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12690
12691 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12692 }
12693
12694 /* Expand code to perform a compare-and-swap. */
12695
12696 void
12697 sparc_expand_compare_and_swap (rtx operands[])
12698 {
12699 rtx bval, retval, mem, oldval, newval;
12700 machine_mode mode;
12701 enum memmodel model;
12702
12703 bval = operands[0];
12704 retval = operands[1];
12705 mem = operands[2];
12706 oldval = operands[3];
12707 newval = operands[4];
12708 model = (enum memmodel) INTVAL (operands[6]);
12709 mode = GET_MODE (mem);
12710
12711 sparc_emit_membar_for_model (model, 3, 1);
12712
12713 if (reg_overlap_mentioned_p (retval, oldval))
12714 oldval = copy_to_reg (oldval);
12715
12716 if (mode == QImode || mode == HImode)
12717 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12718 else
12719 {
12720 rtx (*gen) (rtx, rtx, rtx, rtx);
12721 rtx x;
12722
12723 if (mode == SImode)
12724 gen = gen_atomic_compare_and_swapsi_1;
12725 else
12726 gen = gen_atomic_compare_and_swapdi_1;
12727 emit_insn (gen (retval, mem, oldval, newval));
12728
12729 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12730 if (x != bval)
12731 convert_move (bval, x, 1);
12732 }
12733
12734 sparc_emit_membar_for_model (model, 3, 2);
12735 }
12736
12737 void
12738 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12739 {
12740 rtx t_1, t_2, t_3;
12741
12742 sel = gen_lowpart (DImode, sel);
12743 switch (vmode)
12744 {
12745 case E_V2SImode:
12746 /* inp = xxxxxxxAxxxxxxxB */
12747 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12748 NULL_RTX, 1, OPTAB_DIRECT);
12749 /* t_1 = ....xxxxxxxAxxx. */
12750 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12751 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12752 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12753 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12754 /* sel = .......B */
12755 /* t_1 = ...A.... */
12756 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12757 /* sel = ...A...B */
12758 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12759 /* sel = AAAABBBB * 4 */
12760 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12761 /* sel = { A*4, A*4+1, A*4+2, ... } */
12762 break;
12763
12764 case E_V4HImode:
12765 /* inp = xxxAxxxBxxxCxxxD */
12766 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12767 NULL_RTX, 1, OPTAB_DIRECT);
12768 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12769 NULL_RTX, 1, OPTAB_DIRECT);
12770 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12771 NULL_RTX, 1, OPTAB_DIRECT);
12772 /* t_1 = ..xxxAxxxBxxxCxx */
12773 /* t_2 = ....xxxAxxxBxxxC */
12774 /* t_3 = ......xxxAxxxBxx */
12775 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12776 GEN_INT (0x07),
12777 NULL_RTX, 1, OPTAB_DIRECT);
12778 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12779 GEN_INT (0x0700),
12780 NULL_RTX, 1, OPTAB_DIRECT);
12781 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12782 GEN_INT (0x070000),
12783 NULL_RTX, 1, OPTAB_DIRECT);
12784 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12785 GEN_INT (0x07000000),
12786 NULL_RTX, 1, OPTAB_DIRECT);
12787 /* sel = .......D */
12788 /* t_1 = .....C.. */
12789 /* t_2 = ...B.... */
12790 /* t_3 = .A...... */
12791 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12792 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12793 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12794 /* sel = .A.B.C.D */
12795 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12796 /* sel = AABBCCDD * 2 */
12797 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12798 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12799 break;
12800
12801 case E_V8QImode:
12802 /* input = xAxBxCxDxExFxGxH */
12803 sel = expand_simple_binop (DImode, AND, sel,
12804 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12805 | 0x0f0f0f0f),
12806 NULL_RTX, 1, OPTAB_DIRECT);
12807 /* sel = .A.B.C.D.E.F.G.H */
12808 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12809 NULL_RTX, 1, OPTAB_DIRECT);
12810 /* t_1 = ..A.B.C.D.E.F.G. */
12811 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12812 NULL_RTX, 1, OPTAB_DIRECT);
12813 /* sel = .AABBCCDDEEFFGGH */
12814 sel = expand_simple_binop (DImode, AND, sel,
12815 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12816 | 0xff00ff),
12817 NULL_RTX, 1, OPTAB_DIRECT);
12818 /* sel = ..AB..CD..EF..GH */
12819 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12820 NULL_RTX, 1, OPTAB_DIRECT);
12821 /* t_1 = ....AB..CD..EF.. */
12822 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12823 NULL_RTX, 1, OPTAB_DIRECT);
12824 /* sel = ..ABABCDCDEFEFGH */
12825 sel = expand_simple_binop (DImode, AND, sel,
12826 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12827 NULL_RTX, 1, OPTAB_DIRECT);
12828 /* sel = ....ABCD....EFGH */
12829 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12830 NULL_RTX, 1, OPTAB_DIRECT);
12831 /* t_1 = ........ABCD.... */
12832 sel = gen_lowpart (SImode, sel);
12833 t_1 = gen_lowpart (SImode, t_1);
12834 break;
12835
12836 default:
12837 gcc_unreachable ();
12838 }
12839
12840 /* Always perform the final addition/merge within the bmask insn. */
12841 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12842 }
12843
12844 /* Implement TARGET_VEC_PERM_CONST. */
12845
12846 static bool
12847 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12848 rtx op1, const vec_perm_indices &sel)
12849 {
12850 if (!TARGET_VIS2)
12851 return false;
12852
12853 /* All permutes are supported. */
12854 if (!target)
12855 return true;
12856
12857 /* Force target-independent code to convert constant permutations on other
12858 modes down to V8QI. Rely on this to avoid the complexity of the byte
12859 order of the permutation. */
12860 if (vmode != V8QImode)
12861 return false;
12862
12863 unsigned int i, mask;
12864 for (i = mask = 0; i < 8; ++i)
12865 mask |= (sel[i] & 0xf) << (28 - i*4);
12866 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12867
12868 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12869 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12870 return true;
12871 }
12872
12873 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12874
12875 static bool
12876 sparc_frame_pointer_required (void)
12877 {
12878 /* If the stack pointer is dynamically modified in the function, it cannot
12879 serve as the frame pointer. */
12880 if (cfun->calls_alloca)
12881 return true;
12882
12883 /* If the function receives nonlocal gotos, it needs to save the frame
12884 pointer in the nonlocal_goto_save_area object. */
12885 if (cfun->has_nonlocal_label)
12886 return true;
12887
12888 /* In flat mode, that's it. */
12889 if (TARGET_FLAT)
12890 return false;
12891
12892 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12893 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12894 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12895 }
12896
12897 /* The way this is structured, we can't eliminate SFP in favor of SP
12898 if the frame pointer is required: we want to use the SFP->HFP elimination
12899 in that case. But the test in update_eliminables doesn't know we are
12900 assuming below that we only do the former elimination. */
12901
12902 static bool
12903 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12904 {
12905 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12906 }
12907
12908 /* Return the hard frame pointer directly to bypass the stack bias. */
12909
12910 static rtx
12911 sparc_builtin_setjmp_frame_value (void)
12912 {
12913 return hard_frame_pointer_rtx;
12914 }
12915
12916 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12917 they won't be allocated. */
12918
12919 static void
12920 sparc_conditional_register_usage (void)
12921 {
12922 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12923 {
12924 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12925 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12926 }
12927 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12928 /* then honor it. */
12929 if (TARGET_ARCH32 && fixed_regs[5])
12930 fixed_regs[5] = 1;
12931 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12932 fixed_regs[5] = 0;
12933 if (! TARGET_V9)
12934 {
12935 int regno;
12936 for (regno = SPARC_FIRST_V9_FP_REG;
12937 regno <= SPARC_LAST_V9_FP_REG;
12938 regno++)
12939 fixed_regs[regno] = 1;
12940 /* %fcc0 is used by v8 and v9. */
12941 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12942 regno <= SPARC_LAST_V9_FCC_REG;
12943 regno++)
12944 fixed_regs[regno] = 1;
12945 }
12946 if (! TARGET_FPU)
12947 {
12948 int regno;
12949 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12950 fixed_regs[regno] = 1;
12951 }
12952 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12953 /* then honor it. Likewise with g3 and g4. */
12954 if (fixed_regs[2] == 2)
12955 fixed_regs[2] = ! TARGET_APP_REGS;
12956 if (fixed_regs[3] == 2)
12957 fixed_regs[3] = ! TARGET_APP_REGS;
12958 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12959 fixed_regs[4] = ! TARGET_APP_REGS;
12960 else if (TARGET_CM_EMBMEDANY)
12961 fixed_regs[4] = 1;
12962 else if (fixed_regs[4] == 2)
12963 fixed_regs[4] = 0;
12964 if (TARGET_FLAT)
12965 {
12966 int regno;
12967 /* Disable leaf functions. */
12968 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12969 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12970 leaf_reg_remap [regno] = regno;
12971 }
12972 if (TARGET_VIS)
12973 global_regs[SPARC_GSR_REG] = 1;
12974 }
12975
12976 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
12977
12978 static bool
12979 sparc_use_pseudo_pic_reg (void)
12980 {
12981 return !TARGET_VXWORKS_RTP && flag_pic;
12982 }
12983
12984 /* Implement TARGET_INIT_PIC_REG. */
12985
12986 static void
12987 sparc_init_pic_reg (void)
12988 {
12989 edge entry_edge;
12990 rtx_insn *seq;
12991
12992 if (!crtl->uses_pic_offset_table)
12993 return;
12994
12995 start_sequence ();
12996 load_got_register ();
12997 if (!TARGET_VXWORKS_RTP)
12998 emit_move_insn (pic_offset_table_rtx, global_offset_table_rtx);
12999 seq = get_insns ();
13000 end_sequence ();
13001
13002 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13003 insert_insn_on_edge (seq, entry_edge);
13004 commit_one_edge_insertion (entry_edge);
13005 }
13006
13007 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13008
13009 - We can't load constants into FP registers.
13010 - We can't load FP constants into integer registers when soft-float,
13011 because there is no soft-float pattern with a r/F constraint.
13012 - We can't load FP constants into integer registers for TFmode unless
13013 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13014 - Try and reload integer constants (symbolic or otherwise) back into
13015 registers directly, rather than having them dumped to memory. */
13016
13017 static reg_class_t
13018 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13019 {
13020 machine_mode mode = GET_MODE (x);
13021 if (CONSTANT_P (x))
13022 {
13023 if (FP_REG_CLASS_P (rclass)
13024 || rclass == GENERAL_OR_FP_REGS
13025 || rclass == GENERAL_OR_EXTRA_FP_REGS
13026 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13027 || (mode == TFmode && ! const_zero_operand (x, mode)))
13028 return NO_REGS;
13029
13030 if (GET_MODE_CLASS (mode) == MODE_INT)
13031 return GENERAL_REGS;
13032
13033 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13034 {
13035 if (! FP_REG_CLASS_P (rclass)
13036 || !(const_zero_operand (x, mode)
13037 || const_all_ones_operand (x, mode)))
13038 return NO_REGS;
13039 }
13040 }
13041
13042 if (TARGET_VIS3
13043 && ! TARGET_ARCH64
13044 && (rclass == EXTRA_FP_REGS
13045 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13046 {
13047 int regno = true_regnum (x);
13048
13049 if (SPARC_INT_REG_P (regno))
13050 return (rclass == EXTRA_FP_REGS
13051 ? FP_REGS : GENERAL_OR_FP_REGS);
13052 }
13053
13054 return rclass;
13055 }
13056
13057 /* Return true if we use LRA instead of reload pass. */
13058
13059 static bool
13060 sparc_lra_p (void)
13061 {
13062 return TARGET_LRA;
13063 }
13064
13065 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13066 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13067
13068 const char *
13069 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13070 {
13071 char mulstr[32];
13072
13073 gcc_assert (! TARGET_ARCH64);
13074
13075 if (sparc_check_64 (operands[1], insn) <= 0)
13076 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13077 if (which_alternative == 1)
13078 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13079 if (GET_CODE (operands[2]) == CONST_INT)
13080 {
13081 if (which_alternative == 1)
13082 {
13083 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13084 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13085 output_asm_insn (mulstr, operands);
13086 return "srlx\t%L0, 32, %H0";
13087 }
13088 else
13089 {
13090 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13091 output_asm_insn ("or\t%L1, %3, %3", operands);
13092 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13093 output_asm_insn (mulstr, operands);
13094 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13095 return "mov\t%3, %L0";
13096 }
13097 }
13098 else if (rtx_equal_p (operands[1], operands[2]))
13099 {
13100 if (which_alternative == 1)
13101 {
13102 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13103 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13104 output_asm_insn (mulstr, operands);
13105 return "srlx\t%L0, 32, %H0";
13106 }
13107 else
13108 {
13109 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13110 output_asm_insn ("or\t%L1, %3, %3", operands);
13111 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13112 output_asm_insn (mulstr, operands);
13113 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13114 return "mov\t%3, %L0";
13115 }
13116 }
13117 if (sparc_check_64 (operands[2], insn) <= 0)
13118 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13119 if (which_alternative == 1)
13120 {
13121 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13122 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13123 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13124 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13125 output_asm_insn (mulstr, operands);
13126 return "srlx\t%L0, 32, %H0";
13127 }
13128 else
13129 {
13130 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13131 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13132 output_asm_insn ("or\t%L1, %3, %3", operands);
13133 output_asm_insn ("or\t%L2, %4, %4", operands);
13134 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13135 output_asm_insn (mulstr, operands);
13136 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13137 return "mov\t%3, %L0";
13138 }
13139 }
13140
13141 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13142 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13143 and INNER_MODE are the modes describing TARGET. */
13144
13145 static void
13146 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13147 machine_mode inner_mode)
13148 {
13149 rtx t1, final_insn, sel;
13150 int bmask;
13151
13152 t1 = gen_reg_rtx (mode);
13153
13154 elt = convert_modes (SImode, inner_mode, elt, true);
13155 emit_move_insn (gen_lowpart(SImode, t1), elt);
13156
13157 switch (mode)
13158 {
13159 case E_V2SImode:
13160 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13161 bmask = 0x45674567;
13162 break;
13163 case E_V4HImode:
13164 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13165 bmask = 0x67676767;
13166 break;
13167 case E_V8QImode:
13168 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13169 bmask = 0x77777777;
13170 break;
13171 default:
13172 gcc_unreachable ();
13173 }
13174
13175 sel = force_reg (SImode, GEN_INT (bmask));
13176 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13177 emit_insn (final_insn);
13178 }
13179
13180 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13181 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13182
13183 static void
13184 vector_init_fpmerge (rtx target, rtx elt)
13185 {
13186 rtx t1, t2, t2_low, t3, t3_low;
13187
13188 t1 = gen_reg_rtx (V4QImode);
13189 elt = convert_modes (SImode, QImode, elt, true);
13190 emit_move_insn (gen_lowpart (SImode, t1), elt);
13191
13192 t2 = gen_reg_rtx (V8QImode);
13193 t2_low = gen_lowpart (V4QImode, t2);
13194 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13195
13196 t3 = gen_reg_rtx (V8QImode);
13197 t3_low = gen_lowpart (V4QImode, t3);
13198 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13199
13200 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13201 }
13202
13203 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13204 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13205
13206 static void
13207 vector_init_faligndata (rtx target, rtx elt)
13208 {
13209 rtx t1 = gen_reg_rtx (V4HImode);
13210 int i;
13211
13212 elt = convert_modes (SImode, HImode, elt, true);
13213 emit_move_insn (gen_lowpart (SImode, t1), elt);
13214
13215 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13216 force_reg (SImode, GEN_INT (6)),
13217 const0_rtx));
13218
13219 for (i = 0; i < 4; i++)
13220 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13221 }
13222
13223 /* Emit code to initialize TARGET to values for individual fields VALS. */
13224
13225 void
13226 sparc_expand_vector_init (rtx target, rtx vals)
13227 {
13228 const machine_mode mode = GET_MODE (target);
13229 const machine_mode inner_mode = GET_MODE_INNER (mode);
13230 const int n_elts = GET_MODE_NUNITS (mode);
13231 int i, n_var = 0;
13232 bool all_same = true;
13233 rtx mem;
13234
13235 for (i = 0; i < n_elts; i++)
13236 {
13237 rtx x = XVECEXP (vals, 0, i);
13238 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13239 n_var++;
13240
13241 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13242 all_same = false;
13243 }
13244
13245 if (n_var == 0)
13246 {
13247 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13248 return;
13249 }
13250
13251 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13252 {
13253 if (GET_MODE_SIZE (inner_mode) == 4)
13254 {
13255 emit_move_insn (gen_lowpart (SImode, target),
13256 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13257 return;
13258 }
13259 else if (GET_MODE_SIZE (inner_mode) == 8)
13260 {
13261 emit_move_insn (gen_lowpart (DImode, target),
13262 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13263 return;
13264 }
13265 }
13266 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13267 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13268 {
13269 emit_move_insn (gen_highpart (word_mode, target),
13270 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13271 emit_move_insn (gen_lowpart (word_mode, target),
13272 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13273 return;
13274 }
13275
13276 if (all_same && GET_MODE_SIZE (mode) == 8)
13277 {
13278 if (TARGET_VIS2)
13279 {
13280 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13281 return;
13282 }
13283 if (mode == V8QImode)
13284 {
13285 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13286 return;
13287 }
13288 if (mode == V4HImode)
13289 {
13290 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13291 return;
13292 }
13293 }
13294
13295 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13296 for (i = 0; i < n_elts; i++)
13297 emit_move_insn (adjust_address_nv (mem, inner_mode,
13298 i * GET_MODE_SIZE (inner_mode)),
13299 XVECEXP (vals, 0, i));
13300 emit_move_insn (target, mem);
13301 }
13302
13303 /* Implement TARGET_SECONDARY_RELOAD. */
13304
13305 static reg_class_t
13306 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13307 machine_mode mode, secondary_reload_info *sri)
13308 {
13309 enum reg_class rclass = (enum reg_class) rclass_i;
13310
13311 sri->icode = CODE_FOR_nothing;
13312 sri->extra_cost = 0;
13313
13314 /* We need a temporary when loading/storing a HImode/QImode value
13315 between memory and the FPU registers. This can happen when combine puts
13316 a paradoxical subreg in a float/fix conversion insn. */
13317 if (FP_REG_CLASS_P (rclass)
13318 && (mode == HImode || mode == QImode)
13319 && (GET_CODE (x) == MEM
13320 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13321 && true_regnum (x) == -1)))
13322 return GENERAL_REGS;
13323
13324 /* On 32-bit we need a temporary when loading/storing a DFmode value
13325 between unaligned memory and the upper FPU registers. */
13326 if (TARGET_ARCH32
13327 && rclass == EXTRA_FP_REGS
13328 && mode == DFmode
13329 && GET_CODE (x) == MEM
13330 && ! mem_min_alignment (x, 8))
13331 return FP_REGS;
13332
13333 if (((TARGET_CM_MEDANY
13334 && symbolic_operand (x, mode))
13335 || (TARGET_CM_EMBMEDANY
13336 && text_segment_operand (x, mode)))
13337 && ! flag_pic)
13338 {
13339 if (in_p)
13340 sri->icode = direct_optab_handler (reload_in_optab, mode);
13341 else
13342 sri->icode = direct_optab_handler (reload_out_optab, mode);
13343 return NO_REGS;
13344 }
13345
13346 if (TARGET_VIS3 && TARGET_ARCH32)
13347 {
13348 int regno = true_regnum (x);
13349
13350 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13351 to move 8-byte values in 4-byte pieces. This only works via
13352 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13353 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13354 an FP_REGS intermediate move. */
13355 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13356 || ((general_or_i64_p (rclass)
13357 || rclass == GENERAL_OR_FP_REGS)
13358 && SPARC_FP_REG_P (regno)))
13359 {
13360 sri->extra_cost = 2;
13361 return FP_REGS;
13362 }
13363 }
13364
13365 return NO_REGS;
13366 }
13367
13368 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13369
13370 On SPARC when not VIS3 it is not possible to directly move data
13371 between GENERAL_REGS and FP_REGS. */
13372
13373 static bool
13374 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13375 reg_class_t class2)
13376 {
13377 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13378 && (! TARGET_VIS3
13379 || GET_MODE_SIZE (mode) > 8
13380 || GET_MODE_SIZE (mode) < 4));
13381 }
13382
13383 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13384
13385 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13386 because the movsi and movsf patterns don't handle r/f moves.
13387 For v8 we copy the default definition. */
13388
13389 static machine_mode
13390 sparc_secondary_memory_needed_mode (machine_mode mode)
13391 {
13392 if (TARGET_ARCH64)
13393 {
13394 if (GET_MODE_BITSIZE (mode) < 32)
13395 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13396 return mode;
13397 }
13398 else
13399 {
13400 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13401 return mode_for_size (BITS_PER_WORD,
13402 GET_MODE_CLASS (mode), 0).require ();
13403 return mode;
13404 }
13405 }
13406
13407 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13408 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13409
13410 bool
13411 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13412 {
13413 enum rtx_code rc = GET_CODE (operands[1]);
13414 machine_mode cmp_mode;
13415 rtx cc_reg, dst, cmp;
13416
13417 cmp = operands[1];
13418 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13419 return false;
13420
13421 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13422 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13423
13424 cmp_mode = GET_MODE (XEXP (cmp, 0));
13425 rc = GET_CODE (cmp);
13426
13427 dst = operands[0];
13428 if (! rtx_equal_p (operands[2], dst)
13429 && ! rtx_equal_p (operands[3], dst))
13430 {
13431 if (reg_overlap_mentioned_p (dst, cmp))
13432 dst = gen_reg_rtx (mode);
13433
13434 emit_move_insn (dst, operands[3]);
13435 }
13436 else if (operands[2] == dst)
13437 {
13438 operands[2] = operands[3];
13439
13440 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13441 rc = reverse_condition_maybe_unordered (rc);
13442 else
13443 rc = reverse_condition (rc);
13444 }
13445
13446 if (XEXP (cmp, 1) == const0_rtx
13447 && GET_CODE (XEXP (cmp, 0)) == REG
13448 && cmp_mode == DImode
13449 && v9_regcmp_p (rc))
13450 cc_reg = XEXP (cmp, 0);
13451 else
13452 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13453
13454 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13455
13456 emit_insn (gen_rtx_SET (dst,
13457 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13458
13459 if (dst != operands[0])
13460 emit_move_insn (operands[0], dst);
13461
13462 return true;
13463 }
13464
13465 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13466 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13467 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13468 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13469 code to be used for the condition mask. */
13470
13471 void
13472 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13473 {
13474 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13475 enum rtx_code code = GET_CODE (operands[3]);
13476
13477 mask = gen_reg_rtx (Pmode);
13478 cop0 = operands[4];
13479 cop1 = operands[5];
13480 if (code == LT || code == GE)
13481 {
13482 rtx t;
13483
13484 code = swap_condition (code);
13485 t = cop0; cop0 = cop1; cop1 = t;
13486 }
13487
13488 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13489
13490 fcmp = gen_rtx_UNSPEC (Pmode,
13491 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13492 fcode);
13493
13494 cmask = gen_rtx_UNSPEC (DImode,
13495 gen_rtvec (2, mask, gsr),
13496 ccode);
13497
13498 bshuf = gen_rtx_UNSPEC (mode,
13499 gen_rtvec (3, operands[1], operands[2], gsr),
13500 UNSPEC_BSHUFFLE);
13501
13502 emit_insn (gen_rtx_SET (mask, fcmp));
13503 emit_insn (gen_rtx_SET (gsr, cmask));
13504
13505 emit_insn (gen_rtx_SET (operands[0], bshuf));
13506 }
13507
13508 /* On sparc, any mode which naturally allocates into the float
13509 registers should return 4 here. */
13510
13511 unsigned int
13512 sparc_regmode_natural_size (machine_mode mode)
13513 {
13514 int size = UNITS_PER_WORD;
13515
13516 if (TARGET_ARCH64)
13517 {
13518 enum mode_class mclass = GET_MODE_CLASS (mode);
13519
13520 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13521 size = 4;
13522 }
13523
13524 return size;
13525 }
13526
13527 /* Implement TARGET_HARD_REGNO_NREGS.
13528
13529 On SPARC, ordinary registers hold 32 bits worth; this means both
13530 integer and floating point registers. On v9, integer regs hold 64
13531 bits worth; floating point regs hold 32 bits worth (this includes the
13532 new fp regs as even the odd ones are included in the hard register
13533 count). */
13534
13535 static unsigned int
13536 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13537 {
13538 if (regno == SPARC_GSR_REG)
13539 return 1;
13540 if (TARGET_ARCH64)
13541 {
13542 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13543 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13544 return CEIL (GET_MODE_SIZE (mode), 4);
13545 }
13546 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13547 }
13548
13549 /* Implement TARGET_HARD_REGNO_MODE_OK.
13550
13551 ??? Because of the funny way we pass parameters we should allow certain
13552 ??? types of float/complex values to be in integer registers during
13553 ??? RTL generation. This only matters on arch32. */
13554
13555 static bool
13556 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13557 {
13558 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13559 }
13560
13561 /* Implement TARGET_MODES_TIEABLE_P.
13562
13563 For V9 we have to deal with the fact that only the lower 32 floating
13564 point registers are 32-bit addressable. */
13565
13566 static bool
13567 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13568 {
13569 enum mode_class mclass1, mclass2;
13570 unsigned short size1, size2;
13571
13572 if (mode1 == mode2)
13573 return true;
13574
13575 mclass1 = GET_MODE_CLASS (mode1);
13576 mclass2 = GET_MODE_CLASS (mode2);
13577 if (mclass1 != mclass2)
13578 return false;
13579
13580 if (! TARGET_V9)
13581 return true;
13582
13583 /* Classes are the same and we are V9 so we have to deal with upper
13584 vs. lower floating point registers. If one of the modes is a
13585 4-byte mode, and the other is not, we have to mark them as not
13586 tieable because only the lower 32 floating point register are
13587 addressable 32-bits at a time.
13588
13589 We can't just test explicitly for SFmode, otherwise we won't
13590 cover the vector mode cases properly. */
13591
13592 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13593 return true;
13594
13595 size1 = GET_MODE_SIZE (mode1);
13596 size2 = GET_MODE_SIZE (mode2);
13597 if ((size1 > 4 && size2 == 4)
13598 || (size2 > 4 && size1 == 4))
13599 return false;
13600
13601 return true;
13602 }
13603
13604 /* Implement TARGET_CSTORE_MODE. */
13605
13606 static scalar_int_mode
13607 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13608 {
13609 return (TARGET_ARCH64 ? DImode : SImode);
13610 }
13611
13612 /* Return the compound expression made of T1 and T2. */
13613
13614 static inline tree
13615 compound_expr (tree t1, tree t2)
13616 {
13617 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13618 }
13619
13620 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13621
13622 static void
13623 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13624 {
13625 if (!TARGET_FPU)
13626 return;
13627
13628 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13629 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13630
13631 /* We generate the equivalent of feholdexcept (&fenv_var):
13632
13633 unsigned int fenv_var;
13634 __builtin_store_fsr (&fenv_var);
13635
13636 unsigned int tmp1_var;
13637 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13638
13639 __builtin_load_fsr (&tmp1_var); */
13640
13641 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13642 TREE_ADDRESSABLE (fenv_var) = 1;
13643 tree fenv_addr = build_fold_addr_expr (fenv_var);
13644 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13645 tree hold_stfsr
13646 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13647 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13648
13649 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13650 TREE_ADDRESSABLE (tmp1_var) = 1;
13651 tree masked_fenv_var
13652 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13653 build_int_cst (unsigned_type_node,
13654 ~(accrued_exception_mask | trap_enable_mask)));
13655 tree hold_mask
13656 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13657 NULL_TREE, NULL_TREE);
13658
13659 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13660 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13661 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13662
13663 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13664
13665 /* We reload the value of tmp1_var to clear the exceptions:
13666
13667 __builtin_load_fsr (&tmp1_var); */
13668
13669 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13670
13671 /* We generate the equivalent of feupdateenv (&fenv_var):
13672
13673 unsigned int tmp2_var;
13674 __builtin_store_fsr (&tmp2_var);
13675
13676 __builtin_load_fsr (&fenv_var);
13677
13678 if (SPARC_LOW_FE_EXCEPT_VALUES)
13679 tmp2_var >>= 5;
13680 __atomic_feraiseexcept ((int) tmp2_var); */
13681
13682 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13683 TREE_ADDRESSABLE (tmp2_var) = 1;
13684 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13685 tree update_stfsr
13686 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13687 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13688
13689 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13690
13691 tree atomic_feraiseexcept
13692 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13693 tree update_call
13694 = build_call_expr (atomic_feraiseexcept, 1,
13695 fold_convert (integer_type_node, tmp2_var));
13696
13697 if (SPARC_LOW_FE_EXCEPT_VALUES)
13698 {
13699 tree shifted_tmp2_var
13700 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13701 build_int_cst (unsigned_type_node, 5));
13702 tree update_shift
13703 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13704 update_call = compound_expr (update_shift, update_call);
13705 }
13706
13707 *update
13708 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13709 }
13710
13711 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13712
13713 SImode loads to floating-point registers are not zero-extended.
13714 The definition for LOAD_EXTEND_OP specifies that integer loads
13715 narrower than BITS_PER_WORD will be zero-extended. As a result,
13716 we inhibit changes from SImode unless they are to a mode that is
13717 identical in size.
13718
13719 Likewise for SFmode, since word-mode paradoxical subregs are
13720 problematic on big-endian architectures. */
13721
13722 static bool
13723 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13724 reg_class_t rclass)
13725 {
13726 if (TARGET_ARCH64
13727 && GET_MODE_SIZE (from) == 4
13728 && GET_MODE_SIZE (to) != 4)
13729 return !reg_classes_intersect_p (rclass, FP_REGS);
13730 return true;
13731 }
13732
13733 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13734
13735 static HOST_WIDE_INT
13736 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13737 {
13738 if (TREE_CODE (exp) == STRING_CST)
13739 return MAX (align, FASTEST_ALIGNMENT);
13740 return align;
13741 }
13742
13743 #include "gt-sparc.h"