]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
[SPARC] Errata workaround for GRLIB-TN-0010
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
687 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
688 \f
689 #ifdef SUBTARGET_ATTRIBUTE_TABLE
690 /* Table of valid machine attributes. */
691 static const struct attribute_spec sparc_attribute_table[] =
692 {
693 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
694 do_diagnostic } */
695 SUBTARGET_ATTRIBUTE_TABLE,
696 { NULL, 0, 0, false, false, false, NULL, false }
697 };
698 #endif
699 \f
700 /* Option handling. */
701
702 /* Parsed value. */
703 enum cmodel sparc_cmodel;
704
705 char sparc_hard_reg_printed[8];
706
707 /* Initialize the GCC target structure. */
708
709 /* The default is to use .half rather than .short for aligned HI objects. */
710 #undef TARGET_ASM_ALIGNED_HI_OP
711 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
712
713 #undef TARGET_ASM_UNALIGNED_HI_OP
714 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
715 #undef TARGET_ASM_UNALIGNED_SI_OP
716 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
717 #undef TARGET_ASM_UNALIGNED_DI_OP
718 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
719
720 /* The target hook has to handle DI-mode values. */
721 #undef TARGET_ASM_INTEGER
722 #define TARGET_ASM_INTEGER sparc_assemble_integer
723
724 #undef TARGET_ASM_FUNCTION_PROLOGUE
725 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
726 #undef TARGET_ASM_FUNCTION_EPILOGUE
727 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
728
729 #undef TARGET_SCHED_ADJUST_COST
730 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
731 #undef TARGET_SCHED_ISSUE_RATE
732 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
733 #undef TARGET_SCHED_INIT
734 #define TARGET_SCHED_INIT sparc_sched_init
735 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
736 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
737
738 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
739 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
740
741 #undef TARGET_INIT_LIBFUNCS
742 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
743
744 #undef TARGET_LEGITIMIZE_ADDRESS
745 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
746 #undef TARGET_DELEGITIMIZE_ADDRESS
747 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
748 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
749 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
750
751 #undef TARGET_INIT_BUILTINS
752 #define TARGET_INIT_BUILTINS sparc_init_builtins
753 #undef TARGET_BUILTIN_DECL
754 #define TARGET_BUILTIN_DECL sparc_builtin_decl
755 #undef TARGET_EXPAND_BUILTIN
756 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
757 #undef TARGET_FOLD_BUILTIN
758 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
759
760 #if TARGET_TLS
761 #undef TARGET_HAVE_TLS
762 #define TARGET_HAVE_TLS true
763 #endif
764
765 #undef TARGET_CANNOT_FORCE_CONST_MEM
766 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
767
768 #undef TARGET_ASM_OUTPUT_MI_THUNK
769 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
770 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
771 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
772
773 #undef TARGET_RTX_COSTS
774 #define TARGET_RTX_COSTS sparc_rtx_costs
775 #undef TARGET_ADDRESS_COST
776 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
777 #undef TARGET_REGISTER_MOVE_COST
778 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
779
780 #undef TARGET_PROMOTE_FUNCTION_MODE
781 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
782
783 #undef TARGET_FUNCTION_VALUE
784 #define TARGET_FUNCTION_VALUE sparc_function_value
785 #undef TARGET_LIBCALL_VALUE
786 #define TARGET_LIBCALL_VALUE sparc_libcall_value
787 #undef TARGET_FUNCTION_VALUE_REGNO_P
788 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
789
790 #undef TARGET_STRUCT_VALUE_RTX
791 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
792 #undef TARGET_RETURN_IN_MEMORY
793 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
794 #undef TARGET_MUST_PASS_IN_STACK
795 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
796 #undef TARGET_PASS_BY_REFERENCE
797 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
798 #undef TARGET_ARG_PARTIAL_BYTES
799 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
800 #undef TARGET_FUNCTION_ARG_ADVANCE
801 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
802 #undef TARGET_FUNCTION_ARG
803 #define TARGET_FUNCTION_ARG sparc_function_arg
804 #undef TARGET_FUNCTION_INCOMING_ARG
805 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
806 #undef TARGET_FUNCTION_ARG_PADDING
807 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
808 #undef TARGET_FUNCTION_ARG_BOUNDARY
809 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
810
811 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
812 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
813 #undef TARGET_STRICT_ARGUMENT_NAMING
814 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
815
816 #undef TARGET_EXPAND_BUILTIN_VA_START
817 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
818 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
819 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
820
821 #undef TARGET_VECTOR_MODE_SUPPORTED_P
822 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
823
824 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
825 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
826
827 #ifdef SUBTARGET_INSERT_ATTRIBUTES
828 #undef TARGET_INSERT_ATTRIBUTES
829 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
830 #endif
831
832 #ifdef SUBTARGET_ATTRIBUTE_TABLE
833 #undef TARGET_ATTRIBUTE_TABLE
834 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
835 #endif
836
837 #undef TARGET_OPTION_OVERRIDE
838 #define TARGET_OPTION_OVERRIDE sparc_option_override
839
840 #ifdef TARGET_THREAD_SSP_OFFSET
841 #undef TARGET_STACK_PROTECT_GUARD
842 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
843 #endif
844
845 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
848 #endif
849
850 #undef TARGET_ASM_FILE_END
851 #define TARGET_ASM_FILE_END sparc_file_end
852
853 #undef TARGET_FRAME_POINTER_REQUIRED
854 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
855
856 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
857 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
858
859 #undef TARGET_CAN_ELIMINATE
860 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
861
862 #undef TARGET_PREFERRED_RELOAD_CLASS
863 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
864
865 #undef TARGET_SECONDARY_RELOAD
866 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
867 #undef TARGET_SECONDARY_MEMORY_NEEDED
868 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
869 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
870 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
871
872 #undef TARGET_CONDITIONAL_REGISTER_USAGE
873 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
874
875 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
876 #undef TARGET_MANGLE_TYPE
877 #define TARGET_MANGLE_TYPE sparc_mangle_type
878 #endif
879
880 #undef TARGET_LRA_P
881 #define TARGET_LRA_P sparc_lra_p
882
883 #undef TARGET_LEGITIMATE_ADDRESS_P
884 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
885
886 #undef TARGET_LEGITIMATE_CONSTANT_P
887 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
888
889 #undef TARGET_TRAMPOLINE_INIT
890 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
891
892 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
893 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
894 #undef TARGET_PRINT_OPERAND
895 #define TARGET_PRINT_OPERAND sparc_print_operand
896 #undef TARGET_PRINT_OPERAND_ADDRESS
897 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
898
899 /* The value stored by LDSTUB. */
900 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
901 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
902
903 #undef TARGET_CSTORE_MODE
904 #define TARGET_CSTORE_MODE sparc_cstore_mode
905
906 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
907 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
908
909 #undef TARGET_FIXED_CONDITION_CODE_REGS
910 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
911
912 #undef TARGET_MIN_ARITHMETIC_PRECISION
913 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
914
915 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
916 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
917
918 #undef TARGET_HARD_REGNO_NREGS
919 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
920 #undef TARGET_HARD_REGNO_MODE_OK
921 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
922
923 #undef TARGET_MODES_TIEABLE_P
924 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
925
926 #undef TARGET_CAN_CHANGE_MODE_CLASS
927 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
928
929 #undef TARGET_CONSTANT_ALIGNMENT
930 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
931
932 struct gcc_target targetm = TARGET_INITIALIZER;
933
934 /* Return the memory reference contained in X if any, zero otherwise. */
935
936 static rtx
937 mem_ref (rtx x)
938 {
939 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
940 x = XEXP (x, 0);
941
942 if (MEM_P (x))
943 return x;
944
945 return NULL_RTX;
946 }
947
948 /* True if INSN is a floating-point instruction. */
949
950 static bool
951 fpop_insn_p (rtx_insn *insn)
952 {
953 if (GET_CODE (PATTERN (insn)) != SET)
954 return false;
955
956 switch (get_attr_type (insn))
957 {
958 case TYPE_FPMOVE:
959 case TYPE_FPCMOVE:
960 case TYPE_FP:
961 case TYPE_FPCMP:
962 case TYPE_FPMUL:
963 case TYPE_FPDIVS:
964 case TYPE_FPSQRTS:
965 case TYPE_FPDIVD:
966 case TYPE_FPSQRTD:
967 return true;
968 default:
969 return false;
970 }
971 }
972
973 /* True if INSN is an atomic instruction. */
974
975 static bool
976 atomic_insn_for_leon3_p (rtx_insn *insn)
977 {
978 switch (INSN_CODE (insn))
979 {
980 case CODE_FOR_swapsi:
981 case CODE_FOR_ldstub:
982 case CODE_FOR_atomic_compare_and_swap_leon3_1:
983 return true;
984 default:
985 return false;
986 }
987 }
988
989 /* We use a machine specific pass to enable workarounds for errata.
990
991 We need to have the (essentially) final form of the insn stream in order
992 to properly detect the various hazards. Therefore, this machine specific
993 pass runs as late as possible. */
994
995 /* True if INSN is a md pattern or asm statement. */
996 #define USEFUL_INSN_P(INSN) \
997 (NONDEBUG_INSN_P (INSN) \
998 && GET_CODE (PATTERN (INSN)) != USE \
999 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1000
1001 static unsigned int
1002 sparc_do_work_around_errata (void)
1003 {
1004 rtx_insn *insn, *next;
1005
1006 /* Force all instructions to be split into their final form. */
1007 split_all_insns_noflow ();
1008
1009 /* Now look for specific patterns in the insn stream. */
1010 for (insn = get_insns (); insn; insn = next)
1011 {
1012 bool insert_nop = false;
1013 rtx set;
1014 rtx_insn *jump;
1015 rtx_sequence *seq;
1016
1017 /* Look into the instruction in a delay slot. */
1018 if (NONJUMP_INSN_P (insn)
1019 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1020 {
1021 jump = seq->insn (0);
1022 insn = seq->insn (1);
1023 }
1024 else if (JUMP_P (insn))
1025 jump = insn;
1026 else
1027 jump = NULL;
1028
1029 /* Place a NOP at the branch target of an integer branch if it is
1030 a floating-point operation or a floating-point branch. */
1031 if (sparc_fix_gr712rc
1032 && jump != NULL_RTX
1033 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1034 {
1035 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1036 if (target
1037 && (fpop_insn_p (target)
1038 || ((JUMP_P (target)
1039 && get_attr_branch_type (target) == BRANCH_TYPE_FCC))))
1040 emit_insn_before (gen_nop (), target);
1041 }
1042
1043 /* Insert a NOP between load instruction and atomic
1044 instruction. Insert a NOP at branch target if load
1045 in delay slot and atomic instruction at branch target. */
1046 if (sparc_fix_ut700
1047 && NONJUMP_INSN_P (insn)
1048 && (set = single_set (insn)) != NULL_RTX
1049 && MEM_P (SET_SRC (set))
1050 && REG_P (SET_DEST (set)))
1051 {
1052 if (jump)
1053 {
1054 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1055 if (target
1056 && atomic_insn_for_leon3_p (target))
1057 emit_insn_before (gen_nop (), target);
1058 }
1059
1060 next = next_active_insn (insn);
1061 if (!next)
1062 break;
1063
1064 if (atomic_insn_for_leon3_p (next))
1065 insert_nop = true;
1066 }
1067
1068 /* Look for either of these two sequences:
1069
1070 Sequence A:
1071 1. store of word size or less (e.g. st / stb / sth / stf)
1072 2. any single instruction that is not a load or store
1073 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1074
1075 Sequence B:
1076 1. store of double word size (e.g. std / stdf)
1077 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1078 if (sparc_fix_b2bst
1079 && NONJUMP_INSN_P (insn)
1080 && (set = single_set (insn)) != NULL_RTX
1081 && MEM_P (SET_DEST (set)))
1082 {
1083 /* Sequence B begins with a double-word store. */
1084 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1085 rtx_insn *after;
1086 int i;
1087
1088 next = next_active_insn (insn);
1089 if (!next)
1090 break;
1091
1092 for (after = next, i = 0; i < 2; i++)
1093 {
1094 /* Skip empty assembly statements. */
1095 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1096 || (USEFUL_INSN_P (after)
1097 && (asm_noperands (PATTERN (after))>=0)
1098 && !strcmp (decode_asm_operands (PATTERN (after),
1099 NULL, NULL, NULL,
1100 NULL, NULL), "")))
1101 after = next_active_insn (after);
1102 if (!after)
1103 break;
1104
1105 /* If the insn is a branch, then it cannot be problematic. */
1106 if (!NONJUMP_INSN_P (after)
1107 || GET_CODE (PATTERN (after)) == SEQUENCE)
1108 break;
1109
1110 /* Sequence B is only two instructions long. */
1111 if (seq_b)
1112 {
1113 /* Add NOP if followed by a store. */
1114 if ((set = single_set (after)) != NULL_RTX
1115 && MEM_P (SET_DEST (set)))
1116 insert_nop = true;
1117
1118 /* Otherwise it is ok. */
1119 break;
1120 }
1121
1122 /* If the second instruction is a load or a store,
1123 then the sequence cannot be problematic. */
1124 if (i == 0)
1125 {
1126 if (((set = single_set (after)) != NULL_RTX)
1127 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1128 break;
1129
1130 after = next_active_insn (after);
1131 if (!after)
1132 break;
1133 }
1134
1135 /* Add NOP if third instruction is a store. */
1136 if (i == 1
1137 && ((set = single_set (after)) != NULL_RTX)
1138 && MEM_P (SET_DEST (set)))
1139 insert_nop = true;
1140 }
1141 }
1142 else
1143 /* Look for a single-word load into an odd-numbered FP register. */
1144 if (sparc_fix_at697f
1145 && NONJUMP_INSN_P (insn)
1146 && (set = single_set (insn)) != NULL_RTX
1147 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1148 && MEM_P (SET_SRC (set))
1149 && REG_P (SET_DEST (set))
1150 && REGNO (SET_DEST (set)) > 31
1151 && REGNO (SET_DEST (set)) % 2 != 0)
1152 {
1153 /* The wrong dependency is on the enclosing double register. */
1154 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1155 unsigned int src1, src2, dest;
1156 int code;
1157
1158 next = next_active_insn (insn);
1159 if (!next)
1160 break;
1161 /* If the insn is a branch, then it cannot be problematic. */
1162 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1163 continue;
1164
1165 extract_insn (next);
1166 code = INSN_CODE (next);
1167
1168 switch (code)
1169 {
1170 case CODE_FOR_adddf3:
1171 case CODE_FOR_subdf3:
1172 case CODE_FOR_muldf3:
1173 case CODE_FOR_divdf3:
1174 dest = REGNO (recog_data.operand[0]);
1175 src1 = REGNO (recog_data.operand[1]);
1176 src2 = REGNO (recog_data.operand[2]);
1177 if (src1 != src2)
1178 {
1179 /* Case [1-4]:
1180 ld [address], %fx+1
1181 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1182 if ((src1 == x || src2 == x)
1183 && (dest == src1 || dest == src2))
1184 insert_nop = true;
1185 }
1186 else
1187 {
1188 /* Case 5:
1189 ld [address], %fx+1
1190 FPOPd %fx, %fx, %fx */
1191 if (src1 == x
1192 && dest == src1
1193 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1194 insert_nop = true;
1195 }
1196 break;
1197
1198 case CODE_FOR_sqrtdf2:
1199 dest = REGNO (recog_data.operand[0]);
1200 src1 = REGNO (recog_data.operand[1]);
1201 /* Case 6:
1202 ld [address], %fx+1
1203 fsqrtd %fx, %fx */
1204 if (src1 == x && dest == src1)
1205 insert_nop = true;
1206 break;
1207
1208 default:
1209 break;
1210 }
1211 }
1212
1213 /* Look for a single-word load into an integer register. */
1214 else if (sparc_fix_ut699
1215 && NONJUMP_INSN_P (insn)
1216 && (set = single_set (insn)) != NULL_RTX
1217 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1218 && mem_ref (SET_SRC (set)) != NULL_RTX
1219 && REG_P (SET_DEST (set))
1220 && REGNO (SET_DEST (set)) < 32)
1221 {
1222 /* There is no problem if the second memory access has a data
1223 dependency on the first single-cycle load. */
1224 rtx x = SET_DEST (set);
1225
1226 next = next_active_insn (insn);
1227 if (!next)
1228 break;
1229 /* If the insn is a branch, then it cannot be problematic. */
1230 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1231 continue;
1232
1233 /* Look for a second memory access to/from an integer register. */
1234 if ((set = single_set (next)) != NULL_RTX)
1235 {
1236 rtx src = SET_SRC (set);
1237 rtx dest = SET_DEST (set);
1238 rtx mem;
1239
1240 /* LDD is affected. */
1241 if ((mem = mem_ref (src)) != NULL_RTX
1242 && REG_P (dest)
1243 && REGNO (dest) < 32
1244 && !reg_mentioned_p (x, XEXP (mem, 0)))
1245 insert_nop = true;
1246
1247 /* STD is *not* affected. */
1248 else if (MEM_P (dest)
1249 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1250 && (src == CONST0_RTX (GET_MODE (dest))
1251 || (REG_P (src)
1252 && REGNO (src) < 32
1253 && REGNO (src) != REGNO (x)))
1254 && !reg_mentioned_p (x, XEXP (dest, 0)))
1255 insert_nop = true;
1256 }
1257 }
1258
1259 /* Look for a single-word load/operation into an FP register. */
1260 else if (sparc_fix_ut699
1261 && NONJUMP_INSN_P (insn)
1262 && (set = single_set (insn)) != NULL_RTX
1263 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1264 && REG_P (SET_DEST (set))
1265 && REGNO (SET_DEST (set)) > 31)
1266 {
1267 /* Number of instructions in the problematic window. */
1268 const int n_insns = 4;
1269 /* The problematic combination is with the sibling FP register. */
1270 const unsigned int x = REGNO (SET_DEST (set));
1271 const unsigned int y = x ^ 1;
1272 rtx_insn *after;
1273 int i;
1274
1275 next = next_active_insn (insn);
1276 if (!next)
1277 break;
1278 /* If the insn is a branch, then it cannot be problematic. */
1279 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1280 continue;
1281
1282 /* Look for a second load/operation into the sibling FP register. */
1283 if (!((set = single_set (next)) != NULL_RTX
1284 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1285 && REG_P (SET_DEST (set))
1286 && REGNO (SET_DEST (set)) == y))
1287 continue;
1288
1289 /* Look for a (possible) store from the FP register in the next N
1290 instructions, but bail out if it is again modified or if there
1291 is a store from the sibling FP register before this store. */
1292 for (after = next, i = 0; i < n_insns; i++)
1293 {
1294 bool branch_p;
1295
1296 after = next_active_insn (after);
1297 if (!after)
1298 break;
1299
1300 /* This is a branch with an empty delay slot. */
1301 if (!NONJUMP_INSN_P (after))
1302 {
1303 if (++i == n_insns)
1304 break;
1305 branch_p = true;
1306 after = NULL;
1307 }
1308 /* This is a branch with a filled delay slot. */
1309 else if (rtx_sequence *seq =
1310 dyn_cast <rtx_sequence *> (PATTERN (after)))
1311 {
1312 if (++i == n_insns)
1313 break;
1314 branch_p = true;
1315 after = seq->insn (1);
1316 }
1317 /* This is a regular instruction. */
1318 else
1319 branch_p = false;
1320
1321 if (after && (set = single_set (after)) != NULL_RTX)
1322 {
1323 const rtx src = SET_SRC (set);
1324 const rtx dest = SET_DEST (set);
1325 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1326
1327 /* If the FP register is again modified before the store,
1328 then the store isn't affected. */
1329 if (REG_P (dest)
1330 && (REGNO (dest) == x
1331 || (REGNO (dest) == y && size == 8)))
1332 break;
1333
1334 if (MEM_P (dest) && REG_P (src))
1335 {
1336 /* If there is a store from the sibling FP register
1337 before the store, then the store is not affected. */
1338 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1339 break;
1340
1341 /* Otherwise, the store is affected. */
1342 if (REGNO (src) == x && size == 4)
1343 {
1344 insert_nop = true;
1345 break;
1346 }
1347 }
1348 }
1349
1350 /* If we have a branch in the first M instructions, then we
1351 cannot see the (M+2)th instruction so we play safe. */
1352 if (branch_p && i <= (n_insns - 2))
1353 {
1354 insert_nop = true;
1355 break;
1356 }
1357 }
1358 }
1359
1360 else
1361 next = NEXT_INSN (insn);
1362
1363 if (insert_nop)
1364 emit_insn_before (gen_nop (), next);
1365 }
1366
1367 return 0;
1368 }
1369
1370 namespace {
1371
1372 const pass_data pass_data_work_around_errata =
1373 {
1374 RTL_PASS, /* type */
1375 "errata", /* name */
1376 OPTGROUP_NONE, /* optinfo_flags */
1377 TV_MACH_DEP, /* tv_id */
1378 0, /* properties_required */
1379 0, /* properties_provided */
1380 0, /* properties_destroyed */
1381 0, /* todo_flags_start */
1382 0, /* todo_flags_finish */
1383 };
1384
1385 class pass_work_around_errata : public rtl_opt_pass
1386 {
1387 public:
1388 pass_work_around_errata(gcc::context *ctxt)
1389 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1390 {}
1391
1392 /* opt_pass methods: */
1393 virtual bool gate (function *)
1394 {
1395 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1396 || sparc_fix_gr712rc || sparc_fix_ut700;
1397 }
1398
1399 virtual unsigned int execute (function *)
1400 {
1401 return sparc_do_work_around_errata ();
1402 }
1403
1404 }; // class pass_work_around_errata
1405
1406 } // anon namespace
1407
1408 rtl_opt_pass *
1409 make_pass_work_around_errata (gcc::context *ctxt)
1410 {
1411 return new pass_work_around_errata (ctxt);
1412 }
1413
1414 /* Helpers for TARGET_DEBUG_OPTIONS. */
1415 static void
1416 dump_target_flag_bits (const int flags)
1417 {
1418 if (flags & MASK_64BIT)
1419 fprintf (stderr, "64BIT ");
1420 if (flags & MASK_APP_REGS)
1421 fprintf (stderr, "APP_REGS ");
1422 if (flags & MASK_FASTER_STRUCTS)
1423 fprintf (stderr, "FASTER_STRUCTS ");
1424 if (flags & MASK_FLAT)
1425 fprintf (stderr, "FLAT ");
1426 if (flags & MASK_FMAF)
1427 fprintf (stderr, "FMAF ");
1428 if (flags & MASK_FSMULD)
1429 fprintf (stderr, "FSMULD ");
1430 if (flags & MASK_FPU)
1431 fprintf (stderr, "FPU ");
1432 if (flags & MASK_HARD_QUAD)
1433 fprintf (stderr, "HARD_QUAD ");
1434 if (flags & MASK_POPC)
1435 fprintf (stderr, "POPC ");
1436 if (flags & MASK_PTR64)
1437 fprintf (stderr, "PTR64 ");
1438 if (flags & MASK_STACK_BIAS)
1439 fprintf (stderr, "STACK_BIAS ");
1440 if (flags & MASK_UNALIGNED_DOUBLES)
1441 fprintf (stderr, "UNALIGNED_DOUBLES ");
1442 if (flags & MASK_V8PLUS)
1443 fprintf (stderr, "V8PLUS ");
1444 if (flags & MASK_VIS)
1445 fprintf (stderr, "VIS ");
1446 if (flags & MASK_VIS2)
1447 fprintf (stderr, "VIS2 ");
1448 if (flags & MASK_VIS3)
1449 fprintf (stderr, "VIS3 ");
1450 if (flags & MASK_VIS4)
1451 fprintf (stderr, "VIS4 ");
1452 if (flags & MASK_VIS4B)
1453 fprintf (stderr, "VIS4B ");
1454 if (flags & MASK_CBCOND)
1455 fprintf (stderr, "CBCOND ");
1456 if (flags & MASK_DEPRECATED_V8_INSNS)
1457 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1458 if (flags & MASK_SPARCLET)
1459 fprintf (stderr, "SPARCLET ");
1460 if (flags & MASK_SPARCLITE)
1461 fprintf (stderr, "SPARCLITE ");
1462 if (flags & MASK_V8)
1463 fprintf (stderr, "V8 ");
1464 if (flags & MASK_V9)
1465 fprintf (stderr, "V9 ");
1466 }
1467
1468 static void
1469 dump_target_flags (const char *prefix, const int flags)
1470 {
1471 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1472 dump_target_flag_bits (flags);
1473 fprintf(stderr, "]\n");
1474 }
1475
1476 /* Validate and override various options, and do some machine dependent
1477 initialization. */
1478
1479 static void
1480 sparc_option_override (void)
1481 {
1482 static struct code_model {
1483 const char *const name;
1484 const enum cmodel value;
1485 } const cmodels[] = {
1486 { "32", CM_32 },
1487 { "medlow", CM_MEDLOW },
1488 { "medmid", CM_MEDMID },
1489 { "medany", CM_MEDANY },
1490 { "embmedany", CM_EMBMEDANY },
1491 { NULL, (enum cmodel) 0 }
1492 };
1493 const struct code_model *cmodel;
1494 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1495 static struct cpu_default {
1496 const int cpu;
1497 const enum processor_type processor;
1498 } const cpu_default[] = {
1499 /* There must be one entry here for each TARGET_CPU value. */
1500 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1501 { TARGET_CPU_v8, PROCESSOR_V8 },
1502 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1503 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1504 { TARGET_CPU_leon, PROCESSOR_LEON },
1505 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1506 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1507 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1508 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1509 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1510 { TARGET_CPU_v9, PROCESSOR_V9 },
1511 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1512 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1513 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1514 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1515 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1516 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1517 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1518 { TARGET_CPU_m8, PROCESSOR_M8 },
1519 { -1, PROCESSOR_V7 }
1520 };
1521 const struct cpu_default *def;
1522 /* Table of values for -m{cpu,tune}=. This must match the order of
1523 the enum processor_type in sparc-opts.h. */
1524 static struct cpu_table {
1525 const char *const name;
1526 const int disable;
1527 const int enable;
1528 } const cpu_table[] = {
1529 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1530 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1531 { "v8", MASK_ISA, MASK_V8 },
1532 /* TI TMS390Z55 supersparc */
1533 { "supersparc", MASK_ISA, MASK_V8 },
1534 { "hypersparc", MASK_ISA, MASK_V8 },
1535 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1536 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1537 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1538 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1539 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1540 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1541 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1542 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1543 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1544 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1545 /* TEMIC sparclet */
1546 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1547 { "v9", MASK_ISA, MASK_V9 },
1548 /* UltraSPARC I, II, IIi */
1549 { "ultrasparc", MASK_ISA,
1550 /* Although insns using %y are deprecated, it is a clear win. */
1551 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1552 /* UltraSPARC III */
1553 /* ??? Check if %y issue still holds true. */
1554 { "ultrasparc3", MASK_ISA,
1555 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1556 /* UltraSPARC T1 */
1557 { "niagara", MASK_ISA,
1558 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1559 /* UltraSPARC T2 */
1560 { "niagara2", MASK_ISA,
1561 MASK_V9|MASK_POPC|MASK_VIS2 },
1562 /* UltraSPARC T3 */
1563 { "niagara3", MASK_ISA,
1564 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1565 /* UltraSPARC T4 */
1566 { "niagara4", MASK_ISA,
1567 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1568 /* UltraSPARC M7 */
1569 { "niagara7", MASK_ISA,
1570 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1571 /* UltraSPARC M8 */
1572 { "m8", MASK_ISA,
1573 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1574 };
1575 const struct cpu_table *cpu;
1576 unsigned int i;
1577
1578 if (sparc_debug_string != NULL)
1579 {
1580 const char *q;
1581 char *p;
1582
1583 p = ASTRDUP (sparc_debug_string);
1584 while ((q = strtok (p, ",")) != NULL)
1585 {
1586 bool invert;
1587 int mask;
1588
1589 p = NULL;
1590 if (*q == '!')
1591 {
1592 invert = true;
1593 q++;
1594 }
1595 else
1596 invert = false;
1597
1598 if (! strcmp (q, "all"))
1599 mask = MASK_DEBUG_ALL;
1600 else if (! strcmp (q, "options"))
1601 mask = MASK_DEBUG_OPTIONS;
1602 else
1603 error ("unknown -mdebug-%s switch", q);
1604
1605 if (invert)
1606 sparc_debug &= ~mask;
1607 else
1608 sparc_debug |= mask;
1609 }
1610 }
1611
1612 /* Enable the FsMULd instruction by default if not explicitly specified by
1613 the user. It may be later disabled by the CPU (explicitly or not). */
1614 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1615 target_flags |= MASK_FSMULD;
1616
1617 if (TARGET_DEBUG_OPTIONS)
1618 {
1619 dump_target_flags("Initial target_flags", target_flags);
1620 dump_target_flags("target_flags_explicit", target_flags_explicit);
1621 }
1622
1623 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1624 SUBTARGET_OVERRIDE_OPTIONS;
1625 #endif
1626
1627 #ifndef SPARC_BI_ARCH
1628 /* Check for unsupported architecture size. */
1629 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1630 error ("%s is not supported by this configuration",
1631 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1632 #endif
1633
1634 /* We force all 64bit archs to use 128 bit long double */
1635 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1636 {
1637 error ("-mlong-double-64 not allowed with -m64");
1638 target_flags |= MASK_LONG_DOUBLE_128;
1639 }
1640
1641 /* Code model selection. */
1642 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1643
1644 #ifdef SPARC_BI_ARCH
1645 if (TARGET_ARCH32)
1646 sparc_cmodel = CM_32;
1647 #endif
1648
1649 if (sparc_cmodel_string != NULL)
1650 {
1651 if (TARGET_ARCH64)
1652 {
1653 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1654 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1655 break;
1656 if (cmodel->name == NULL)
1657 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1658 else
1659 sparc_cmodel = cmodel->value;
1660 }
1661 else
1662 error ("-mcmodel= is not supported on 32-bit systems");
1663 }
1664
1665 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1666 for (i = 8; i < 16; i++)
1667 if (!call_used_regs [i])
1668 {
1669 error ("-fcall-saved-REG is not supported for out registers");
1670 call_used_regs [i] = 1;
1671 }
1672
1673 /* Set the default CPU if no -mcpu option was specified. */
1674 if (!global_options_set.x_sparc_cpu_and_features)
1675 {
1676 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1677 if (def->cpu == TARGET_CPU_DEFAULT)
1678 break;
1679 gcc_assert (def->cpu != -1);
1680 sparc_cpu_and_features = def->processor;
1681 }
1682
1683 /* Set the default CPU if no -mtune option was specified. */
1684 if (!global_options_set.x_sparc_cpu)
1685 sparc_cpu = sparc_cpu_and_features;
1686
1687 cpu = &cpu_table[(int) sparc_cpu_and_features];
1688
1689 if (TARGET_DEBUG_OPTIONS)
1690 {
1691 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1692 dump_target_flags ("cpu->disable", cpu->disable);
1693 dump_target_flags ("cpu->enable", cpu->enable);
1694 }
1695
1696 target_flags &= ~cpu->disable;
1697 target_flags |= (cpu->enable
1698 #ifndef HAVE_AS_FMAF_HPC_VIS3
1699 & ~(MASK_FMAF | MASK_VIS3)
1700 #endif
1701 #ifndef HAVE_AS_SPARC4
1702 & ~MASK_CBCOND
1703 #endif
1704 #ifndef HAVE_AS_SPARC5_VIS4
1705 & ~(MASK_VIS4 | MASK_SUBXC)
1706 #endif
1707 #ifndef HAVE_AS_SPARC6
1708 & ~(MASK_VIS4B)
1709 #endif
1710 #ifndef HAVE_AS_LEON
1711 & ~(MASK_LEON | MASK_LEON3)
1712 #endif
1713 & ~(target_flags_explicit & MASK_FEATURES)
1714 );
1715
1716 /* -mvis2 implies -mvis. */
1717 if (TARGET_VIS2)
1718 target_flags |= MASK_VIS;
1719
1720 /* -mvis3 implies -mvis2 and -mvis. */
1721 if (TARGET_VIS3)
1722 target_flags |= MASK_VIS2 | MASK_VIS;
1723
1724 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1725 if (TARGET_VIS4)
1726 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1727
1728 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1729 if (TARGET_VIS4B)
1730 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1731
1732 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1733 FPU is disabled. */
1734 if (!TARGET_FPU)
1735 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1736 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1737
1738 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1739 are available; -m64 also implies v9. */
1740 if (TARGET_VIS || TARGET_ARCH64)
1741 {
1742 target_flags |= MASK_V9;
1743 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1744 }
1745
1746 /* -mvis also implies -mv8plus on 32-bit. */
1747 if (TARGET_VIS && !TARGET_ARCH64)
1748 target_flags |= MASK_V8PLUS;
1749
1750 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1751 if (TARGET_V9 && TARGET_ARCH32)
1752 target_flags |= MASK_DEPRECATED_V8_INSNS;
1753
1754 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1755 if (!TARGET_V9 || TARGET_ARCH64)
1756 target_flags &= ~MASK_V8PLUS;
1757
1758 /* Don't use stack biasing in 32-bit mode. */
1759 if (TARGET_ARCH32)
1760 target_flags &= ~MASK_STACK_BIAS;
1761
1762 /* Use LRA instead of reload, unless otherwise instructed. */
1763 if (!(target_flags_explicit & MASK_LRA))
1764 target_flags |= MASK_LRA;
1765
1766 /* Enable the back-to-back store errata workaround for LEON3FT. */
1767 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1768 sparc_fix_b2bst = 1;
1769
1770 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1771 if (sparc_fix_ut699)
1772 target_flags &= ~MASK_FSMULD;
1773
1774 /* Supply a default value for align_functions. */
1775 if (align_functions == 0)
1776 {
1777 if (sparc_cpu == PROCESSOR_ULTRASPARC
1778 || sparc_cpu == PROCESSOR_ULTRASPARC3
1779 || sparc_cpu == PROCESSOR_NIAGARA
1780 || sparc_cpu == PROCESSOR_NIAGARA2
1781 || sparc_cpu == PROCESSOR_NIAGARA3
1782 || sparc_cpu == PROCESSOR_NIAGARA4)
1783 align_functions = 32;
1784 else if (sparc_cpu == PROCESSOR_NIAGARA7
1785 || sparc_cpu == PROCESSOR_M8)
1786 align_functions = 64;
1787 }
1788
1789 /* Validate PCC_STRUCT_RETURN. */
1790 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1791 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1792
1793 /* Only use .uaxword when compiling for a 64-bit target. */
1794 if (!TARGET_ARCH64)
1795 targetm.asm_out.unaligned_op.di = NULL;
1796
1797 /* Do various machine dependent initializations. */
1798 sparc_init_modes ();
1799
1800 /* Set up function hooks. */
1801 init_machine_status = sparc_init_machine_status;
1802
1803 switch (sparc_cpu)
1804 {
1805 case PROCESSOR_V7:
1806 case PROCESSOR_CYPRESS:
1807 sparc_costs = &cypress_costs;
1808 break;
1809 case PROCESSOR_V8:
1810 case PROCESSOR_SPARCLITE:
1811 case PROCESSOR_SUPERSPARC:
1812 sparc_costs = &supersparc_costs;
1813 break;
1814 case PROCESSOR_F930:
1815 case PROCESSOR_F934:
1816 case PROCESSOR_HYPERSPARC:
1817 case PROCESSOR_SPARCLITE86X:
1818 sparc_costs = &hypersparc_costs;
1819 break;
1820 case PROCESSOR_LEON:
1821 sparc_costs = &leon_costs;
1822 break;
1823 case PROCESSOR_LEON3:
1824 case PROCESSOR_LEON3V7:
1825 sparc_costs = &leon3_costs;
1826 break;
1827 case PROCESSOR_SPARCLET:
1828 case PROCESSOR_TSC701:
1829 sparc_costs = &sparclet_costs;
1830 break;
1831 case PROCESSOR_V9:
1832 case PROCESSOR_ULTRASPARC:
1833 sparc_costs = &ultrasparc_costs;
1834 break;
1835 case PROCESSOR_ULTRASPARC3:
1836 sparc_costs = &ultrasparc3_costs;
1837 break;
1838 case PROCESSOR_NIAGARA:
1839 sparc_costs = &niagara_costs;
1840 break;
1841 case PROCESSOR_NIAGARA2:
1842 sparc_costs = &niagara2_costs;
1843 break;
1844 case PROCESSOR_NIAGARA3:
1845 sparc_costs = &niagara3_costs;
1846 break;
1847 case PROCESSOR_NIAGARA4:
1848 sparc_costs = &niagara4_costs;
1849 break;
1850 case PROCESSOR_NIAGARA7:
1851 sparc_costs = &niagara7_costs;
1852 break;
1853 case PROCESSOR_M8:
1854 sparc_costs = &m8_costs;
1855 break;
1856 case PROCESSOR_NATIVE:
1857 gcc_unreachable ();
1858 };
1859
1860 if (sparc_memory_model == SMM_DEFAULT)
1861 {
1862 /* Choose the memory model for the operating system. */
1863 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1864 if (os_default != SMM_DEFAULT)
1865 sparc_memory_model = os_default;
1866 /* Choose the most relaxed model for the processor. */
1867 else if (TARGET_V9)
1868 sparc_memory_model = SMM_RMO;
1869 else if (TARGET_LEON3)
1870 sparc_memory_model = SMM_TSO;
1871 else if (TARGET_LEON)
1872 sparc_memory_model = SMM_SC;
1873 else if (TARGET_V8)
1874 sparc_memory_model = SMM_PSO;
1875 else
1876 sparc_memory_model = SMM_SC;
1877 }
1878
1879 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1880 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1881 target_flags |= MASK_LONG_DOUBLE_128;
1882 #endif
1883
1884 if (TARGET_DEBUG_OPTIONS)
1885 dump_target_flags ("Final target_flags", target_flags);
1886
1887 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1888 can run at the same time. More important, it is the threshold
1889 defining when additional prefetches will be dropped by the
1890 hardware.
1891
1892 The UltraSPARC-III features a documented prefetch queue with a
1893 size of 8. Additional prefetches issued in the cpu are
1894 dropped.
1895
1896 Niagara processors are different. In these processors prefetches
1897 are handled much like regular loads. The L1 miss buffer is 32
1898 entries, but prefetches start getting affected when 30 entries
1899 become occupied. That occupation could be a mix of regular loads
1900 and prefetches though. And that buffer is shared by all threads.
1901 Once the threshold is reached, if the core is running a single
1902 thread the prefetch will retry. If more than one thread is
1903 running, the prefetch will be dropped.
1904
1905 All this makes it very difficult to determine how many
1906 simultaneous prefetches can be issued simultaneously, even in a
1907 single-threaded program. Experimental results show that setting
1908 this parameter to 32 works well when the number of threads is not
1909 high. */
1910 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1911 ((sparc_cpu == PROCESSOR_ULTRASPARC
1912 || sparc_cpu == PROCESSOR_NIAGARA
1913 || sparc_cpu == PROCESSOR_NIAGARA2
1914 || sparc_cpu == PROCESSOR_NIAGARA3
1915 || sparc_cpu == PROCESSOR_NIAGARA4)
1916 ? 2
1917 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1918 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1919 || sparc_cpu == PROCESSOR_M8)
1920 ? 32 : 3))),
1921 global_options.x_param_values,
1922 global_options_set.x_param_values);
1923
1924 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1925 bytes.
1926
1927 The Oracle SPARC Architecture (previously the UltraSPARC
1928 Architecture) specification states that when a PREFETCH[A]
1929 instruction is executed an implementation-specific amount of data
1930 is prefetched, and that it is at least 64 bytes long (aligned to
1931 at least 64 bytes).
1932
1933 However, this is not correct. The M7 (and implementations prior
1934 to that) does not guarantee a 64B prefetch into a cache if the
1935 line size is smaller. A single cache line is all that is ever
1936 prefetched. So for the M7, where the L1D$ has 32B lines and the
1937 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1938 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1939 is a read_n prefetch, which is the only type which allocates to
1940 the L1.) */
1941 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1942 (sparc_cpu == PROCESSOR_M8
1943 ? 64 : 32),
1944 global_options.x_param_values,
1945 global_options_set.x_param_values);
1946
1947 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1948 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1949 Niagara processors feature a L1D$ of 16KB. */
1950 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1951 ((sparc_cpu == PROCESSOR_ULTRASPARC
1952 || sparc_cpu == PROCESSOR_ULTRASPARC3
1953 || sparc_cpu == PROCESSOR_NIAGARA
1954 || sparc_cpu == PROCESSOR_NIAGARA2
1955 || sparc_cpu == PROCESSOR_NIAGARA3
1956 || sparc_cpu == PROCESSOR_NIAGARA4
1957 || sparc_cpu == PROCESSOR_NIAGARA7
1958 || sparc_cpu == PROCESSOR_M8)
1959 ? 16 : 64),
1960 global_options.x_param_values,
1961 global_options_set.x_param_values);
1962
1963
1964 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1965 that 512 is the default in params.def. */
1966 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1967 ((sparc_cpu == PROCESSOR_NIAGARA4
1968 || sparc_cpu == PROCESSOR_M8)
1969 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1970 ? 256 : 512)),
1971 global_options.x_param_values,
1972 global_options_set.x_param_values);
1973
1974
1975 /* Disable save slot sharing for call-clobbered registers by default.
1976 The IRA sharing algorithm works on single registers only and this
1977 pessimizes for double floating-point registers. */
1978 if (!global_options_set.x_flag_ira_share_save_slots)
1979 flag_ira_share_save_slots = 0;
1980
1981 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1982 redundant 32-to-64-bit extensions. */
1983 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1984 flag_ree = 0;
1985 }
1986 \f
1987 /* Miscellaneous utilities. */
1988
1989 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1990 or branch on register contents instructions. */
1991
1992 int
1993 v9_regcmp_p (enum rtx_code code)
1994 {
1995 return (code == EQ || code == NE || code == GE || code == LT
1996 || code == LE || code == GT);
1997 }
1998
1999 /* Nonzero if OP is a floating point constant which can
2000 be loaded into an integer register using a single
2001 sethi instruction. */
2002
2003 int
2004 fp_sethi_p (rtx op)
2005 {
2006 if (GET_CODE (op) == CONST_DOUBLE)
2007 {
2008 long i;
2009
2010 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2011 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2012 }
2013
2014 return 0;
2015 }
2016
2017 /* Nonzero if OP is a floating point constant which can
2018 be loaded into an integer register using a single
2019 mov instruction. */
2020
2021 int
2022 fp_mov_p (rtx op)
2023 {
2024 if (GET_CODE (op) == CONST_DOUBLE)
2025 {
2026 long i;
2027
2028 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2029 return SPARC_SIMM13_P (i);
2030 }
2031
2032 return 0;
2033 }
2034
2035 /* Nonzero if OP is a floating point constant which can
2036 be loaded into an integer register using a high/losum
2037 instruction sequence. */
2038
2039 int
2040 fp_high_losum_p (rtx op)
2041 {
2042 /* The constraints calling this should only be in
2043 SFmode move insns, so any constant which cannot
2044 be moved using a single insn will do. */
2045 if (GET_CODE (op) == CONST_DOUBLE)
2046 {
2047 long i;
2048
2049 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2050 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2051 }
2052
2053 return 0;
2054 }
2055
2056 /* Return true if the address of LABEL can be loaded by means of the
2057 mov{si,di}_pic_label_ref patterns in PIC mode. */
2058
2059 static bool
2060 can_use_mov_pic_label_ref (rtx label)
2061 {
2062 /* VxWorks does not impose a fixed gap between segments; the run-time
2063 gap can be different from the object-file gap. We therefore can't
2064 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2065 are absolutely sure that X is in the same segment as the GOT.
2066 Unfortunately, the flexibility of linker scripts means that we
2067 can't be sure of that in general, so assume that GOT-relative
2068 accesses are never valid on VxWorks. */
2069 if (TARGET_VXWORKS_RTP)
2070 return false;
2071
2072 /* Similarly, if the label is non-local, it might end up being placed
2073 in a different section than the current one; now mov_pic_label_ref
2074 requires the label and the code to be in the same section. */
2075 if (LABEL_REF_NONLOCAL_P (label))
2076 return false;
2077
2078 /* Finally, if we are reordering basic blocks and partition into hot
2079 and cold sections, this might happen for any label. */
2080 if (flag_reorder_blocks_and_partition)
2081 return false;
2082
2083 return true;
2084 }
2085
2086 /* Expand a move instruction. Return true if all work is done. */
2087
2088 bool
2089 sparc_expand_move (machine_mode mode, rtx *operands)
2090 {
2091 /* Handle sets of MEM first. */
2092 if (GET_CODE (operands[0]) == MEM)
2093 {
2094 /* 0 is a register (or a pair of registers) on SPARC. */
2095 if (register_or_zero_operand (operands[1], mode))
2096 return false;
2097
2098 if (!reload_in_progress)
2099 {
2100 operands[0] = validize_mem (operands[0]);
2101 operands[1] = force_reg (mode, operands[1]);
2102 }
2103 }
2104
2105 /* Fixup TLS cases. */
2106 if (TARGET_HAVE_TLS
2107 && CONSTANT_P (operands[1])
2108 && sparc_tls_referenced_p (operands [1]))
2109 {
2110 operands[1] = sparc_legitimize_tls_address (operands[1]);
2111 return false;
2112 }
2113
2114 /* Fixup PIC cases. */
2115 if (flag_pic && CONSTANT_P (operands[1]))
2116 {
2117 if (pic_address_needs_scratch (operands[1]))
2118 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2119
2120 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2121 if (GET_CODE (operands[1]) == LABEL_REF
2122 && can_use_mov_pic_label_ref (operands[1]))
2123 {
2124 if (mode == SImode)
2125 {
2126 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2127 return true;
2128 }
2129
2130 if (mode == DImode)
2131 {
2132 gcc_assert (TARGET_ARCH64);
2133 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2134 return true;
2135 }
2136 }
2137
2138 if (symbolic_operand (operands[1], mode))
2139 {
2140 operands[1]
2141 = sparc_legitimize_pic_address (operands[1],
2142 reload_in_progress
2143 ? operands[0] : NULL_RTX);
2144 return false;
2145 }
2146 }
2147
2148 /* If we are trying to toss an integer constant into FP registers,
2149 or loading a FP or vector constant, force it into memory. */
2150 if (CONSTANT_P (operands[1])
2151 && REG_P (operands[0])
2152 && (SPARC_FP_REG_P (REGNO (operands[0]))
2153 || SCALAR_FLOAT_MODE_P (mode)
2154 || VECTOR_MODE_P (mode)))
2155 {
2156 /* emit_group_store will send such bogosity to us when it is
2157 not storing directly into memory. So fix this up to avoid
2158 crashes in output_constant_pool. */
2159 if (operands [1] == const0_rtx)
2160 operands[1] = CONST0_RTX (mode);
2161
2162 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2163 always other regs. */
2164 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2165 && (const_zero_operand (operands[1], mode)
2166 || const_all_ones_operand (operands[1], mode)))
2167 return false;
2168
2169 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2170 /* We are able to build any SF constant in integer registers
2171 with at most 2 instructions. */
2172 && (mode == SFmode
2173 /* And any DF constant in integer registers if needed. */
2174 || (mode == DFmode && !can_create_pseudo_p ())))
2175 return false;
2176
2177 operands[1] = force_const_mem (mode, operands[1]);
2178 if (!reload_in_progress)
2179 operands[1] = validize_mem (operands[1]);
2180 return false;
2181 }
2182
2183 /* Accept non-constants and valid constants unmodified. */
2184 if (!CONSTANT_P (operands[1])
2185 || GET_CODE (operands[1]) == HIGH
2186 || input_operand (operands[1], mode))
2187 return false;
2188
2189 switch (mode)
2190 {
2191 case E_QImode:
2192 /* All QImode constants require only one insn, so proceed. */
2193 break;
2194
2195 case E_HImode:
2196 case E_SImode:
2197 sparc_emit_set_const32 (operands[0], operands[1]);
2198 return true;
2199
2200 case E_DImode:
2201 /* input_operand should have filtered out 32-bit mode. */
2202 sparc_emit_set_const64 (operands[0], operands[1]);
2203 return true;
2204
2205 case E_TImode:
2206 {
2207 rtx high, low;
2208 /* TImode isn't available in 32-bit mode. */
2209 split_double (operands[1], &high, &low);
2210 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2211 high));
2212 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2213 low));
2214 }
2215 return true;
2216
2217 default:
2218 gcc_unreachable ();
2219 }
2220
2221 return false;
2222 }
2223
2224 /* Load OP1, a 32-bit constant, into OP0, a register.
2225 We know it can't be done in one insn when we get
2226 here, the move expander guarantees this. */
2227
2228 static void
2229 sparc_emit_set_const32 (rtx op0, rtx op1)
2230 {
2231 machine_mode mode = GET_MODE (op0);
2232 rtx temp = op0;
2233
2234 if (can_create_pseudo_p ())
2235 temp = gen_reg_rtx (mode);
2236
2237 if (GET_CODE (op1) == CONST_INT)
2238 {
2239 gcc_assert (!small_int_operand (op1, mode)
2240 && !const_high_operand (op1, mode));
2241
2242 /* Emit them as real moves instead of a HIGH/LO_SUM,
2243 this way CSE can see everything and reuse intermediate
2244 values if it wants. */
2245 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2246 & ~(HOST_WIDE_INT) 0x3ff)));
2247
2248 emit_insn (gen_rtx_SET (op0,
2249 gen_rtx_IOR (mode, temp,
2250 GEN_INT (INTVAL (op1) & 0x3ff))));
2251 }
2252 else
2253 {
2254 /* A symbol, emit in the traditional way. */
2255 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2256 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2257 }
2258 }
2259
2260 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2261 If TEMP is nonzero, we are forbidden to use any other scratch
2262 registers. Otherwise, we are allowed to generate them as needed.
2263
2264 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2265 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2266
2267 void
2268 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2269 {
2270 rtx cst, temp1, temp2, temp3, temp4, temp5;
2271 rtx ti_temp = 0;
2272
2273 /* Deal with too large offsets. */
2274 if (GET_CODE (op1) == CONST
2275 && GET_CODE (XEXP (op1, 0)) == PLUS
2276 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2277 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2278 {
2279 gcc_assert (!temp);
2280 temp1 = gen_reg_rtx (DImode);
2281 temp2 = gen_reg_rtx (DImode);
2282 sparc_emit_set_const64 (temp2, cst);
2283 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2284 NULL_RTX);
2285 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2286 return;
2287 }
2288
2289 if (temp && GET_MODE (temp) == TImode)
2290 {
2291 ti_temp = temp;
2292 temp = gen_rtx_REG (DImode, REGNO (temp));
2293 }
2294
2295 /* SPARC-V9 code-model support. */
2296 switch (sparc_cmodel)
2297 {
2298 case CM_MEDLOW:
2299 /* The range spanned by all instructions in the object is less
2300 than 2^31 bytes (2GB) and the distance from any instruction
2301 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2302 than 2^31 bytes (2GB).
2303
2304 The executable must be in the low 4TB of the virtual address
2305 space.
2306
2307 sethi %hi(symbol), %temp1
2308 or %temp1, %lo(symbol), %reg */
2309 if (temp)
2310 temp1 = temp; /* op0 is allowed. */
2311 else
2312 temp1 = gen_reg_rtx (DImode);
2313
2314 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2315 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2316 break;
2317
2318 case CM_MEDMID:
2319 /* The range spanned by all instructions in the object is less
2320 than 2^31 bytes (2GB) and the distance from any instruction
2321 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2322 than 2^31 bytes (2GB).
2323
2324 The executable must be in the low 16TB of the virtual address
2325 space.
2326
2327 sethi %h44(symbol), %temp1
2328 or %temp1, %m44(symbol), %temp2
2329 sllx %temp2, 12, %temp3
2330 or %temp3, %l44(symbol), %reg */
2331 if (temp)
2332 {
2333 temp1 = op0;
2334 temp2 = op0;
2335 temp3 = temp; /* op0 is allowed. */
2336 }
2337 else
2338 {
2339 temp1 = gen_reg_rtx (DImode);
2340 temp2 = gen_reg_rtx (DImode);
2341 temp3 = gen_reg_rtx (DImode);
2342 }
2343
2344 emit_insn (gen_seth44 (temp1, op1));
2345 emit_insn (gen_setm44 (temp2, temp1, op1));
2346 emit_insn (gen_rtx_SET (temp3,
2347 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2348 emit_insn (gen_setl44 (op0, temp3, op1));
2349 break;
2350
2351 case CM_MEDANY:
2352 /* The range spanned by all instructions in the object is less
2353 than 2^31 bytes (2GB) and the distance from any instruction
2354 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2355 than 2^31 bytes (2GB).
2356
2357 The executable can be placed anywhere in the virtual address
2358 space.
2359
2360 sethi %hh(symbol), %temp1
2361 sethi %lm(symbol), %temp2
2362 or %temp1, %hm(symbol), %temp3
2363 sllx %temp3, 32, %temp4
2364 or %temp4, %temp2, %temp5
2365 or %temp5, %lo(symbol), %reg */
2366 if (temp)
2367 {
2368 /* It is possible that one of the registers we got for operands[2]
2369 might coincide with that of operands[0] (which is why we made
2370 it TImode). Pick the other one to use as our scratch. */
2371 if (rtx_equal_p (temp, op0))
2372 {
2373 gcc_assert (ti_temp);
2374 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2375 }
2376 temp1 = op0;
2377 temp2 = temp; /* op0 is _not_ allowed, see above. */
2378 temp3 = op0;
2379 temp4 = op0;
2380 temp5 = op0;
2381 }
2382 else
2383 {
2384 temp1 = gen_reg_rtx (DImode);
2385 temp2 = gen_reg_rtx (DImode);
2386 temp3 = gen_reg_rtx (DImode);
2387 temp4 = gen_reg_rtx (DImode);
2388 temp5 = gen_reg_rtx (DImode);
2389 }
2390
2391 emit_insn (gen_sethh (temp1, op1));
2392 emit_insn (gen_setlm (temp2, op1));
2393 emit_insn (gen_sethm (temp3, temp1, op1));
2394 emit_insn (gen_rtx_SET (temp4,
2395 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2396 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2397 emit_insn (gen_setlo (op0, temp5, op1));
2398 break;
2399
2400 case CM_EMBMEDANY:
2401 /* Old old old backwards compatibility kruft here.
2402 Essentially it is MEDLOW with a fixed 64-bit
2403 virtual base added to all data segment addresses.
2404 Text-segment stuff is computed like MEDANY, we can't
2405 reuse the code above because the relocation knobs
2406 look different.
2407
2408 Data segment: sethi %hi(symbol), %temp1
2409 add %temp1, EMBMEDANY_BASE_REG, %temp2
2410 or %temp2, %lo(symbol), %reg */
2411 if (data_segment_operand (op1, GET_MODE (op1)))
2412 {
2413 if (temp)
2414 {
2415 temp1 = temp; /* op0 is allowed. */
2416 temp2 = op0;
2417 }
2418 else
2419 {
2420 temp1 = gen_reg_rtx (DImode);
2421 temp2 = gen_reg_rtx (DImode);
2422 }
2423
2424 emit_insn (gen_embmedany_sethi (temp1, op1));
2425 emit_insn (gen_embmedany_brsum (temp2, temp1));
2426 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2427 }
2428
2429 /* Text segment: sethi %uhi(symbol), %temp1
2430 sethi %hi(symbol), %temp2
2431 or %temp1, %ulo(symbol), %temp3
2432 sllx %temp3, 32, %temp4
2433 or %temp4, %temp2, %temp5
2434 or %temp5, %lo(symbol), %reg */
2435 else
2436 {
2437 if (temp)
2438 {
2439 /* It is possible that one of the registers we got for operands[2]
2440 might coincide with that of operands[0] (which is why we made
2441 it TImode). Pick the other one to use as our scratch. */
2442 if (rtx_equal_p (temp, op0))
2443 {
2444 gcc_assert (ti_temp);
2445 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2446 }
2447 temp1 = op0;
2448 temp2 = temp; /* op0 is _not_ allowed, see above. */
2449 temp3 = op0;
2450 temp4 = op0;
2451 temp5 = op0;
2452 }
2453 else
2454 {
2455 temp1 = gen_reg_rtx (DImode);
2456 temp2 = gen_reg_rtx (DImode);
2457 temp3 = gen_reg_rtx (DImode);
2458 temp4 = gen_reg_rtx (DImode);
2459 temp5 = gen_reg_rtx (DImode);
2460 }
2461
2462 emit_insn (gen_embmedany_textuhi (temp1, op1));
2463 emit_insn (gen_embmedany_texthi (temp2, op1));
2464 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2465 emit_insn (gen_rtx_SET (temp4,
2466 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2467 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2468 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2469 }
2470 break;
2471
2472 default:
2473 gcc_unreachable ();
2474 }
2475 }
2476
2477 /* These avoid problems when cross compiling. If we do not
2478 go through all this hair then the optimizer will see
2479 invalid REG_EQUAL notes or in some cases none at all. */
2480 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2481 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2482 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2483 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2484
2485 /* The optimizer is not to assume anything about exactly
2486 which bits are set for a HIGH, they are unspecified.
2487 Unfortunately this leads to many missed optimizations
2488 during CSE. We mask out the non-HIGH bits, and matches
2489 a plain movdi, to alleviate this problem. */
2490 static rtx
2491 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2492 {
2493 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2494 }
2495
2496 static rtx
2497 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2498 {
2499 return gen_rtx_SET (dest, GEN_INT (val));
2500 }
2501
2502 static rtx
2503 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2504 {
2505 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2506 }
2507
2508 static rtx
2509 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2510 {
2511 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2512 }
2513
2514 /* Worker routines for 64-bit constant formation on arch64.
2515 One of the key things to be doing in these emissions is
2516 to create as many temp REGs as possible. This makes it
2517 possible for half-built constants to be used later when
2518 such values are similar to something required later on.
2519 Without doing this, the optimizer cannot see such
2520 opportunities. */
2521
2522 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2523 unsigned HOST_WIDE_INT, int);
2524
2525 static void
2526 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2527 unsigned HOST_WIDE_INT low_bits, int is_neg)
2528 {
2529 unsigned HOST_WIDE_INT high_bits;
2530
2531 if (is_neg)
2532 high_bits = (~low_bits) & 0xffffffff;
2533 else
2534 high_bits = low_bits;
2535
2536 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2537 if (!is_neg)
2538 {
2539 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2540 }
2541 else
2542 {
2543 /* If we are XOR'ing with -1, then we should emit a one's complement
2544 instead. This way the combiner will notice logical operations
2545 such as ANDN later on and substitute. */
2546 if ((low_bits & 0x3ff) == 0x3ff)
2547 {
2548 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2549 }
2550 else
2551 {
2552 emit_insn (gen_rtx_SET (op0,
2553 gen_safe_XOR64 (temp,
2554 (-(HOST_WIDE_INT)0x400
2555 | (low_bits & 0x3ff)))));
2556 }
2557 }
2558 }
2559
2560 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2561 unsigned HOST_WIDE_INT, int);
2562
2563 static void
2564 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2565 unsigned HOST_WIDE_INT high_bits,
2566 unsigned HOST_WIDE_INT low_immediate,
2567 int shift_count)
2568 {
2569 rtx temp2 = op0;
2570
2571 if ((high_bits & 0xfffffc00) != 0)
2572 {
2573 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2574 if ((high_bits & ~0xfffffc00) != 0)
2575 emit_insn (gen_rtx_SET (op0,
2576 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2577 else
2578 temp2 = temp;
2579 }
2580 else
2581 {
2582 emit_insn (gen_safe_SET64 (temp, high_bits));
2583 temp2 = temp;
2584 }
2585
2586 /* Now shift it up into place. */
2587 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2588 GEN_INT (shift_count))));
2589
2590 /* If there is a low immediate part piece, finish up by
2591 putting that in as well. */
2592 if (low_immediate != 0)
2593 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2594 }
2595
2596 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2597 unsigned HOST_WIDE_INT);
2598
2599 /* Full 64-bit constant decomposition. Even though this is the
2600 'worst' case, we still optimize a few things away. */
2601 static void
2602 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2603 unsigned HOST_WIDE_INT high_bits,
2604 unsigned HOST_WIDE_INT low_bits)
2605 {
2606 rtx sub_temp = op0;
2607
2608 if (can_create_pseudo_p ())
2609 sub_temp = gen_reg_rtx (DImode);
2610
2611 if ((high_bits & 0xfffffc00) != 0)
2612 {
2613 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2614 if ((high_bits & ~0xfffffc00) != 0)
2615 emit_insn (gen_rtx_SET (sub_temp,
2616 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2617 else
2618 sub_temp = temp;
2619 }
2620 else
2621 {
2622 emit_insn (gen_safe_SET64 (temp, high_bits));
2623 sub_temp = temp;
2624 }
2625
2626 if (can_create_pseudo_p ())
2627 {
2628 rtx temp2 = gen_reg_rtx (DImode);
2629 rtx temp3 = gen_reg_rtx (DImode);
2630 rtx temp4 = gen_reg_rtx (DImode);
2631
2632 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2633 GEN_INT (32))));
2634
2635 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2636 if ((low_bits & ~0xfffffc00) != 0)
2637 {
2638 emit_insn (gen_rtx_SET (temp3,
2639 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2640 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2641 }
2642 else
2643 {
2644 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2645 }
2646 }
2647 else
2648 {
2649 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2650 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2651 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2652 int to_shift = 12;
2653
2654 /* We are in the middle of reload, so this is really
2655 painful. However we do still make an attempt to
2656 avoid emitting truly stupid code. */
2657 if (low1 != const0_rtx)
2658 {
2659 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2660 GEN_INT (to_shift))));
2661 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2662 sub_temp = op0;
2663 to_shift = 12;
2664 }
2665 else
2666 {
2667 to_shift += 12;
2668 }
2669 if (low2 != const0_rtx)
2670 {
2671 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2672 GEN_INT (to_shift))));
2673 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2674 sub_temp = op0;
2675 to_shift = 8;
2676 }
2677 else
2678 {
2679 to_shift += 8;
2680 }
2681 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2682 GEN_INT (to_shift))));
2683 if (low3 != const0_rtx)
2684 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2685 /* phew... */
2686 }
2687 }
2688
2689 /* Analyze a 64-bit constant for certain properties. */
2690 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2691 unsigned HOST_WIDE_INT,
2692 int *, int *, int *);
2693
2694 static void
2695 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2696 unsigned HOST_WIDE_INT low_bits,
2697 int *hbsp, int *lbsp, int *abbasp)
2698 {
2699 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2700 int i;
2701
2702 lowest_bit_set = highest_bit_set = -1;
2703 i = 0;
2704 do
2705 {
2706 if ((lowest_bit_set == -1)
2707 && ((low_bits >> i) & 1))
2708 lowest_bit_set = i;
2709 if ((highest_bit_set == -1)
2710 && ((high_bits >> (32 - i - 1)) & 1))
2711 highest_bit_set = (64 - i - 1);
2712 }
2713 while (++i < 32
2714 && ((highest_bit_set == -1)
2715 || (lowest_bit_set == -1)));
2716 if (i == 32)
2717 {
2718 i = 0;
2719 do
2720 {
2721 if ((lowest_bit_set == -1)
2722 && ((high_bits >> i) & 1))
2723 lowest_bit_set = i + 32;
2724 if ((highest_bit_set == -1)
2725 && ((low_bits >> (32 - i - 1)) & 1))
2726 highest_bit_set = 32 - i - 1;
2727 }
2728 while (++i < 32
2729 && ((highest_bit_set == -1)
2730 || (lowest_bit_set == -1)));
2731 }
2732 /* If there are no bits set this should have gone out
2733 as one instruction! */
2734 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2735 all_bits_between_are_set = 1;
2736 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2737 {
2738 if (i < 32)
2739 {
2740 if ((low_bits & (1 << i)) != 0)
2741 continue;
2742 }
2743 else
2744 {
2745 if ((high_bits & (1 << (i - 32))) != 0)
2746 continue;
2747 }
2748 all_bits_between_are_set = 0;
2749 break;
2750 }
2751 *hbsp = highest_bit_set;
2752 *lbsp = lowest_bit_set;
2753 *abbasp = all_bits_between_are_set;
2754 }
2755
2756 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2757
2758 static int
2759 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2760 unsigned HOST_WIDE_INT low_bits)
2761 {
2762 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2763
2764 if (high_bits == 0
2765 || high_bits == 0xffffffff)
2766 return 1;
2767
2768 analyze_64bit_constant (high_bits, low_bits,
2769 &highest_bit_set, &lowest_bit_set,
2770 &all_bits_between_are_set);
2771
2772 if ((highest_bit_set == 63
2773 || lowest_bit_set == 0)
2774 && all_bits_between_are_set != 0)
2775 return 1;
2776
2777 if ((highest_bit_set - lowest_bit_set) < 21)
2778 return 1;
2779
2780 return 0;
2781 }
2782
2783 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2784 unsigned HOST_WIDE_INT,
2785 int, int);
2786
2787 static unsigned HOST_WIDE_INT
2788 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2789 unsigned HOST_WIDE_INT low_bits,
2790 int lowest_bit_set, int shift)
2791 {
2792 HOST_WIDE_INT hi, lo;
2793
2794 if (lowest_bit_set < 32)
2795 {
2796 lo = (low_bits >> lowest_bit_set) << shift;
2797 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2798 }
2799 else
2800 {
2801 lo = 0;
2802 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2803 }
2804 gcc_assert (! (hi & lo));
2805 return (hi | lo);
2806 }
2807
2808 /* Here we are sure to be arch64 and this is an integer constant
2809 being loaded into a register. Emit the most efficient
2810 insn sequence possible. Detection of all the 1-insn cases
2811 has been done already. */
2812 static void
2813 sparc_emit_set_const64 (rtx op0, rtx op1)
2814 {
2815 unsigned HOST_WIDE_INT high_bits, low_bits;
2816 int lowest_bit_set, highest_bit_set;
2817 int all_bits_between_are_set;
2818 rtx temp = 0;
2819
2820 /* Sanity check that we know what we are working with. */
2821 gcc_assert (TARGET_ARCH64
2822 && (GET_CODE (op0) == SUBREG
2823 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2824
2825 if (! can_create_pseudo_p ())
2826 temp = op0;
2827
2828 if (GET_CODE (op1) != CONST_INT)
2829 {
2830 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2831 return;
2832 }
2833
2834 if (! temp)
2835 temp = gen_reg_rtx (DImode);
2836
2837 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2838 low_bits = (INTVAL (op1) & 0xffffffff);
2839
2840 /* low_bits bits 0 --> 31
2841 high_bits bits 32 --> 63 */
2842
2843 analyze_64bit_constant (high_bits, low_bits,
2844 &highest_bit_set, &lowest_bit_set,
2845 &all_bits_between_are_set);
2846
2847 /* First try for a 2-insn sequence. */
2848
2849 /* These situations are preferred because the optimizer can
2850 * do more things with them:
2851 * 1) mov -1, %reg
2852 * sllx %reg, shift, %reg
2853 * 2) mov -1, %reg
2854 * srlx %reg, shift, %reg
2855 * 3) mov some_small_const, %reg
2856 * sllx %reg, shift, %reg
2857 */
2858 if (((highest_bit_set == 63
2859 || lowest_bit_set == 0)
2860 && all_bits_between_are_set != 0)
2861 || ((highest_bit_set - lowest_bit_set) < 12))
2862 {
2863 HOST_WIDE_INT the_const = -1;
2864 int shift = lowest_bit_set;
2865
2866 if ((highest_bit_set != 63
2867 && lowest_bit_set != 0)
2868 || all_bits_between_are_set == 0)
2869 {
2870 the_const =
2871 create_simple_focus_bits (high_bits, low_bits,
2872 lowest_bit_set, 0);
2873 }
2874 else if (lowest_bit_set == 0)
2875 shift = -(63 - highest_bit_set);
2876
2877 gcc_assert (SPARC_SIMM13_P (the_const));
2878 gcc_assert (shift != 0);
2879
2880 emit_insn (gen_safe_SET64 (temp, the_const));
2881 if (shift > 0)
2882 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2883 GEN_INT (shift))));
2884 else if (shift < 0)
2885 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2886 GEN_INT (-shift))));
2887 return;
2888 }
2889
2890 /* Now a range of 22 or less bits set somewhere.
2891 * 1) sethi %hi(focus_bits), %reg
2892 * sllx %reg, shift, %reg
2893 * 2) sethi %hi(focus_bits), %reg
2894 * srlx %reg, shift, %reg
2895 */
2896 if ((highest_bit_set - lowest_bit_set) < 21)
2897 {
2898 unsigned HOST_WIDE_INT focus_bits =
2899 create_simple_focus_bits (high_bits, low_bits,
2900 lowest_bit_set, 10);
2901
2902 gcc_assert (SPARC_SETHI_P (focus_bits));
2903 gcc_assert (lowest_bit_set != 10);
2904
2905 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2906
2907 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2908 if (lowest_bit_set < 10)
2909 emit_insn (gen_rtx_SET (op0,
2910 gen_rtx_LSHIFTRT (DImode, temp,
2911 GEN_INT (10 - lowest_bit_set))));
2912 else if (lowest_bit_set > 10)
2913 emit_insn (gen_rtx_SET (op0,
2914 gen_rtx_ASHIFT (DImode, temp,
2915 GEN_INT (lowest_bit_set - 10))));
2916 return;
2917 }
2918
2919 /* 1) sethi %hi(low_bits), %reg
2920 * or %reg, %lo(low_bits), %reg
2921 * 2) sethi %hi(~low_bits), %reg
2922 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2923 */
2924 if (high_bits == 0
2925 || high_bits == 0xffffffff)
2926 {
2927 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2928 (high_bits == 0xffffffff));
2929 return;
2930 }
2931
2932 /* Now, try 3-insn sequences. */
2933
2934 /* 1) sethi %hi(high_bits), %reg
2935 * or %reg, %lo(high_bits), %reg
2936 * sllx %reg, 32, %reg
2937 */
2938 if (low_bits == 0)
2939 {
2940 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2941 return;
2942 }
2943
2944 /* We may be able to do something quick
2945 when the constant is negated, so try that. */
2946 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2947 (~low_bits) & 0xfffffc00))
2948 {
2949 /* NOTE: The trailing bits get XOR'd so we need the
2950 non-negated bits, not the negated ones. */
2951 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2952
2953 if ((((~high_bits) & 0xffffffff) == 0
2954 && ((~low_bits) & 0x80000000) == 0)
2955 || (((~high_bits) & 0xffffffff) == 0xffffffff
2956 && ((~low_bits) & 0x80000000) != 0))
2957 {
2958 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2959
2960 if ((SPARC_SETHI_P (fast_int)
2961 && (~high_bits & 0xffffffff) == 0)
2962 || SPARC_SIMM13_P (fast_int))
2963 emit_insn (gen_safe_SET64 (temp, fast_int));
2964 else
2965 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2966 }
2967 else
2968 {
2969 rtx negated_const;
2970 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2971 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2972 sparc_emit_set_const64 (temp, negated_const);
2973 }
2974
2975 /* If we are XOR'ing with -1, then we should emit a one's complement
2976 instead. This way the combiner will notice logical operations
2977 such as ANDN later on and substitute. */
2978 if (trailing_bits == 0x3ff)
2979 {
2980 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2981 }
2982 else
2983 {
2984 emit_insn (gen_rtx_SET (op0,
2985 gen_safe_XOR64 (temp,
2986 (-0x400 | trailing_bits))));
2987 }
2988 return;
2989 }
2990
2991 /* 1) sethi %hi(xxx), %reg
2992 * or %reg, %lo(xxx), %reg
2993 * sllx %reg, yyy, %reg
2994 *
2995 * ??? This is just a generalized version of the low_bits==0
2996 * thing above, FIXME...
2997 */
2998 if ((highest_bit_set - lowest_bit_set) < 32)
2999 {
3000 unsigned HOST_WIDE_INT focus_bits =
3001 create_simple_focus_bits (high_bits, low_bits,
3002 lowest_bit_set, 0);
3003
3004 /* We can't get here in this state. */
3005 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3006
3007 /* So what we know is that the set bits straddle the
3008 middle of the 64-bit word. */
3009 sparc_emit_set_const64_quick2 (op0, temp,
3010 focus_bits, 0,
3011 lowest_bit_set);
3012 return;
3013 }
3014
3015 /* 1) sethi %hi(high_bits), %reg
3016 * or %reg, %lo(high_bits), %reg
3017 * sllx %reg, 32, %reg
3018 * or %reg, low_bits, %reg
3019 */
3020 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3021 {
3022 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3023 return;
3024 }
3025
3026 /* The easiest way when all else fails, is full decomposition. */
3027 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3028 }
3029
3030 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3031
3032 static bool
3033 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3034 {
3035 *p1 = SPARC_ICC_REG;
3036 *p2 = SPARC_FCC_REG;
3037 return true;
3038 }
3039
3040 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3041
3042 static unsigned int
3043 sparc_min_arithmetic_precision (void)
3044 {
3045 return 32;
3046 }
3047
3048 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3049 return the mode to be used for the comparison. For floating-point,
3050 CCFP[E]mode is used. CCNZmode should be used when the first operand
3051 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3052 processing is needed. */
3053
3054 machine_mode
3055 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3056 {
3057 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3058 {
3059 switch (op)
3060 {
3061 case EQ:
3062 case NE:
3063 case UNORDERED:
3064 case ORDERED:
3065 case UNLT:
3066 case UNLE:
3067 case UNGT:
3068 case UNGE:
3069 case UNEQ:
3070 case LTGT:
3071 return CCFPmode;
3072
3073 case LT:
3074 case LE:
3075 case GT:
3076 case GE:
3077 return CCFPEmode;
3078
3079 default:
3080 gcc_unreachable ();
3081 }
3082 }
3083 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3084 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3085 && y == const0_rtx)
3086 {
3087 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3088 return CCXNZmode;
3089 else
3090 return CCNZmode;
3091 }
3092 else
3093 {
3094 /* This is for the cmp<mode>_sne pattern. */
3095 if (GET_CODE (x) == NOT && y == constm1_rtx)
3096 {
3097 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3098 return CCXCmode;
3099 else
3100 return CCCmode;
3101 }
3102
3103 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3104 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3105 {
3106 if (GET_CODE (y) == UNSPEC
3107 && (XINT (y, 1) == UNSPEC_ADDV
3108 || XINT (y, 1) == UNSPEC_SUBV
3109 || XINT (y, 1) == UNSPEC_NEGV))
3110 return CCVmode;
3111 else
3112 return CCCmode;
3113 }
3114
3115 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3116 return CCXmode;
3117 else
3118 return CCmode;
3119 }
3120 }
3121
3122 /* Emit the compare insn and return the CC reg for a CODE comparison
3123 with operands X and Y. */
3124
3125 static rtx
3126 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3127 {
3128 machine_mode mode;
3129 rtx cc_reg;
3130
3131 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3132 return x;
3133
3134 mode = SELECT_CC_MODE (code, x, y);
3135
3136 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3137 fcc regs (cse can't tell they're really call clobbered regs and will
3138 remove a duplicate comparison even if there is an intervening function
3139 call - it will then try to reload the cc reg via an int reg which is why
3140 we need the movcc patterns). It is possible to provide the movcc
3141 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3142 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3143 to tell cse that CCFPE mode registers (even pseudos) are call
3144 clobbered. */
3145
3146 /* ??? This is an experiment. Rather than making changes to cse which may
3147 or may not be easy/clean, we do our own cse. This is possible because
3148 we will generate hard registers. Cse knows they're call clobbered (it
3149 doesn't know the same thing about pseudos). If we guess wrong, no big
3150 deal, but if we win, great! */
3151
3152 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3153 #if 1 /* experiment */
3154 {
3155 int reg;
3156 /* We cycle through the registers to ensure they're all exercised. */
3157 static int next_fcc_reg = 0;
3158 /* Previous x,y for each fcc reg. */
3159 static rtx prev_args[4][2];
3160
3161 /* Scan prev_args for x,y. */
3162 for (reg = 0; reg < 4; reg++)
3163 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3164 break;
3165 if (reg == 4)
3166 {
3167 reg = next_fcc_reg;
3168 prev_args[reg][0] = x;
3169 prev_args[reg][1] = y;
3170 next_fcc_reg = (next_fcc_reg + 1) & 3;
3171 }
3172 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3173 }
3174 #else
3175 cc_reg = gen_reg_rtx (mode);
3176 #endif /* ! experiment */
3177 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3178 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3179 else
3180 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3181
3182 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3183 will only result in an unrecognizable insn so no point in asserting. */
3184 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3185
3186 return cc_reg;
3187 }
3188
3189
3190 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3191
3192 rtx
3193 gen_compare_reg (rtx cmp)
3194 {
3195 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3196 }
3197
3198 /* This function is used for v9 only.
3199 DEST is the target of the Scc insn.
3200 CODE is the code for an Scc's comparison.
3201 X and Y are the values we compare.
3202
3203 This function is needed to turn
3204
3205 (set (reg:SI 110)
3206 (gt (reg:CCX 100 %icc)
3207 (const_int 0)))
3208 into
3209 (set (reg:SI 110)
3210 (gt:DI (reg:CCX 100 %icc)
3211 (const_int 0)))
3212
3213 IE: The instruction recognizer needs to see the mode of the comparison to
3214 find the right instruction. We could use "gt:DI" right in the
3215 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3216
3217 static int
3218 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3219 {
3220 if (! TARGET_ARCH64
3221 && (GET_MODE (x) == DImode
3222 || GET_MODE (dest) == DImode))
3223 return 0;
3224
3225 /* Try to use the movrCC insns. */
3226 if (TARGET_ARCH64
3227 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3228 && y == const0_rtx
3229 && v9_regcmp_p (compare_code))
3230 {
3231 rtx op0 = x;
3232 rtx temp;
3233
3234 /* Special case for op0 != 0. This can be done with one instruction if
3235 dest == x. */
3236
3237 if (compare_code == NE
3238 && GET_MODE (dest) == DImode
3239 && rtx_equal_p (op0, dest))
3240 {
3241 emit_insn (gen_rtx_SET (dest,
3242 gen_rtx_IF_THEN_ELSE (DImode,
3243 gen_rtx_fmt_ee (compare_code, DImode,
3244 op0, const0_rtx),
3245 const1_rtx,
3246 dest)));
3247 return 1;
3248 }
3249
3250 if (reg_overlap_mentioned_p (dest, op0))
3251 {
3252 /* Handle the case where dest == x.
3253 We "early clobber" the result. */
3254 op0 = gen_reg_rtx (GET_MODE (x));
3255 emit_move_insn (op0, x);
3256 }
3257
3258 emit_insn (gen_rtx_SET (dest, const0_rtx));
3259 if (GET_MODE (op0) != DImode)
3260 {
3261 temp = gen_reg_rtx (DImode);
3262 convert_move (temp, op0, 0);
3263 }
3264 else
3265 temp = op0;
3266 emit_insn (gen_rtx_SET (dest,
3267 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3268 gen_rtx_fmt_ee (compare_code, DImode,
3269 temp, const0_rtx),
3270 const1_rtx,
3271 dest)));
3272 return 1;
3273 }
3274 else
3275 {
3276 x = gen_compare_reg_1 (compare_code, x, y);
3277 y = const0_rtx;
3278
3279 emit_insn (gen_rtx_SET (dest, const0_rtx));
3280 emit_insn (gen_rtx_SET (dest,
3281 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3282 gen_rtx_fmt_ee (compare_code,
3283 GET_MODE (x), x, y),
3284 const1_rtx, dest)));
3285 return 1;
3286 }
3287 }
3288
3289
3290 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3291 without jumps using the addx/subx instructions. */
3292
3293 bool
3294 emit_scc_insn (rtx operands[])
3295 {
3296 rtx tem, x, y;
3297 enum rtx_code code;
3298 machine_mode mode;
3299
3300 /* The quad-word fp compare library routines all return nonzero to indicate
3301 true, which is different from the equivalent libgcc routines, so we must
3302 handle them specially here. */
3303 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3304 {
3305 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3306 GET_CODE (operands[1]));
3307 operands[2] = XEXP (operands[1], 0);
3308 operands[3] = XEXP (operands[1], 1);
3309 }
3310
3311 code = GET_CODE (operands[1]);
3312 x = operands[2];
3313 y = operands[3];
3314 mode = GET_MODE (x);
3315
3316 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3317 more applications). The exception to this is "reg != 0" which can
3318 be done in one instruction on v9 (so we do it). */
3319 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3320 {
3321 if (y != const0_rtx)
3322 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3323
3324 rtx pat = gen_rtx_SET (operands[0],
3325 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3326 x, const0_rtx));
3327
3328 /* If we can use addx/subx or addxc, add a clobber for CC. */
3329 if (mode == SImode || (code == NE && TARGET_VIS3))
3330 {
3331 rtx clobber
3332 = gen_rtx_CLOBBER (VOIDmode,
3333 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3334 SPARC_ICC_REG));
3335 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3336 }
3337
3338 emit_insn (pat);
3339 return true;
3340 }
3341
3342 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3343 if (TARGET_ARCH64
3344 && mode == DImode
3345 && !((code == LTU || code == GTU) && TARGET_VIS3)
3346 && gen_v9_scc (operands[0], code, x, y))
3347 return true;
3348
3349 /* We can do LTU and GEU using the addx/subx instructions too. And
3350 for GTU/LEU, if both operands are registers swap them and fall
3351 back to the easy case. */
3352 if (code == GTU || code == LEU)
3353 {
3354 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3355 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3356 {
3357 tem = x;
3358 x = y;
3359 y = tem;
3360 code = swap_condition (code);
3361 }
3362 }
3363
3364 if (code == LTU || code == GEU)
3365 {
3366 emit_insn (gen_rtx_SET (operands[0],
3367 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3368 gen_compare_reg_1 (code, x, y),
3369 const0_rtx)));
3370 return true;
3371 }
3372
3373 /* All the posibilities to use addx/subx based sequences has been
3374 exhausted, try for a 3 instruction sequence using v9 conditional
3375 moves. */
3376 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3377 return true;
3378
3379 /* Nope, do branches. */
3380 return false;
3381 }
3382
3383 /* Emit a conditional jump insn for the v9 architecture using comparison code
3384 CODE and jump target LABEL.
3385 This function exists to take advantage of the v9 brxx insns. */
3386
3387 static void
3388 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3389 {
3390 emit_jump_insn (gen_rtx_SET (pc_rtx,
3391 gen_rtx_IF_THEN_ELSE (VOIDmode,
3392 gen_rtx_fmt_ee (code, GET_MODE (op0),
3393 op0, const0_rtx),
3394 gen_rtx_LABEL_REF (VOIDmode, label),
3395 pc_rtx)));
3396 }
3397
3398 /* Emit a conditional jump insn for the UA2011 architecture using
3399 comparison code CODE and jump target LABEL. This function exists
3400 to take advantage of the UA2011 Compare and Branch insns. */
3401
3402 static void
3403 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3404 {
3405 rtx if_then_else;
3406
3407 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3408 gen_rtx_fmt_ee(code, GET_MODE(op0),
3409 op0, op1),
3410 gen_rtx_LABEL_REF (VOIDmode, label),
3411 pc_rtx);
3412
3413 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3414 }
3415
3416 void
3417 emit_conditional_branch_insn (rtx operands[])
3418 {
3419 /* The quad-word fp compare library routines all return nonzero to indicate
3420 true, which is different from the equivalent libgcc routines, so we must
3421 handle them specially here. */
3422 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3423 {
3424 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3425 GET_CODE (operands[0]));
3426 operands[1] = XEXP (operands[0], 0);
3427 operands[2] = XEXP (operands[0], 1);
3428 }
3429
3430 /* If we can tell early on that the comparison is against a constant
3431 that won't fit in the 5-bit signed immediate field of a cbcond,
3432 use one of the other v9 conditional branch sequences. */
3433 if (TARGET_CBCOND
3434 && GET_CODE (operands[1]) == REG
3435 && (GET_MODE (operands[1]) == SImode
3436 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3437 && (GET_CODE (operands[2]) != CONST_INT
3438 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3439 {
3440 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3441 return;
3442 }
3443
3444 if (TARGET_ARCH64 && operands[2] == const0_rtx
3445 && GET_CODE (operands[1]) == REG
3446 && GET_MODE (operands[1]) == DImode)
3447 {
3448 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3449 return;
3450 }
3451
3452 operands[1] = gen_compare_reg (operands[0]);
3453 operands[2] = const0_rtx;
3454 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3455 operands[1], operands[2]);
3456 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3457 operands[3]));
3458 }
3459
3460
3461 /* Generate a DFmode part of a hard TFmode register.
3462 REG is the TFmode hard register, LOW is 1 for the
3463 low 64bit of the register and 0 otherwise.
3464 */
3465 rtx
3466 gen_df_reg (rtx reg, int low)
3467 {
3468 int regno = REGNO (reg);
3469
3470 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3471 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3472 return gen_rtx_REG (DFmode, regno);
3473 }
3474 \f
3475 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3476 Unlike normal calls, TFmode operands are passed by reference. It is
3477 assumed that no more than 3 operands are required. */
3478
3479 static void
3480 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3481 {
3482 rtx ret_slot = NULL, arg[3], func_sym;
3483 int i;
3484
3485 /* We only expect to be called for conversions, unary, and binary ops. */
3486 gcc_assert (nargs == 2 || nargs == 3);
3487
3488 for (i = 0; i < nargs; ++i)
3489 {
3490 rtx this_arg = operands[i];
3491 rtx this_slot;
3492
3493 /* TFmode arguments and return values are passed by reference. */
3494 if (GET_MODE (this_arg) == TFmode)
3495 {
3496 int force_stack_temp;
3497
3498 force_stack_temp = 0;
3499 if (TARGET_BUGGY_QP_LIB && i == 0)
3500 force_stack_temp = 1;
3501
3502 if (GET_CODE (this_arg) == MEM
3503 && ! force_stack_temp)
3504 {
3505 tree expr = MEM_EXPR (this_arg);
3506 if (expr)
3507 mark_addressable (expr);
3508 this_arg = XEXP (this_arg, 0);
3509 }
3510 else if (CONSTANT_P (this_arg)
3511 && ! force_stack_temp)
3512 {
3513 this_slot = force_const_mem (TFmode, this_arg);
3514 this_arg = XEXP (this_slot, 0);
3515 }
3516 else
3517 {
3518 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3519
3520 /* Operand 0 is the return value. We'll copy it out later. */
3521 if (i > 0)
3522 emit_move_insn (this_slot, this_arg);
3523 else
3524 ret_slot = this_slot;
3525
3526 this_arg = XEXP (this_slot, 0);
3527 }
3528 }
3529
3530 arg[i] = this_arg;
3531 }
3532
3533 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3534
3535 if (GET_MODE (operands[0]) == TFmode)
3536 {
3537 if (nargs == 2)
3538 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3539 arg[0], GET_MODE (arg[0]),
3540 arg[1], GET_MODE (arg[1]));
3541 else
3542 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3543 arg[0], GET_MODE (arg[0]),
3544 arg[1], GET_MODE (arg[1]),
3545 arg[2], GET_MODE (arg[2]));
3546
3547 if (ret_slot)
3548 emit_move_insn (operands[0], ret_slot);
3549 }
3550 else
3551 {
3552 rtx ret;
3553
3554 gcc_assert (nargs == 2);
3555
3556 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3557 GET_MODE (operands[0]),
3558 arg[1], GET_MODE (arg[1]));
3559
3560 if (ret != operands[0])
3561 emit_move_insn (operands[0], ret);
3562 }
3563 }
3564
3565 /* Expand soft-float TFmode calls to sparc abi routines. */
3566
3567 static void
3568 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3569 {
3570 const char *func;
3571
3572 switch (code)
3573 {
3574 case PLUS:
3575 func = "_Qp_add";
3576 break;
3577 case MINUS:
3578 func = "_Qp_sub";
3579 break;
3580 case MULT:
3581 func = "_Qp_mul";
3582 break;
3583 case DIV:
3584 func = "_Qp_div";
3585 break;
3586 default:
3587 gcc_unreachable ();
3588 }
3589
3590 emit_soft_tfmode_libcall (func, 3, operands);
3591 }
3592
3593 static void
3594 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3595 {
3596 const char *func;
3597
3598 gcc_assert (code == SQRT);
3599 func = "_Qp_sqrt";
3600
3601 emit_soft_tfmode_libcall (func, 2, operands);
3602 }
3603
3604 static void
3605 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3606 {
3607 const char *func;
3608
3609 switch (code)
3610 {
3611 case FLOAT_EXTEND:
3612 switch (GET_MODE (operands[1]))
3613 {
3614 case E_SFmode:
3615 func = "_Qp_stoq";
3616 break;
3617 case E_DFmode:
3618 func = "_Qp_dtoq";
3619 break;
3620 default:
3621 gcc_unreachable ();
3622 }
3623 break;
3624
3625 case FLOAT_TRUNCATE:
3626 switch (GET_MODE (operands[0]))
3627 {
3628 case E_SFmode:
3629 func = "_Qp_qtos";
3630 break;
3631 case E_DFmode:
3632 func = "_Qp_qtod";
3633 break;
3634 default:
3635 gcc_unreachable ();
3636 }
3637 break;
3638
3639 case FLOAT:
3640 switch (GET_MODE (operands[1]))
3641 {
3642 case E_SImode:
3643 func = "_Qp_itoq";
3644 if (TARGET_ARCH64)
3645 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3646 break;
3647 case E_DImode:
3648 func = "_Qp_xtoq";
3649 break;
3650 default:
3651 gcc_unreachable ();
3652 }
3653 break;
3654
3655 case UNSIGNED_FLOAT:
3656 switch (GET_MODE (operands[1]))
3657 {
3658 case E_SImode:
3659 func = "_Qp_uitoq";
3660 if (TARGET_ARCH64)
3661 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3662 break;
3663 case E_DImode:
3664 func = "_Qp_uxtoq";
3665 break;
3666 default:
3667 gcc_unreachable ();
3668 }
3669 break;
3670
3671 case FIX:
3672 switch (GET_MODE (operands[0]))
3673 {
3674 case E_SImode:
3675 func = "_Qp_qtoi";
3676 break;
3677 case E_DImode:
3678 func = "_Qp_qtox";
3679 break;
3680 default:
3681 gcc_unreachable ();
3682 }
3683 break;
3684
3685 case UNSIGNED_FIX:
3686 switch (GET_MODE (operands[0]))
3687 {
3688 case E_SImode:
3689 func = "_Qp_qtoui";
3690 break;
3691 case E_DImode:
3692 func = "_Qp_qtoux";
3693 break;
3694 default:
3695 gcc_unreachable ();
3696 }
3697 break;
3698
3699 default:
3700 gcc_unreachable ();
3701 }
3702
3703 emit_soft_tfmode_libcall (func, 2, operands);
3704 }
3705
3706 /* Expand a hard-float tfmode operation. All arguments must be in
3707 registers. */
3708
3709 static void
3710 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3711 {
3712 rtx op, dest;
3713
3714 if (GET_RTX_CLASS (code) == RTX_UNARY)
3715 {
3716 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3717 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3718 }
3719 else
3720 {
3721 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3722 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3723 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3724 operands[1], operands[2]);
3725 }
3726
3727 if (register_operand (operands[0], VOIDmode))
3728 dest = operands[0];
3729 else
3730 dest = gen_reg_rtx (GET_MODE (operands[0]));
3731
3732 emit_insn (gen_rtx_SET (dest, op));
3733
3734 if (dest != operands[0])
3735 emit_move_insn (operands[0], dest);
3736 }
3737
3738 void
3739 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3740 {
3741 if (TARGET_HARD_QUAD)
3742 emit_hard_tfmode_operation (code, operands);
3743 else
3744 emit_soft_tfmode_binop (code, operands);
3745 }
3746
3747 void
3748 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3749 {
3750 if (TARGET_HARD_QUAD)
3751 emit_hard_tfmode_operation (code, operands);
3752 else
3753 emit_soft_tfmode_unop (code, operands);
3754 }
3755
3756 void
3757 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3758 {
3759 if (TARGET_HARD_QUAD)
3760 emit_hard_tfmode_operation (code, operands);
3761 else
3762 emit_soft_tfmode_cvt (code, operands);
3763 }
3764 \f
3765 /* Return nonzero if a branch/jump/call instruction will be emitting
3766 nop into its delay slot. */
3767
3768 int
3769 empty_delay_slot (rtx_insn *insn)
3770 {
3771 rtx seq;
3772
3773 /* If no previous instruction (should not happen), return true. */
3774 if (PREV_INSN (insn) == NULL)
3775 return 1;
3776
3777 seq = NEXT_INSN (PREV_INSN (insn));
3778 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3779 return 0;
3780
3781 return 1;
3782 }
3783
3784 /* Return nonzero if we should emit a nop after a cbcond instruction.
3785 The cbcond instruction does not have a delay slot, however there is
3786 a severe performance penalty if a control transfer appears right
3787 after a cbcond. Therefore we emit a nop when we detect this
3788 situation. */
3789
3790 int
3791 emit_cbcond_nop (rtx_insn *insn)
3792 {
3793 rtx next = next_active_insn (insn);
3794
3795 if (!next)
3796 return 1;
3797
3798 if (NONJUMP_INSN_P (next)
3799 && GET_CODE (PATTERN (next)) == SEQUENCE)
3800 next = XVECEXP (PATTERN (next), 0, 0);
3801 else if (CALL_P (next)
3802 && GET_CODE (PATTERN (next)) == PARALLEL)
3803 {
3804 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3805
3806 if (GET_CODE (delay) == RETURN)
3807 {
3808 /* It's a sibling call. Do not emit the nop if we're going
3809 to emit something other than the jump itself as the first
3810 instruction of the sibcall sequence. */
3811 if (sparc_leaf_function_p || TARGET_FLAT)
3812 return 0;
3813 }
3814 }
3815
3816 if (NONJUMP_INSN_P (next))
3817 return 0;
3818
3819 return 1;
3820 }
3821
3822 /* Return nonzero if TRIAL can go into the call delay slot. */
3823
3824 int
3825 eligible_for_call_delay (rtx_insn *trial)
3826 {
3827 rtx pat;
3828
3829 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3830 return 0;
3831
3832 /* Binutils allows
3833 call __tls_get_addr, %tgd_call (foo)
3834 add %l7, %o0, %o0, %tgd_add (foo)
3835 while Sun as/ld does not. */
3836 if (TARGET_GNU_TLS || !TARGET_TLS)
3837 return 1;
3838
3839 pat = PATTERN (trial);
3840
3841 /* We must reject tgd_add{32|64}, i.e.
3842 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3843 and tldm_add{32|64}, i.e.
3844 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3845 for Sun as/ld. */
3846 if (GET_CODE (pat) == SET
3847 && GET_CODE (SET_SRC (pat)) == PLUS)
3848 {
3849 rtx unspec = XEXP (SET_SRC (pat), 1);
3850
3851 if (GET_CODE (unspec) == UNSPEC
3852 && (XINT (unspec, 1) == UNSPEC_TLSGD
3853 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3854 return 0;
3855 }
3856
3857 return 1;
3858 }
3859
3860 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3861 instruction. RETURN_P is true if the v9 variant 'return' is to be
3862 considered in the test too.
3863
3864 TRIAL must be a SET whose destination is a REG appropriate for the
3865 'restore' instruction or, if RETURN_P is true, for the 'return'
3866 instruction. */
3867
3868 static int
3869 eligible_for_restore_insn (rtx trial, bool return_p)
3870 {
3871 rtx pat = PATTERN (trial);
3872 rtx src = SET_SRC (pat);
3873 bool src_is_freg = false;
3874 rtx src_reg;
3875
3876 /* Since we now can do moves between float and integer registers when
3877 VIS3 is enabled, we have to catch this case. We can allow such
3878 moves when doing a 'return' however. */
3879 src_reg = src;
3880 if (GET_CODE (src_reg) == SUBREG)
3881 src_reg = SUBREG_REG (src_reg);
3882 if (GET_CODE (src_reg) == REG
3883 && SPARC_FP_REG_P (REGNO (src_reg)))
3884 src_is_freg = true;
3885
3886 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3887 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3888 && arith_operand (src, GET_MODE (src))
3889 && ! src_is_freg)
3890 {
3891 if (TARGET_ARCH64)
3892 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3893 else
3894 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3895 }
3896
3897 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3898 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3899 && arith_double_operand (src, GET_MODE (src))
3900 && ! src_is_freg)
3901 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3902
3903 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3904 else if (! TARGET_FPU && register_operand (src, SFmode))
3905 return 1;
3906
3907 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3908 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3909 return 1;
3910
3911 /* If we have the 'return' instruction, anything that does not use
3912 local or output registers and can go into a delay slot wins. */
3913 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3914 return 1;
3915
3916 /* The 'restore src1,src2,dest' pattern for SImode. */
3917 else if (GET_CODE (src) == PLUS
3918 && register_operand (XEXP (src, 0), SImode)
3919 && arith_operand (XEXP (src, 1), SImode))
3920 return 1;
3921
3922 /* The 'restore src1,src2,dest' pattern for DImode. */
3923 else if (GET_CODE (src) == PLUS
3924 && register_operand (XEXP (src, 0), DImode)
3925 && arith_double_operand (XEXP (src, 1), DImode))
3926 return 1;
3927
3928 /* The 'restore src1,%lo(src2),dest' pattern. */
3929 else if (GET_CODE (src) == LO_SUM
3930 && ! TARGET_CM_MEDMID
3931 && ((register_operand (XEXP (src, 0), SImode)
3932 && immediate_operand (XEXP (src, 1), SImode))
3933 || (TARGET_ARCH64
3934 && register_operand (XEXP (src, 0), DImode)
3935 && immediate_operand (XEXP (src, 1), DImode))))
3936 return 1;
3937
3938 /* The 'restore src,src,dest' pattern. */
3939 else if (GET_CODE (src) == ASHIFT
3940 && (register_operand (XEXP (src, 0), SImode)
3941 || register_operand (XEXP (src, 0), DImode))
3942 && XEXP (src, 1) == const1_rtx)
3943 return 1;
3944
3945 return 0;
3946 }
3947
3948 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3949
3950 int
3951 eligible_for_return_delay (rtx_insn *trial)
3952 {
3953 int regno;
3954 rtx pat;
3955
3956 /* If the function uses __builtin_eh_return, the eh_return machinery
3957 occupies the delay slot. */
3958 if (crtl->calls_eh_return)
3959 return 0;
3960
3961 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3962 return 0;
3963
3964 /* In the case of a leaf or flat function, anything can go into the slot. */
3965 if (sparc_leaf_function_p || TARGET_FLAT)
3966 return 1;
3967
3968 if (!NONJUMP_INSN_P (trial))
3969 return 0;
3970
3971 pat = PATTERN (trial);
3972 if (GET_CODE (pat) == PARALLEL)
3973 {
3974 int i;
3975
3976 if (! TARGET_V9)
3977 return 0;
3978 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3979 {
3980 rtx expr = XVECEXP (pat, 0, i);
3981 if (GET_CODE (expr) != SET)
3982 return 0;
3983 if (GET_CODE (SET_DEST (expr)) != REG)
3984 return 0;
3985 regno = REGNO (SET_DEST (expr));
3986 if (regno >= 8 && regno < 24)
3987 return 0;
3988 }
3989 return !epilogue_renumber (&pat, 1);
3990 }
3991
3992 if (GET_CODE (pat) != SET)
3993 return 0;
3994
3995 if (GET_CODE (SET_DEST (pat)) != REG)
3996 return 0;
3997
3998 regno = REGNO (SET_DEST (pat));
3999
4000 /* Otherwise, only operations which can be done in tandem with
4001 a `restore' or `return' insn can go into the delay slot. */
4002 if (regno >= 8 && regno < 24)
4003 return 0;
4004
4005 /* If this instruction sets up floating point register and we have a return
4006 instruction, it can probably go in. But restore will not work
4007 with FP_REGS. */
4008 if (! SPARC_INT_REG_P (regno))
4009 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4010
4011 return eligible_for_restore_insn (trial, true);
4012 }
4013
4014 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4015
4016 int
4017 eligible_for_sibcall_delay (rtx_insn *trial)
4018 {
4019 rtx pat;
4020
4021 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4022 return 0;
4023
4024 if (!NONJUMP_INSN_P (trial))
4025 return 0;
4026
4027 pat = PATTERN (trial);
4028
4029 if (sparc_leaf_function_p || TARGET_FLAT)
4030 {
4031 /* If the tail call is done using the call instruction,
4032 we have to restore %o7 in the delay slot. */
4033 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4034 return 0;
4035
4036 /* %g1 is used to build the function address */
4037 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4038 return 0;
4039
4040 return 1;
4041 }
4042
4043 if (GET_CODE (pat) != SET)
4044 return 0;
4045
4046 /* Otherwise, only operations which can be done in tandem with
4047 a `restore' insn can go into the delay slot. */
4048 if (GET_CODE (SET_DEST (pat)) != REG
4049 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4050 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4051 return 0;
4052
4053 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4054 in most cases. */
4055 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4056 return 0;
4057
4058 return eligible_for_restore_insn (trial, false);
4059 }
4060 \f
4061 /* Determine if it's legal to put X into the constant pool. This
4062 is not possible if X contains the address of a symbol that is
4063 not constant (TLS) or not known at final link time (PIC). */
4064
4065 static bool
4066 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4067 {
4068 switch (GET_CODE (x))
4069 {
4070 case CONST_INT:
4071 case CONST_WIDE_INT:
4072 case CONST_DOUBLE:
4073 case CONST_VECTOR:
4074 /* Accept all non-symbolic constants. */
4075 return false;
4076
4077 case LABEL_REF:
4078 /* Labels are OK iff we are non-PIC. */
4079 return flag_pic != 0;
4080
4081 case SYMBOL_REF:
4082 /* 'Naked' TLS symbol references are never OK,
4083 non-TLS symbols are OK iff we are non-PIC. */
4084 if (SYMBOL_REF_TLS_MODEL (x))
4085 return true;
4086 else
4087 return flag_pic != 0;
4088
4089 case CONST:
4090 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4091 case PLUS:
4092 case MINUS:
4093 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4094 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4095 case UNSPEC:
4096 return true;
4097 default:
4098 gcc_unreachable ();
4099 }
4100 }
4101 \f
4102 /* Global Offset Table support. */
4103 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4104 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4105
4106 /* Return the SYMBOL_REF for the Global Offset Table. */
4107
4108 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4109
4110 static rtx
4111 sparc_got (void)
4112 {
4113 if (!sparc_got_symbol)
4114 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4115
4116 return sparc_got_symbol;
4117 }
4118
4119 /* Ensure that we are not using patterns that are not OK with PIC. */
4120
4121 int
4122 check_pic (int i)
4123 {
4124 rtx op;
4125
4126 switch (flag_pic)
4127 {
4128 case 1:
4129 op = recog_data.operand[i];
4130 gcc_assert (GET_CODE (op) != SYMBOL_REF
4131 && (GET_CODE (op) != CONST
4132 || (GET_CODE (XEXP (op, 0)) == MINUS
4133 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4134 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4135 /* fallthrough */
4136 case 2:
4137 default:
4138 return 1;
4139 }
4140 }
4141
4142 /* Return true if X is an address which needs a temporary register when
4143 reloaded while generating PIC code. */
4144
4145 int
4146 pic_address_needs_scratch (rtx x)
4147 {
4148 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4149 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4150 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4151 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4152 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4153 return 1;
4154
4155 return 0;
4156 }
4157
4158 /* Determine if a given RTX is a valid constant. We already know this
4159 satisfies CONSTANT_P. */
4160
4161 static bool
4162 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4163 {
4164 switch (GET_CODE (x))
4165 {
4166 case CONST:
4167 case SYMBOL_REF:
4168 if (sparc_tls_referenced_p (x))
4169 return false;
4170 break;
4171
4172 case CONST_DOUBLE:
4173 /* Floating point constants are generally not ok.
4174 The only exception is 0.0 and all-ones in VIS. */
4175 if (TARGET_VIS
4176 && SCALAR_FLOAT_MODE_P (mode)
4177 && (const_zero_operand (x, mode)
4178 || const_all_ones_operand (x, mode)))
4179 return true;
4180
4181 return false;
4182
4183 case CONST_VECTOR:
4184 /* Vector constants are generally not ok.
4185 The only exception is 0 or -1 in VIS. */
4186 if (TARGET_VIS
4187 && (const_zero_operand (x, mode)
4188 || const_all_ones_operand (x, mode)))
4189 return true;
4190
4191 return false;
4192
4193 default:
4194 break;
4195 }
4196
4197 return true;
4198 }
4199
4200 /* Determine if a given RTX is a valid constant address. */
4201
4202 bool
4203 constant_address_p (rtx x)
4204 {
4205 switch (GET_CODE (x))
4206 {
4207 case LABEL_REF:
4208 case CONST_INT:
4209 case HIGH:
4210 return true;
4211
4212 case CONST:
4213 if (flag_pic && pic_address_needs_scratch (x))
4214 return false;
4215 return sparc_legitimate_constant_p (Pmode, x);
4216
4217 case SYMBOL_REF:
4218 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4219
4220 default:
4221 return false;
4222 }
4223 }
4224
4225 /* Nonzero if the constant value X is a legitimate general operand
4226 when generating PIC code. It is given that flag_pic is on and
4227 that X satisfies CONSTANT_P. */
4228
4229 bool
4230 legitimate_pic_operand_p (rtx x)
4231 {
4232 if (pic_address_needs_scratch (x))
4233 return false;
4234 if (sparc_tls_referenced_p (x))
4235 return false;
4236 return true;
4237 }
4238
4239 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4240 (CONST_INT_P (X) \
4241 && INTVAL (X) >= -0x1000 \
4242 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4243
4244 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4245 (CONST_INT_P (X) \
4246 && INTVAL (X) >= -0x1000 \
4247 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4248
4249 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4250
4251 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4252 ordinarily. This changes a bit when generating PIC. */
4253
4254 static bool
4255 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4256 {
4257 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4258
4259 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4260 rs1 = addr;
4261 else if (GET_CODE (addr) == PLUS)
4262 {
4263 rs1 = XEXP (addr, 0);
4264 rs2 = XEXP (addr, 1);
4265
4266 /* Canonicalize. REG comes first, if there are no regs,
4267 LO_SUM comes first. */
4268 if (!REG_P (rs1)
4269 && GET_CODE (rs1) != SUBREG
4270 && (REG_P (rs2)
4271 || GET_CODE (rs2) == SUBREG
4272 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4273 {
4274 rs1 = XEXP (addr, 1);
4275 rs2 = XEXP (addr, 0);
4276 }
4277
4278 if ((flag_pic == 1
4279 && rs1 == pic_offset_table_rtx
4280 && !REG_P (rs2)
4281 && GET_CODE (rs2) != SUBREG
4282 && GET_CODE (rs2) != LO_SUM
4283 && GET_CODE (rs2) != MEM
4284 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4285 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4286 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4287 || ((REG_P (rs1)
4288 || GET_CODE (rs1) == SUBREG)
4289 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4290 {
4291 imm1 = rs2;
4292 rs2 = NULL;
4293 }
4294 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4295 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4296 {
4297 /* We prohibit REG + REG for TFmode when there are no quad move insns
4298 and we consequently need to split. We do this because REG+REG
4299 is not an offsettable address. If we get the situation in reload
4300 where source and destination of a movtf pattern are both MEMs with
4301 REG+REG address, then only one of them gets converted to an
4302 offsettable address. */
4303 if (mode == TFmode
4304 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4305 return 0;
4306
4307 /* Likewise for TImode, but in all cases. */
4308 if (mode == TImode)
4309 return 0;
4310
4311 /* We prohibit REG + REG on ARCH32 if not optimizing for
4312 DFmode/DImode because then mem_min_alignment is likely to be zero
4313 after reload and the forced split would lack a matching splitter
4314 pattern. */
4315 if (TARGET_ARCH32 && !optimize
4316 && (mode == DFmode || mode == DImode))
4317 return 0;
4318 }
4319 else if (USE_AS_OFFSETABLE_LO10
4320 && GET_CODE (rs1) == LO_SUM
4321 && TARGET_ARCH64
4322 && ! TARGET_CM_MEDMID
4323 && RTX_OK_FOR_OLO10_P (rs2, mode))
4324 {
4325 rs2 = NULL;
4326 imm1 = XEXP (rs1, 1);
4327 rs1 = XEXP (rs1, 0);
4328 if (!CONSTANT_P (imm1)
4329 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4330 return 0;
4331 }
4332 }
4333 else if (GET_CODE (addr) == LO_SUM)
4334 {
4335 rs1 = XEXP (addr, 0);
4336 imm1 = XEXP (addr, 1);
4337
4338 if (!CONSTANT_P (imm1)
4339 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4340 return 0;
4341
4342 /* We can't allow TFmode in 32-bit mode, because an offset greater
4343 than the alignment (8) may cause the LO_SUM to overflow. */
4344 if (mode == TFmode && TARGET_ARCH32)
4345 return 0;
4346
4347 /* During reload, accept the HIGH+LO_SUM construct generated by
4348 sparc_legitimize_reload_address. */
4349 if (reload_in_progress
4350 && GET_CODE (rs1) == HIGH
4351 && XEXP (rs1, 0) == imm1)
4352 return 1;
4353 }
4354 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4355 return 1;
4356 else
4357 return 0;
4358
4359 if (GET_CODE (rs1) == SUBREG)
4360 rs1 = SUBREG_REG (rs1);
4361 if (!REG_P (rs1))
4362 return 0;
4363
4364 if (rs2)
4365 {
4366 if (GET_CODE (rs2) == SUBREG)
4367 rs2 = SUBREG_REG (rs2);
4368 if (!REG_P (rs2))
4369 return 0;
4370 }
4371
4372 if (strict)
4373 {
4374 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4375 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4376 return 0;
4377 }
4378 else
4379 {
4380 if ((! SPARC_INT_REG_P (REGNO (rs1))
4381 && REGNO (rs1) != FRAME_POINTER_REGNUM
4382 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4383 || (rs2
4384 && (! SPARC_INT_REG_P (REGNO (rs2))
4385 && REGNO (rs2) != FRAME_POINTER_REGNUM
4386 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4387 return 0;
4388 }
4389 return 1;
4390 }
4391
4392 /* Return the SYMBOL_REF for the tls_get_addr function. */
4393
4394 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4395
4396 static rtx
4397 sparc_tls_get_addr (void)
4398 {
4399 if (!sparc_tls_symbol)
4400 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4401
4402 return sparc_tls_symbol;
4403 }
4404
4405 /* Return the Global Offset Table to be used in TLS mode. */
4406
4407 static rtx
4408 sparc_tls_got (void)
4409 {
4410 /* In PIC mode, this is just the PIC offset table. */
4411 if (flag_pic)
4412 {
4413 crtl->uses_pic_offset_table = 1;
4414 return pic_offset_table_rtx;
4415 }
4416
4417 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4418 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4419 if (TARGET_SUN_TLS && TARGET_ARCH32)
4420 {
4421 load_got_register ();
4422 return global_offset_table_rtx;
4423 }
4424
4425 /* In all other cases, we load a new pseudo with the GOT symbol. */
4426 return copy_to_reg (sparc_got ());
4427 }
4428
4429 /* Return true if X contains a thread-local symbol. */
4430
4431 static bool
4432 sparc_tls_referenced_p (rtx x)
4433 {
4434 if (!TARGET_HAVE_TLS)
4435 return false;
4436
4437 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4438 x = XEXP (XEXP (x, 0), 0);
4439
4440 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4441 return true;
4442
4443 /* That's all we handle in sparc_legitimize_tls_address for now. */
4444 return false;
4445 }
4446
4447 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4448 this (thread-local) address. */
4449
4450 static rtx
4451 sparc_legitimize_tls_address (rtx addr)
4452 {
4453 rtx temp1, temp2, temp3, ret, o0, got;
4454 rtx_insn *insn;
4455
4456 gcc_assert (can_create_pseudo_p ());
4457
4458 if (GET_CODE (addr) == SYMBOL_REF)
4459 switch (SYMBOL_REF_TLS_MODEL (addr))
4460 {
4461 case TLS_MODEL_GLOBAL_DYNAMIC:
4462 start_sequence ();
4463 temp1 = gen_reg_rtx (SImode);
4464 temp2 = gen_reg_rtx (SImode);
4465 ret = gen_reg_rtx (Pmode);
4466 o0 = gen_rtx_REG (Pmode, 8);
4467 got = sparc_tls_got ();
4468 emit_insn (gen_tgd_hi22 (temp1, addr));
4469 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4470 if (TARGET_ARCH32)
4471 {
4472 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4473 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4474 addr, const1_rtx));
4475 }
4476 else
4477 {
4478 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4479 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4480 addr, const1_rtx));
4481 }
4482 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4483 insn = get_insns ();
4484 end_sequence ();
4485 emit_libcall_block (insn, ret, o0, addr);
4486 break;
4487
4488 case TLS_MODEL_LOCAL_DYNAMIC:
4489 start_sequence ();
4490 temp1 = gen_reg_rtx (SImode);
4491 temp2 = gen_reg_rtx (SImode);
4492 temp3 = gen_reg_rtx (Pmode);
4493 ret = gen_reg_rtx (Pmode);
4494 o0 = gen_rtx_REG (Pmode, 8);
4495 got = sparc_tls_got ();
4496 emit_insn (gen_tldm_hi22 (temp1));
4497 emit_insn (gen_tldm_lo10 (temp2, temp1));
4498 if (TARGET_ARCH32)
4499 {
4500 emit_insn (gen_tldm_add32 (o0, got, temp2));
4501 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4502 const1_rtx));
4503 }
4504 else
4505 {
4506 emit_insn (gen_tldm_add64 (o0, got, temp2));
4507 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4508 const1_rtx));
4509 }
4510 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4511 insn = get_insns ();
4512 end_sequence ();
4513 emit_libcall_block (insn, temp3, o0,
4514 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4515 UNSPEC_TLSLD_BASE));
4516 temp1 = gen_reg_rtx (SImode);
4517 temp2 = gen_reg_rtx (SImode);
4518 emit_insn (gen_tldo_hix22 (temp1, addr));
4519 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4520 if (TARGET_ARCH32)
4521 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4522 else
4523 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4524 break;
4525
4526 case TLS_MODEL_INITIAL_EXEC:
4527 temp1 = gen_reg_rtx (SImode);
4528 temp2 = gen_reg_rtx (SImode);
4529 temp3 = gen_reg_rtx (Pmode);
4530 got = sparc_tls_got ();
4531 emit_insn (gen_tie_hi22 (temp1, addr));
4532 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4533 if (TARGET_ARCH32)
4534 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4535 else
4536 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4537 if (TARGET_SUN_TLS)
4538 {
4539 ret = gen_reg_rtx (Pmode);
4540 if (TARGET_ARCH32)
4541 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4542 temp3, addr));
4543 else
4544 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4545 temp3, addr));
4546 }
4547 else
4548 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4549 break;
4550
4551 case TLS_MODEL_LOCAL_EXEC:
4552 temp1 = gen_reg_rtx (Pmode);
4553 temp2 = gen_reg_rtx (Pmode);
4554 if (TARGET_ARCH32)
4555 {
4556 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4557 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4558 }
4559 else
4560 {
4561 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4562 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4563 }
4564 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4565 break;
4566
4567 default:
4568 gcc_unreachable ();
4569 }
4570
4571 else if (GET_CODE (addr) == CONST)
4572 {
4573 rtx base, offset;
4574
4575 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4576
4577 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4578 offset = XEXP (XEXP (addr, 0), 1);
4579
4580 base = force_operand (base, NULL_RTX);
4581 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4582 offset = force_reg (Pmode, offset);
4583 ret = gen_rtx_PLUS (Pmode, base, offset);
4584 }
4585
4586 else
4587 gcc_unreachable (); /* for now ... */
4588
4589 return ret;
4590 }
4591
4592 /* Legitimize PIC addresses. If the address is already position-independent,
4593 we return ORIG. Newly generated position-independent addresses go into a
4594 reg. This is REG if nonzero, otherwise we allocate register(s) as
4595 necessary. */
4596
4597 static rtx
4598 sparc_legitimize_pic_address (rtx orig, rtx reg)
4599 {
4600 bool gotdata_op = false;
4601
4602 if (GET_CODE (orig) == SYMBOL_REF
4603 /* See the comment in sparc_expand_move. */
4604 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4605 {
4606 rtx pic_ref, address;
4607 rtx_insn *insn;
4608
4609 if (reg == 0)
4610 {
4611 gcc_assert (can_create_pseudo_p ());
4612 reg = gen_reg_rtx (Pmode);
4613 }
4614
4615 if (flag_pic == 2)
4616 {
4617 /* If not during reload, allocate another temp reg here for loading
4618 in the address, so that these instructions can be optimized
4619 properly. */
4620 rtx temp_reg = (! can_create_pseudo_p ()
4621 ? reg : gen_reg_rtx (Pmode));
4622
4623 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4624 won't get confused into thinking that these two instructions
4625 are loading in the true address of the symbol. If in the
4626 future a PIC rtx exists, that should be used instead. */
4627 if (TARGET_ARCH64)
4628 {
4629 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4630 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4631 }
4632 else
4633 {
4634 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4635 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4636 }
4637 address = temp_reg;
4638 gotdata_op = true;
4639 }
4640 else
4641 address = orig;
4642
4643 crtl->uses_pic_offset_table = 1;
4644 if (gotdata_op)
4645 {
4646 if (TARGET_ARCH64)
4647 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4648 pic_offset_table_rtx,
4649 address, orig));
4650 else
4651 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4652 pic_offset_table_rtx,
4653 address, orig));
4654 }
4655 else
4656 {
4657 pic_ref
4658 = gen_const_mem (Pmode,
4659 gen_rtx_PLUS (Pmode,
4660 pic_offset_table_rtx, address));
4661 insn = emit_move_insn (reg, pic_ref);
4662 }
4663
4664 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4665 by loop. */
4666 set_unique_reg_note (insn, REG_EQUAL, orig);
4667 return reg;
4668 }
4669 else if (GET_CODE (orig) == CONST)
4670 {
4671 rtx base, offset;
4672
4673 if (GET_CODE (XEXP (orig, 0)) == PLUS
4674 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4675 return orig;
4676
4677 if (reg == 0)
4678 {
4679 gcc_assert (can_create_pseudo_p ());
4680 reg = gen_reg_rtx (Pmode);
4681 }
4682
4683 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4684 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4685 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4686 base == reg ? NULL_RTX : reg);
4687
4688 if (GET_CODE (offset) == CONST_INT)
4689 {
4690 if (SMALL_INT (offset))
4691 return plus_constant (Pmode, base, INTVAL (offset));
4692 else if (can_create_pseudo_p ())
4693 offset = force_reg (Pmode, offset);
4694 else
4695 /* If we reach here, then something is seriously wrong. */
4696 gcc_unreachable ();
4697 }
4698 return gen_rtx_PLUS (Pmode, base, offset);
4699 }
4700 else if (GET_CODE (orig) == LABEL_REF)
4701 /* ??? We ought to be checking that the register is live instead, in case
4702 it is eliminated. */
4703 crtl->uses_pic_offset_table = 1;
4704
4705 return orig;
4706 }
4707
4708 /* Try machine-dependent ways of modifying an illegitimate address X
4709 to be legitimate. If we find one, return the new, valid address.
4710
4711 OLDX is the address as it was before break_out_memory_refs was called.
4712 In some cases it is useful to look at this to decide what needs to be done.
4713
4714 MODE is the mode of the operand pointed to by X.
4715
4716 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4717
4718 static rtx
4719 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4720 machine_mode mode)
4721 {
4722 rtx orig_x = x;
4723
4724 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4725 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4726 force_operand (XEXP (x, 0), NULL_RTX));
4727 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4728 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4729 force_operand (XEXP (x, 1), NULL_RTX));
4730 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4731 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4732 XEXP (x, 1));
4733 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4734 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4735 force_operand (XEXP (x, 1), NULL_RTX));
4736
4737 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4738 return x;
4739
4740 if (sparc_tls_referenced_p (x))
4741 x = sparc_legitimize_tls_address (x);
4742 else if (flag_pic)
4743 x = sparc_legitimize_pic_address (x, NULL_RTX);
4744 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4745 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4746 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4747 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4748 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4749 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4750 else if (GET_CODE (x) == SYMBOL_REF
4751 || GET_CODE (x) == CONST
4752 || GET_CODE (x) == LABEL_REF)
4753 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4754
4755 return x;
4756 }
4757
4758 /* Delegitimize an address that was legitimized by the above function. */
4759
4760 static rtx
4761 sparc_delegitimize_address (rtx x)
4762 {
4763 x = delegitimize_mem_from_attrs (x);
4764
4765 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4766 switch (XINT (XEXP (x, 1), 1))
4767 {
4768 case UNSPEC_MOVE_PIC:
4769 case UNSPEC_TLSLE:
4770 x = XVECEXP (XEXP (x, 1), 0, 0);
4771 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4772 break;
4773 default:
4774 break;
4775 }
4776
4777 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4778 if (GET_CODE (x) == MINUS
4779 && REG_P (XEXP (x, 0))
4780 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4781 && GET_CODE (XEXP (x, 1)) == LO_SUM
4782 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4783 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4784 {
4785 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4786 gcc_assert (GET_CODE (x) == LABEL_REF);
4787 }
4788
4789 return x;
4790 }
4791
4792 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4793 replace the input X, or the original X if no replacement is called for.
4794 The output parameter *WIN is 1 if the calling macro should goto WIN,
4795 0 if it should not.
4796
4797 For SPARC, we wish to handle addresses by splitting them into
4798 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4799 This cuts the number of extra insns by one.
4800
4801 Do nothing when generating PIC code and the address is a symbolic
4802 operand or requires a scratch register. */
4803
4804 rtx
4805 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4806 int opnum, int type,
4807 int ind_levels ATTRIBUTE_UNUSED, int *win)
4808 {
4809 /* Decompose SImode constants into HIGH+LO_SUM. */
4810 if (CONSTANT_P (x)
4811 && (mode != TFmode || TARGET_ARCH64)
4812 && GET_MODE (x) == SImode
4813 && GET_CODE (x) != LO_SUM
4814 && GET_CODE (x) != HIGH
4815 && sparc_cmodel <= CM_MEDLOW
4816 && !(flag_pic
4817 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4818 {
4819 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4820 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4821 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4822 opnum, (enum reload_type)type);
4823 *win = 1;
4824 return x;
4825 }
4826
4827 /* We have to recognize what we have already generated above. */
4828 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4829 {
4830 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4831 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4832 opnum, (enum reload_type)type);
4833 *win = 1;
4834 return x;
4835 }
4836
4837 *win = 0;
4838 return x;
4839 }
4840
4841 /* Return true if ADDR (a legitimate address expression)
4842 has an effect that depends on the machine mode it is used for.
4843
4844 In PIC mode,
4845
4846 (mem:HI [%l7+a])
4847
4848 is not equivalent to
4849
4850 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4851
4852 because [%l7+a+1] is interpreted as the address of (a+1). */
4853
4854
4855 static bool
4856 sparc_mode_dependent_address_p (const_rtx addr,
4857 addr_space_t as ATTRIBUTE_UNUSED)
4858 {
4859 if (flag_pic && GET_CODE (addr) == PLUS)
4860 {
4861 rtx op0 = XEXP (addr, 0);
4862 rtx op1 = XEXP (addr, 1);
4863 if (op0 == pic_offset_table_rtx
4864 && symbolic_operand (op1, VOIDmode))
4865 return true;
4866 }
4867
4868 return false;
4869 }
4870
4871 #ifdef HAVE_GAS_HIDDEN
4872 # define USE_HIDDEN_LINKONCE 1
4873 #else
4874 # define USE_HIDDEN_LINKONCE 0
4875 #endif
4876
4877 static void
4878 get_pc_thunk_name (char name[32], unsigned int regno)
4879 {
4880 const char *reg_name = reg_names[regno];
4881
4882 /* Skip the leading '%' as that cannot be used in a
4883 symbol name. */
4884 reg_name += 1;
4885
4886 if (USE_HIDDEN_LINKONCE)
4887 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4888 else
4889 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4890 }
4891
4892 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4893
4894 static rtx
4895 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4896 {
4897 int orig_flag_pic = flag_pic;
4898 rtx insn;
4899
4900 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4901 flag_pic = 0;
4902 if (TARGET_ARCH64)
4903 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4904 else
4905 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4906 flag_pic = orig_flag_pic;
4907
4908 return insn;
4909 }
4910
4911 /* Emit code to load the GOT register. */
4912
4913 void
4914 load_got_register (void)
4915 {
4916 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4917 if (!global_offset_table_rtx)
4918 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4919
4920 if (TARGET_VXWORKS_RTP)
4921 emit_insn (gen_vxworks_load_got ());
4922 else
4923 {
4924 /* The GOT symbol is subject to a PC-relative relocation so we need a
4925 helper function to add the PC value and thus get the final value. */
4926 if (!got_helper_rtx)
4927 {
4928 char name[32];
4929 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4930 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4931 }
4932
4933 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4934 got_helper_rtx,
4935 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4936 }
4937
4938 /* Need to emit this whether or not we obey regdecls,
4939 since setjmp/longjmp can cause life info to screw up.
4940 ??? In the case where we don't obey regdecls, this is not sufficient
4941 since we may not fall out the bottom. */
4942 emit_use (global_offset_table_rtx);
4943 }
4944
4945 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4946 address of the call target. */
4947
4948 void
4949 sparc_emit_call_insn (rtx pat, rtx addr)
4950 {
4951 rtx_insn *insn;
4952
4953 insn = emit_call_insn (pat);
4954
4955 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4956 if (TARGET_VXWORKS_RTP
4957 && flag_pic
4958 && GET_CODE (addr) == SYMBOL_REF
4959 && (SYMBOL_REF_DECL (addr)
4960 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4961 : !SYMBOL_REF_LOCAL_P (addr)))
4962 {
4963 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4964 crtl->uses_pic_offset_table = 1;
4965 }
4966 }
4967 \f
4968 /* Return 1 if RTX is a MEM which is known to be aligned to at
4969 least a DESIRED byte boundary. */
4970
4971 int
4972 mem_min_alignment (rtx mem, int desired)
4973 {
4974 rtx addr, base, offset;
4975
4976 /* If it's not a MEM we can't accept it. */
4977 if (GET_CODE (mem) != MEM)
4978 return 0;
4979
4980 /* Obviously... */
4981 if (!TARGET_UNALIGNED_DOUBLES
4982 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4983 return 1;
4984
4985 /* ??? The rest of the function predates MEM_ALIGN so
4986 there is probably a bit of redundancy. */
4987 addr = XEXP (mem, 0);
4988 base = offset = NULL_RTX;
4989 if (GET_CODE (addr) == PLUS)
4990 {
4991 if (GET_CODE (XEXP (addr, 0)) == REG)
4992 {
4993 base = XEXP (addr, 0);
4994
4995 /* What we are saying here is that if the base
4996 REG is aligned properly, the compiler will make
4997 sure any REG based index upon it will be so
4998 as well. */
4999 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5000 offset = XEXP (addr, 1);
5001 else
5002 offset = const0_rtx;
5003 }
5004 }
5005 else if (GET_CODE (addr) == REG)
5006 {
5007 base = addr;
5008 offset = const0_rtx;
5009 }
5010
5011 if (base != NULL_RTX)
5012 {
5013 int regno = REGNO (base);
5014
5015 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5016 {
5017 /* Check if the compiler has recorded some information
5018 about the alignment of the base REG. If reload has
5019 completed, we already matched with proper alignments.
5020 If not running global_alloc, reload might give us
5021 unaligned pointer to local stack though. */
5022 if (((cfun != 0
5023 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5024 || (optimize && reload_completed))
5025 && (INTVAL (offset) & (desired - 1)) == 0)
5026 return 1;
5027 }
5028 else
5029 {
5030 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5031 return 1;
5032 }
5033 }
5034 else if (! TARGET_UNALIGNED_DOUBLES
5035 || CONSTANT_P (addr)
5036 || GET_CODE (addr) == LO_SUM)
5037 {
5038 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5039 is true, in which case we can only assume that an access is aligned if
5040 it is to a constant address, or the address involves a LO_SUM. */
5041 return 1;
5042 }
5043
5044 /* An obviously unaligned address. */
5045 return 0;
5046 }
5047
5048 \f
5049 /* Vectors to keep interesting information about registers where it can easily
5050 be got. We used to use the actual mode value as the bit number, but there
5051 are more than 32 modes now. Instead we use two tables: one indexed by
5052 hard register number, and one indexed by mode. */
5053
5054 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5055 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5056 mapped into one sparc_mode_class mode. */
5057
5058 enum sparc_mode_class {
5059 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5060 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5061 CC_MODE, CCFP_MODE
5062 };
5063
5064 /* Modes for single-word and smaller quantities. */
5065 #define S_MODES \
5066 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5067
5068 /* Modes for double-word and smaller quantities. */
5069 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5070
5071 /* Modes for quad-word and smaller quantities. */
5072 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5073
5074 /* Modes for 8-word and smaller quantities. */
5075 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5076
5077 /* Modes for single-float quantities. */
5078 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5079
5080 /* Modes for double-float and smaller quantities. */
5081 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5082
5083 /* Modes for quad-float and smaller quantities. */
5084 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5085
5086 /* Modes for quad-float pairs and smaller quantities. */
5087 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5088
5089 /* Modes for double-float only quantities. */
5090 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5091
5092 /* Modes for quad-float and double-float only quantities. */
5093 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5094
5095 /* Modes for quad-float pairs and double-float only quantities. */
5096 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5097
5098 /* Modes for condition codes. */
5099 #define CC_MODES (1 << (int) CC_MODE)
5100 #define CCFP_MODES (1 << (int) CCFP_MODE)
5101
5102 /* Value is 1 if register/mode pair is acceptable on sparc.
5103
5104 The funny mixture of D and T modes is because integer operations
5105 do not specially operate on tetra quantities, so non-quad-aligned
5106 registers can hold quadword quantities (except %o4 and %i4 because
5107 they cross fixed registers).
5108
5109 ??? Note that, despite the settings, non-double-aligned parameter
5110 registers can hold double-word quantities in 32-bit mode. */
5111
5112 /* This points to either the 32-bit or the 64-bit version. */
5113 static const int *hard_regno_mode_classes;
5114
5115 static const int hard_32bit_mode_classes[] = {
5116 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5117 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5118 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5119 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5120
5121 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5122 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5123 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5124 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5125
5126 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5127 and none can hold SFmode/SImode values. */
5128 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5129 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5130 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5131 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5132
5133 /* %fcc[0123] */
5134 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5135
5136 /* %icc, %sfp, %gsr */
5137 CC_MODES, 0, D_MODES
5138 };
5139
5140 static const int hard_64bit_mode_classes[] = {
5141 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5142 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5143 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5144 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5145
5146 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5147 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5148 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5149 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5150
5151 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5152 and none can hold SFmode/SImode values. */
5153 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5154 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5155 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5156 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5157
5158 /* %fcc[0123] */
5159 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5160
5161 /* %icc, %sfp, %gsr */
5162 CC_MODES, 0, D_MODES
5163 };
5164
5165 static int sparc_mode_class [NUM_MACHINE_MODES];
5166
5167 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5168
5169 static void
5170 sparc_init_modes (void)
5171 {
5172 int i;
5173
5174 for (i = 0; i < NUM_MACHINE_MODES; i++)
5175 {
5176 machine_mode m = (machine_mode) i;
5177 unsigned int size = GET_MODE_SIZE (m);
5178
5179 switch (GET_MODE_CLASS (m))
5180 {
5181 case MODE_INT:
5182 case MODE_PARTIAL_INT:
5183 case MODE_COMPLEX_INT:
5184 if (size < 4)
5185 sparc_mode_class[i] = 1 << (int) H_MODE;
5186 else if (size == 4)
5187 sparc_mode_class[i] = 1 << (int) S_MODE;
5188 else if (size == 8)
5189 sparc_mode_class[i] = 1 << (int) D_MODE;
5190 else if (size == 16)
5191 sparc_mode_class[i] = 1 << (int) T_MODE;
5192 else if (size == 32)
5193 sparc_mode_class[i] = 1 << (int) O_MODE;
5194 else
5195 sparc_mode_class[i] = 0;
5196 break;
5197 case MODE_VECTOR_INT:
5198 if (size == 4)
5199 sparc_mode_class[i] = 1 << (int) SF_MODE;
5200 else if (size == 8)
5201 sparc_mode_class[i] = 1 << (int) DF_MODE;
5202 else
5203 sparc_mode_class[i] = 0;
5204 break;
5205 case MODE_FLOAT:
5206 case MODE_COMPLEX_FLOAT:
5207 if (size == 4)
5208 sparc_mode_class[i] = 1 << (int) SF_MODE;
5209 else if (size == 8)
5210 sparc_mode_class[i] = 1 << (int) DF_MODE;
5211 else if (size == 16)
5212 sparc_mode_class[i] = 1 << (int) TF_MODE;
5213 else if (size == 32)
5214 sparc_mode_class[i] = 1 << (int) OF_MODE;
5215 else
5216 sparc_mode_class[i] = 0;
5217 break;
5218 case MODE_CC:
5219 if (m == CCFPmode || m == CCFPEmode)
5220 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5221 else
5222 sparc_mode_class[i] = 1 << (int) CC_MODE;
5223 break;
5224 default:
5225 sparc_mode_class[i] = 0;
5226 break;
5227 }
5228 }
5229
5230 if (TARGET_ARCH64)
5231 hard_regno_mode_classes = hard_64bit_mode_classes;
5232 else
5233 hard_regno_mode_classes = hard_32bit_mode_classes;
5234
5235 /* Initialize the array used by REGNO_REG_CLASS. */
5236 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5237 {
5238 if (i < 16 && TARGET_V8PLUS)
5239 sparc_regno_reg_class[i] = I64_REGS;
5240 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5241 sparc_regno_reg_class[i] = GENERAL_REGS;
5242 else if (i < 64)
5243 sparc_regno_reg_class[i] = FP_REGS;
5244 else if (i < 96)
5245 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5246 else if (i < 100)
5247 sparc_regno_reg_class[i] = FPCC_REGS;
5248 else
5249 sparc_regno_reg_class[i] = NO_REGS;
5250 }
5251 }
5252 \f
5253 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5254
5255 static inline bool
5256 save_global_or_fp_reg_p (unsigned int regno,
5257 int leaf_function ATTRIBUTE_UNUSED)
5258 {
5259 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5260 }
5261
5262 /* Return whether the return address register (%i7) is needed. */
5263
5264 static inline bool
5265 return_addr_reg_needed_p (int leaf_function)
5266 {
5267 /* If it is live, for example because of __builtin_return_address (0). */
5268 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5269 return true;
5270
5271 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5272 if (!leaf_function
5273 /* Loading the GOT register clobbers %o7. */
5274 || crtl->uses_pic_offset_table
5275 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5276 return true;
5277
5278 return false;
5279 }
5280
5281 /* Return whether REGNO, a local or in register, must be saved/restored. */
5282
5283 static bool
5284 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5285 {
5286 /* General case: call-saved registers live at some point. */
5287 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5288 return true;
5289
5290 /* Frame pointer register (%fp) if needed. */
5291 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5292 return true;
5293
5294 /* Return address register (%i7) if needed. */
5295 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5296 return true;
5297
5298 /* GOT register (%l7) if needed. */
5299 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5300 return true;
5301
5302 /* If the function accesses prior frames, the frame pointer and the return
5303 address of the previous frame must be saved on the stack. */
5304 if (crtl->accesses_prior_frames
5305 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5306 return true;
5307
5308 return false;
5309 }
5310
5311 /* Compute the frame size required by the function. This function is called
5312 during the reload pass and also by sparc_expand_prologue. */
5313
5314 HOST_WIDE_INT
5315 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5316 {
5317 HOST_WIDE_INT frame_size, apparent_frame_size;
5318 int args_size, n_global_fp_regs = 0;
5319 bool save_local_in_regs_p = false;
5320 unsigned int i;
5321
5322 /* If the function allocates dynamic stack space, the dynamic offset is
5323 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5324 if (leaf_function && !cfun->calls_alloca)
5325 args_size = 0;
5326 else
5327 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5328
5329 /* Calculate space needed for global registers. */
5330 if (TARGET_ARCH64)
5331 {
5332 for (i = 0; i < 8; i++)
5333 if (save_global_or_fp_reg_p (i, 0))
5334 n_global_fp_regs += 2;
5335 }
5336 else
5337 {
5338 for (i = 0; i < 8; i += 2)
5339 if (save_global_or_fp_reg_p (i, 0)
5340 || save_global_or_fp_reg_p (i + 1, 0))
5341 n_global_fp_regs += 2;
5342 }
5343
5344 /* In the flat window model, find out which local and in registers need to
5345 be saved. We don't reserve space in the current frame for them as they
5346 will be spilled into the register window save area of the caller's frame.
5347 However, as soon as we use this register window save area, we must create
5348 that of the current frame to make it the live one. */
5349 if (TARGET_FLAT)
5350 for (i = 16; i < 32; i++)
5351 if (save_local_or_in_reg_p (i, leaf_function))
5352 {
5353 save_local_in_regs_p = true;
5354 break;
5355 }
5356
5357 /* Calculate space needed for FP registers. */
5358 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5359 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5360 n_global_fp_regs += 2;
5361
5362 if (size == 0
5363 && n_global_fp_regs == 0
5364 && args_size == 0
5365 && !save_local_in_regs_p)
5366 frame_size = apparent_frame_size = 0;
5367 else
5368 {
5369 /* We subtract TARGET_STARTING_FRAME_OFFSET, remember it's negative. */
5370 apparent_frame_size
5371 = ROUND_UP (size - targetm.starting_frame_offset (), 8);
5372 apparent_frame_size += n_global_fp_regs * 4;
5373
5374 /* We need to add the size of the outgoing argument area. */
5375 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5376
5377 /* And that of the register window save area. */
5378 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5379
5380 /* Finally, bump to the appropriate alignment. */
5381 frame_size = SPARC_STACK_ALIGN (frame_size);
5382 }
5383
5384 /* Set up values for use in prologue and epilogue. */
5385 sparc_frame_size = frame_size;
5386 sparc_apparent_frame_size = apparent_frame_size;
5387 sparc_n_global_fp_regs = n_global_fp_regs;
5388 sparc_save_local_in_regs_p = save_local_in_regs_p;
5389
5390 return frame_size;
5391 }
5392
5393 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5394
5395 int
5396 sparc_initial_elimination_offset (int to)
5397 {
5398 int offset;
5399
5400 if (to == STACK_POINTER_REGNUM)
5401 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5402 else
5403 offset = 0;
5404
5405 offset += SPARC_STACK_BIAS;
5406 return offset;
5407 }
5408
5409 /* Output any necessary .register pseudo-ops. */
5410
5411 void
5412 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5413 {
5414 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5415 int i;
5416
5417 if (TARGET_ARCH32)
5418 return;
5419
5420 /* Check if %g[2367] were used without
5421 .register being printed for them already. */
5422 for (i = 2; i < 8; i++)
5423 {
5424 if (df_regs_ever_live_p (i)
5425 && ! sparc_hard_reg_printed [i])
5426 {
5427 sparc_hard_reg_printed [i] = 1;
5428 /* %g7 is used as TLS base register, use #ignore
5429 for it instead of #scratch. */
5430 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5431 i == 7 ? "ignore" : "scratch");
5432 }
5433 if (i == 3) i = 5;
5434 }
5435 #endif
5436 }
5437
5438 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5439
5440 #if PROBE_INTERVAL > 4096
5441 #error Cannot use indexed addressing mode for stack probing
5442 #endif
5443
5444 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5445 inclusive. These are offsets from the current stack pointer.
5446
5447 Note that we don't use the REG+REG addressing mode for the probes because
5448 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5449 so the advantages of having a single code win here. */
5450
5451 static void
5452 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5453 {
5454 rtx g1 = gen_rtx_REG (Pmode, 1);
5455
5456 /* See if we have a constant small number of probes to generate. If so,
5457 that's the easy case. */
5458 if (size <= PROBE_INTERVAL)
5459 {
5460 emit_move_insn (g1, GEN_INT (first));
5461 emit_insn (gen_rtx_SET (g1,
5462 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5463 emit_stack_probe (plus_constant (Pmode, g1, -size));
5464 }
5465
5466 /* The run-time loop is made up of 9 insns in the generic case while the
5467 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5468 else if (size <= 4 * PROBE_INTERVAL)
5469 {
5470 HOST_WIDE_INT i;
5471
5472 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5473 emit_insn (gen_rtx_SET (g1,
5474 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5475 emit_stack_probe (g1);
5476
5477 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5478 it exceeds SIZE. If only two probes are needed, this will not
5479 generate any code. Then probe at FIRST + SIZE. */
5480 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5481 {
5482 emit_insn (gen_rtx_SET (g1,
5483 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5484 emit_stack_probe (g1);
5485 }
5486
5487 emit_stack_probe (plus_constant (Pmode, g1,
5488 (i - PROBE_INTERVAL) - size));
5489 }
5490
5491 /* Otherwise, do the same as above, but in a loop. Note that we must be
5492 extra careful with variables wrapping around because we might be at
5493 the very top (or the very bottom) of the address space and we have
5494 to be able to handle this case properly; in particular, we use an
5495 equality test for the loop condition. */
5496 else
5497 {
5498 HOST_WIDE_INT rounded_size;
5499 rtx g4 = gen_rtx_REG (Pmode, 4);
5500
5501 emit_move_insn (g1, GEN_INT (first));
5502
5503
5504 /* Step 1: round SIZE to the previous multiple of the interval. */
5505
5506 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5507 emit_move_insn (g4, GEN_INT (rounded_size));
5508
5509
5510 /* Step 2: compute initial and final value of the loop counter. */
5511
5512 /* TEST_ADDR = SP + FIRST. */
5513 emit_insn (gen_rtx_SET (g1,
5514 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5515
5516 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5517 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5518
5519
5520 /* Step 3: the loop
5521
5522 while (TEST_ADDR != LAST_ADDR)
5523 {
5524 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5525 probe at TEST_ADDR
5526 }
5527
5528 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5529 until it is equal to ROUNDED_SIZE. */
5530
5531 if (TARGET_ARCH64)
5532 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5533 else
5534 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5535
5536
5537 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5538 that SIZE is equal to ROUNDED_SIZE. */
5539
5540 if (size != rounded_size)
5541 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5542 }
5543
5544 /* Make sure nothing is scheduled before we are done. */
5545 emit_insn (gen_blockage ());
5546 }
5547
5548 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5549 absolute addresses. */
5550
5551 const char *
5552 output_probe_stack_range (rtx reg1, rtx reg2)
5553 {
5554 static int labelno = 0;
5555 char loop_lab[32];
5556 rtx xops[2];
5557
5558 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5559
5560 /* Loop. */
5561 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5562
5563 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5564 xops[0] = reg1;
5565 xops[1] = GEN_INT (-PROBE_INTERVAL);
5566 output_asm_insn ("add\t%0, %1, %0", xops);
5567
5568 /* Test if TEST_ADDR == LAST_ADDR. */
5569 xops[1] = reg2;
5570 output_asm_insn ("cmp\t%0, %1", xops);
5571
5572 /* Probe at TEST_ADDR and branch. */
5573 if (TARGET_ARCH64)
5574 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5575 else
5576 fputs ("\tbne\t", asm_out_file);
5577 assemble_name_raw (asm_out_file, loop_lab);
5578 fputc ('\n', asm_out_file);
5579 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5580 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5581
5582 return "";
5583 }
5584
5585 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5586 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5587 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5588 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5589 the action to be performed if it returns false. Return the new offset. */
5590
5591 typedef bool (*sorr_pred_t) (unsigned int, int);
5592 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5593
5594 static int
5595 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5596 int offset, int leaf_function, sorr_pred_t save_p,
5597 sorr_act_t action_true, sorr_act_t action_false)
5598 {
5599 unsigned int i;
5600 rtx mem;
5601 rtx_insn *insn;
5602
5603 if (TARGET_ARCH64 && high <= 32)
5604 {
5605 int fp_offset = -1;
5606
5607 for (i = low; i < high; i++)
5608 {
5609 if (save_p (i, leaf_function))
5610 {
5611 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5612 base, offset));
5613 if (action_true == SORR_SAVE)
5614 {
5615 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5616 RTX_FRAME_RELATED_P (insn) = 1;
5617 }
5618 else /* action_true == SORR_RESTORE */
5619 {
5620 /* The frame pointer must be restored last since its old
5621 value may be used as base address for the frame. This
5622 is problematic in 64-bit mode only because of the lack
5623 of double-word load instruction. */
5624 if (i == HARD_FRAME_POINTER_REGNUM)
5625 fp_offset = offset;
5626 else
5627 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5628 }
5629 offset += 8;
5630 }
5631 else if (action_false == SORR_ADVANCE)
5632 offset += 8;
5633 }
5634
5635 if (fp_offset >= 0)
5636 {
5637 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5638 emit_move_insn (hard_frame_pointer_rtx, mem);
5639 }
5640 }
5641 else
5642 {
5643 for (i = low; i < high; i += 2)
5644 {
5645 bool reg0 = save_p (i, leaf_function);
5646 bool reg1 = save_p (i + 1, leaf_function);
5647 machine_mode mode;
5648 int regno;
5649
5650 if (reg0 && reg1)
5651 {
5652 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5653 regno = i;
5654 }
5655 else if (reg0)
5656 {
5657 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5658 regno = i;
5659 }
5660 else if (reg1)
5661 {
5662 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5663 regno = i + 1;
5664 offset += 4;
5665 }
5666 else
5667 {
5668 if (action_false == SORR_ADVANCE)
5669 offset += 8;
5670 continue;
5671 }
5672
5673 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5674 if (action_true == SORR_SAVE)
5675 {
5676 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5677 RTX_FRAME_RELATED_P (insn) = 1;
5678 if (mode == DImode)
5679 {
5680 rtx set1, set2;
5681 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5682 offset));
5683 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5684 RTX_FRAME_RELATED_P (set1) = 1;
5685 mem
5686 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5687 offset + 4));
5688 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5689 RTX_FRAME_RELATED_P (set2) = 1;
5690 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5691 gen_rtx_PARALLEL (VOIDmode,
5692 gen_rtvec (2, set1, set2)));
5693 }
5694 }
5695 else /* action_true == SORR_RESTORE */
5696 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5697
5698 /* Bump and round down to double word
5699 in case we already bumped by 4. */
5700 offset = ROUND_DOWN (offset + 8, 8);
5701 }
5702 }
5703
5704 return offset;
5705 }
5706
5707 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5708
5709 static rtx
5710 emit_adjust_base_to_offset (rtx base, int offset)
5711 {
5712 /* ??? This might be optimized a little as %g1 might already have a
5713 value close enough that a single add insn will do. */
5714 /* ??? Although, all of this is probably only a temporary fix because
5715 if %g1 can hold a function result, then sparc_expand_epilogue will
5716 lose (the result will be clobbered). */
5717 rtx new_base = gen_rtx_REG (Pmode, 1);
5718 emit_move_insn (new_base, GEN_INT (offset));
5719 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5720 return new_base;
5721 }
5722
5723 /* Emit code to save/restore call-saved global and FP registers. */
5724
5725 static void
5726 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5727 {
5728 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5729 {
5730 base = emit_adjust_base_to_offset (base, offset);
5731 offset = 0;
5732 }
5733
5734 offset
5735 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5736 save_global_or_fp_reg_p, action, SORR_NONE);
5737 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5738 save_global_or_fp_reg_p, action, SORR_NONE);
5739 }
5740
5741 /* Emit code to save/restore call-saved local and in registers. */
5742
5743 static void
5744 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5745 {
5746 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5747 {
5748 base = emit_adjust_base_to_offset (base, offset);
5749 offset = 0;
5750 }
5751
5752 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5753 save_local_or_in_reg_p, action, SORR_ADVANCE);
5754 }
5755
5756 /* Emit a window_save insn. */
5757
5758 static rtx_insn *
5759 emit_window_save (rtx increment)
5760 {
5761 rtx_insn *insn = emit_insn (gen_window_save (increment));
5762 RTX_FRAME_RELATED_P (insn) = 1;
5763
5764 /* The incoming return address (%o7) is saved in %i7. */
5765 add_reg_note (insn, REG_CFA_REGISTER,
5766 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5767 gen_rtx_REG (Pmode,
5768 INCOMING_RETURN_ADDR_REGNUM)));
5769
5770 /* The window save event. */
5771 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5772
5773 /* The CFA is %fp, the hard frame pointer. */
5774 add_reg_note (insn, REG_CFA_DEF_CFA,
5775 plus_constant (Pmode, hard_frame_pointer_rtx,
5776 INCOMING_FRAME_SP_OFFSET));
5777
5778 return insn;
5779 }
5780
5781 /* Generate an increment for the stack pointer. */
5782
5783 static rtx
5784 gen_stack_pointer_inc (rtx increment)
5785 {
5786 return gen_rtx_SET (stack_pointer_rtx,
5787 gen_rtx_PLUS (Pmode,
5788 stack_pointer_rtx,
5789 increment));
5790 }
5791
5792 /* Expand the function prologue. The prologue is responsible for reserving
5793 storage for the frame, saving the call-saved registers and loading the
5794 GOT register if needed. */
5795
5796 void
5797 sparc_expand_prologue (void)
5798 {
5799 HOST_WIDE_INT size;
5800 rtx_insn *insn;
5801
5802 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5803 on the final value of the flag means deferring the prologue/epilogue
5804 expansion until just before the second scheduling pass, which is too
5805 late to emit multiple epilogues or return insns.
5806
5807 Of course we are making the assumption that the value of the flag
5808 will not change between now and its final value. Of the three parts
5809 of the formula, only the last one can reasonably vary. Let's take a
5810 closer look, after assuming that the first two ones are set to true
5811 (otherwise the last value is effectively silenced).
5812
5813 If only_leaf_regs_used returns false, the global predicate will also
5814 be false so the actual frame size calculated below will be positive.
5815 As a consequence, the save_register_window insn will be emitted in
5816 the instruction stream; now this insn explicitly references %fp
5817 which is not a leaf register so only_leaf_regs_used will always
5818 return false subsequently.
5819
5820 If only_leaf_regs_used returns true, we hope that the subsequent
5821 optimization passes won't cause non-leaf registers to pop up. For
5822 example, the regrename pass has special provisions to not rename to
5823 non-leaf registers in a leaf function. */
5824 sparc_leaf_function_p
5825 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5826
5827 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5828
5829 if (flag_stack_usage_info)
5830 current_function_static_stack_size = size;
5831
5832 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5833 || flag_stack_clash_protection)
5834 {
5835 if (crtl->is_leaf && !cfun->calls_alloca)
5836 {
5837 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5838 sparc_emit_probe_stack_range (get_stack_check_protect (),
5839 size - get_stack_check_protect ());
5840 }
5841 else if (size > 0)
5842 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5843 }
5844
5845 if (size == 0)
5846 ; /* do nothing. */
5847 else if (sparc_leaf_function_p)
5848 {
5849 rtx size_int_rtx = GEN_INT (-size);
5850
5851 if (size <= 4096)
5852 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5853 else if (size <= 8192)
5854 {
5855 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5856 RTX_FRAME_RELATED_P (insn) = 1;
5857
5858 /* %sp is still the CFA register. */
5859 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5860 }
5861 else
5862 {
5863 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5864 emit_move_insn (size_rtx, size_int_rtx);
5865 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5866 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5867 gen_stack_pointer_inc (size_int_rtx));
5868 }
5869
5870 RTX_FRAME_RELATED_P (insn) = 1;
5871 }
5872 else
5873 {
5874 rtx size_int_rtx = GEN_INT (-size);
5875
5876 if (size <= 4096)
5877 emit_window_save (size_int_rtx);
5878 else if (size <= 8192)
5879 {
5880 emit_window_save (GEN_INT (-4096));
5881
5882 /* %sp is not the CFA register anymore. */
5883 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5884
5885 /* Make sure no %fp-based store is issued until after the frame is
5886 established. The offset between the frame pointer and the stack
5887 pointer is calculated relative to the value of the stack pointer
5888 at the end of the function prologue, and moving instructions that
5889 access the stack via the frame pointer between the instructions
5890 that decrement the stack pointer could result in accessing the
5891 register window save area, which is volatile. */
5892 emit_insn (gen_frame_blockage ());
5893 }
5894 else
5895 {
5896 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5897 emit_move_insn (size_rtx, size_int_rtx);
5898 emit_window_save (size_rtx);
5899 }
5900 }
5901
5902 if (sparc_leaf_function_p)
5903 {
5904 sparc_frame_base_reg = stack_pointer_rtx;
5905 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5906 }
5907 else
5908 {
5909 sparc_frame_base_reg = hard_frame_pointer_rtx;
5910 sparc_frame_base_offset = SPARC_STACK_BIAS;
5911 }
5912
5913 if (sparc_n_global_fp_regs > 0)
5914 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5915 sparc_frame_base_offset
5916 - sparc_apparent_frame_size,
5917 SORR_SAVE);
5918
5919 /* Load the GOT register if needed. */
5920 if (crtl->uses_pic_offset_table)
5921 load_got_register ();
5922
5923 /* Advertise that the data calculated just above are now valid. */
5924 sparc_prologue_data_valid_p = true;
5925 }
5926
5927 /* Expand the function prologue. The prologue is responsible for reserving
5928 storage for the frame, saving the call-saved registers and loading the
5929 GOT register if needed. */
5930
5931 void
5932 sparc_flat_expand_prologue (void)
5933 {
5934 HOST_WIDE_INT size;
5935 rtx_insn *insn;
5936
5937 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5938
5939 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5940
5941 if (flag_stack_usage_info)
5942 current_function_static_stack_size = size;
5943
5944 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5945 || flag_stack_clash_protection)
5946 {
5947 if (crtl->is_leaf && !cfun->calls_alloca)
5948 {
5949 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5950 sparc_emit_probe_stack_range (get_stack_check_protect (),
5951 size - get_stack_check_protect ());
5952 }
5953 else if (size > 0)
5954 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5955 }
5956
5957 if (sparc_save_local_in_regs_p)
5958 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5959 SORR_SAVE);
5960
5961 if (size == 0)
5962 ; /* do nothing. */
5963 else
5964 {
5965 rtx size_int_rtx, size_rtx;
5966
5967 size_rtx = size_int_rtx = GEN_INT (-size);
5968
5969 /* We establish the frame (i.e. decrement the stack pointer) first, even
5970 if we use a frame pointer, because we cannot clobber any call-saved
5971 registers, including the frame pointer, if we haven't created a new
5972 register save area, for the sake of compatibility with the ABI. */
5973 if (size <= 4096)
5974 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5975 else if (size <= 8192 && !frame_pointer_needed)
5976 {
5977 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5978 RTX_FRAME_RELATED_P (insn) = 1;
5979 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5980 }
5981 else
5982 {
5983 size_rtx = gen_rtx_REG (Pmode, 1);
5984 emit_move_insn (size_rtx, size_int_rtx);
5985 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5986 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5987 gen_stack_pointer_inc (size_int_rtx));
5988 }
5989 RTX_FRAME_RELATED_P (insn) = 1;
5990
5991 /* Ensure nothing is scheduled until after the frame is established. */
5992 emit_insn (gen_blockage ());
5993
5994 if (frame_pointer_needed)
5995 {
5996 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5997 gen_rtx_MINUS (Pmode,
5998 stack_pointer_rtx,
5999 size_rtx)));
6000 RTX_FRAME_RELATED_P (insn) = 1;
6001
6002 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6003 gen_rtx_SET (hard_frame_pointer_rtx,
6004 plus_constant (Pmode, stack_pointer_rtx,
6005 size)));
6006 }
6007
6008 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6009 {
6010 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6011 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6012
6013 insn = emit_move_insn (i7, o7);
6014 RTX_FRAME_RELATED_P (insn) = 1;
6015
6016 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6017
6018 /* Prevent this instruction from ever being considered dead,
6019 even if this function has no epilogue. */
6020 emit_use (i7);
6021 }
6022 }
6023
6024 if (frame_pointer_needed)
6025 {
6026 sparc_frame_base_reg = hard_frame_pointer_rtx;
6027 sparc_frame_base_offset = SPARC_STACK_BIAS;
6028 }
6029 else
6030 {
6031 sparc_frame_base_reg = stack_pointer_rtx;
6032 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6033 }
6034
6035 if (sparc_n_global_fp_regs > 0)
6036 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6037 sparc_frame_base_offset
6038 - sparc_apparent_frame_size,
6039 SORR_SAVE);
6040
6041 /* Load the GOT register if needed. */
6042 if (crtl->uses_pic_offset_table)
6043 load_got_register ();
6044
6045 /* Advertise that the data calculated just above are now valid. */
6046 sparc_prologue_data_valid_p = true;
6047 }
6048
6049 /* This function generates the assembly code for function entry, which boils
6050 down to emitting the necessary .register directives. */
6051
6052 static void
6053 sparc_asm_function_prologue (FILE *file)
6054 {
6055 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6056 if (!TARGET_FLAT)
6057 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6058
6059 sparc_output_scratch_registers (file);
6060 }
6061
6062 /* Expand the function epilogue, either normal or part of a sibcall.
6063 We emit all the instructions except the return or the call. */
6064
6065 void
6066 sparc_expand_epilogue (bool for_eh)
6067 {
6068 HOST_WIDE_INT size = sparc_frame_size;
6069
6070 if (cfun->calls_alloca)
6071 emit_insn (gen_frame_blockage ());
6072
6073 if (sparc_n_global_fp_regs > 0)
6074 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6075 sparc_frame_base_offset
6076 - sparc_apparent_frame_size,
6077 SORR_RESTORE);
6078
6079 if (size == 0 || for_eh)
6080 ; /* do nothing. */
6081 else if (sparc_leaf_function_p)
6082 {
6083 if (size <= 4096)
6084 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6085 else if (size <= 8192)
6086 {
6087 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6088 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6089 }
6090 else
6091 {
6092 rtx reg = gen_rtx_REG (Pmode, 1);
6093 emit_move_insn (reg, GEN_INT (size));
6094 emit_insn (gen_stack_pointer_inc (reg));
6095 }
6096 }
6097 }
6098
6099 /* Expand the function epilogue, either normal or part of a sibcall.
6100 We emit all the instructions except the return or the call. */
6101
6102 void
6103 sparc_flat_expand_epilogue (bool for_eh)
6104 {
6105 HOST_WIDE_INT size = sparc_frame_size;
6106
6107 if (sparc_n_global_fp_regs > 0)
6108 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6109 sparc_frame_base_offset
6110 - sparc_apparent_frame_size,
6111 SORR_RESTORE);
6112
6113 /* If we have a frame pointer, we'll need both to restore it before the
6114 frame is destroyed and use its current value in destroying the frame.
6115 Since we don't have an atomic way to do that in the flat window model,
6116 we save the current value into a temporary register (%g1). */
6117 if (frame_pointer_needed && !for_eh)
6118 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6119
6120 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6121 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6122 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6123
6124 if (sparc_save_local_in_regs_p)
6125 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6126 sparc_frame_base_offset,
6127 SORR_RESTORE);
6128
6129 if (size == 0 || for_eh)
6130 ; /* do nothing. */
6131 else if (frame_pointer_needed)
6132 {
6133 /* Make sure the frame is destroyed after everything else is done. */
6134 emit_insn (gen_blockage ());
6135
6136 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6137 }
6138 else
6139 {
6140 /* Likewise. */
6141 emit_insn (gen_blockage ());
6142
6143 if (size <= 4096)
6144 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6145 else if (size <= 8192)
6146 {
6147 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6148 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6149 }
6150 else
6151 {
6152 rtx reg = gen_rtx_REG (Pmode, 1);
6153 emit_move_insn (reg, GEN_INT (size));
6154 emit_insn (gen_stack_pointer_inc (reg));
6155 }
6156 }
6157 }
6158
6159 /* Return true if it is appropriate to emit `return' instructions in the
6160 body of a function. */
6161
6162 bool
6163 sparc_can_use_return_insn_p (void)
6164 {
6165 return sparc_prologue_data_valid_p
6166 && sparc_n_global_fp_regs == 0
6167 && TARGET_FLAT
6168 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6169 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6170 }
6171
6172 /* This function generates the assembly code for function exit. */
6173
6174 static void
6175 sparc_asm_function_epilogue (FILE *file)
6176 {
6177 /* If the last two instructions of a function are "call foo; dslot;"
6178 the return address might point to the first instruction in the next
6179 function and we have to output a dummy nop for the sake of sane
6180 backtraces in such cases. This is pointless for sibling calls since
6181 the return address is explicitly adjusted. */
6182
6183 rtx_insn *insn = get_last_insn ();
6184
6185 rtx last_real_insn = prev_real_insn (insn);
6186 if (last_real_insn
6187 && NONJUMP_INSN_P (last_real_insn)
6188 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6189 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6190
6191 if (last_real_insn
6192 && CALL_P (last_real_insn)
6193 && !SIBLING_CALL_P (last_real_insn))
6194 fputs("\tnop\n", file);
6195
6196 sparc_output_deferred_case_vectors ();
6197 }
6198
6199 /* Output a 'restore' instruction. */
6200
6201 static void
6202 output_restore (rtx pat)
6203 {
6204 rtx operands[3];
6205
6206 if (! pat)
6207 {
6208 fputs ("\t restore\n", asm_out_file);
6209 return;
6210 }
6211
6212 gcc_assert (GET_CODE (pat) == SET);
6213
6214 operands[0] = SET_DEST (pat);
6215 pat = SET_SRC (pat);
6216
6217 switch (GET_CODE (pat))
6218 {
6219 case PLUS:
6220 operands[1] = XEXP (pat, 0);
6221 operands[2] = XEXP (pat, 1);
6222 output_asm_insn (" restore %r1, %2, %Y0", operands);
6223 break;
6224 case LO_SUM:
6225 operands[1] = XEXP (pat, 0);
6226 operands[2] = XEXP (pat, 1);
6227 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6228 break;
6229 case ASHIFT:
6230 operands[1] = XEXP (pat, 0);
6231 gcc_assert (XEXP (pat, 1) == const1_rtx);
6232 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6233 break;
6234 default:
6235 operands[1] = pat;
6236 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6237 break;
6238 }
6239 }
6240
6241 /* Output a return. */
6242
6243 const char *
6244 output_return (rtx_insn *insn)
6245 {
6246 if (crtl->calls_eh_return)
6247 {
6248 /* If the function uses __builtin_eh_return, the eh_return
6249 machinery occupies the delay slot. */
6250 gcc_assert (!final_sequence);
6251
6252 if (flag_delayed_branch)
6253 {
6254 if (!TARGET_FLAT && TARGET_V9)
6255 fputs ("\treturn\t%i7+8\n", asm_out_file);
6256 else
6257 {
6258 if (!TARGET_FLAT)
6259 fputs ("\trestore\n", asm_out_file);
6260
6261 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6262 }
6263
6264 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6265 }
6266 else
6267 {
6268 if (!TARGET_FLAT)
6269 fputs ("\trestore\n", asm_out_file);
6270
6271 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6272 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6273 }
6274 }
6275 else if (sparc_leaf_function_p || TARGET_FLAT)
6276 {
6277 /* This is a leaf or flat function so we don't have to bother restoring
6278 the register window, which frees us from dealing with the convoluted
6279 semantics of restore/return. We simply output the jump to the
6280 return address and the insn in the delay slot (if any). */
6281
6282 return "jmp\t%%o7+%)%#";
6283 }
6284 else
6285 {
6286 /* This is a regular function so we have to restore the register window.
6287 We may have a pending insn for the delay slot, which will be either
6288 combined with the 'restore' instruction or put in the delay slot of
6289 the 'return' instruction. */
6290
6291 if (final_sequence)
6292 {
6293 rtx_insn *delay;
6294 rtx pat;
6295 int seen;
6296
6297 delay = NEXT_INSN (insn);
6298 gcc_assert (delay);
6299
6300 pat = PATTERN (delay);
6301
6302 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6303 {
6304 epilogue_renumber (&pat, 0);
6305 return "return\t%%i7+%)%#";
6306 }
6307 else
6308 {
6309 output_asm_insn ("jmp\t%%i7+%)", NULL);
6310
6311 /* We're going to output the insn in the delay slot manually.
6312 Make sure to output its source location first. */
6313 PATTERN (delay) = gen_blockage ();
6314 INSN_CODE (delay) = -1;
6315 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6316 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6317
6318 output_restore (pat);
6319 }
6320 }
6321 else
6322 {
6323 /* The delay slot is empty. */
6324 if (TARGET_V9)
6325 return "return\t%%i7+%)\n\t nop";
6326 else if (flag_delayed_branch)
6327 return "jmp\t%%i7+%)\n\t restore";
6328 else
6329 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6330 }
6331 }
6332
6333 return "";
6334 }
6335
6336 /* Output a sibling call. */
6337
6338 const char *
6339 output_sibcall (rtx_insn *insn, rtx call_operand)
6340 {
6341 rtx operands[1];
6342
6343 gcc_assert (flag_delayed_branch);
6344
6345 operands[0] = call_operand;
6346
6347 if (sparc_leaf_function_p || TARGET_FLAT)
6348 {
6349 /* This is a leaf or flat function so we don't have to bother restoring
6350 the register window. We simply output the jump to the function and
6351 the insn in the delay slot (if any). */
6352
6353 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6354
6355 if (final_sequence)
6356 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6357 operands);
6358 else
6359 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6360 it into branch if possible. */
6361 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6362 operands);
6363 }
6364 else
6365 {
6366 /* This is a regular function so we have to restore the register window.
6367 We may have a pending insn for the delay slot, which will be combined
6368 with the 'restore' instruction. */
6369
6370 output_asm_insn ("call\t%a0, 0", operands);
6371
6372 if (final_sequence)
6373 {
6374 rtx_insn *delay;
6375 rtx pat;
6376 int seen;
6377
6378 delay = NEXT_INSN (insn);
6379 gcc_assert (delay);
6380
6381 pat = PATTERN (delay);
6382
6383 /* We're going to output the insn in the delay slot manually.
6384 Make sure to output its source location first. */
6385 PATTERN (delay) = gen_blockage ();
6386 INSN_CODE (delay) = -1;
6387 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6388 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6389
6390 output_restore (pat);
6391 }
6392 else
6393 output_restore (NULL_RTX);
6394 }
6395
6396 return "";
6397 }
6398 \f
6399 /* Functions for handling argument passing.
6400
6401 For 32-bit, the first 6 args are normally in registers and the rest are
6402 pushed. Any arg that starts within the first 6 words is at least
6403 partially passed in a register unless its data type forbids.
6404
6405 For 64-bit, the argument registers are laid out as an array of 16 elements
6406 and arguments are added sequentially. The first 6 int args and up to the
6407 first 16 fp args (depending on size) are passed in regs.
6408
6409 Slot Stack Integral Float Float in structure Double Long Double
6410 ---- ----- -------- ----- ------------------ ------ -----------
6411 15 [SP+248] %f31 %f30,%f31 %d30
6412 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6413 13 [SP+232] %f27 %f26,%f27 %d26
6414 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6415 11 [SP+216] %f23 %f22,%f23 %d22
6416 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6417 9 [SP+200] %f19 %f18,%f19 %d18
6418 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6419 7 [SP+184] %f15 %f14,%f15 %d14
6420 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6421 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6422 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6423 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6424 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6425 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6426 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6427
6428 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6429
6430 Integral arguments are always passed as 64-bit quantities appropriately
6431 extended.
6432
6433 Passing of floating point values is handled as follows.
6434 If a prototype is in scope:
6435 If the value is in a named argument (i.e. not a stdarg function or a
6436 value not part of the `...') then the value is passed in the appropriate
6437 fp reg.
6438 If the value is part of the `...' and is passed in one of the first 6
6439 slots then the value is passed in the appropriate int reg.
6440 If the value is part of the `...' and is not passed in one of the first 6
6441 slots then the value is passed in memory.
6442 If a prototype is not in scope:
6443 If the value is one of the first 6 arguments the value is passed in the
6444 appropriate integer reg and the appropriate fp reg.
6445 If the value is not one of the first 6 arguments the value is passed in
6446 the appropriate fp reg and in memory.
6447
6448
6449 Summary of the calling conventions implemented by GCC on the SPARC:
6450
6451 32-bit ABI:
6452 size argument return value
6453
6454 small integer <4 int. reg. int. reg.
6455 word 4 int. reg. int. reg.
6456 double word 8 int. reg. int. reg.
6457
6458 _Complex small integer <8 int. reg. int. reg.
6459 _Complex word 8 int. reg. int. reg.
6460 _Complex double word 16 memory int. reg.
6461
6462 vector integer <=8 int. reg. FP reg.
6463 vector integer >8 memory memory
6464
6465 float 4 int. reg. FP reg.
6466 double 8 int. reg. FP reg.
6467 long double 16 memory memory
6468
6469 _Complex float 8 memory FP reg.
6470 _Complex double 16 memory FP reg.
6471 _Complex long double 32 memory FP reg.
6472
6473 vector float any memory memory
6474
6475 aggregate any memory memory
6476
6477
6478
6479 64-bit ABI:
6480 size argument return value
6481
6482 small integer <8 int. reg. int. reg.
6483 word 8 int. reg. int. reg.
6484 double word 16 int. reg. int. reg.
6485
6486 _Complex small integer <16 int. reg. int. reg.
6487 _Complex word 16 int. reg. int. reg.
6488 _Complex double word 32 memory int. reg.
6489
6490 vector integer <=16 FP reg. FP reg.
6491 vector integer 16<s<=32 memory FP reg.
6492 vector integer >32 memory memory
6493
6494 float 4 FP reg. FP reg.
6495 double 8 FP reg. FP reg.
6496 long double 16 FP reg. FP reg.
6497
6498 _Complex float 8 FP reg. FP reg.
6499 _Complex double 16 FP reg. FP reg.
6500 _Complex long double 32 memory FP reg.
6501
6502 vector float <=16 FP reg. FP reg.
6503 vector float 16<s<=32 memory FP reg.
6504 vector float >32 memory memory
6505
6506 aggregate <=16 reg. reg.
6507 aggregate 16<s<=32 memory reg.
6508 aggregate >32 memory memory
6509
6510
6511
6512 Note #1: complex floating-point types follow the extended SPARC ABIs as
6513 implemented by the Sun compiler.
6514
6515 Note #2: integral vector types follow the scalar floating-point types
6516 conventions to match what is implemented by the Sun VIS SDK.
6517
6518 Note #3: floating-point vector types follow the aggregate types
6519 conventions. */
6520
6521
6522 /* Maximum number of int regs for args. */
6523 #define SPARC_INT_ARG_MAX 6
6524 /* Maximum number of fp regs for args. */
6525 #define SPARC_FP_ARG_MAX 16
6526 /* Number of words (partially) occupied for a given size in units. */
6527 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6528
6529 /* Handle the INIT_CUMULATIVE_ARGS macro.
6530 Initialize a variable CUM of type CUMULATIVE_ARGS
6531 for a call to a function whose data type is FNTYPE.
6532 For a library call, FNTYPE is 0. */
6533
6534 void
6535 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6536 {
6537 cum->words = 0;
6538 cum->prototype_p = fntype && prototype_p (fntype);
6539 cum->libcall_p = !fntype;
6540 }
6541
6542 /* Handle promotion of pointer and integer arguments. */
6543
6544 static machine_mode
6545 sparc_promote_function_mode (const_tree type, machine_mode mode,
6546 int *punsignedp, const_tree, int)
6547 {
6548 if (type && POINTER_TYPE_P (type))
6549 {
6550 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6551 return Pmode;
6552 }
6553
6554 /* Integral arguments are passed as full words, as per the ABI. */
6555 if (GET_MODE_CLASS (mode) == MODE_INT
6556 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6557 return word_mode;
6558
6559 return mode;
6560 }
6561
6562 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6563
6564 static bool
6565 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6566 {
6567 return TARGET_ARCH64 ? true : false;
6568 }
6569
6570 /* Traverse the record TYPE recursively and call FUNC on its fields.
6571 NAMED is true if this is for a named parameter. DATA is passed
6572 to FUNC for each field. OFFSET is the starting position and
6573 PACKED is true if we are inside a packed record. */
6574
6575 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6576 static void
6577 traverse_record_type (const_tree type, bool named, T *data,
6578 HOST_WIDE_INT offset = 0, bool packed = false)
6579 {
6580 /* The ABI obviously doesn't specify how packed structures are passed.
6581 These are passed in integer regs if possible, otherwise memory. */
6582 if (!packed)
6583 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6584 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6585 {
6586 packed = true;
6587 break;
6588 }
6589
6590 /* Walk the real fields, but skip those with no size or a zero size.
6591 ??? Fields with variable offset are handled as having zero offset. */
6592 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6593 if (TREE_CODE (field) == FIELD_DECL)
6594 {
6595 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6596 continue;
6597
6598 HOST_WIDE_INT bitpos = offset;
6599 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6600 bitpos += int_bit_position (field);
6601
6602 tree field_type = TREE_TYPE (field);
6603 if (TREE_CODE (field_type) == RECORD_TYPE)
6604 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6605 packed);
6606 else
6607 {
6608 const bool fp_type
6609 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6610 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6611 data);
6612 }
6613 }
6614 }
6615
6616 /* Handle recursive register classifying for structure layout. */
6617
6618 typedef struct
6619 {
6620 bool fp_regs; /* true if field eligible to FP registers. */
6621 bool fp_regs_in_first_word; /* true if such field in first word. */
6622 } classify_data_t;
6623
6624 /* A subroutine of function_arg_slotno. Classify the field. */
6625
6626 inline void
6627 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6628 classify_data_t *data)
6629 {
6630 if (fp)
6631 {
6632 data->fp_regs = true;
6633 if (bitpos < BITS_PER_WORD)
6634 data->fp_regs_in_first_word = true;
6635 }
6636 }
6637
6638 /* Compute the slot number to pass an argument in.
6639 Return the slot number or -1 if passing on the stack.
6640
6641 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6642 the preceding args and about the function being called.
6643 MODE is the argument's machine mode.
6644 TYPE is the data type of the argument (as a tree).
6645 This is null for libcalls where that information may
6646 not be available.
6647 NAMED is nonzero if this argument is a named parameter
6648 (otherwise it is an extra parameter matching an ellipsis).
6649 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6650 *PREGNO records the register number to use if scalar type.
6651 *PPADDING records the amount of padding needed in words. */
6652
6653 static int
6654 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6655 const_tree type, bool named, bool incoming,
6656 int *pregno, int *ppadding)
6657 {
6658 int regbase = (incoming
6659 ? SPARC_INCOMING_INT_ARG_FIRST
6660 : SPARC_OUTGOING_INT_ARG_FIRST);
6661 int slotno = cum->words;
6662 enum mode_class mclass;
6663 int regno;
6664
6665 *ppadding = 0;
6666
6667 if (type && TREE_ADDRESSABLE (type))
6668 return -1;
6669
6670 if (TARGET_ARCH32
6671 && mode == BLKmode
6672 && type
6673 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6674 return -1;
6675
6676 /* For SPARC64, objects requiring 16-byte alignment get it. */
6677 if (TARGET_ARCH64
6678 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6679 && (slotno & 1) != 0)
6680 slotno++, *ppadding = 1;
6681
6682 mclass = GET_MODE_CLASS (mode);
6683 if (type && TREE_CODE (type) == VECTOR_TYPE)
6684 {
6685 /* Vector types deserve special treatment because they are
6686 polymorphic wrt their mode, depending upon whether VIS
6687 instructions are enabled. */
6688 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6689 {
6690 /* The SPARC port defines no floating-point vector modes. */
6691 gcc_assert (mode == BLKmode);
6692 }
6693 else
6694 {
6695 /* Integral vector types should either have a vector
6696 mode or an integral mode, because we are guaranteed
6697 by pass_by_reference that their size is not greater
6698 than 16 bytes and TImode is 16-byte wide. */
6699 gcc_assert (mode != BLKmode);
6700
6701 /* Vector integers are handled like floats according to
6702 the Sun VIS SDK. */
6703 mclass = MODE_FLOAT;
6704 }
6705 }
6706
6707 switch (mclass)
6708 {
6709 case MODE_FLOAT:
6710 case MODE_COMPLEX_FLOAT:
6711 case MODE_VECTOR_INT:
6712 if (TARGET_ARCH64 && TARGET_FPU && named)
6713 {
6714 /* If all arg slots are filled, then must pass on stack. */
6715 if (slotno >= SPARC_FP_ARG_MAX)
6716 return -1;
6717
6718 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6719 /* Arguments filling only one single FP register are
6720 right-justified in the outer double FP register. */
6721 if (GET_MODE_SIZE (mode) <= 4)
6722 regno++;
6723 break;
6724 }
6725 /* fallthrough */
6726
6727 case MODE_INT:
6728 case MODE_COMPLEX_INT:
6729 /* If all arg slots are filled, then must pass on stack. */
6730 if (slotno >= SPARC_INT_ARG_MAX)
6731 return -1;
6732
6733 regno = regbase + slotno;
6734 break;
6735
6736 case MODE_RANDOM:
6737 if (mode == VOIDmode)
6738 /* MODE is VOIDmode when generating the actual call. */
6739 return -1;
6740
6741 gcc_assert (mode == BLKmode);
6742
6743 if (TARGET_ARCH32
6744 || !type
6745 || (TREE_CODE (type) != RECORD_TYPE
6746 && TREE_CODE (type) != VECTOR_TYPE))
6747 {
6748 /* If all arg slots are filled, then must pass on stack. */
6749 if (slotno >= SPARC_INT_ARG_MAX)
6750 return -1;
6751
6752 regno = regbase + slotno;
6753 }
6754 else /* TARGET_ARCH64 && type */
6755 {
6756 /* If all arg slots are filled, then must pass on stack. */
6757 if (slotno >= SPARC_FP_ARG_MAX)
6758 return -1;
6759
6760 if (TREE_CODE (type) == RECORD_TYPE)
6761 {
6762 classify_data_t data = { false, false };
6763 traverse_record_type<classify_data_t, classify_registers>
6764 (type, named, &data);
6765
6766 if (data.fp_regs)
6767 {
6768 /* If all FP slots are filled except for the last one and
6769 there is no FP field in the first word, then must pass
6770 on stack. */
6771 if (slotno >= SPARC_FP_ARG_MAX - 1
6772 && !data.fp_regs_in_first_word)
6773 return -1;
6774 }
6775 else
6776 {
6777 /* If all int slots are filled, then must pass on stack. */
6778 if (slotno >= SPARC_INT_ARG_MAX)
6779 return -1;
6780 }
6781 }
6782
6783 /* PREGNO isn't set since both int and FP regs can be used. */
6784 return slotno;
6785 }
6786 break;
6787
6788 default :
6789 gcc_unreachable ();
6790 }
6791
6792 *pregno = regno;
6793 return slotno;
6794 }
6795
6796 /* Handle recursive register counting/assigning for structure layout. */
6797
6798 typedef struct
6799 {
6800 int slotno; /* slot number of the argument. */
6801 int regbase; /* regno of the base register. */
6802 int intoffset; /* offset of the first pending integer field. */
6803 int nregs; /* number of words passed in registers. */
6804 bool stack; /* true if part of the argument is on the stack. */
6805 rtx ret; /* return expression being built. */
6806 } assign_data_t;
6807
6808 /* A subroutine of function_arg_record_value. Compute the number of integer
6809 registers to be assigned between PARMS->intoffset and BITPOS. Return
6810 true if at least one integer register is assigned or false otherwise. */
6811
6812 static bool
6813 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6814 {
6815 if (data->intoffset < 0)
6816 return false;
6817
6818 const int intoffset = data->intoffset;
6819 data->intoffset = -1;
6820
6821 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6822 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6823 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6824 int nregs = (endbit - startbit) / BITS_PER_WORD;
6825
6826 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6827 {
6828 nregs = SPARC_INT_ARG_MAX - this_slotno;
6829
6830 /* We need to pass this field (partly) on the stack. */
6831 data->stack = 1;
6832 }
6833
6834 if (nregs <= 0)
6835 return false;
6836
6837 *pnregs = nregs;
6838 return true;
6839 }
6840
6841 /* A subroutine of function_arg_record_value. Compute the number and the mode
6842 of the FP registers to be assigned for FIELD. Return true if at least one
6843 FP register is assigned or false otherwise. */
6844
6845 static bool
6846 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6847 assign_data_t *data,
6848 int *pnregs, machine_mode *pmode)
6849 {
6850 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6851 machine_mode mode = DECL_MODE (field);
6852 int nregs, nslots;
6853
6854 /* Slots are counted as words while regs are counted as having the size of
6855 the (inner) mode. */
6856 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6857 {
6858 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6859 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6860 }
6861 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6862 {
6863 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6864 nregs = 2;
6865 }
6866 else
6867 nregs = 1;
6868
6869 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6870
6871 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6872 {
6873 nslots = SPARC_FP_ARG_MAX - this_slotno;
6874 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6875
6876 /* We need to pass this field (partly) on the stack. */
6877 data->stack = 1;
6878
6879 if (nregs <= 0)
6880 return false;
6881 }
6882
6883 *pnregs = nregs;
6884 *pmode = mode;
6885 return true;
6886 }
6887
6888 /* A subroutine of function_arg_record_value. Count the number of registers
6889 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6890
6891 inline void
6892 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6893 assign_data_t *data)
6894 {
6895 if (fp)
6896 {
6897 int nregs;
6898 machine_mode mode;
6899
6900 if (compute_int_layout (bitpos, data, &nregs))
6901 data->nregs += nregs;
6902
6903 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6904 data->nregs += nregs;
6905 }
6906 else
6907 {
6908 if (data->intoffset < 0)
6909 data->intoffset = bitpos;
6910 }
6911 }
6912
6913 /* A subroutine of function_arg_record_value. Assign the bits of the
6914 structure between PARMS->intoffset and BITPOS to integer registers. */
6915
6916 static void
6917 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6918 {
6919 int intoffset = data->intoffset;
6920 machine_mode mode;
6921 int nregs;
6922
6923 if (!compute_int_layout (bitpos, data, &nregs))
6924 return;
6925
6926 /* If this is the trailing part of a word, only load that much into
6927 the register. Otherwise load the whole register. Note that in
6928 the latter case we may pick up unwanted bits. It's not a problem
6929 at the moment but may wish to revisit. */
6930 if (intoffset % BITS_PER_WORD != 0)
6931 mode = smallest_int_mode_for_size (BITS_PER_WORD
6932 - intoffset % BITS_PER_WORD);
6933 else
6934 mode = word_mode;
6935
6936 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6937 unsigned int regno = data->regbase + this_slotno;
6938 intoffset /= BITS_PER_UNIT;
6939
6940 do
6941 {
6942 rtx reg = gen_rtx_REG (mode, regno);
6943 XVECEXP (data->ret, 0, data->stack + data->nregs)
6944 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6945 data->nregs += 1;
6946 mode = word_mode;
6947 regno += 1;
6948 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6949 }
6950 while (--nregs > 0);
6951 }
6952
6953 /* A subroutine of function_arg_record_value. Assign FIELD at position
6954 BITPOS to FP registers. */
6955
6956 static void
6957 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6958 assign_data_t *data)
6959 {
6960 int nregs;
6961 machine_mode mode;
6962
6963 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6964 return;
6965
6966 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6967 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6968 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6969 regno++;
6970 int pos = bitpos / BITS_PER_UNIT;
6971
6972 do
6973 {
6974 rtx reg = gen_rtx_REG (mode, regno);
6975 XVECEXP (data->ret, 0, data->stack + data->nregs)
6976 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6977 data->nregs += 1;
6978 regno += GET_MODE_SIZE (mode) / 4;
6979 pos += GET_MODE_SIZE (mode);
6980 }
6981 while (--nregs > 0);
6982 }
6983
6984 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6985 the structure between PARMS->intoffset and BITPOS to registers. */
6986
6987 inline void
6988 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6989 assign_data_t *data)
6990 {
6991 if (fp)
6992 {
6993 assign_int_registers (bitpos, data);
6994
6995 assign_fp_registers (field, bitpos, data);
6996 }
6997 else
6998 {
6999 if (data->intoffset < 0)
7000 data->intoffset = bitpos;
7001 }
7002 }
7003
7004 /* Used by function_arg and sparc_function_value_1 to implement the complex
7005 conventions of the 64-bit ABI for passing and returning structures.
7006 Return an expression valid as a return value for the FUNCTION_ARG
7007 and TARGET_FUNCTION_VALUE.
7008
7009 TYPE is the data type of the argument (as a tree).
7010 This is null for libcalls where that information may
7011 not be available.
7012 MODE is the argument's machine mode.
7013 SLOTNO is the index number of the argument's slot in the parameter array.
7014 NAMED is true if this argument is a named parameter
7015 (otherwise it is an extra parameter matching an ellipsis).
7016 REGBASE is the regno of the base register for the parameter array. */
7017
7018 static rtx
7019 function_arg_record_value (const_tree type, machine_mode mode,
7020 int slotno, bool named, int regbase)
7021 {
7022 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7023 assign_data_t data;
7024 int nregs;
7025
7026 data.slotno = slotno;
7027 data.regbase = regbase;
7028
7029 /* Count how many registers we need. */
7030 data.nregs = 0;
7031 data.intoffset = 0;
7032 data.stack = false;
7033 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7034
7035 /* Take into account pending integer fields. */
7036 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7037 data.nregs += nregs;
7038
7039 /* Allocate the vector and handle some annoying special cases. */
7040 nregs = data.nregs;
7041
7042 if (nregs == 0)
7043 {
7044 /* ??? Empty structure has no value? Duh? */
7045 if (typesize <= 0)
7046 {
7047 /* Though there's nothing really to store, return a word register
7048 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7049 leads to breakage due to the fact that there are zero bytes to
7050 load. */
7051 return gen_rtx_REG (mode, regbase);
7052 }
7053
7054 /* ??? C++ has structures with no fields, and yet a size. Give up
7055 for now and pass everything back in integer registers. */
7056 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7057 if (nregs + slotno > SPARC_INT_ARG_MAX)
7058 nregs = SPARC_INT_ARG_MAX - slotno;
7059 }
7060
7061 gcc_assert (nregs > 0);
7062
7063 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7064
7065 /* If at least one field must be passed on the stack, generate
7066 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7067 also be passed on the stack. We can't do much better because the
7068 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7069 of structures for which the fields passed exclusively in registers
7070 are not at the beginning of the structure. */
7071 if (data.stack)
7072 XVECEXP (data.ret, 0, 0)
7073 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7074
7075 /* Assign the registers. */
7076 data.nregs = 0;
7077 data.intoffset = 0;
7078 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7079
7080 /* Assign pending integer fields. */
7081 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7082
7083 gcc_assert (data.nregs == nregs);
7084
7085 return data.ret;
7086 }
7087
7088 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7089 of the 64-bit ABI for passing and returning unions.
7090 Return an expression valid as a return value for the FUNCTION_ARG
7091 and TARGET_FUNCTION_VALUE.
7092
7093 SIZE is the size in bytes of the union.
7094 MODE is the argument's machine mode.
7095 REGNO is the hard register the union will be passed in. */
7096
7097 static rtx
7098 function_arg_union_value (int size, machine_mode mode, int slotno,
7099 int regno)
7100 {
7101 int nwords = CEIL_NWORDS (size), i;
7102 rtx regs;
7103
7104 /* See comment in previous function for empty structures. */
7105 if (nwords == 0)
7106 return gen_rtx_REG (mode, regno);
7107
7108 if (slotno == SPARC_INT_ARG_MAX - 1)
7109 nwords = 1;
7110
7111 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7112
7113 for (i = 0; i < nwords; i++)
7114 {
7115 /* Unions are passed left-justified. */
7116 XVECEXP (regs, 0, i)
7117 = gen_rtx_EXPR_LIST (VOIDmode,
7118 gen_rtx_REG (word_mode, regno),
7119 GEN_INT (UNITS_PER_WORD * i));
7120 regno++;
7121 }
7122
7123 return regs;
7124 }
7125
7126 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7127 for passing and returning BLKmode vectors.
7128 Return an expression valid as a return value for the FUNCTION_ARG
7129 and TARGET_FUNCTION_VALUE.
7130
7131 SIZE is the size in bytes of the vector.
7132 REGNO is the FP hard register the vector will be passed in. */
7133
7134 static rtx
7135 function_arg_vector_value (int size, int regno)
7136 {
7137 const int nregs = MAX (1, size / 8);
7138 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7139
7140 if (size < 8)
7141 XVECEXP (regs, 0, 0)
7142 = gen_rtx_EXPR_LIST (VOIDmode,
7143 gen_rtx_REG (SImode, regno),
7144 const0_rtx);
7145 else
7146 for (int i = 0; i < nregs; i++)
7147 XVECEXP (regs, 0, i)
7148 = gen_rtx_EXPR_LIST (VOIDmode,
7149 gen_rtx_REG (DImode, regno + 2*i),
7150 GEN_INT (i*8));
7151
7152 return regs;
7153 }
7154
7155 /* Determine where to put an argument to a function.
7156 Value is zero to push the argument on the stack,
7157 or a hard register in which to store the argument.
7158
7159 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7160 the preceding args and about the function being called.
7161 MODE is the argument's machine mode.
7162 TYPE is the data type of the argument (as a tree).
7163 This is null for libcalls where that information may
7164 not be available.
7165 NAMED is true if this argument is a named parameter
7166 (otherwise it is an extra parameter matching an ellipsis).
7167 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7168 TARGET_FUNCTION_INCOMING_ARG. */
7169
7170 static rtx
7171 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7172 const_tree type, bool named, bool incoming)
7173 {
7174 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7175
7176 int regbase = (incoming
7177 ? SPARC_INCOMING_INT_ARG_FIRST
7178 : SPARC_OUTGOING_INT_ARG_FIRST);
7179 int slotno, regno, padding;
7180 enum mode_class mclass = GET_MODE_CLASS (mode);
7181
7182 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7183 &regno, &padding);
7184 if (slotno == -1)
7185 return 0;
7186
7187 /* Vector types deserve special treatment because they are polymorphic wrt
7188 their mode, depending upon whether VIS instructions are enabled. */
7189 if (type && TREE_CODE (type) == VECTOR_TYPE)
7190 {
7191 HOST_WIDE_INT size = int_size_in_bytes (type);
7192 gcc_assert ((TARGET_ARCH32 && size <= 8)
7193 || (TARGET_ARCH64 && size <= 16));
7194
7195 if (mode == BLKmode)
7196 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7197
7198 mclass = MODE_FLOAT;
7199 }
7200
7201 if (TARGET_ARCH32)
7202 return gen_rtx_REG (mode, regno);
7203
7204 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7205 and are promoted to registers if possible. */
7206 if (type && TREE_CODE (type) == RECORD_TYPE)
7207 {
7208 HOST_WIDE_INT size = int_size_in_bytes (type);
7209 gcc_assert (size <= 16);
7210
7211 return function_arg_record_value (type, mode, slotno, named, regbase);
7212 }
7213
7214 /* Unions up to 16 bytes in size are passed in integer registers. */
7215 else if (type && TREE_CODE (type) == UNION_TYPE)
7216 {
7217 HOST_WIDE_INT size = int_size_in_bytes (type);
7218 gcc_assert (size <= 16);
7219
7220 return function_arg_union_value (size, mode, slotno, regno);
7221 }
7222
7223 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7224 but also have the slot allocated for them.
7225 If no prototype is in scope fp values in register slots get passed
7226 in two places, either fp regs and int regs or fp regs and memory. */
7227 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7228 && SPARC_FP_REG_P (regno))
7229 {
7230 rtx reg = gen_rtx_REG (mode, regno);
7231 if (cum->prototype_p || cum->libcall_p)
7232 return reg;
7233 else
7234 {
7235 rtx v0, v1;
7236
7237 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7238 {
7239 int intreg;
7240
7241 /* On incoming, we don't need to know that the value
7242 is passed in %f0 and %i0, and it confuses other parts
7243 causing needless spillage even on the simplest cases. */
7244 if (incoming)
7245 return reg;
7246
7247 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7248 + (regno - SPARC_FP_ARG_FIRST) / 2);
7249
7250 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7251 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7252 const0_rtx);
7253 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7254 }
7255 else
7256 {
7257 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7258 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7259 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7260 }
7261 }
7262 }
7263
7264 /* All other aggregate types are passed in an integer register in a mode
7265 corresponding to the size of the type. */
7266 else if (type && AGGREGATE_TYPE_P (type))
7267 {
7268 HOST_WIDE_INT size = int_size_in_bytes (type);
7269 gcc_assert (size <= 16);
7270
7271 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7272 }
7273
7274 return gen_rtx_REG (mode, regno);
7275 }
7276
7277 /* Handle the TARGET_FUNCTION_ARG target hook. */
7278
7279 static rtx
7280 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7281 const_tree type, bool named)
7282 {
7283 return sparc_function_arg_1 (cum, mode, type, named, false);
7284 }
7285
7286 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7287
7288 static rtx
7289 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7290 const_tree type, bool named)
7291 {
7292 return sparc_function_arg_1 (cum, mode, type, named, true);
7293 }
7294
7295 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7296
7297 static unsigned int
7298 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7299 {
7300 return ((TARGET_ARCH64
7301 && (GET_MODE_ALIGNMENT (mode) == 128
7302 || (type && TYPE_ALIGN (type) == 128)))
7303 ? 128
7304 : PARM_BOUNDARY);
7305 }
7306
7307 /* For an arg passed partly in registers and partly in memory,
7308 this is the number of bytes of registers used.
7309 For args passed entirely in registers or entirely in memory, zero.
7310
7311 Any arg that starts in the first 6 regs but won't entirely fit in them
7312 needs partial registers on v8. On v9, structures with integer
7313 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7314 values that begin in the last fp reg [where "last fp reg" varies with the
7315 mode] will be split between that reg and memory. */
7316
7317 static int
7318 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7319 tree type, bool named)
7320 {
7321 int slotno, regno, padding;
7322
7323 /* We pass false for incoming here, it doesn't matter. */
7324 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7325 false, &regno, &padding);
7326
7327 if (slotno == -1)
7328 return 0;
7329
7330 if (TARGET_ARCH32)
7331 {
7332 if ((slotno + (mode == BLKmode
7333 ? CEIL_NWORDS (int_size_in_bytes (type))
7334 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7335 > SPARC_INT_ARG_MAX)
7336 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7337 }
7338 else
7339 {
7340 /* We are guaranteed by pass_by_reference that the size of the
7341 argument is not greater than 16 bytes, so we only need to return
7342 one word if the argument is partially passed in registers. */
7343
7344 if (type && AGGREGATE_TYPE_P (type))
7345 {
7346 int size = int_size_in_bytes (type);
7347
7348 if (size > UNITS_PER_WORD
7349 && (slotno == SPARC_INT_ARG_MAX - 1
7350 || slotno == SPARC_FP_ARG_MAX - 1))
7351 return UNITS_PER_WORD;
7352 }
7353 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7354 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7355 && ! (TARGET_FPU && named)))
7356 {
7357 /* The complex types are passed as packed types. */
7358 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7359 && slotno == SPARC_INT_ARG_MAX - 1)
7360 return UNITS_PER_WORD;
7361 }
7362 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7363 {
7364 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7365 > SPARC_FP_ARG_MAX)
7366 return UNITS_PER_WORD;
7367 }
7368 }
7369
7370 return 0;
7371 }
7372
7373 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7374 Specify whether to pass the argument by reference. */
7375
7376 static bool
7377 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7378 machine_mode mode, const_tree type,
7379 bool named ATTRIBUTE_UNUSED)
7380 {
7381 if (TARGET_ARCH32)
7382 /* Original SPARC 32-bit ABI says that structures and unions,
7383 and quad-precision floats are passed by reference. For Pascal,
7384 also pass arrays by reference. All other base types are passed
7385 in registers.
7386
7387 Extended ABI (as implemented by the Sun compiler) says that all
7388 complex floats are passed by reference. Pass complex integers
7389 in registers up to 8 bytes. More generally, enforce the 2-word
7390 cap for passing arguments in registers.
7391
7392 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7393 integers are passed like floats of the same size, that is in
7394 registers up to 8 bytes. Pass all vector floats by reference
7395 like structure and unions. */
7396 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7397 || mode == SCmode
7398 /* Catch CDImode, TFmode, DCmode and TCmode. */
7399 || GET_MODE_SIZE (mode) > 8
7400 || (type
7401 && TREE_CODE (type) == VECTOR_TYPE
7402 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7403 else
7404 /* Original SPARC 64-bit ABI says that structures and unions
7405 smaller than 16 bytes are passed in registers, as well as
7406 all other base types.
7407
7408 Extended ABI (as implemented by the Sun compiler) says that
7409 complex floats are passed in registers up to 16 bytes. Pass
7410 all complex integers in registers up to 16 bytes. More generally,
7411 enforce the 2-word cap for passing arguments in registers.
7412
7413 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7414 integers are passed like floats of the same size, that is in
7415 registers (up to 16 bytes). Pass all vector floats like structure
7416 and unions. */
7417 return ((type
7418 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7419 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7420 /* Catch CTImode and TCmode. */
7421 || GET_MODE_SIZE (mode) > 16);
7422 }
7423
7424 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7425 Update the data in CUM to advance over an argument
7426 of mode MODE and data type TYPE.
7427 TYPE is null for libcalls where that information may not be available. */
7428
7429 static void
7430 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7431 const_tree type, bool named)
7432 {
7433 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7434 int regno, padding;
7435
7436 /* We pass false for incoming here, it doesn't matter. */
7437 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7438
7439 /* If argument requires leading padding, add it. */
7440 cum->words += padding;
7441
7442 if (TARGET_ARCH32)
7443 cum->words += (mode == BLKmode
7444 ? CEIL_NWORDS (int_size_in_bytes (type))
7445 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7446 else
7447 {
7448 if (type && AGGREGATE_TYPE_P (type))
7449 {
7450 int size = int_size_in_bytes (type);
7451
7452 if (size <= 8)
7453 ++cum->words;
7454 else if (size <= 16)
7455 cum->words += 2;
7456 else /* passed by reference */
7457 ++cum->words;
7458 }
7459 else
7460 cum->words += (mode == BLKmode
7461 ? CEIL_NWORDS (int_size_in_bytes (type))
7462 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7463 }
7464 }
7465
7466 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7467 are always stored left shifted in their argument slot. */
7468
7469 static pad_direction
7470 sparc_function_arg_padding (machine_mode mode, const_tree type)
7471 {
7472 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7473 return PAD_UPWARD;
7474
7475 /* Fall back to the default. */
7476 return default_function_arg_padding (mode, type);
7477 }
7478
7479 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7480 Specify whether to return the return value in memory. */
7481
7482 static bool
7483 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7484 {
7485 if (TARGET_ARCH32)
7486 /* Original SPARC 32-bit ABI says that structures and unions,
7487 and quad-precision floats are returned in memory. All other
7488 base types are returned in registers.
7489
7490 Extended ABI (as implemented by the Sun compiler) says that
7491 all complex floats are returned in registers (8 FP registers
7492 at most for '_Complex long double'). Return all complex integers
7493 in registers (4 at most for '_Complex long long').
7494
7495 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7496 integers are returned like floats of the same size, that is in
7497 registers up to 8 bytes and in memory otherwise. Return all
7498 vector floats in memory like structure and unions; note that
7499 they always have BLKmode like the latter. */
7500 return (TYPE_MODE (type) == BLKmode
7501 || TYPE_MODE (type) == TFmode
7502 || (TREE_CODE (type) == VECTOR_TYPE
7503 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7504 else
7505 /* Original SPARC 64-bit ABI says that structures and unions
7506 smaller than 32 bytes are returned in registers, as well as
7507 all other base types.
7508
7509 Extended ABI (as implemented by the Sun compiler) says that all
7510 complex floats are returned in registers (8 FP registers at most
7511 for '_Complex long double'). Return all complex integers in
7512 registers (4 at most for '_Complex TItype').
7513
7514 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7515 integers are returned like floats of the same size, that is in
7516 registers. Return all vector floats like structure and unions;
7517 note that they always have BLKmode like the latter. */
7518 return (TYPE_MODE (type) == BLKmode
7519 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7520 }
7521
7522 /* Handle the TARGET_STRUCT_VALUE target hook.
7523 Return where to find the structure return value address. */
7524
7525 static rtx
7526 sparc_struct_value_rtx (tree fndecl, int incoming)
7527 {
7528 if (TARGET_ARCH64)
7529 return 0;
7530 else
7531 {
7532 rtx mem;
7533
7534 if (incoming)
7535 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7536 STRUCT_VALUE_OFFSET));
7537 else
7538 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7539 STRUCT_VALUE_OFFSET));
7540
7541 /* Only follow the SPARC ABI for fixed-size structure returns.
7542 Variable size structure returns are handled per the normal
7543 procedures in GCC. This is enabled by -mstd-struct-return */
7544 if (incoming == 2
7545 && sparc_std_struct_return
7546 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7547 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7548 {
7549 /* We must check and adjust the return address, as it is optional
7550 as to whether the return object is really provided. */
7551 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7552 rtx scratch = gen_reg_rtx (SImode);
7553 rtx_code_label *endlab = gen_label_rtx ();
7554
7555 /* Calculate the return object size. */
7556 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7557 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7558 /* Construct a temporary return value. */
7559 rtx temp_val
7560 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7561
7562 /* Implement SPARC 32-bit psABI callee return struct checking:
7563
7564 Fetch the instruction where we will return to and see if
7565 it's an unimp instruction (the most significant 10 bits
7566 will be zero). */
7567 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7568 plus_constant (Pmode,
7569 ret_reg, 8)));
7570 /* Assume the size is valid and pre-adjust. */
7571 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7572 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7573 0, endlab);
7574 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7575 /* Write the address of the memory pointed to by temp_val into
7576 the memory pointed to by mem. */
7577 emit_move_insn (mem, XEXP (temp_val, 0));
7578 emit_label (endlab);
7579 }
7580
7581 return mem;
7582 }
7583 }
7584
7585 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7586 For v9, function return values are subject to the same rules as arguments,
7587 except that up to 32 bytes may be returned in registers. */
7588
7589 static rtx
7590 sparc_function_value_1 (const_tree type, machine_mode mode,
7591 bool outgoing)
7592 {
7593 /* Beware that the two values are swapped here wrt function_arg. */
7594 int regbase = (outgoing
7595 ? SPARC_INCOMING_INT_ARG_FIRST
7596 : SPARC_OUTGOING_INT_ARG_FIRST);
7597 enum mode_class mclass = GET_MODE_CLASS (mode);
7598 int regno;
7599
7600 /* Vector types deserve special treatment because they are polymorphic wrt
7601 their mode, depending upon whether VIS instructions are enabled. */
7602 if (type && TREE_CODE (type) == VECTOR_TYPE)
7603 {
7604 HOST_WIDE_INT size = int_size_in_bytes (type);
7605 gcc_assert ((TARGET_ARCH32 && size <= 8)
7606 || (TARGET_ARCH64 && size <= 32));
7607
7608 if (mode == BLKmode)
7609 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7610
7611 mclass = MODE_FLOAT;
7612 }
7613
7614 if (TARGET_ARCH64 && type)
7615 {
7616 /* Structures up to 32 bytes in size are returned in registers. */
7617 if (TREE_CODE (type) == RECORD_TYPE)
7618 {
7619 HOST_WIDE_INT size = int_size_in_bytes (type);
7620 gcc_assert (size <= 32);
7621
7622 return function_arg_record_value (type, mode, 0, 1, regbase);
7623 }
7624
7625 /* Unions up to 32 bytes in size are returned in integer registers. */
7626 else if (TREE_CODE (type) == UNION_TYPE)
7627 {
7628 HOST_WIDE_INT size = int_size_in_bytes (type);
7629 gcc_assert (size <= 32);
7630
7631 return function_arg_union_value (size, mode, 0, regbase);
7632 }
7633
7634 /* Objects that require it are returned in FP registers. */
7635 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7636 ;
7637
7638 /* All other aggregate types are returned in an integer register in a
7639 mode corresponding to the size of the type. */
7640 else if (AGGREGATE_TYPE_P (type))
7641 {
7642 /* All other aggregate types are passed in an integer register
7643 in a mode corresponding to the size of the type. */
7644 HOST_WIDE_INT size = int_size_in_bytes (type);
7645 gcc_assert (size <= 32);
7646
7647 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7648
7649 /* ??? We probably should have made the same ABI change in
7650 3.4.0 as the one we made for unions. The latter was
7651 required by the SCD though, while the former is not
7652 specified, so we favored compatibility and efficiency.
7653
7654 Now we're stuck for aggregates larger than 16 bytes,
7655 because OImode vanished in the meantime. Let's not
7656 try to be unduly clever, and simply follow the ABI
7657 for unions in that case. */
7658 if (mode == BLKmode)
7659 return function_arg_union_value (size, mode, 0, regbase);
7660 else
7661 mclass = MODE_INT;
7662 }
7663
7664 /* We should only have pointer and integer types at this point. This
7665 must match sparc_promote_function_mode. */
7666 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7667 mode = word_mode;
7668 }
7669
7670 /* We should only have pointer and integer types at this point, except with
7671 -freg-struct-return. This must match sparc_promote_function_mode. */
7672 else if (TARGET_ARCH32
7673 && !(type && AGGREGATE_TYPE_P (type))
7674 && mclass == MODE_INT
7675 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7676 mode = word_mode;
7677
7678 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7679 regno = SPARC_FP_ARG_FIRST;
7680 else
7681 regno = regbase;
7682
7683 return gen_rtx_REG (mode, regno);
7684 }
7685
7686 /* Handle TARGET_FUNCTION_VALUE.
7687 On the SPARC, the value is found in the first "output" register, but the
7688 called function leaves it in the first "input" register. */
7689
7690 static rtx
7691 sparc_function_value (const_tree valtype,
7692 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7693 bool outgoing)
7694 {
7695 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7696 }
7697
7698 /* Handle TARGET_LIBCALL_VALUE. */
7699
7700 static rtx
7701 sparc_libcall_value (machine_mode mode,
7702 const_rtx fun ATTRIBUTE_UNUSED)
7703 {
7704 return sparc_function_value_1 (NULL_TREE, mode, false);
7705 }
7706
7707 /* Handle FUNCTION_VALUE_REGNO_P.
7708 On the SPARC, the first "output" reg is used for integer values, and the
7709 first floating point register is used for floating point values. */
7710
7711 static bool
7712 sparc_function_value_regno_p (const unsigned int regno)
7713 {
7714 return (regno == 8 || (TARGET_FPU && regno == 32));
7715 }
7716
7717 /* Do what is necessary for `va_start'. We look at the current function
7718 to determine if stdarg or varargs is used and return the address of
7719 the first unnamed parameter. */
7720
7721 static rtx
7722 sparc_builtin_saveregs (void)
7723 {
7724 int first_reg = crtl->args.info.words;
7725 rtx address;
7726 int regno;
7727
7728 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7729 emit_move_insn (gen_rtx_MEM (word_mode,
7730 gen_rtx_PLUS (Pmode,
7731 frame_pointer_rtx,
7732 GEN_INT (FIRST_PARM_OFFSET (0)
7733 + (UNITS_PER_WORD
7734 * regno)))),
7735 gen_rtx_REG (word_mode,
7736 SPARC_INCOMING_INT_ARG_FIRST + regno));
7737
7738 address = gen_rtx_PLUS (Pmode,
7739 frame_pointer_rtx,
7740 GEN_INT (FIRST_PARM_OFFSET (0)
7741 + UNITS_PER_WORD * first_reg));
7742
7743 return address;
7744 }
7745
7746 /* Implement `va_start' for stdarg. */
7747
7748 static void
7749 sparc_va_start (tree valist, rtx nextarg)
7750 {
7751 nextarg = expand_builtin_saveregs ();
7752 std_expand_builtin_va_start (valist, nextarg);
7753 }
7754
7755 /* Implement `va_arg' for stdarg. */
7756
7757 static tree
7758 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7759 gimple_seq *post_p)
7760 {
7761 HOST_WIDE_INT size, rsize, align;
7762 tree addr, incr;
7763 bool indirect;
7764 tree ptrtype = build_pointer_type (type);
7765
7766 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7767 {
7768 indirect = true;
7769 size = rsize = UNITS_PER_WORD;
7770 align = 0;
7771 }
7772 else
7773 {
7774 indirect = false;
7775 size = int_size_in_bytes (type);
7776 rsize = ROUND_UP (size, UNITS_PER_WORD);
7777 align = 0;
7778
7779 if (TARGET_ARCH64)
7780 {
7781 /* For SPARC64, objects requiring 16-byte alignment get it. */
7782 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7783 align = 2 * UNITS_PER_WORD;
7784
7785 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7786 are left-justified in their slots. */
7787 if (AGGREGATE_TYPE_P (type))
7788 {
7789 if (size == 0)
7790 size = rsize = UNITS_PER_WORD;
7791 else
7792 size = rsize;
7793 }
7794 }
7795 }
7796
7797 incr = valist;
7798 if (align)
7799 {
7800 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7801 incr = fold_convert (sizetype, incr);
7802 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7803 size_int (-align));
7804 incr = fold_convert (ptr_type_node, incr);
7805 }
7806
7807 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7808 addr = incr;
7809
7810 if (BYTES_BIG_ENDIAN && size < rsize)
7811 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7812
7813 if (indirect)
7814 {
7815 addr = fold_convert (build_pointer_type (ptrtype), addr);
7816 addr = build_va_arg_indirect_ref (addr);
7817 }
7818
7819 /* If the address isn't aligned properly for the type, we need a temporary.
7820 FIXME: This is inefficient, usually we can do this in registers. */
7821 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7822 {
7823 tree tmp = create_tmp_var (type, "va_arg_tmp");
7824 tree dest_addr = build_fold_addr_expr (tmp);
7825 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7826 3, dest_addr, addr, size_int (rsize));
7827 TREE_ADDRESSABLE (tmp) = 1;
7828 gimplify_and_add (copy, pre_p);
7829 addr = dest_addr;
7830 }
7831
7832 else
7833 addr = fold_convert (ptrtype, addr);
7834
7835 incr = fold_build_pointer_plus_hwi (incr, rsize);
7836 gimplify_assign (valist, incr, post_p);
7837
7838 return build_va_arg_indirect_ref (addr);
7839 }
7840 \f
7841 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7842 Specify whether the vector mode is supported by the hardware. */
7843
7844 static bool
7845 sparc_vector_mode_supported_p (machine_mode mode)
7846 {
7847 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7848 }
7849 \f
7850 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7851
7852 static machine_mode
7853 sparc_preferred_simd_mode (scalar_mode mode)
7854 {
7855 if (TARGET_VIS)
7856 switch (mode)
7857 {
7858 case E_SImode:
7859 return V2SImode;
7860 case E_HImode:
7861 return V4HImode;
7862 case E_QImode:
7863 return V8QImode;
7864
7865 default:;
7866 }
7867
7868 return word_mode;
7869 }
7870 \f
7871 /* Return the string to output an unconditional branch to LABEL, which is
7872 the operand number of the label.
7873
7874 DEST is the destination insn (i.e. the label), INSN is the source. */
7875
7876 const char *
7877 output_ubranch (rtx dest, rtx_insn *insn)
7878 {
7879 static char string[64];
7880 bool v9_form = false;
7881 int delta;
7882 char *p;
7883
7884 /* Even if we are trying to use cbcond for this, evaluate
7885 whether we can use V9 branches as our backup plan. */
7886
7887 delta = 5000000;
7888 if (INSN_ADDRESSES_SET_P ())
7889 delta = (INSN_ADDRESSES (INSN_UID (dest))
7890 - INSN_ADDRESSES (INSN_UID (insn)));
7891
7892 /* Leave some instructions for "slop". */
7893 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7894 v9_form = true;
7895
7896 if (TARGET_CBCOND)
7897 {
7898 bool emit_nop = emit_cbcond_nop (insn);
7899 bool far = false;
7900 const char *rval;
7901
7902 if (delta < -500 || delta > 500)
7903 far = true;
7904
7905 if (far)
7906 {
7907 if (v9_form)
7908 rval = "ba,a,pt\t%%xcc, %l0";
7909 else
7910 rval = "b,a\t%l0";
7911 }
7912 else
7913 {
7914 if (emit_nop)
7915 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7916 else
7917 rval = "cwbe\t%%g0, %%g0, %l0";
7918 }
7919 return rval;
7920 }
7921
7922 if (v9_form)
7923 strcpy (string, "ba%*,pt\t%%xcc, ");
7924 else
7925 strcpy (string, "b%*\t");
7926
7927 p = strchr (string, '\0');
7928 *p++ = '%';
7929 *p++ = 'l';
7930 *p++ = '0';
7931 *p++ = '%';
7932 *p++ = '(';
7933 *p = '\0';
7934
7935 return string;
7936 }
7937
7938 /* Return the string to output a conditional branch to LABEL, which is
7939 the operand number of the label. OP is the conditional expression.
7940 XEXP (OP, 0) is assumed to be a condition code register (integer or
7941 floating point) and its mode specifies what kind of comparison we made.
7942
7943 DEST is the destination insn (i.e. the label), INSN is the source.
7944
7945 REVERSED is nonzero if we should reverse the sense of the comparison.
7946
7947 ANNUL is nonzero if we should generate an annulling branch. */
7948
7949 const char *
7950 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7951 rtx_insn *insn)
7952 {
7953 static char string[64];
7954 enum rtx_code code = GET_CODE (op);
7955 rtx cc_reg = XEXP (op, 0);
7956 machine_mode mode = GET_MODE (cc_reg);
7957 const char *labelno, *branch;
7958 int spaces = 8, far;
7959 char *p;
7960
7961 /* v9 branches are limited to +-1MB. If it is too far away,
7962 change
7963
7964 bne,pt %xcc, .LC30
7965
7966 to
7967
7968 be,pn %xcc, .+12
7969 nop
7970 ba .LC30
7971
7972 and
7973
7974 fbne,a,pn %fcc2, .LC29
7975
7976 to
7977
7978 fbe,pt %fcc2, .+16
7979 nop
7980 ba .LC29 */
7981
7982 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7983 if (reversed ^ far)
7984 {
7985 /* Reversal of FP compares takes care -- an ordered compare
7986 becomes an unordered compare and vice versa. */
7987 if (mode == CCFPmode || mode == CCFPEmode)
7988 code = reverse_condition_maybe_unordered (code);
7989 else
7990 code = reverse_condition (code);
7991 }
7992
7993 /* Start by writing the branch condition. */
7994 if (mode == CCFPmode || mode == CCFPEmode)
7995 {
7996 switch (code)
7997 {
7998 case NE:
7999 branch = "fbne";
8000 break;
8001 case EQ:
8002 branch = "fbe";
8003 break;
8004 case GE:
8005 branch = "fbge";
8006 break;
8007 case GT:
8008 branch = "fbg";
8009 break;
8010 case LE:
8011 branch = "fble";
8012 break;
8013 case LT:
8014 branch = "fbl";
8015 break;
8016 case UNORDERED:
8017 branch = "fbu";
8018 break;
8019 case ORDERED:
8020 branch = "fbo";
8021 break;
8022 case UNGT:
8023 branch = "fbug";
8024 break;
8025 case UNLT:
8026 branch = "fbul";
8027 break;
8028 case UNEQ:
8029 branch = "fbue";
8030 break;
8031 case UNGE:
8032 branch = "fbuge";
8033 break;
8034 case UNLE:
8035 branch = "fbule";
8036 break;
8037 case LTGT:
8038 branch = "fblg";
8039 break;
8040 default:
8041 gcc_unreachable ();
8042 }
8043
8044 /* ??? !v9: FP branches cannot be preceded by another floating point
8045 insn. Because there is currently no concept of pre-delay slots,
8046 we can fix this only by always emitting a nop before a floating
8047 point branch. */
8048
8049 string[0] = '\0';
8050 if (! TARGET_V9)
8051 strcpy (string, "nop\n\t");
8052 strcat (string, branch);
8053 }
8054 else
8055 {
8056 switch (code)
8057 {
8058 case NE:
8059 if (mode == CCVmode || mode == CCXVmode)
8060 branch = "bvs";
8061 else
8062 branch = "bne";
8063 break;
8064 case EQ:
8065 if (mode == CCVmode || mode == CCXVmode)
8066 branch = "bvc";
8067 else
8068 branch = "be";
8069 break;
8070 case GE:
8071 if (mode == CCNZmode || mode == CCXNZmode)
8072 branch = "bpos";
8073 else
8074 branch = "bge";
8075 break;
8076 case GT:
8077 branch = "bg";
8078 break;
8079 case LE:
8080 branch = "ble";
8081 break;
8082 case LT:
8083 if (mode == CCNZmode || mode == CCXNZmode)
8084 branch = "bneg";
8085 else
8086 branch = "bl";
8087 break;
8088 case GEU:
8089 branch = "bgeu";
8090 break;
8091 case GTU:
8092 branch = "bgu";
8093 break;
8094 case LEU:
8095 branch = "bleu";
8096 break;
8097 case LTU:
8098 branch = "blu";
8099 break;
8100 default:
8101 gcc_unreachable ();
8102 }
8103 strcpy (string, branch);
8104 }
8105 spaces -= strlen (branch);
8106 p = strchr (string, '\0');
8107
8108 /* Now add the annulling, the label, and a possible noop. */
8109 if (annul && ! far)
8110 {
8111 strcpy (p, ",a");
8112 p += 2;
8113 spaces -= 2;
8114 }
8115
8116 if (TARGET_V9)
8117 {
8118 rtx note;
8119 int v8 = 0;
8120
8121 if (! far && insn && INSN_ADDRESSES_SET_P ())
8122 {
8123 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8124 - INSN_ADDRESSES (INSN_UID (insn)));
8125 /* Leave some instructions for "slop". */
8126 if (delta < -260000 || delta >= 260000)
8127 v8 = 1;
8128 }
8129
8130 switch (mode)
8131 {
8132 case E_CCmode:
8133 case E_CCNZmode:
8134 case E_CCCmode:
8135 case E_CCVmode:
8136 labelno = "%%icc, ";
8137 if (v8)
8138 labelno = "";
8139 break;
8140 case E_CCXmode:
8141 case E_CCXNZmode:
8142 case E_CCXCmode:
8143 case E_CCXVmode:
8144 labelno = "%%xcc, ";
8145 gcc_assert (!v8);
8146 break;
8147 case E_CCFPmode:
8148 case E_CCFPEmode:
8149 {
8150 static char v9_fcc_labelno[] = "%%fccX, ";
8151 /* Set the char indicating the number of the fcc reg to use. */
8152 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8153 labelno = v9_fcc_labelno;
8154 if (v8)
8155 {
8156 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8157 labelno = "";
8158 }
8159 }
8160 break;
8161 default:
8162 gcc_unreachable ();
8163 }
8164
8165 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8166 {
8167 strcpy (p,
8168 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8169 >= profile_probability::even ()) ^ far)
8170 ? ",pt" : ",pn");
8171 p += 3;
8172 spaces -= 3;
8173 }
8174 }
8175 else
8176 labelno = "";
8177
8178 if (spaces > 0)
8179 *p++ = '\t';
8180 else
8181 *p++ = ' ';
8182 strcpy (p, labelno);
8183 p = strchr (p, '\0');
8184 if (far)
8185 {
8186 strcpy (p, ".+12\n\t nop\n\tb\t");
8187 /* Skip the next insn if requested or
8188 if we know that it will be a nop. */
8189 if (annul || ! final_sequence)
8190 p[3] = '6';
8191 p += 14;
8192 }
8193 *p++ = '%';
8194 *p++ = 'l';
8195 *p++ = label + '0';
8196 *p++ = '%';
8197 *p++ = '#';
8198 *p = '\0';
8199
8200 return string;
8201 }
8202
8203 /* Emit a library call comparison between floating point X and Y.
8204 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8205 Return the new operator to be used in the comparison sequence.
8206
8207 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8208 values as arguments instead of the TFmode registers themselves,
8209 that's why we cannot call emit_float_lib_cmp. */
8210
8211 rtx
8212 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8213 {
8214 const char *qpfunc;
8215 rtx slot0, slot1, result, tem, tem2, libfunc;
8216 machine_mode mode;
8217 enum rtx_code new_comparison;
8218
8219 switch (comparison)
8220 {
8221 case EQ:
8222 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8223 break;
8224
8225 case NE:
8226 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8227 break;
8228
8229 case GT:
8230 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8231 break;
8232
8233 case GE:
8234 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8235 break;
8236
8237 case LT:
8238 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8239 break;
8240
8241 case LE:
8242 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8243 break;
8244
8245 case ORDERED:
8246 case UNORDERED:
8247 case UNGT:
8248 case UNLT:
8249 case UNEQ:
8250 case UNGE:
8251 case UNLE:
8252 case LTGT:
8253 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8254 break;
8255
8256 default:
8257 gcc_unreachable ();
8258 }
8259
8260 if (TARGET_ARCH64)
8261 {
8262 if (MEM_P (x))
8263 {
8264 tree expr = MEM_EXPR (x);
8265 if (expr)
8266 mark_addressable (expr);
8267 slot0 = x;
8268 }
8269 else
8270 {
8271 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8272 emit_move_insn (slot0, x);
8273 }
8274
8275 if (MEM_P (y))
8276 {
8277 tree expr = MEM_EXPR (y);
8278 if (expr)
8279 mark_addressable (expr);
8280 slot1 = y;
8281 }
8282 else
8283 {
8284 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8285 emit_move_insn (slot1, y);
8286 }
8287
8288 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8289 emit_library_call (libfunc, LCT_NORMAL,
8290 DImode,
8291 XEXP (slot0, 0), Pmode,
8292 XEXP (slot1, 0), Pmode);
8293 mode = DImode;
8294 }
8295 else
8296 {
8297 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8298 emit_library_call (libfunc, LCT_NORMAL,
8299 SImode,
8300 x, TFmode, y, TFmode);
8301 mode = SImode;
8302 }
8303
8304
8305 /* Immediately move the result of the libcall into a pseudo
8306 register so reload doesn't clobber the value if it needs
8307 the return register for a spill reg. */
8308 result = gen_reg_rtx (mode);
8309 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8310
8311 switch (comparison)
8312 {
8313 default:
8314 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8315 case ORDERED:
8316 case UNORDERED:
8317 new_comparison = (comparison == UNORDERED ? EQ : NE);
8318 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8319 case UNGT:
8320 case UNGE:
8321 new_comparison = (comparison == UNGT ? GT : NE);
8322 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8323 case UNLE:
8324 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8325 case UNLT:
8326 tem = gen_reg_rtx (mode);
8327 if (TARGET_ARCH32)
8328 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8329 else
8330 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8331 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8332 case UNEQ:
8333 case LTGT:
8334 tem = gen_reg_rtx (mode);
8335 if (TARGET_ARCH32)
8336 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8337 else
8338 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8339 tem2 = gen_reg_rtx (mode);
8340 if (TARGET_ARCH32)
8341 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8342 else
8343 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8344 new_comparison = (comparison == UNEQ ? EQ : NE);
8345 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8346 }
8347
8348 gcc_unreachable ();
8349 }
8350
8351 /* Generate an unsigned DImode to FP conversion. This is the same code
8352 optabs would emit if we didn't have TFmode patterns. */
8353
8354 void
8355 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8356 {
8357 rtx i0, i1, f0, in, out;
8358
8359 out = operands[0];
8360 in = force_reg (DImode, operands[1]);
8361 rtx_code_label *neglab = gen_label_rtx ();
8362 rtx_code_label *donelab = gen_label_rtx ();
8363 i0 = gen_reg_rtx (DImode);
8364 i1 = gen_reg_rtx (DImode);
8365 f0 = gen_reg_rtx (mode);
8366
8367 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8368
8369 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8370 emit_jump_insn (gen_jump (donelab));
8371 emit_barrier ();
8372
8373 emit_label (neglab);
8374
8375 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8376 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8377 emit_insn (gen_iordi3 (i0, i0, i1));
8378 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8379 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8380
8381 emit_label (donelab);
8382 }
8383
8384 /* Generate an FP to unsigned DImode conversion. This is the same code
8385 optabs would emit if we didn't have TFmode patterns. */
8386
8387 void
8388 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8389 {
8390 rtx i0, i1, f0, in, out, limit;
8391
8392 out = operands[0];
8393 in = force_reg (mode, operands[1]);
8394 rtx_code_label *neglab = gen_label_rtx ();
8395 rtx_code_label *donelab = gen_label_rtx ();
8396 i0 = gen_reg_rtx (DImode);
8397 i1 = gen_reg_rtx (DImode);
8398 limit = gen_reg_rtx (mode);
8399 f0 = gen_reg_rtx (mode);
8400
8401 emit_move_insn (limit,
8402 const_double_from_real_value (
8403 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8404 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8405
8406 emit_insn (gen_rtx_SET (out,
8407 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8408 emit_jump_insn (gen_jump (donelab));
8409 emit_barrier ();
8410
8411 emit_label (neglab);
8412
8413 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8414 emit_insn (gen_rtx_SET (i0,
8415 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8416 emit_insn (gen_movdi (i1, const1_rtx));
8417 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8418 emit_insn (gen_xordi3 (out, i0, i1));
8419
8420 emit_label (donelab);
8421 }
8422
8423 /* Return the string to output a compare and branch instruction to DEST.
8424 DEST is the destination insn (i.e. the label), INSN is the source,
8425 and OP is the conditional expression. */
8426
8427 const char *
8428 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8429 {
8430 machine_mode mode = GET_MODE (XEXP (op, 0));
8431 enum rtx_code code = GET_CODE (op);
8432 const char *cond_str, *tmpl;
8433 int far, emit_nop, len;
8434 static char string[64];
8435 char size_char;
8436
8437 /* Compare and Branch is limited to +-2KB. If it is too far away,
8438 change
8439
8440 cxbne X, Y, .LC30
8441
8442 to
8443
8444 cxbe X, Y, .+16
8445 nop
8446 ba,pt xcc, .LC30
8447 nop */
8448
8449 len = get_attr_length (insn);
8450
8451 far = len == 4;
8452 emit_nop = len == 2;
8453
8454 if (far)
8455 code = reverse_condition (code);
8456
8457 size_char = ((mode == SImode) ? 'w' : 'x');
8458
8459 switch (code)
8460 {
8461 case NE:
8462 cond_str = "ne";
8463 break;
8464
8465 case EQ:
8466 cond_str = "e";
8467 break;
8468
8469 case GE:
8470 cond_str = "ge";
8471 break;
8472
8473 case GT:
8474 cond_str = "g";
8475 break;
8476
8477 case LE:
8478 cond_str = "le";
8479 break;
8480
8481 case LT:
8482 cond_str = "l";
8483 break;
8484
8485 case GEU:
8486 cond_str = "cc";
8487 break;
8488
8489 case GTU:
8490 cond_str = "gu";
8491 break;
8492
8493 case LEU:
8494 cond_str = "leu";
8495 break;
8496
8497 case LTU:
8498 cond_str = "cs";
8499 break;
8500
8501 default:
8502 gcc_unreachable ();
8503 }
8504
8505 if (far)
8506 {
8507 int veryfar = 1, delta;
8508
8509 if (INSN_ADDRESSES_SET_P ())
8510 {
8511 delta = (INSN_ADDRESSES (INSN_UID (dest))
8512 - INSN_ADDRESSES (INSN_UID (insn)));
8513 /* Leave some instructions for "slop". */
8514 if (delta >= -260000 && delta < 260000)
8515 veryfar = 0;
8516 }
8517
8518 if (veryfar)
8519 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8520 else
8521 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8522 }
8523 else
8524 {
8525 if (emit_nop)
8526 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8527 else
8528 tmpl = "c%cb%s\t%%1, %%2, %%3";
8529 }
8530
8531 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8532
8533 return string;
8534 }
8535
8536 /* Return the string to output a conditional branch to LABEL, testing
8537 register REG. LABEL is the operand number of the label; REG is the
8538 operand number of the reg. OP is the conditional expression. The mode
8539 of REG says what kind of comparison we made.
8540
8541 DEST is the destination insn (i.e. the label), INSN is the source.
8542
8543 REVERSED is nonzero if we should reverse the sense of the comparison.
8544
8545 ANNUL is nonzero if we should generate an annulling branch. */
8546
8547 const char *
8548 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8549 int annul, rtx_insn *insn)
8550 {
8551 static char string[64];
8552 enum rtx_code code = GET_CODE (op);
8553 machine_mode mode = GET_MODE (XEXP (op, 0));
8554 rtx note;
8555 int far;
8556 char *p;
8557
8558 /* branch on register are limited to +-128KB. If it is too far away,
8559 change
8560
8561 brnz,pt %g1, .LC30
8562
8563 to
8564
8565 brz,pn %g1, .+12
8566 nop
8567 ba,pt %xcc, .LC30
8568
8569 and
8570
8571 brgez,a,pn %o1, .LC29
8572
8573 to
8574
8575 brlz,pt %o1, .+16
8576 nop
8577 ba,pt %xcc, .LC29 */
8578
8579 far = get_attr_length (insn) >= 3;
8580
8581 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8582 if (reversed ^ far)
8583 code = reverse_condition (code);
8584
8585 /* Only 64-bit versions of these instructions exist. */
8586 gcc_assert (mode == DImode);
8587
8588 /* Start by writing the branch condition. */
8589
8590 switch (code)
8591 {
8592 case NE:
8593 strcpy (string, "brnz");
8594 break;
8595
8596 case EQ:
8597 strcpy (string, "brz");
8598 break;
8599
8600 case GE:
8601 strcpy (string, "brgez");
8602 break;
8603
8604 case LT:
8605 strcpy (string, "brlz");
8606 break;
8607
8608 case LE:
8609 strcpy (string, "brlez");
8610 break;
8611
8612 case GT:
8613 strcpy (string, "brgz");
8614 break;
8615
8616 default:
8617 gcc_unreachable ();
8618 }
8619
8620 p = strchr (string, '\0');
8621
8622 /* Now add the annulling, reg, label, and nop. */
8623 if (annul && ! far)
8624 {
8625 strcpy (p, ",a");
8626 p += 2;
8627 }
8628
8629 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8630 {
8631 strcpy (p,
8632 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8633 >= profile_probability::even ()) ^ far)
8634 ? ",pt" : ",pn");
8635 p += 3;
8636 }
8637
8638 *p = p < string + 8 ? '\t' : ' ';
8639 p++;
8640 *p++ = '%';
8641 *p++ = '0' + reg;
8642 *p++ = ',';
8643 *p++ = ' ';
8644 if (far)
8645 {
8646 int veryfar = 1, delta;
8647
8648 if (INSN_ADDRESSES_SET_P ())
8649 {
8650 delta = (INSN_ADDRESSES (INSN_UID (dest))
8651 - INSN_ADDRESSES (INSN_UID (insn)));
8652 /* Leave some instructions for "slop". */
8653 if (delta >= -260000 && delta < 260000)
8654 veryfar = 0;
8655 }
8656
8657 strcpy (p, ".+12\n\t nop\n\t");
8658 /* Skip the next insn if requested or
8659 if we know that it will be a nop. */
8660 if (annul || ! final_sequence)
8661 p[3] = '6';
8662 p += 12;
8663 if (veryfar)
8664 {
8665 strcpy (p, "b\t");
8666 p += 2;
8667 }
8668 else
8669 {
8670 strcpy (p, "ba,pt\t%%xcc, ");
8671 p += 13;
8672 }
8673 }
8674 *p++ = '%';
8675 *p++ = 'l';
8676 *p++ = '0' + label;
8677 *p++ = '%';
8678 *p++ = '#';
8679 *p = '\0';
8680
8681 return string;
8682 }
8683
8684 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8685 Such instructions cannot be used in the delay slot of return insn on v9.
8686 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8687 */
8688
8689 static int
8690 epilogue_renumber (register rtx *where, int test)
8691 {
8692 register const char *fmt;
8693 register int i;
8694 register enum rtx_code code;
8695
8696 if (*where == 0)
8697 return 0;
8698
8699 code = GET_CODE (*where);
8700
8701 switch (code)
8702 {
8703 case REG:
8704 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8705 return 1;
8706 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8707 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8708 /* fallthrough */
8709 case SCRATCH:
8710 case CC0:
8711 case PC:
8712 case CONST_INT:
8713 case CONST_WIDE_INT:
8714 case CONST_DOUBLE:
8715 return 0;
8716
8717 /* Do not replace the frame pointer with the stack pointer because
8718 it can cause the delayed instruction to load below the stack.
8719 This occurs when instructions like:
8720
8721 (set (reg/i:SI 24 %i0)
8722 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8723 (const_int -20 [0xffffffec])) 0))
8724
8725 are in the return delayed slot. */
8726 case PLUS:
8727 if (GET_CODE (XEXP (*where, 0)) == REG
8728 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8729 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8730 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8731 return 1;
8732 break;
8733
8734 case MEM:
8735 if (SPARC_STACK_BIAS
8736 && GET_CODE (XEXP (*where, 0)) == REG
8737 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8738 return 1;
8739 break;
8740
8741 default:
8742 break;
8743 }
8744
8745 fmt = GET_RTX_FORMAT (code);
8746
8747 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8748 {
8749 if (fmt[i] == 'E')
8750 {
8751 register int j;
8752 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8753 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8754 return 1;
8755 }
8756 else if (fmt[i] == 'e'
8757 && epilogue_renumber (&(XEXP (*where, i)), test))
8758 return 1;
8759 }
8760 return 0;
8761 }
8762 \f
8763 /* Leaf functions and non-leaf functions have different needs. */
8764
8765 static const int
8766 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8767
8768 static const int
8769 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8770
8771 static const int *const reg_alloc_orders[] = {
8772 reg_leaf_alloc_order,
8773 reg_nonleaf_alloc_order};
8774
8775 void
8776 order_regs_for_local_alloc (void)
8777 {
8778 static int last_order_nonleaf = 1;
8779
8780 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8781 {
8782 last_order_nonleaf = !last_order_nonleaf;
8783 memcpy ((char *) reg_alloc_order,
8784 (const char *) reg_alloc_orders[last_order_nonleaf],
8785 FIRST_PSEUDO_REGISTER * sizeof (int));
8786 }
8787 }
8788 \f
8789 /* Return 1 if REG and MEM are legitimate enough to allow the various
8790 MEM<-->REG splits to be run. */
8791
8792 int
8793 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8794 {
8795 /* Punt if we are here by mistake. */
8796 gcc_assert (reload_completed);
8797
8798 /* We must have an offsettable memory reference. */
8799 if (!offsettable_memref_p (mem))
8800 return 0;
8801
8802 /* If we have legitimate args for ldd/std, we do not want
8803 the split to happen. */
8804 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8805 return 0;
8806
8807 /* Success. */
8808 return 1;
8809 }
8810
8811 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8812
8813 void
8814 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8815 {
8816 rtx high_part = gen_highpart (mode, dest);
8817 rtx low_part = gen_lowpart (mode, dest);
8818 rtx word0 = adjust_address (src, mode, 0);
8819 rtx word1 = adjust_address (src, mode, 4);
8820
8821 if (reg_overlap_mentioned_p (high_part, word1))
8822 {
8823 emit_move_insn_1 (low_part, word1);
8824 emit_move_insn_1 (high_part, word0);
8825 }
8826 else
8827 {
8828 emit_move_insn_1 (high_part, word0);
8829 emit_move_insn_1 (low_part, word1);
8830 }
8831 }
8832
8833 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8834
8835 void
8836 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8837 {
8838 rtx word0 = adjust_address (dest, mode, 0);
8839 rtx word1 = adjust_address (dest, mode, 4);
8840 rtx high_part = gen_highpart (mode, src);
8841 rtx low_part = gen_lowpart (mode, src);
8842
8843 emit_move_insn_1 (word0, high_part);
8844 emit_move_insn_1 (word1, low_part);
8845 }
8846
8847 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8848
8849 int
8850 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8851 {
8852 /* Punt if we are here by mistake. */
8853 gcc_assert (reload_completed);
8854
8855 if (GET_CODE (reg1) == SUBREG)
8856 reg1 = SUBREG_REG (reg1);
8857 if (GET_CODE (reg1) != REG)
8858 return 0;
8859 const int regno1 = REGNO (reg1);
8860
8861 if (GET_CODE (reg2) == SUBREG)
8862 reg2 = SUBREG_REG (reg2);
8863 if (GET_CODE (reg2) != REG)
8864 return 0;
8865 const int regno2 = REGNO (reg2);
8866
8867 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8868 return 1;
8869
8870 if (TARGET_VIS3)
8871 {
8872 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8873 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8874 return 1;
8875 }
8876
8877 return 0;
8878 }
8879
8880 /* Split a REG <--> REG move into a pair of moves in MODE. */
8881
8882 void
8883 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8884 {
8885 rtx dest1 = gen_highpart (mode, dest);
8886 rtx dest2 = gen_lowpart (mode, dest);
8887 rtx src1 = gen_highpart (mode, src);
8888 rtx src2 = gen_lowpart (mode, src);
8889
8890 /* Now emit using the real source and destination we found, swapping
8891 the order if we detect overlap. */
8892 if (reg_overlap_mentioned_p (dest1, src2))
8893 {
8894 emit_move_insn_1 (dest2, src2);
8895 emit_move_insn_1 (dest1, src1);
8896 }
8897 else
8898 {
8899 emit_move_insn_1 (dest1, src1);
8900 emit_move_insn_1 (dest2, src2);
8901 }
8902 }
8903
8904 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8905 This makes them candidates for using ldd and std insns.
8906
8907 Note reg1 and reg2 *must* be hard registers. */
8908
8909 int
8910 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8911 {
8912 /* We might have been passed a SUBREG. */
8913 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8914 return 0;
8915
8916 if (REGNO (reg1) % 2 != 0)
8917 return 0;
8918
8919 /* Integer ldd is deprecated in SPARC V9 */
8920 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8921 return 0;
8922
8923 return (REGNO (reg1) == REGNO (reg2) - 1);
8924 }
8925
8926 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8927 an ldd or std insn.
8928
8929 This can only happen when addr1 and addr2, the addresses in mem1
8930 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8931 addr1 must also be aligned on a 64-bit boundary.
8932
8933 Also iff dependent_reg_rtx is not null it should not be used to
8934 compute the address for mem1, i.e. we cannot optimize a sequence
8935 like:
8936 ld [%o0], %o0
8937 ld [%o0 + 4], %o1
8938 to
8939 ldd [%o0], %o0
8940 nor:
8941 ld [%g3 + 4], %g3
8942 ld [%g3], %g2
8943 to
8944 ldd [%g3], %g2
8945
8946 But, note that the transformation from:
8947 ld [%g2 + 4], %g3
8948 ld [%g2], %g2
8949 to
8950 ldd [%g2], %g2
8951 is perfectly fine. Thus, the peephole2 patterns always pass us
8952 the destination register of the first load, never the second one.
8953
8954 For stores we don't have a similar problem, so dependent_reg_rtx is
8955 NULL_RTX. */
8956
8957 int
8958 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8959 {
8960 rtx addr1, addr2;
8961 unsigned int reg1;
8962 HOST_WIDE_INT offset1;
8963
8964 /* The mems cannot be volatile. */
8965 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8966 return 0;
8967
8968 /* MEM1 should be aligned on a 64-bit boundary. */
8969 if (MEM_ALIGN (mem1) < 64)
8970 return 0;
8971
8972 addr1 = XEXP (mem1, 0);
8973 addr2 = XEXP (mem2, 0);
8974
8975 /* Extract a register number and offset (if used) from the first addr. */
8976 if (GET_CODE (addr1) == PLUS)
8977 {
8978 /* If not a REG, return zero. */
8979 if (GET_CODE (XEXP (addr1, 0)) != REG)
8980 return 0;
8981 else
8982 {
8983 reg1 = REGNO (XEXP (addr1, 0));
8984 /* The offset must be constant! */
8985 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8986 return 0;
8987 offset1 = INTVAL (XEXP (addr1, 1));
8988 }
8989 }
8990 else if (GET_CODE (addr1) != REG)
8991 return 0;
8992 else
8993 {
8994 reg1 = REGNO (addr1);
8995 /* This was a simple (mem (reg)) expression. Offset is 0. */
8996 offset1 = 0;
8997 }
8998
8999 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9000 if (GET_CODE (addr2) != PLUS)
9001 return 0;
9002
9003 if (GET_CODE (XEXP (addr2, 0)) != REG
9004 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9005 return 0;
9006
9007 if (reg1 != REGNO (XEXP (addr2, 0)))
9008 return 0;
9009
9010 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9011 return 0;
9012
9013 /* The first offset must be evenly divisible by 8 to ensure the
9014 address is 64-bit aligned. */
9015 if (offset1 % 8 != 0)
9016 return 0;
9017
9018 /* The offset for the second addr must be 4 more than the first addr. */
9019 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9020 return 0;
9021
9022 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9023 instructions. */
9024 return 1;
9025 }
9026
9027 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9028
9029 rtx
9030 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9031 {
9032 rtx x = widen_memory_access (mem1, mode, 0);
9033 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9034 return x;
9035 }
9036
9037 /* Return 1 if reg is a pseudo, or is the first register in
9038 a hard register pair. This makes it suitable for use in
9039 ldd and std insns. */
9040
9041 int
9042 register_ok_for_ldd (rtx reg)
9043 {
9044 /* We might have been passed a SUBREG. */
9045 if (!REG_P (reg))
9046 return 0;
9047
9048 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9049 return (REGNO (reg) % 2 == 0);
9050
9051 return 1;
9052 }
9053
9054 /* Return 1 if OP, a MEM, has an address which is known to be
9055 aligned to an 8-byte boundary. */
9056
9057 int
9058 memory_ok_for_ldd (rtx op)
9059 {
9060 /* In 64-bit mode, we assume that the address is word-aligned. */
9061 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9062 return 0;
9063
9064 if (! can_create_pseudo_p ()
9065 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9066 return 0;
9067
9068 return 1;
9069 }
9070 \f
9071 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9072
9073 static bool
9074 sparc_print_operand_punct_valid_p (unsigned char code)
9075 {
9076 if (code == '#'
9077 || code == '*'
9078 || code == '('
9079 || code == ')'
9080 || code == '_'
9081 || code == '&')
9082 return true;
9083
9084 return false;
9085 }
9086
9087 /* Implement TARGET_PRINT_OPERAND.
9088 Print operand X (an rtx) in assembler syntax to file FILE.
9089 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9090 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9091
9092 static void
9093 sparc_print_operand (FILE *file, rtx x, int code)
9094 {
9095 const char *s;
9096
9097 switch (code)
9098 {
9099 case '#':
9100 /* Output an insn in a delay slot. */
9101 if (final_sequence)
9102 sparc_indent_opcode = 1;
9103 else
9104 fputs ("\n\t nop", file);
9105 return;
9106 case '*':
9107 /* Output an annul flag if there's nothing for the delay slot and we
9108 are optimizing. This is always used with '(' below.
9109 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9110 this is a dbx bug. So, we only do this when optimizing.
9111 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9112 Always emit a nop in case the next instruction is a branch. */
9113 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9114 fputs (",a", file);
9115 return;
9116 case '(':
9117 /* Output a 'nop' if there's nothing for the delay slot and we are
9118 not optimizing. This is always used with '*' above. */
9119 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9120 fputs ("\n\t nop", file);
9121 else if (final_sequence)
9122 sparc_indent_opcode = 1;
9123 return;
9124 case ')':
9125 /* Output the right displacement from the saved PC on function return.
9126 The caller may have placed an "unimp" insn immediately after the call
9127 so we have to account for it. This insn is used in the 32-bit ABI
9128 when calling a function that returns a non zero-sized structure. The
9129 64-bit ABI doesn't have it. Be careful to have this test be the same
9130 as that for the call. The exception is when sparc_std_struct_return
9131 is enabled, the psABI is followed exactly and the adjustment is made
9132 by the code in sparc_struct_value_rtx. The call emitted is the same
9133 when sparc_std_struct_return is enabled. */
9134 if (!TARGET_ARCH64
9135 && cfun->returns_struct
9136 && !sparc_std_struct_return
9137 && DECL_SIZE (DECL_RESULT (current_function_decl))
9138 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9139 == INTEGER_CST
9140 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9141 fputs ("12", file);
9142 else
9143 fputc ('8', file);
9144 return;
9145 case '_':
9146 /* Output the Embedded Medium/Anywhere code model base register. */
9147 fputs (EMBMEDANY_BASE_REG, file);
9148 return;
9149 case '&':
9150 /* Print some local dynamic TLS name. */
9151 if (const char *name = get_some_local_dynamic_name ())
9152 assemble_name (file, name);
9153 else
9154 output_operand_lossage ("'%%&' used without any "
9155 "local dynamic TLS references");
9156 return;
9157
9158 case 'Y':
9159 /* Adjust the operand to take into account a RESTORE operation. */
9160 if (GET_CODE (x) == CONST_INT)
9161 break;
9162 else if (GET_CODE (x) != REG)
9163 output_operand_lossage ("invalid %%Y operand");
9164 else if (REGNO (x) < 8)
9165 fputs (reg_names[REGNO (x)], file);
9166 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9167 fputs (reg_names[REGNO (x)-16], file);
9168 else
9169 output_operand_lossage ("invalid %%Y operand");
9170 return;
9171 case 'L':
9172 /* Print out the low order register name of a register pair. */
9173 if (WORDS_BIG_ENDIAN)
9174 fputs (reg_names[REGNO (x)+1], file);
9175 else
9176 fputs (reg_names[REGNO (x)], file);
9177 return;
9178 case 'H':
9179 /* Print out the high order register name of a register pair. */
9180 if (WORDS_BIG_ENDIAN)
9181 fputs (reg_names[REGNO (x)], file);
9182 else
9183 fputs (reg_names[REGNO (x)+1], file);
9184 return;
9185 case 'R':
9186 /* Print out the second register name of a register pair or quad.
9187 I.e., R (%o0) => %o1. */
9188 fputs (reg_names[REGNO (x)+1], file);
9189 return;
9190 case 'S':
9191 /* Print out the third register name of a register quad.
9192 I.e., S (%o0) => %o2. */
9193 fputs (reg_names[REGNO (x)+2], file);
9194 return;
9195 case 'T':
9196 /* Print out the fourth register name of a register quad.
9197 I.e., T (%o0) => %o3. */
9198 fputs (reg_names[REGNO (x)+3], file);
9199 return;
9200 case 'x':
9201 /* Print a condition code register. */
9202 if (REGNO (x) == SPARC_ICC_REG)
9203 {
9204 switch (GET_MODE (x))
9205 {
9206 case E_CCmode:
9207 case E_CCNZmode:
9208 case E_CCCmode:
9209 case E_CCVmode:
9210 s = "%icc";
9211 break;
9212 case E_CCXmode:
9213 case E_CCXNZmode:
9214 case E_CCXCmode:
9215 case E_CCXVmode:
9216 s = "%xcc";
9217 break;
9218 default:
9219 gcc_unreachable ();
9220 }
9221 fputs (s, file);
9222 }
9223 else
9224 /* %fccN register */
9225 fputs (reg_names[REGNO (x)], file);
9226 return;
9227 case 'm':
9228 /* Print the operand's address only. */
9229 output_address (GET_MODE (x), XEXP (x, 0));
9230 return;
9231 case 'r':
9232 /* In this case we need a register. Use %g0 if the
9233 operand is const0_rtx. */
9234 if (x == const0_rtx
9235 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9236 {
9237 fputs ("%g0", file);
9238 return;
9239 }
9240 else
9241 break;
9242
9243 case 'A':
9244 switch (GET_CODE (x))
9245 {
9246 case IOR:
9247 s = "or";
9248 break;
9249 case AND:
9250 s = "and";
9251 break;
9252 case XOR:
9253 s = "xor";
9254 break;
9255 default:
9256 output_operand_lossage ("invalid %%A operand");
9257 s = "";
9258 break;
9259 }
9260 fputs (s, file);
9261 return;
9262
9263 case 'B':
9264 switch (GET_CODE (x))
9265 {
9266 case IOR:
9267 s = "orn";
9268 break;
9269 case AND:
9270 s = "andn";
9271 break;
9272 case XOR:
9273 s = "xnor";
9274 break;
9275 default:
9276 output_operand_lossage ("invalid %%B operand");
9277 s = "";
9278 break;
9279 }
9280 fputs (s, file);
9281 return;
9282
9283 /* This is used by the conditional move instructions. */
9284 case 'C':
9285 {
9286 machine_mode mode = GET_MODE (XEXP (x, 0));
9287 switch (GET_CODE (x))
9288 {
9289 case NE:
9290 if (mode == CCVmode || mode == CCXVmode)
9291 s = "vs";
9292 else
9293 s = "ne";
9294 break;
9295 case EQ:
9296 if (mode == CCVmode || mode == CCXVmode)
9297 s = "vc";
9298 else
9299 s = "e";
9300 break;
9301 case GE:
9302 if (mode == CCNZmode || mode == CCXNZmode)
9303 s = "pos";
9304 else
9305 s = "ge";
9306 break;
9307 case GT:
9308 s = "g";
9309 break;
9310 case LE:
9311 s = "le";
9312 break;
9313 case LT:
9314 if (mode == CCNZmode || mode == CCXNZmode)
9315 s = "neg";
9316 else
9317 s = "l";
9318 break;
9319 case GEU:
9320 s = "geu";
9321 break;
9322 case GTU:
9323 s = "gu";
9324 break;
9325 case LEU:
9326 s = "leu";
9327 break;
9328 case LTU:
9329 s = "lu";
9330 break;
9331 case LTGT:
9332 s = "lg";
9333 break;
9334 case UNORDERED:
9335 s = "u";
9336 break;
9337 case ORDERED:
9338 s = "o";
9339 break;
9340 case UNLT:
9341 s = "ul";
9342 break;
9343 case UNLE:
9344 s = "ule";
9345 break;
9346 case UNGT:
9347 s = "ug";
9348 break;
9349 case UNGE:
9350 s = "uge"
9351 ; break;
9352 case UNEQ:
9353 s = "ue";
9354 break;
9355 default:
9356 output_operand_lossage ("invalid %%C operand");
9357 s = "";
9358 break;
9359 }
9360 fputs (s, file);
9361 return;
9362 }
9363
9364 /* This are used by the movr instruction pattern. */
9365 case 'D':
9366 {
9367 switch (GET_CODE (x))
9368 {
9369 case NE:
9370 s = "ne";
9371 break;
9372 case EQ:
9373 s = "e";
9374 break;
9375 case GE:
9376 s = "gez";
9377 break;
9378 case LT:
9379 s = "lz";
9380 break;
9381 case LE:
9382 s = "lez";
9383 break;
9384 case GT:
9385 s = "gz";
9386 break;
9387 default:
9388 output_operand_lossage ("invalid %%D operand");
9389 s = "";
9390 break;
9391 }
9392 fputs (s, file);
9393 return;
9394 }
9395
9396 case 'b':
9397 {
9398 /* Print a sign-extended character. */
9399 int i = trunc_int_for_mode (INTVAL (x), QImode);
9400 fprintf (file, "%d", i);
9401 return;
9402 }
9403
9404 case 'f':
9405 /* Operand must be a MEM; write its address. */
9406 if (GET_CODE (x) != MEM)
9407 output_operand_lossage ("invalid %%f operand");
9408 output_address (GET_MODE (x), XEXP (x, 0));
9409 return;
9410
9411 case 's':
9412 {
9413 /* Print a sign-extended 32-bit value. */
9414 HOST_WIDE_INT i;
9415 if (GET_CODE(x) == CONST_INT)
9416 i = INTVAL (x);
9417 else
9418 {
9419 output_operand_lossage ("invalid %%s operand");
9420 return;
9421 }
9422 i = trunc_int_for_mode (i, SImode);
9423 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9424 return;
9425 }
9426
9427 case 0:
9428 /* Do nothing special. */
9429 break;
9430
9431 default:
9432 /* Undocumented flag. */
9433 output_operand_lossage ("invalid operand output code");
9434 }
9435
9436 if (GET_CODE (x) == REG)
9437 fputs (reg_names[REGNO (x)], file);
9438 else if (GET_CODE (x) == MEM)
9439 {
9440 fputc ('[', file);
9441 /* Poor Sun assembler doesn't understand absolute addressing. */
9442 if (CONSTANT_P (XEXP (x, 0)))
9443 fputs ("%g0+", file);
9444 output_address (GET_MODE (x), XEXP (x, 0));
9445 fputc (']', file);
9446 }
9447 else if (GET_CODE (x) == HIGH)
9448 {
9449 fputs ("%hi(", file);
9450 output_addr_const (file, XEXP (x, 0));
9451 fputc (')', file);
9452 }
9453 else if (GET_CODE (x) == LO_SUM)
9454 {
9455 sparc_print_operand (file, XEXP (x, 0), 0);
9456 if (TARGET_CM_MEDMID)
9457 fputs ("+%l44(", file);
9458 else
9459 fputs ("+%lo(", file);
9460 output_addr_const (file, XEXP (x, 1));
9461 fputc (')', file);
9462 }
9463 else if (GET_CODE (x) == CONST_DOUBLE)
9464 output_operand_lossage ("floating-point constant not a valid immediate operand");
9465 else
9466 output_addr_const (file, x);
9467 }
9468
9469 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9470
9471 static void
9472 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9473 {
9474 register rtx base, index = 0;
9475 int offset = 0;
9476 register rtx addr = x;
9477
9478 if (REG_P (addr))
9479 fputs (reg_names[REGNO (addr)], file);
9480 else if (GET_CODE (addr) == PLUS)
9481 {
9482 if (CONST_INT_P (XEXP (addr, 0)))
9483 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9484 else if (CONST_INT_P (XEXP (addr, 1)))
9485 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9486 else
9487 base = XEXP (addr, 0), index = XEXP (addr, 1);
9488 if (GET_CODE (base) == LO_SUM)
9489 {
9490 gcc_assert (USE_AS_OFFSETABLE_LO10
9491 && TARGET_ARCH64
9492 && ! TARGET_CM_MEDMID);
9493 output_operand (XEXP (base, 0), 0);
9494 fputs ("+%lo(", file);
9495 output_address (VOIDmode, XEXP (base, 1));
9496 fprintf (file, ")+%d", offset);
9497 }
9498 else
9499 {
9500 fputs (reg_names[REGNO (base)], file);
9501 if (index == 0)
9502 fprintf (file, "%+d", offset);
9503 else if (REG_P (index))
9504 fprintf (file, "+%s", reg_names[REGNO (index)]);
9505 else if (GET_CODE (index) == SYMBOL_REF
9506 || GET_CODE (index) == LABEL_REF
9507 || GET_CODE (index) == CONST)
9508 fputc ('+', file), output_addr_const (file, index);
9509 else gcc_unreachable ();
9510 }
9511 }
9512 else if (GET_CODE (addr) == MINUS
9513 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9514 {
9515 output_addr_const (file, XEXP (addr, 0));
9516 fputs ("-(", file);
9517 output_addr_const (file, XEXP (addr, 1));
9518 fputs ("-.)", file);
9519 }
9520 else if (GET_CODE (addr) == LO_SUM)
9521 {
9522 output_operand (XEXP (addr, 0), 0);
9523 if (TARGET_CM_MEDMID)
9524 fputs ("+%l44(", file);
9525 else
9526 fputs ("+%lo(", file);
9527 output_address (VOIDmode, XEXP (addr, 1));
9528 fputc (')', file);
9529 }
9530 else if (flag_pic
9531 && GET_CODE (addr) == CONST
9532 && GET_CODE (XEXP (addr, 0)) == MINUS
9533 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9534 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9535 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9536 {
9537 addr = XEXP (addr, 0);
9538 output_addr_const (file, XEXP (addr, 0));
9539 /* Group the args of the second CONST in parenthesis. */
9540 fputs ("-(", file);
9541 /* Skip past the second CONST--it does nothing for us. */
9542 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9543 /* Close the parenthesis. */
9544 fputc (')', file);
9545 }
9546 else
9547 {
9548 output_addr_const (file, addr);
9549 }
9550 }
9551 \f
9552 /* Target hook for assembling integer objects. The sparc version has
9553 special handling for aligned DI-mode objects. */
9554
9555 static bool
9556 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9557 {
9558 /* ??? We only output .xword's for symbols and only then in environments
9559 where the assembler can handle them. */
9560 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9561 {
9562 if (TARGET_V9)
9563 {
9564 assemble_integer_with_op ("\t.xword\t", x);
9565 return true;
9566 }
9567 else
9568 {
9569 assemble_aligned_integer (4, const0_rtx);
9570 assemble_aligned_integer (4, x);
9571 return true;
9572 }
9573 }
9574 return default_assemble_integer (x, size, aligned_p);
9575 }
9576 \f
9577 /* Return the value of a code used in the .proc pseudo-op that says
9578 what kind of result this function returns. For non-C types, we pick
9579 the closest C type. */
9580
9581 #ifndef SHORT_TYPE_SIZE
9582 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9583 #endif
9584
9585 #ifndef INT_TYPE_SIZE
9586 #define INT_TYPE_SIZE BITS_PER_WORD
9587 #endif
9588
9589 #ifndef LONG_TYPE_SIZE
9590 #define LONG_TYPE_SIZE BITS_PER_WORD
9591 #endif
9592
9593 #ifndef LONG_LONG_TYPE_SIZE
9594 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9595 #endif
9596
9597 #ifndef FLOAT_TYPE_SIZE
9598 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9599 #endif
9600
9601 #ifndef DOUBLE_TYPE_SIZE
9602 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9603 #endif
9604
9605 #ifndef LONG_DOUBLE_TYPE_SIZE
9606 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9607 #endif
9608
9609 unsigned long
9610 sparc_type_code (register tree type)
9611 {
9612 register unsigned long qualifiers = 0;
9613 register unsigned shift;
9614
9615 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9616 setting more, since some assemblers will give an error for this. Also,
9617 we must be careful to avoid shifts of 32 bits or more to avoid getting
9618 unpredictable results. */
9619
9620 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9621 {
9622 switch (TREE_CODE (type))
9623 {
9624 case ERROR_MARK:
9625 return qualifiers;
9626
9627 case ARRAY_TYPE:
9628 qualifiers |= (3 << shift);
9629 break;
9630
9631 case FUNCTION_TYPE:
9632 case METHOD_TYPE:
9633 qualifiers |= (2 << shift);
9634 break;
9635
9636 case POINTER_TYPE:
9637 case REFERENCE_TYPE:
9638 case OFFSET_TYPE:
9639 qualifiers |= (1 << shift);
9640 break;
9641
9642 case RECORD_TYPE:
9643 return (qualifiers | 8);
9644
9645 case UNION_TYPE:
9646 case QUAL_UNION_TYPE:
9647 return (qualifiers | 9);
9648
9649 case ENUMERAL_TYPE:
9650 return (qualifiers | 10);
9651
9652 case VOID_TYPE:
9653 return (qualifiers | 16);
9654
9655 case INTEGER_TYPE:
9656 /* If this is a range type, consider it to be the underlying
9657 type. */
9658 if (TREE_TYPE (type) != 0)
9659 break;
9660
9661 /* Carefully distinguish all the standard types of C,
9662 without messing up if the language is not C. We do this by
9663 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9664 look at both the names and the above fields, but that's redundant.
9665 Any type whose size is between two C types will be considered
9666 to be the wider of the two types. Also, we do not have a
9667 special code to use for "long long", so anything wider than
9668 long is treated the same. Note that we can't distinguish
9669 between "int" and "long" in this code if they are the same
9670 size, but that's fine, since neither can the assembler. */
9671
9672 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9673 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9674
9675 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9676 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9677
9678 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9679 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9680
9681 else
9682 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9683
9684 case REAL_TYPE:
9685 /* If this is a range type, consider it to be the underlying
9686 type. */
9687 if (TREE_TYPE (type) != 0)
9688 break;
9689
9690 /* Carefully distinguish all the standard types of C,
9691 without messing up if the language is not C. */
9692
9693 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9694 return (qualifiers | 6);
9695
9696 else
9697 return (qualifiers | 7);
9698
9699 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9700 /* ??? We need to distinguish between double and float complex types,
9701 but I don't know how yet because I can't reach this code from
9702 existing front-ends. */
9703 return (qualifiers | 7); /* Who knows? */
9704
9705 case VECTOR_TYPE:
9706 case BOOLEAN_TYPE: /* Boolean truth value type. */
9707 case LANG_TYPE:
9708 case NULLPTR_TYPE:
9709 return qualifiers;
9710
9711 default:
9712 gcc_unreachable (); /* Not a type! */
9713 }
9714 }
9715
9716 return qualifiers;
9717 }
9718 \f
9719 /* Nested function support. */
9720
9721 /* Emit RTL insns to initialize the variable parts of a trampoline.
9722 FNADDR is an RTX for the address of the function's pure code.
9723 CXT is an RTX for the static chain value for the function.
9724
9725 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9726 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9727 (to store insns). This is a bit excessive. Perhaps a different
9728 mechanism would be better here.
9729
9730 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9731
9732 static void
9733 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9734 {
9735 /* SPARC 32-bit trampoline:
9736
9737 sethi %hi(fn), %g1
9738 sethi %hi(static), %g2
9739 jmp %g1+%lo(fn)
9740 or %g2, %lo(static), %g2
9741
9742 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9743 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9744 */
9745
9746 emit_move_insn
9747 (adjust_address (m_tramp, SImode, 0),
9748 expand_binop (SImode, ior_optab,
9749 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9750 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9751 NULL_RTX, 1, OPTAB_DIRECT));
9752
9753 emit_move_insn
9754 (adjust_address (m_tramp, SImode, 4),
9755 expand_binop (SImode, ior_optab,
9756 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9757 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9758 NULL_RTX, 1, OPTAB_DIRECT));
9759
9760 emit_move_insn
9761 (adjust_address (m_tramp, SImode, 8),
9762 expand_binop (SImode, ior_optab,
9763 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9764 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9765 NULL_RTX, 1, OPTAB_DIRECT));
9766
9767 emit_move_insn
9768 (adjust_address (m_tramp, SImode, 12),
9769 expand_binop (SImode, ior_optab,
9770 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9771 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9772 NULL_RTX, 1, OPTAB_DIRECT));
9773
9774 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9775 aligned on a 16 byte boundary so one flush clears it all. */
9776 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9777 if (sparc_cpu != PROCESSOR_ULTRASPARC
9778 && sparc_cpu != PROCESSOR_ULTRASPARC3
9779 && sparc_cpu != PROCESSOR_NIAGARA
9780 && sparc_cpu != PROCESSOR_NIAGARA2
9781 && sparc_cpu != PROCESSOR_NIAGARA3
9782 && sparc_cpu != PROCESSOR_NIAGARA4
9783 && sparc_cpu != PROCESSOR_NIAGARA7
9784 && sparc_cpu != PROCESSOR_M8)
9785 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9786
9787 /* Call __enable_execute_stack after writing onto the stack to make sure
9788 the stack address is accessible. */
9789 #ifdef HAVE_ENABLE_EXECUTE_STACK
9790 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9791 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9792 #endif
9793
9794 }
9795
9796 /* The 64-bit version is simpler because it makes more sense to load the
9797 values as "immediate" data out of the trampoline. It's also easier since
9798 we can read the PC without clobbering a register. */
9799
9800 static void
9801 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9802 {
9803 /* SPARC 64-bit trampoline:
9804
9805 rd %pc, %g1
9806 ldx [%g1+24], %g5
9807 jmp %g5
9808 ldx [%g1+16], %g5
9809 +16 bytes data
9810 */
9811
9812 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9813 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9814 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9815 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9816 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9817 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9818 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9819 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9820 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9821 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9822 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9823
9824 if (sparc_cpu != PROCESSOR_ULTRASPARC
9825 && sparc_cpu != PROCESSOR_ULTRASPARC3
9826 && sparc_cpu != PROCESSOR_NIAGARA
9827 && sparc_cpu != PROCESSOR_NIAGARA2
9828 && sparc_cpu != PROCESSOR_NIAGARA3
9829 && sparc_cpu != PROCESSOR_NIAGARA4
9830 && sparc_cpu != PROCESSOR_NIAGARA7
9831 && sparc_cpu != PROCESSOR_M8)
9832 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9833
9834 /* Call __enable_execute_stack after writing onto the stack to make sure
9835 the stack address is accessible. */
9836 #ifdef HAVE_ENABLE_EXECUTE_STACK
9837 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9838 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9839 #endif
9840 }
9841
9842 /* Worker for TARGET_TRAMPOLINE_INIT. */
9843
9844 static void
9845 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9846 {
9847 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9848 cxt = force_reg (Pmode, cxt);
9849 if (TARGET_ARCH64)
9850 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9851 else
9852 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9853 }
9854 \f
9855 /* Adjust the cost of a scheduling dependency. Return the new cost of
9856 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9857
9858 static int
9859 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9860 int cost)
9861 {
9862 enum attr_type insn_type;
9863
9864 if (recog_memoized (insn) < 0)
9865 return cost;
9866
9867 insn_type = get_attr_type (insn);
9868
9869 if (dep_type == 0)
9870 {
9871 /* Data dependency; DEP_INSN writes a register that INSN reads some
9872 cycles later. */
9873
9874 /* if a load, then the dependence must be on the memory address;
9875 add an extra "cycle". Note that the cost could be two cycles
9876 if the reg was written late in an instruction group; we ca not tell
9877 here. */
9878 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9879 return cost + 3;
9880
9881 /* Get the delay only if the address of the store is the dependence. */
9882 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9883 {
9884 rtx pat = PATTERN(insn);
9885 rtx dep_pat = PATTERN (dep_insn);
9886
9887 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9888 return cost; /* This should not happen! */
9889
9890 /* The dependency between the two instructions was on the data that
9891 is being stored. Assume that this implies that the address of the
9892 store is not dependent. */
9893 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9894 return cost;
9895
9896 return cost + 3; /* An approximation. */
9897 }
9898
9899 /* A shift instruction cannot receive its data from an instruction
9900 in the same cycle; add a one cycle penalty. */
9901 if (insn_type == TYPE_SHIFT)
9902 return cost + 3; /* Split before cascade into shift. */
9903 }
9904 else
9905 {
9906 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9907 INSN writes some cycles later. */
9908
9909 /* These are only significant for the fpu unit; writing a fp reg before
9910 the fpu has finished with it stalls the processor. */
9911
9912 /* Reusing an integer register causes no problems. */
9913 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9914 return 0;
9915 }
9916
9917 return cost;
9918 }
9919
9920 static int
9921 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9922 int cost)
9923 {
9924 enum attr_type insn_type, dep_type;
9925 rtx pat = PATTERN(insn);
9926 rtx dep_pat = PATTERN (dep_insn);
9927
9928 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9929 return cost;
9930
9931 insn_type = get_attr_type (insn);
9932 dep_type = get_attr_type (dep_insn);
9933
9934 switch (dtype)
9935 {
9936 case 0:
9937 /* Data dependency; DEP_INSN writes a register that INSN reads some
9938 cycles later. */
9939
9940 switch (insn_type)
9941 {
9942 case TYPE_STORE:
9943 case TYPE_FPSTORE:
9944 /* Get the delay iff the address of the store is the dependence. */
9945 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9946 return cost;
9947
9948 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9949 return cost;
9950 return cost + 3;
9951
9952 case TYPE_LOAD:
9953 case TYPE_SLOAD:
9954 case TYPE_FPLOAD:
9955 /* If a load, then the dependence must be on the memory address. If
9956 the addresses aren't equal, then it might be a false dependency */
9957 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9958 {
9959 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9960 || GET_CODE (SET_DEST (dep_pat)) != MEM
9961 || GET_CODE (SET_SRC (pat)) != MEM
9962 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9963 XEXP (SET_SRC (pat), 0)))
9964 return cost + 2;
9965
9966 return cost + 8;
9967 }
9968 break;
9969
9970 case TYPE_BRANCH:
9971 /* Compare to branch latency is 0. There is no benefit from
9972 separating compare and branch. */
9973 if (dep_type == TYPE_COMPARE)
9974 return 0;
9975 /* Floating point compare to branch latency is less than
9976 compare to conditional move. */
9977 if (dep_type == TYPE_FPCMP)
9978 return cost - 1;
9979 break;
9980 default:
9981 break;
9982 }
9983 break;
9984
9985 case REG_DEP_ANTI:
9986 /* Anti-dependencies only penalize the fpu unit. */
9987 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9988 return 0;
9989 break;
9990
9991 default:
9992 break;
9993 }
9994
9995 return cost;
9996 }
9997
9998 static int
9999 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10000 unsigned int)
10001 {
10002 switch (sparc_cpu)
10003 {
10004 case PROCESSOR_SUPERSPARC:
10005 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10006 break;
10007 case PROCESSOR_HYPERSPARC:
10008 case PROCESSOR_SPARCLITE86X:
10009 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10010 break;
10011 default:
10012 break;
10013 }
10014 return cost;
10015 }
10016
10017 static void
10018 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10019 int sched_verbose ATTRIBUTE_UNUSED,
10020 int max_ready ATTRIBUTE_UNUSED)
10021 {}
10022
10023 static int
10024 sparc_use_sched_lookahead (void)
10025 {
10026 if (sparc_cpu == PROCESSOR_NIAGARA
10027 || sparc_cpu == PROCESSOR_NIAGARA2
10028 || sparc_cpu == PROCESSOR_NIAGARA3)
10029 return 0;
10030 if (sparc_cpu == PROCESSOR_NIAGARA4
10031 || sparc_cpu == PROCESSOR_NIAGARA7
10032 || sparc_cpu == PROCESSOR_M8)
10033 return 2;
10034 if (sparc_cpu == PROCESSOR_ULTRASPARC
10035 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10036 return 4;
10037 if ((1 << sparc_cpu) &
10038 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10039 (1 << PROCESSOR_SPARCLITE86X)))
10040 return 3;
10041 return 0;
10042 }
10043
10044 static int
10045 sparc_issue_rate (void)
10046 {
10047 switch (sparc_cpu)
10048 {
10049 case PROCESSOR_NIAGARA:
10050 case PROCESSOR_NIAGARA2:
10051 case PROCESSOR_NIAGARA3:
10052 default:
10053 return 1;
10054 case PROCESSOR_NIAGARA4:
10055 case PROCESSOR_NIAGARA7:
10056 case PROCESSOR_V9:
10057 /* Assume V9 processors are capable of at least dual-issue. */
10058 return 2;
10059 case PROCESSOR_SUPERSPARC:
10060 return 3;
10061 case PROCESSOR_HYPERSPARC:
10062 case PROCESSOR_SPARCLITE86X:
10063 return 2;
10064 case PROCESSOR_ULTRASPARC:
10065 case PROCESSOR_ULTRASPARC3:
10066 case PROCESSOR_M8:
10067 return 4;
10068 }
10069 }
10070
10071 static int
10072 set_extends (rtx_insn *insn)
10073 {
10074 register rtx pat = PATTERN (insn);
10075
10076 switch (GET_CODE (SET_SRC (pat)))
10077 {
10078 /* Load and some shift instructions zero extend. */
10079 case MEM:
10080 case ZERO_EXTEND:
10081 /* sethi clears the high bits */
10082 case HIGH:
10083 /* LO_SUM is used with sethi. sethi cleared the high
10084 bits and the values used with lo_sum are positive */
10085 case LO_SUM:
10086 /* Store flag stores 0 or 1 */
10087 case LT: case LTU:
10088 case GT: case GTU:
10089 case LE: case LEU:
10090 case GE: case GEU:
10091 case EQ:
10092 case NE:
10093 return 1;
10094 case AND:
10095 {
10096 rtx op0 = XEXP (SET_SRC (pat), 0);
10097 rtx op1 = XEXP (SET_SRC (pat), 1);
10098 if (GET_CODE (op1) == CONST_INT)
10099 return INTVAL (op1) >= 0;
10100 if (GET_CODE (op0) != REG)
10101 return 0;
10102 if (sparc_check_64 (op0, insn) == 1)
10103 return 1;
10104 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10105 }
10106 case IOR:
10107 case XOR:
10108 {
10109 rtx op0 = XEXP (SET_SRC (pat), 0);
10110 rtx op1 = XEXP (SET_SRC (pat), 1);
10111 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10112 return 0;
10113 if (GET_CODE (op1) == CONST_INT)
10114 return INTVAL (op1) >= 0;
10115 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10116 }
10117 case LSHIFTRT:
10118 return GET_MODE (SET_SRC (pat)) == SImode;
10119 /* Positive integers leave the high bits zero. */
10120 case CONST_INT:
10121 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10122 case ASHIFTRT:
10123 case SIGN_EXTEND:
10124 return - (GET_MODE (SET_SRC (pat)) == SImode);
10125 case REG:
10126 return sparc_check_64 (SET_SRC (pat), insn);
10127 default:
10128 return 0;
10129 }
10130 }
10131
10132 /* We _ought_ to have only one kind per function, but... */
10133 static GTY(()) rtx sparc_addr_diff_list;
10134 static GTY(()) rtx sparc_addr_list;
10135
10136 void
10137 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10138 {
10139 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10140 if (diff)
10141 sparc_addr_diff_list
10142 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10143 else
10144 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10145 }
10146
10147 static void
10148 sparc_output_addr_vec (rtx vec)
10149 {
10150 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10151 int idx, vlen = XVECLEN (body, 0);
10152
10153 #ifdef ASM_OUTPUT_ADDR_VEC_START
10154 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10155 #endif
10156
10157 #ifdef ASM_OUTPUT_CASE_LABEL
10158 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10159 NEXT_INSN (lab));
10160 #else
10161 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10162 #endif
10163
10164 for (idx = 0; idx < vlen; idx++)
10165 {
10166 ASM_OUTPUT_ADDR_VEC_ELT
10167 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10168 }
10169
10170 #ifdef ASM_OUTPUT_ADDR_VEC_END
10171 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10172 #endif
10173 }
10174
10175 static void
10176 sparc_output_addr_diff_vec (rtx vec)
10177 {
10178 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10179 rtx base = XEXP (XEXP (body, 0), 0);
10180 int idx, vlen = XVECLEN (body, 1);
10181
10182 #ifdef ASM_OUTPUT_ADDR_VEC_START
10183 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10184 #endif
10185
10186 #ifdef ASM_OUTPUT_CASE_LABEL
10187 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10188 NEXT_INSN (lab));
10189 #else
10190 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10191 #endif
10192
10193 for (idx = 0; idx < vlen; idx++)
10194 {
10195 ASM_OUTPUT_ADDR_DIFF_ELT
10196 (asm_out_file,
10197 body,
10198 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10199 CODE_LABEL_NUMBER (base));
10200 }
10201
10202 #ifdef ASM_OUTPUT_ADDR_VEC_END
10203 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10204 #endif
10205 }
10206
10207 static void
10208 sparc_output_deferred_case_vectors (void)
10209 {
10210 rtx t;
10211 int align;
10212
10213 if (sparc_addr_list == NULL_RTX
10214 && sparc_addr_diff_list == NULL_RTX)
10215 return;
10216
10217 /* Align to cache line in the function's code section. */
10218 switch_to_section (current_function_section ());
10219
10220 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10221 if (align > 0)
10222 ASM_OUTPUT_ALIGN (asm_out_file, align);
10223
10224 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10225 sparc_output_addr_vec (XEXP (t, 0));
10226 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10227 sparc_output_addr_diff_vec (XEXP (t, 0));
10228
10229 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10230 }
10231
10232 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10233 unknown. Return 1 if the high bits are zero, -1 if the register is
10234 sign extended. */
10235 int
10236 sparc_check_64 (rtx x, rtx_insn *insn)
10237 {
10238 /* If a register is set only once it is safe to ignore insns this
10239 code does not know how to handle. The loop will either recognize
10240 the single set and return the correct value or fail to recognize
10241 it and return 0. */
10242 int set_once = 0;
10243 rtx y = x;
10244
10245 gcc_assert (GET_CODE (x) == REG);
10246
10247 if (GET_MODE (x) == DImode)
10248 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10249
10250 if (flag_expensive_optimizations
10251 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10252 set_once = 1;
10253
10254 if (insn == 0)
10255 {
10256 if (set_once)
10257 insn = get_last_insn_anywhere ();
10258 else
10259 return 0;
10260 }
10261
10262 while ((insn = PREV_INSN (insn)))
10263 {
10264 switch (GET_CODE (insn))
10265 {
10266 case JUMP_INSN:
10267 case NOTE:
10268 break;
10269 case CODE_LABEL:
10270 case CALL_INSN:
10271 default:
10272 if (! set_once)
10273 return 0;
10274 break;
10275 case INSN:
10276 {
10277 rtx pat = PATTERN (insn);
10278 if (GET_CODE (pat) != SET)
10279 return 0;
10280 if (rtx_equal_p (x, SET_DEST (pat)))
10281 return set_extends (insn);
10282 if (y && rtx_equal_p (y, SET_DEST (pat)))
10283 return set_extends (insn);
10284 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10285 return 0;
10286 }
10287 }
10288 }
10289 return 0;
10290 }
10291
10292 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10293 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10294
10295 const char *
10296 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10297 {
10298 static char asm_code[60];
10299
10300 /* The scratch register is only required when the destination
10301 register is not a 64-bit global or out register. */
10302 if (which_alternative != 2)
10303 operands[3] = operands[0];
10304
10305 /* We can only shift by constants <= 63. */
10306 if (GET_CODE (operands[2]) == CONST_INT)
10307 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10308
10309 if (GET_CODE (operands[1]) == CONST_INT)
10310 {
10311 output_asm_insn ("mov\t%1, %3", operands);
10312 }
10313 else
10314 {
10315 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10316 if (sparc_check_64 (operands[1], insn) <= 0)
10317 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10318 output_asm_insn ("or\t%L1, %3, %3", operands);
10319 }
10320
10321 strcpy (asm_code, opcode);
10322
10323 if (which_alternative != 2)
10324 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10325 else
10326 return
10327 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10328 }
10329 \f
10330 /* Output rtl to increment the profiler label LABELNO
10331 for profiling a function entry. */
10332
10333 void
10334 sparc_profile_hook (int labelno)
10335 {
10336 char buf[32];
10337 rtx lab, fun;
10338
10339 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10340 if (NO_PROFILE_COUNTERS)
10341 {
10342 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10343 }
10344 else
10345 {
10346 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10347 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10348 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10349 }
10350 }
10351 \f
10352 #ifdef TARGET_SOLARIS
10353 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10354
10355 static void
10356 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10357 tree decl ATTRIBUTE_UNUSED)
10358 {
10359 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10360 {
10361 solaris_elf_asm_comdat_section (name, flags, decl);
10362 return;
10363 }
10364
10365 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10366
10367 if (!(flags & SECTION_DEBUG))
10368 fputs (",#alloc", asm_out_file);
10369 if (flags & SECTION_WRITE)
10370 fputs (",#write", asm_out_file);
10371 if (flags & SECTION_TLS)
10372 fputs (",#tls", asm_out_file);
10373 if (flags & SECTION_CODE)
10374 fputs (",#execinstr", asm_out_file);
10375
10376 if (flags & SECTION_NOTYPE)
10377 ;
10378 else if (flags & SECTION_BSS)
10379 fputs (",#nobits", asm_out_file);
10380 else
10381 fputs (",#progbits", asm_out_file);
10382
10383 fputc ('\n', asm_out_file);
10384 }
10385 #endif /* TARGET_SOLARIS */
10386
10387 /* We do not allow indirect calls to be optimized into sibling calls.
10388
10389 We cannot use sibling calls when delayed branches are disabled
10390 because they will likely require the call delay slot to be filled.
10391
10392 Also, on SPARC 32-bit we cannot emit a sibling call when the
10393 current function returns a structure. This is because the "unimp
10394 after call" convention would cause the callee to return to the
10395 wrong place. The generic code already disallows cases where the
10396 function being called returns a structure.
10397
10398 It may seem strange how this last case could occur. Usually there
10399 is code after the call which jumps to epilogue code which dumps the
10400 return value into the struct return area. That ought to invalidate
10401 the sibling call right? Well, in the C++ case we can end up passing
10402 the pointer to the struct return area to a constructor (which returns
10403 void) and then nothing else happens. Such a sibling call would look
10404 valid without the added check here.
10405
10406 VxWorks PIC PLT entries require the global pointer to be initialized
10407 on entry. We therefore can't emit sibling calls to them. */
10408 static bool
10409 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10410 {
10411 return (decl
10412 && flag_delayed_branch
10413 && (TARGET_ARCH64 || ! cfun->returns_struct)
10414 && !(TARGET_VXWORKS_RTP
10415 && flag_pic
10416 && !targetm.binds_local_p (decl)));
10417 }
10418 \f
10419 /* libfunc renaming. */
10420
10421 static void
10422 sparc_init_libfuncs (void)
10423 {
10424 if (TARGET_ARCH32)
10425 {
10426 /* Use the subroutines that Sun's library provides for integer
10427 multiply and divide. The `*' prevents an underscore from
10428 being prepended by the compiler. .umul is a little faster
10429 than .mul. */
10430 set_optab_libfunc (smul_optab, SImode, "*.umul");
10431 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10432 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10433 set_optab_libfunc (smod_optab, SImode, "*.rem");
10434 set_optab_libfunc (umod_optab, SImode, "*.urem");
10435
10436 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10437 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10438 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10439 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10440 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10441 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10442
10443 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10444 is because with soft-float, the SFmode and DFmode sqrt
10445 instructions will be absent, and the compiler will notice and
10446 try to use the TFmode sqrt instruction for calls to the
10447 builtin function sqrt, but this fails. */
10448 if (TARGET_FPU)
10449 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10450
10451 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10452 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10453 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10454 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10455 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10456 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10457
10458 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10459 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10460 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10461 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10462
10463 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10464 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10465 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10466 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10467
10468 if (DITF_CONVERSION_LIBFUNCS)
10469 {
10470 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10471 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10472 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10473 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10474 }
10475
10476 if (SUN_CONVERSION_LIBFUNCS)
10477 {
10478 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10479 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10480 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10481 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10482 }
10483 }
10484 if (TARGET_ARCH64)
10485 {
10486 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10487 do not exist in the library. Make sure the compiler does not
10488 emit calls to them by accident. (It should always use the
10489 hardware instructions.) */
10490 set_optab_libfunc (smul_optab, SImode, 0);
10491 set_optab_libfunc (sdiv_optab, SImode, 0);
10492 set_optab_libfunc (udiv_optab, SImode, 0);
10493 set_optab_libfunc (smod_optab, SImode, 0);
10494 set_optab_libfunc (umod_optab, SImode, 0);
10495
10496 if (SUN_INTEGER_MULTIPLY_64)
10497 {
10498 set_optab_libfunc (smul_optab, DImode, "__mul64");
10499 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10500 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10501 set_optab_libfunc (smod_optab, DImode, "__rem64");
10502 set_optab_libfunc (umod_optab, DImode, "__urem64");
10503 }
10504
10505 if (SUN_CONVERSION_LIBFUNCS)
10506 {
10507 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10508 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10509 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10510 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10511 }
10512 }
10513 }
10514 \f
10515 /* SPARC builtins. */
10516 enum sparc_builtins
10517 {
10518 /* FPU builtins. */
10519 SPARC_BUILTIN_LDFSR,
10520 SPARC_BUILTIN_STFSR,
10521
10522 /* VIS 1.0 builtins. */
10523 SPARC_BUILTIN_FPACK16,
10524 SPARC_BUILTIN_FPACK32,
10525 SPARC_BUILTIN_FPACKFIX,
10526 SPARC_BUILTIN_FEXPAND,
10527 SPARC_BUILTIN_FPMERGE,
10528 SPARC_BUILTIN_FMUL8X16,
10529 SPARC_BUILTIN_FMUL8X16AU,
10530 SPARC_BUILTIN_FMUL8X16AL,
10531 SPARC_BUILTIN_FMUL8SUX16,
10532 SPARC_BUILTIN_FMUL8ULX16,
10533 SPARC_BUILTIN_FMULD8SUX16,
10534 SPARC_BUILTIN_FMULD8ULX16,
10535 SPARC_BUILTIN_FALIGNDATAV4HI,
10536 SPARC_BUILTIN_FALIGNDATAV8QI,
10537 SPARC_BUILTIN_FALIGNDATAV2SI,
10538 SPARC_BUILTIN_FALIGNDATADI,
10539 SPARC_BUILTIN_WRGSR,
10540 SPARC_BUILTIN_RDGSR,
10541 SPARC_BUILTIN_ALIGNADDR,
10542 SPARC_BUILTIN_ALIGNADDRL,
10543 SPARC_BUILTIN_PDIST,
10544 SPARC_BUILTIN_EDGE8,
10545 SPARC_BUILTIN_EDGE8L,
10546 SPARC_BUILTIN_EDGE16,
10547 SPARC_BUILTIN_EDGE16L,
10548 SPARC_BUILTIN_EDGE32,
10549 SPARC_BUILTIN_EDGE32L,
10550 SPARC_BUILTIN_FCMPLE16,
10551 SPARC_BUILTIN_FCMPLE32,
10552 SPARC_BUILTIN_FCMPNE16,
10553 SPARC_BUILTIN_FCMPNE32,
10554 SPARC_BUILTIN_FCMPGT16,
10555 SPARC_BUILTIN_FCMPGT32,
10556 SPARC_BUILTIN_FCMPEQ16,
10557 SPARC_BUILTIN_FCMPEQ32,
10558 SPARC_BUILTIN_FPADD16,
10559 SPARC_BUILTIN_FPADD16S,
10560 SPARC_BUILTIN_FPADD32,
10561 SPARC_BUILTIN_FPADD32S,
10562 SPARC_BUILTIN_FPSUB16,
10563 SPARC_BUILTIN_FPSUB16S,
10564 SPARC_BUILTIN_FPSUB32,
10565 SPARC_BUILTIN_FPSUB32S,
10566 SPARC_BUILTIN_ARRAY8,
10567 SPARC_BUILTIN_ARRAY16,
10568 SPARC_BUILTIN_ARRAY32,
10569
10570 /* VIS 2.0 builtins. */
10571 SPARC_BUILTIN_EDGE8N,
10572 SPARC_BUILTIN_EDGE8LN,
10573 SPARC_BUILTIN_EDGE16N,
10574 SPARC_BUILTIN_EDGE16LN,
10575 SPARC_BUILTIN_EDGE32N,
10576 SPARC_BUILTIN_EDGE32LN,
10577 SPARC_BUILTIN_BMASK,
10578 SPARC_BUILTIN_BSHUFFLEV4HI,
10579 SPARC_BUILTIN_BSHUFFLEV8QI,
10580 SPARC_BUILTIN_BSHUFFLEV2SI,
10581 SPARC_BUILTIN_BSHUFFLEDI,
10582
10583 /* VIS 3.0 builtins. */
10584 SPARC_BUILTIN_CMASK8,
10585 SPARC_BUILTIN_CMASK16,
10586 SPARC_BUILTIN_CMASK32,
10587 SPARC_BUILTIN_FCHKSM16,
10588 SPARC_BUILTIN_FSLL16,
10589 SPARC_BUILTIN_FSLAS16,
10590 SPARC_BUILTIN_FSRL16,
10591 SPARC_BUILTIN_FSRA16,
10592 SPARC_BUILTIN_FSLL32,
10593 SPARC_BUILTIN_FSLAS32,
10594 SPARC_BUILTIN_FSRL32,
10595 SPARC_BUILTIN_FSRA32,
10596 SPARC_BUILTIN_PDISTN,
10597 SPARC_BUILTIN_FMEAN16,
10598 SPARC_BUILTIN_FPADD64,
10599 SPARC_BUILTIN_FPSUB64,
10600 SPARC_BUILTIN_FPADDS16,
10601 SPARC_BUILTIN_FPADDS16S,
10602 SPARC_BUILTIN_FPSUBS16,
10603 SPARC_BUILTIN_FPSUBS16S,
10604 SPARC_BUILTIN_FPADDS32,
10605 SPARC_BUILTIN_FPADDS32S,
10606 SPARC_BUILTIN_FPSUBS32,
10607 SPARC_BUILTIN_FPSUBS32S,
10608 SPARC_BUILTIN_FUCMPLE8,
10609 SPARC_BUILTIN_FUCMPNE8,
10610 SPARC_BUILTIN_FUCMPGT8,
10611 SPARC_BUILTIN_FUCMPEQ8,
10612 SPARC_BUILTIN_FHADDS,
10613 SPARC_BUILTIN_FHADDD,
10614 SPARC_BUILTIN_FHSUBS,
10615 SPARC_BUILTIN_FHSUBD,
10616 SPARC_BUILTIN_FNHADDS,
10617 SPARC_BUILTIN_FNHADDD,
10618 SPARC_BUILTIN_UMULXHI,
10619 SPARC_BUILTIN_XMULX,
10620 SPARC_BUILTIN_XMULXHI,
10621
10622 /* VIS 4.0 builtins. */
10623 SPARC_BUILTIN_FPADD8,
10624 SPARC_BUILTIN_FPADDS8,
10625 SPARC_BUILTIN_FPADDUS8,
10626 SPARC_BUILTIN_FPADDUS16,
10627 SPARC_BUILTIN_FPCMPLE8,
10628 SPARC_BUILTIN_FPCMPGT8,
10629 SPARC_BUILTIN_FPCMPULE16,
10630 SPARC_BUILTIN_FPCMPUGT16,
10631 SPARC_BUILTIN_FPCMPULE32,
10632 SPARC_BUILTIN_FPCMPUGT32,
10633 SPARC_BUILTIN_FPMAX8,
10634 SPARC_BUILTIN_FPMAX16,
10635 SPARC_BUILTIN_FPMAX32,
10636 SPARC_BUILTIN_FPMAXU8,
10637 SPARC_BUILTIN_FPMAXU16,
10638 SPARC_BUILTIN_FPMAXU32,
10639 SPARC_BUILTIN_FPMIN8,
10640 SPARC_BUILTIN_FPMIN16,
10641 SPARC_BUILTIN_FPMIN32,
10642 SPARC_BUILTIN_FPMINU8,
10643 SPARC_BUILTIN_FPMINU16,
10644 SPARC_BUILTIN_FPMINU32,
10645 SPARC_BUILTIN_FPSUB8,
10646 SPARC_BUILTIN_FPSUBS8,
10647 SPARC_BUILTIN_FPSUBUS8,
10648 SPARC_BUILTIN_FPSUBUS16,
10649
10650 /* VIS 4.0B builtins. */
10651
10652 /* Note that all the DICTUNPACK* entries should be kept
10653 contiguous. */
10654 SPARC_BUILTIN_FIRST_DICTUNPACK,
10655 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10656 SPARC_BUILTIN_DICTUNPACK16,
10657 SPARC_BUILTIN_DICTUNPACK32,
10658 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10659
10660 /* Note that all the FPCMP*SHL entries should be kept
10661 contiguous. */
10662 SPARC_BUILTIN_FIRST_FPCMPSHL,
10663 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10664 SPARC_BUILTIN_FPCMPGT8SHL,
10665 SPARC_BUILTIN_FPCMPEQ8SHL,
10666 SPARC_BUILTIN_FPCMPNE8SHL,
10667 SPARC_BUILTIN_FPCMPLE16SHL,
10668 SPARC_BUILTIN_FPCMPGT16SHL,
10669 SPARC_BUILTIN_FPCMPEQ16SHL,
10670 SPARC_BUILTIN_FPCMPNE16SHL,
10671 SPARC_BUILTIN_FPCMPLE32SHL,
10672 SPARC_BUILTIN_FPCMPGT32SHL,
10673 SPARC_BUILTIN_FPCMPEQ32SHL,
10674 SPARC_BUILTIN_FPCMPNE32SHL,
10675 SPARC_BUILTIN_FPCMPULE8SHL,
10676 SPARC_BUILTIN_FPCMPUGT8SHL,
10677 SPARC_BUILTIN_FPCMPULE16SHL,
10678 SPARC_BUILTIN_FPCMPUGT16SHL,
10679 SPARC_BUILTIN_FPCMPULE32SHL,
10680 SPARC_BUILTIN_FPCMPUGT32SHL,
10681 SPARC_BUILTIN_FPCMPDE8SHL,
10682 SPARC_BUILTIN_FPCMPDE16SHL,
10683 SPARC_BUILTIN_FPCMPDE32SHL,
10684 SPARC_BUILTIN_FPCMPUR8SHL,
10685 SPARC_BUILTIN_FPCMPUR16SHL,
10686 SPARC_BUILTIN_FPCMPUR32SHL,
10687 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10688
10689 SPARC_BUILTIN_MAX
10690 };
10691
10692 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10693 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10694
10695 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10696 The instruction should require a constant operand of some sort. The
10697 function prints an error if OPVAL is not valid. */
10698
10699 static int
10700 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10701 {
10702 if (GET_CODE (opval) != CONST_INT)
10703 {
10704 error ("%qs expects a constant argument", insn_data[icode].name);
10705 return false;
10706 }
10707
10708 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10709 {
10710 error ("constant argument out of range for %qs", insn_data[icode].name);
10711 return false;
10712 }
10713 return true;
10714 }
10715
10716 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10717 function decl or NULL_TREE if the builtin was not added. */
10718
10719 static tree
10720 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10721 tree type)
10722 {
10723 tree t
10724 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10725
10726 if (t)
10727 {
10728 sparc_builtins[code] = t;
10729 sparc_builtins_icode[code] = icode;
10730 }
10731
10732 return t;
10733 }
10734
10735 /* Likewise, but also marks the function as "const". */
10736
10737 static tree
10738 def_builtin_const (const char *name, enum insn_code icode,
10739 enum sparc_builtins code, tree type)
10740 {
10741 tree t = def_builtin (name, icode, code, type);
10742
10743 if (t)
10744 TREE_READONLY (t) = 1;
10745
10746 return t;
10747 }
10748
10749 /* Implement the TARGET_INIT_BUILTINS target hook.
10750 Create builtin functions for special SPARC instructions. */
10751
10752 static void
10753 sparc_init_builtins (void)
10754 {
10755 if (TARGET_FPU)
10756 sparc_fpu_init_builtins ();
10757
10758 if (TARGET_VIS)
10759 sparc_vis_init_builtins ();
10760 }
10761
10762 /* Create builtin functions for FPU instructions. */
10763
10764 static void
10765 sparc_fpu_init_builtins (void)
10766 {
10767 tree ftype
10768 = build_function_type_list (void_type_node,
10769 build_pointer_type (unsigned_type_node), 0);
10770 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10771 SPARC_BUILTIN_LDFSR, ftype);
10772 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10773 SPARC_BUILTIN_STFSR, ftype);
10774 }
10775
10776 /* Create builtin functions for VIS instructions. */
10777
10778 static void
10779 sparc_vis_init_builtins (void)
10780 {
10781 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10782 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10783 tree v4hi = build_vector_type (intHI_type_node, 4);
10784 tree v2hi = build_vector_type (intHI_type_node, 2);
10785 tree v2si = build_vector_type (intSI_type_node, 2);
10786 tree v1si = build_vector_type (intSI_type_node, 1);
10787
10788 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10789 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10790 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10791 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10792 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10793 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10794 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10795 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10796 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10797 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10798 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10799 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10800 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10801 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10802 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10803 v8qi, v8qi,
10804 intDI_type_node, 0);
10805 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10806 v8qi, v8qi, 0);
10807 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10808 v8qi, v8qi, 0);
10809 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10810 intSI_type_node, 0);
10811 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10812 intSI_type_node, 0);
10813 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10814 intDI_type_node, 0);
10815 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10816 intDI_type_node,
10817 intDI_type_node, 0);
10818 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10819 intSI_type_node,
10820 intSI_type_node, 0);
10821 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10822 ptr_type_node,
10823 intSI_type_node, 0);
10824 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10825 ptr_type_node,
10826 intDI_type_node, 0);
10827 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10828 ptr_type_node,
10829 ptr_type_node, 0);
10830 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10831 ptr_type_node,
10832 ptr_type_node, 0);
10833 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10834 v4hi, v4hi, 0);
10835 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10836 v2si, v2si, 0);
10837 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10838 v4hi, v4hi, 0);
10839 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10840 v2si, v2si, 0);
10841 tree void_ftype_di = build_function_type_list (void_type_node,
10842 intDI_type_node, 0);
10843 tree di_ftype_void = build_function_type_list (intDI_type_node,
10844 void_type_node, 0);
10845 tree void_ftype_si = build_function_type_list (void_type_node,
10846 intSI_type_node, 0);
10847 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10848 float_type_node,
10849 float_type_node, 0);
10850 tree df_ftype_df_df = build_function_type_list (double_type_node,
10851 double_type_node,
10852 double_type_node, 0);
10853
10854 /* Packing and expanding vectors. */
10855 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10856 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10857 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10858 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10859 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10860 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10861 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10862 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10863 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10864 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10865
10866 /* Multiplications. */
10867 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10868 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10869 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10870 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10871 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10872 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10873 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10874 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10875 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10876 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10877 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10878 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10879 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10880 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10881
10882 /* Data aligning. */
10883 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10884 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10885 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10886 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10887 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10888 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10889 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10890 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10891
10892 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10893 SPARC_BUILTIN_WRGSR, void_ftype_di);
10894 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10895 SPARC_BUILTIN_RDGSR, di_ftype_void);
10896
10897 if (TARGET_ARCH64)
10898 {
10899 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10900 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10901 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10902 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10903 }
10904 else
10905 {
10906 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10907 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10908 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10909 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10910 }
10911
10912 /* Pixel distance. */
10913 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10914 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10915
10916 /* Edge handling. */
10917 if (TARGET_ARCH64)
10918 {
10919 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10920 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10921 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10922 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10923 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10924 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10925 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10926 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10927 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10928 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10929 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10930 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10931 }
10932 else
10933 {
10934 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10935 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10936 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10937 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10938 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10939 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10940 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10941 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10942 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10943 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10944 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10945 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10946 }
10947
10948 /* Pixel compare. */
10949 if (TARGET_ARCH64)
10950 {
10951 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10952 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10953 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10954 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10955 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10956 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10957 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10958 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10959 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10960 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10961 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10962 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10963 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10964 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10965 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10966 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10967 }
10968 else
10969 {
10970 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10971 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10972 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10973 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10974 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10975 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10976 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10977 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10978 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10979 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10980 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10981 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10982 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10983 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10984 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10985 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10986 }
10987
10988 /* Addition and subtraction. */
10989 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10990 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10991 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10992 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10993 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10994 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10995 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10996 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10997 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10998 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10999 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11000 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11001 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11002 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11003 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11004 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11005
11006 /* Three-dimensional array addressing. */
11007 if (TARGET_ARCH64)
11008 {
11009 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11010 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11011 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11012 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11013 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11014 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11015 }
11016 else
11017 {
11018 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11019 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11020 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11021 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11022 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11023 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11024 }
11025
11026 if (TARGET_VIS2)
11027 {
11028 /* Edge handling. */
11029 if (TARGET_ARCH64)
11030 {
11031 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11032 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11033 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11034 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11035 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11036 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11037 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11038 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11039 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11040 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11041 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11042 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11043 }
11044 else
11045 {
11046 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11047 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11048 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11049 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11050 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11051 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11052 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11053 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11054 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11055 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11056 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11057 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11058 }
11059
11060 /* Byte mask and shuffle. */
11061 if (TARGET_ARCH64)
11062 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11063 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11064 else
11065 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11066 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11067 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11068 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11069 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11070 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11071 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11072 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11073 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11074 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11075 }
11076
11077 if (TARGET_VIS3)
11078 {
11079 if (TARGET_ARCH64)
11080 {
11081 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11082 SPARC_BUILTIN_CMASK8, void_ftype_di);
11083 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11084 SPARC_BUILTIN_CMASK16, void_ftype_di);
11085 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11086 SPARC_BUILTIN_CMASK32, void_ftype_di);
11087 }
11088 else
11089 {
11090 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11091 SPARC_BUILTIN_CMASK8, void_ftype_si);
11092 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11093 SPARC_BUILTIN_CMASK16, void_ftype_si);
11094 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11095 SPARC_BUILTIN_CMASK32, void_ftype_si);
11096 }
11097
11098 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11099 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11100
11101 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11102 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11103 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11104 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11105 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11106 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11107 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11108 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11109 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11110 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11111 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11112 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11113 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11114 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11115 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11116 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11117
11118 if (TARGET_ARCH64)
11119 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11120 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11121 else
11122 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11123 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11124
11125 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11126 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11127 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11128 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11129 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11130 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11131
11132 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11133 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11134 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11135 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11136 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11137 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11138 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11139 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11140 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11141 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11142 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11143 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11144 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11145 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11146 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11147 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11148
11149 if (TARGET_ARCH64)
11150 {
11151 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11152 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11153 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11154 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11155 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11156 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11157 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11158 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11159 }
11160 else
11161 {
11162 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11163 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11164 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11165 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11166 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11167 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11168 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11169 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11170 }
11171
11172 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11173 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11174 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11175 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11176 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11177 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11178 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11179 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11180 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11181 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11182 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11183 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11184
11185 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11186 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11187 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11188 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11189 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11190 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11191 }
11192
11193 if (TARGET_VIS4)
11194 {
11195 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11196 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11197 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11198 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11199 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11200 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11201 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11202 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11203
11204
11205 if (TARGET_ARCH64)
11206 {
11207 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11208 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11209 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11210 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11211 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11212 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11213 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11214 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11215 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11216 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11217 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11218 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11219 }
11220 else
11221 {
11222 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11223 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11224 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11225 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11226 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11227 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11228 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11229 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11230 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11231 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11232 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11233 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11234 }
11235
11236 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11237 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11238 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11239 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11240 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11241 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11242 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11243 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11244 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11245 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11246 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11247 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11248 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11249 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11250 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11251 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11252 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11253 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11254 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11255 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11256 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11257 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11258 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11259 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11260 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11261 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11262 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11263 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11264 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11265 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11266 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11267 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11268 }
11269
11270 if (TARGET_VIS4B)
11271 {
11272 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11273 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11274 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11275 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11276 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11277 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11278
11279 if (TARGET_ARCH64)
11280 {
11281 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11282 v8qi, v8qi,
11283 intSI_type_node, 0);
11284 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11285 v4hi, v4hi,
11286 intSI_type_node, 0);
11287 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11288 v2si, v2si,
11289 intSI_type_node, 0);
11290
11291 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11292 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11293 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11294 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11295 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11296 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11297 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11298 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11299
11300 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11301 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11302 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11303 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11304 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11305 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11306 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11307 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11308
11309 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11310 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11311 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11312 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11313 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11314 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11315 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11316 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11317
11318
11319 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11320 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11321 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11322 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11323
11324 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11325 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11326 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11327 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11328
11329 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11330 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11331 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11332 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11333
11334 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11335 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11336 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11337 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11338 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11339 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11340
11341 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11342 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11343 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11344 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11345 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11346 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11347
11348 }
11349 else
11350 {
11351 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11352 v8qi, v8qi,
11353 intSI_type_node, 0);
11354 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11355 v4hi, v4hi,
11356 intSI_type_node, 0);
11357 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11358 v2si, v2si,
11359 intSI_type_node, 0);
11360
11361 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11362 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11363 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11364 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11365 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11366 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11367 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11368 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11369
11370 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11371 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11372 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11373 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11374 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11375 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11376 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11377 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11378
11379 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11380 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11381 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11382 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11383 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11384 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11385 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11386 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11387
11388
11389 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11390 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11391 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11392 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11393
11394 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11395 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11396 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11397 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11398
11399 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11400 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11401 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11402 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11403
11404 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11405 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11406 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11407 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11408 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11409 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11410
11411 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11412 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11413 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11414 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11415 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11416 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11417 }
11418 }
11419 }
11420
11421 /* Implement TARGET_BUILTIN_DECL hook. */
11422
11423 static tree
11424 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11425 {
11426 if (code >= SPARC_BUILTIN_MAX)
11427 return error_mark_node;
11428
11429 return sparc_builtins[code];
11430 }
11431
11432 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11433
11434 static rtx
11435 sparc_expand_builtin (tree exp, rtx target,
11436 rtx subtarget ATTRIBUTE_UNUSED,
11437 machine_mode tmode ATTRIBUTE_UNUSED,
11438 int ignore ATTRIBUTE_UNUSED)
11439 {
11440 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11441 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11442 enum insn_code icode = sparc_builtins_icode[code];
11443 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11444 call_expr_arg_iterator iter;
11445 int arg_count = 0;
11446 rtx pat, op[4];
11447 tree arg;
11448
11449 if (nonvoid)
11450 {
11451 machine_mode tmode = insn_data[icode].operand[0].mode;
11452 if (!target
11453 || GET_MODE (target) != tmode
11454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11455 op[0] = gen_reg_rtx (tmode);
11456 else
11457 op[0] = target;
11458 }
11459
11460 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11461 {
11462 const struct insn_operand_data *insn_op;
11463 int idx;
11464
11465 if (arg == error_mark_node)
11466 return NULL_RTX;
11467
11468 arg_count++;
11469 idx = arg_count - !nonvoid;
11470 insn_op = &insn_data[icode].operand[idx];
11471 op[arg_count] = expand_normal (arg);
11472
11473 /* Some of the builtins require constant arguments. We check
11474 for this here. */
11475 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11476 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11477 && arg_count == 3)
11478 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11479 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11480 && arg_count == 2))
11481 {
11482 if (!check_constant_argument (icode, idx, op[arg_count]))
11483 return const0_rtx;
11484 }
11485
11486 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11487 {
11488 if (!address_operand (op[arg_count], SImode))
11489 {
11490 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11491 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11492 }
11493 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11494 }
11495
11496 else if (insn_op->mode == V1DImode
11497 && GET_MODE (op[arg_count]) == DImode)
11498 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11499
11500 else if (insn_op->mode == V1SImode
11501 && GET_MODE (op[arg_count]) == SImode)
11502 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11503
11504 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11505 insn_op->mode))
11506 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11507 }
11508
11509 switch (arg_count)
11510 {
11511 case 0:
11512 pat = GEN_FCN (icode) (op[0]);
11513 break;
11514 case 1:
11515 if (nonvoid)
11516 pat = GEN_FCN (icode) (op[0], op[1]);
11517 else
11518 pat = GEN_FCN (icode) (op[1]);
11519 break;
11520 case 2:
11521 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11522 break;
11523 case 3:
11524 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11525 break;
11526 default:
11527 gcc_unreachable ();
11528 }
11529
11530 if (!pat)
11531 return NULL_RTX;
11532
11533 emit_insn (pat);
11534
11535 return (nonvoid ? op[0] : const0_rtx);
11536 }
11537
11538 /* Return the upper 16 bits of the 8x16 multiplication. */
11539
11540 static int
11541 sparc_vis_mul8x16 (int e8, int e16)
11542 {
11543 return (e8 * e16 + 128) / 256;
11544 }
11545
11546 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11547 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11548
11549 static void
11550 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11551 tree inner_type, tree cst0, tree cst1)
11552 {
11553 unsigned i, num = VECTOR_CST_NELTS (cst0);
11554 int scale;
11555
11556 switch (fncode)
11557 {
11558 case SPARC_BUILTIN_FMUL8X16:
11559 for (i = 0; i < num; ++i)
11560 {
11561 int val
11562 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11563 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11564 n_elts->quick_push (build_int_cst (inner_type, val));
11565 }
11566 break;
11567
11568 case SPARC_BUILTIN_FMUL8X16AU:
11569 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11570
11571 for (i = 0; i < num; ++i)
11572 {
11573 int val
11574 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11575 scale);
11576 n_elts->quick_push (build_int_cst (inner_type, val));
11577 }
11578 break;
11579
11580 case SPARC_BUILTIN_FMUL8X16AL:
11581 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11582
11583 for (i = 0; i < num; ++i)
11584 {
11585 int val
11586 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11587 scale);
11588 n_elts->quick_push (build_int_cst (inner_type, val));
11589 }
11590 break;
11591
11592 default:
11593 gcc_unreachable ();
11594 }
11595 }
11596
11597 /* Implement TARGET_FOLD_BUILTIN hook.
11598
11599 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11600 result of the function call is ignored. NULL_TREE is returned if the
11601 function could not be folded. */
11602
11603 static tree
11604 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11605 tree *args, bool ignore)
11606 {
11607 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11608 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11609 tree arg0, arg1, arg2;
11610
11611 if (ignore)
11612 switch (code)
11613 {
11614 case SPARC_BUILTIN_LDFSR:
11615 case SPARC_BUILTIN_STFSR:
11616 case SPARC_BUILTIN_ALIGNADDR:
11617 case SPARC_BUILTIN_WRGSR:
11618 case SPARC_BUILTIN_BMASK:
11619 case SPARC_BUILTIN_CMASK8:
11620 case SPARC_BUILTIN_CMASK16:
11621 case SPARC_BUILTIN_CMASK32:
11622 break;
11623
11624 default:
11625 return build_zero_cst (rtype);
11626 }
11627
11628 switch (code)
11629 {
11630 case SPARC_BUILTIN_FEXPAND:
11631 arg0 = args[0];
11632 STRIP_NOPS (arg0);
11633
11634 if (TREE_CODE (arg0) == VECTOR_CST)
11635 {
11636 tree inner_type = TREE_TYPE (rtype);
11637 unsigned i;
11638
11639 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11640 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11641 {
11642 unsigned HOST_WIDE_INT val
11643 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11644 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11645 }
11646 return build_vector (rtype, n_elts);
11647 }
11648 break;
11649
11650 case SPARC_BUILTIN_FMUL8X16:
11651 case SPARC_BUILTIN_FMUL8X16AU:
11652 case SPARC_BUILTIN_FMUL8X16AL:
11653 arg0 = args[0];
11654 arg1 = args[1];
11655 STRIP_NOPS (arg0);
11656 STRIP_NOPS (arg1);
11657
11658 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11659 {
11660 tree inner_type = TREE_TYPE (rtype);
11661 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11662 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11663 return build_vector (rtype, n_elts);
11664 }
11665 break;
11666
11667 case SPARC_BUILTIN_FPMERGE:
11668 arg0 = args[0];
11669 arg1 = args[1];
11670 STRIP_NOPS (arg0);
11671 STRIP_NOPS (arg1);
11672
11673 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11674 {
11675 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11676 unsigned i;
11677 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11678 {
11679 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11680 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11681 }
11682
11683 return build_vector (rtype, n_elts);
11684 }
11685 break;
11686
11687 case SPARC_BUILTIN_PDIST:
11688 case SPARC_BUILTIN_PDISTN:
11689 arg0 = args[0];
11690 arg1 = args[1];
11691 STRIP_NOPS (arg0);
11692 STRIP_NOPS (arg1);
11693 if (code == SPARC_BUILTIN_PDIST)
11694 {
11695 arg2 = args[2];
11696 STRIP_NOPS (arg2);
11697 }
11698 else
11699 arg2 = integer_zero_node;
11700
11701 if (TREE_CODE (arg0) == VECTOR_CST
11702 && TREE_CODE (arg1) == VECTOR_CST
11703 && TREE_CODE (arg2) == INTEGER_CST)
11704 {
11705 bool overflow = false;
11706 widest_int result = wi::to_widest (arg2);
11707 widest_int tmp;
11708 unsigned i;
11709
11710 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11711 {
11712 tree e0 = VECTOR_CST_ELT (arg0, i);
11713 tree e1 = VECTOR_CST_ELT (arg1, i);
11714
11715 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11716
11717 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11718 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11719 if (wi::neg_p (tmp))
11720 tmp = wi::neg (tmp, &neg2_ovf);
11721 else
11722 neg2_ovf = false;
11723 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11724 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11725 }
11726
11727 gcc_assert (!overflow);
11728
11729 return wide_int_to_tree (rtype, result);
11730 }
11731
11732 default:
11733 break;
11734 }
11735
11736 return NULL_TREE;
11737 }
11738 \f
11739 /* ??? This duplicates information provided to the compiler by the
11740 ??? scheduler description. Some day, teach genautomata to output
11741 ??? the latencies and then CSE will just use that. */
11742
11743 static bool
11744 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11745 int opno ATTRIBUTE_UNUSED,
11746 int *total, bool speed ATTRIBUTE_UNUSED)
11747 {
11748 int code = GET_CODE (x);
11749 bool float_mode_p = FLOAT_MODE_P (mode);
11750
11751 switch (code)
11752 {
11753 case CONST_INT:
11754 if (SMALL_INT (x))
11755 *total = 0;
11756 else
11757 *total = 2;
11758 return true;
11759
11760 case CONST_WIDE_INT:
11761 *total = 0;
11762 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11763 *total += 2;
11764 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11765 *total += 2;
11766 return true;
11767
11768 case HIGH:
11769 *total = 2;
11770 return true;
11771
11772 case CONST:
11773 case LABEL_REF:
11774 case SYMBOL_REF:
11775 *total = 4;
11776 return true;
11777
11778 case CONST_DOUBLE:
11779 *total = 8;
11780 return true;
11781
11782 case MEM:
11783 /* If outer-code was a sign or zero extension, a cost
11784 of COSTS_N_INSNS (1) was already added in. This is
11785 why we are subtracting it back out. */
11786 if (outer_code == ZERO_EXTEND)
11787 {
11788 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11789 }
11790 else if (outer_code == SIGN_EXTEND)
11791 {
11792 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11793 }
11794 else if (float_mode_p)
11795 {
11796 *total = sparc_costs->float_load;
11797 }
11798 else
11799 {
11800 *total = sparc_costs->int_load;
11801 }
11802
11803 return true;
11804
11805 case PLUS:
11806 case MINUS:
11807 if (float_mode_p)
11808 *total = sparc_costs->float_plusminus;
11809 else
11810 *total = COSTS_N_INSNS (1);
11811 return false;
11812
11813 case FMA:
11814 {
11815 rtx sub;
11816
11817 gcc_assert (float_mode_p);
11818 *total = sparc_costs->float_mul;
11819
11820 sub = XEXP (x, 0);
11821 if (GET_CODE (sub) == NEG)
11822 sub = XEXP (sub, 0);
11823 *total += rtx_cost (sub, mode, FMA, 0, speed);
11824
11825 sub = XEXP (x, 2);
11826 if (GET_CODE (sub) == NEG)
11827 sub = XEXP (sub, 0);
11828 *total += rtx_cost (sub, mode, FMA, 2, speed);
11829 return true;
11830 }
11831
11832 case MULT:
11833 if (float_mode_p)
11834 *total = sparc_costs->float_mul;
11835 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11836 *total = COSTS_N_INSNS (25);
11837 else
11838 {
11839 int bit_cost;
11840
11841 bit_cost = 0;
11842 if (sparc_costs->int_mul_bit_factor)
11843 {
11844 int nbits;
11845
11846 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11847 {
11848 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11849 for (nbits = 0; value != 0; value &= value - 1)
11850 nbits++;
11851 }
11852 else
11853 nbits = 7;
11854
11855 if (nbits < 3)
11856 nbits = 3;
11857 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11858 bit_cost = COSTS_N_INSNS (bit_cost);
11859 }
11860
11861 if (mode == DImode || !TARGET_HARD_MUL)
11862 *total = sparc_costs->int_mulX + bit_cost;
11863 else
11864 *total = sparc_costs->int_mul + bit_cost;
11865 }
11866 return false;
11867
11868 case ASHIFT:
11869 case ASHIFTRT:
11870 case LSHIFTRT:
11871 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11872 return false;
11873
11874 case DIV:
11875 case UDIV:
11876 case MOD:
11877 case UMOD:
11878 if (float_mode_p)
11879 {
11880 if (mode == DFmode)
11881 *total = sparc_costs->float_div_df;
11882 else
11883 *total = sparc_costs->float_div_sf;
11884 }
11885 else
11886 {
11887 if (mode == DImode)
11888 *total = sparc_costs->int_divX;
11889 else
11890 *total = sparc_costs->int_div;
11891 }
11892 return false;
11893
11894 case NEG:
11895 if (! float_mode_p)
11896 {
11897 *total = COSTS_N_INSNS (1);
11898 return false;
11899 }
11900 /* FALLTHRU */
11901
11902 case ABS:
11903 case FLOAT:
11904 case UNSIGNED_FLOAT:
11905 case FIX:
11906 case UNSIGNED_FIX:
11907 case FLOAT_EXTEND:
11908 case FLOAT_TRUNCATE:
11909 *total = sparc_costs->float_move;
11910 return false;
11911
11912 case SQRT:
11913 if (mode == DFmode)
11914 *total = sparc_costs->float_sqrt_df;
11915 else
11916 *total = sparc_costs->float_sqrt_sf;
11917 return false;
11918
11919 case COMPARE:
11920 if (float_mode_p)
11921 *total = sparc_costs->float_cmp;
11922 else
11923 *total = COSTS_N_INSNS (1);
11924 return false;
11925
11926 case IF_THEN_ELSE:
11927 if (float_mode_p)
11928 *total = sparc_costs->float_cmove;
11929 else
11930 *total = sparc_costs->int_cmove;
11931 return false;
11932
11933 case IOR:
11934 /* Handle the NAND vector patterns. */
11935 if (sparc_vector_mode_supported_p (mode)
11936 && GET_CODE (XEXP (x, 0)) == NOT
11937 && GET_CODE (XEXP (x, 1)) == NOT)
11938 {
11939 *total = COSTS_N_INSNS (1);
11940 return true;
11941 }
11942 else
11943 return false;
11944
11945 default:
11946 return false;
11947 }
11948 }
11949
11950 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11951
11952 static inline bool
11953 general_or_i64_p (reg_class_t rclass)
11954 {
11955 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11956 }
11957
11958 /* Implement TARGET_REGISTER_MOVE_COST. */
11959
11960 static int
11961 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11962 reg_class_t from, reg_class_t to)
11963 {
11964 bool need_memory = false;
11965
11966 /* This helps postreload CSE to eliminate redundant comparisons. */
11967 if (from == NO_REGS || to == NO_REGS)
11968 return 100;
11969
11970 if (from == FPCC_REGS || to == FPCC_REGS)
11971 need_memory = true;
11972 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11973 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11974 {
11975 if (TARGET_VIS3)
11976 {
11977 int size = GET_MODE_SIZE (mode);
11978 if (size == 8 || size == 4)
11979 {
11980 if (! TARGET_ARCH32 || size == 4)
11981 return 4;
11982 else
11983 return 6;
11984 }
11985 }
11986 need_memory = true;
11987 }
11988
11989 if (need_memory)
11990 {
11991 if (sparc_cpu == PROCESSOR_ULTRASPARC
11992 || sparc_cpu == PROCESSOR_ULTRASPARC3
11993 || sparc_cpu == PROCESSOR_NIAGARA
11994 || sparc_cpu == PROCESSOR_NIAGARA2
11995 || sparc_cpu == PROCESSOR_NIAGARA3
11996 || sparc_cpu == PROCESSOR_NIAGARA4
11997 || sparc_cpu == PROCESSOR_NIAGARA7
11998 || sparc_cpu == PROCESSOR_M8)
11999 return 12;
12000
12001 return 6;
12002 }
12003
12004 return 2;
12005 }
12006
12007 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12008 This is achieved by means of a manual dynamic stack space allocation in
12009 the current frame. We make the assumption that SEQ doesn't contain any
12010 function calls, with the possible exception of calls to the GOT helper. */
12011
12012 static void
12013 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12014 {
12015 /* We must preserve the lowest 16 words for the register save area. */
12016 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12017 /* We really need only 2 words of fresh stack space. */
12018 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12019
12020 rtx slot
12021 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12022 SPARC_STACK_BIAS + offset));
12023
12024 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12025 emit_insn (gen_rtx_SET (slot, reg));
12026 if (reg2)
12027 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12028 reg2));
12029 emit_insn (seq);
12030 if (reg2)
12031 emit_insn (gen_rtx_SET (reg2,
12032 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12033 emit_insn (gen_rtx_SET (reg, slot));
12034 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12035 }
12036
12037 /* Output the assembler code for a thunk function. THUNK_DECL is the
12038 declaration for the thunk function itself, FUNCTION is the decl for
12039 the target function. DELTA is an immediate constant offset to be
12040 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12041 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12042
12043 static void
12044 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12045 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12046 tree function)
12047 {
12048 rtx this_rtx, funexp;
12049 rtx_insn *insn;
12050 unsigned int int_arg_first;
12051
12052 reload_completed = 1;
12053 epilogue_completed = 1;
12054
12055 emit_note (NOTE_INSN_PROLOGUE_END);
12056
12057 if (TARGET_FLAT)
12058 {
12059 sparc_leaf_function_p = 1;
12060
12061 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12062 }
12063 else if (flag_delayed_branch)
12064 {
12065 /* We will emit a regular sibcall below, so we need to instruct
12066 output_sibcall that we are in a leaf function. */
12067 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12068
12069 /* This will cause final.c to invoke leaf_renumber_regs so we
12070 must behave as if we were in a not-yet-leafified function. */
12071 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12072 }
12073 else
12074 {
12075 /* We will emit the sibcall manually below, so we will need to
12076 manually spill non-leaf registers. */
12077 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12078
12079 /* We really are in a leaf function. */
12080 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12081 }
12082
12083 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12084 returns a structure, the structure return pointer is there instead. */
12085 if (TARGET_ARCH64
12086 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12087 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12088 else
12089 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12090
12091 /* Add DELTA. When possible use a plain add, otherwise load it into
12092 a register first. */
12093 if (delta)
12094 {
12095 rtx delta_rtx = GEN_INT (delta);
12096
12097 if (! SPARC_SIMM13_P (delta))
12098 {
12099 rtx scratch = gen_rtx_REG (Pmode, 1);
12100 emit_move_insn (scratch, delta_rtx);
12101 delta_rtx = scratch;
12102 }
12103
12104 /* THIS_RTX += DELTA. */
12105 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12106 }
12107
12108 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12109 if (vcall_offset)
12110 {
12111 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12112 rtx scratch = gen_rtx_REG (Pmode, 1);
12113
12114 gcc_assert (vcall_offset < 0);
12115
12116 /* SCRATCH = *THIS_RTX. */
12117 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12118
12119 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12120 may not have any available scratch register at this point. */
12121 if (SPARC_SIMM13_P (vcall_offset))
12122 ;
12123 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12124 else if (! fixed_regs[5]
12125 /* The below sequence is made up of at least 2 insns,
12126 while the default method may need only one. */
12127 && vcall_offset < -8192)
12128 {
12129 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12130 emit_move_insn (scratch2, vcall_offset_rtx);
12131 vcall_offset_rtx = scratch2;
12132 }
12133 else
12134 {
12135 rtx increment = GEN_INT (-4096);
12136
12137 /* VCALL_OFFSET is a negative number whose typical range can be
12138 estimated as -32768..0 in 32-bit mode. In almost all cases
12139 it is therefore cheaper to emit multiple add insns than
12140 spilling and loading the constant into a register (at least
12141 6 insns). */
12142 while (! SPARC_SIMM13_P (vcall_offset))
12143 {
12144 emit_insn (gen_add2_insn (scratch, increment));
12145 vcall_offset += 4096;
12146 }
12147 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12148 }
12149
12150 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12151 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12152 gen_rtx_PLUS (Pmode,
12153 scratch,
12154 vcall_offset_rtx)));
12155
12156 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12157 emit_insn (gen_add2_insn (this_rtx, scratch));
12158 }
12159
12160 /* Generate a tail call to the target function. */
12161 if (! TREE_USED (function))
12162 {
12163 assemble_external (function);
12164 TREE_USED (function) = 1;
12165 }
12166 funexp = XEXP (DECL_RTL (function), 0);
12167
12168 if (flag_delayed_branch)
12169 {
12170 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12171 insn = emit_call_insn (gen_sibcall (funexp));
12172 SIBLING_CALL_P (insn) = 1;
12173 }
12174 else
12175 {
12176 /* The hoops we have to jump through in order to generate a sibcall
12177 without using delay slots... */
12178 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12179
12180 if (flag_pic)
12181 {
12182 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12183 start_sequence ();
12184 load_got_register (); /* clobbers %o7 */
12185 scratch = sparc_legitimize_pic_address (funexp, scratch);
12186 seq = get_insns ();
12187 end_sequence ();
12188 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12189 }
12190 else if (TARGET_ARCH32)
12191 {
12192 emit_insn (gen_rtx_SET (scratch,
12193 gen_rtx_HIGH (SImode, funexp)));
12194 emit_insn (gen_rtx_SET (scratch,
12195 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12196 }
12197 else /* TARGET_ARCH64 */
12198 {
12199 switch (sparc_cmodel)
12200 {
12201 case CM_MEDLOW:
12202 case CM_MEDMID:
12203 /* The destination can serve as a temporary. */
12204 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12205 break;
12206
12207 case CM_MEDANY:
12208 case CM_EMBMEDANY:
12209 /* The destination cannot serve as a temporary. */
12210 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12211 start_sequence ();
12212 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12213 seq = get_insns ();
12214 end_sequence ();
12215 emit_and_preserve (seq, spill_reg, 0);
12216 break;
12217
12218 default:
12219 gcc_unreachable ();
12220 }
12221 }
12222
12223 emit_jump_insn (gen_indirect_jump (scratch));
12224 }
12225
12226 emit_barrier ();
12227
12228 /* Run just enough of rest_of_compilation to get the insns emitted.
12229 There's not really enough bulk here to make other passes such as
12230 instruction scheduling worth while. Note that use_thunk calls
12231 assemble_start_function and assemble_end_function. */
12232 insn = get_insns ();
12233 shorten_branches (insn);
12234 final_start_function (insn, file, 1);
12235 final (insn, file, 1);
12236 final_end_function ();
12237
12238 reload_completed = 0;
12239 epilogue_completed = 0;
12240 }
12241
12242 /* Return true if sparc_output_mi_thunk would be able to output the
12243 assembler code for the thunk function specified by the arguments
12244 it is passed, and false otherwise. */
12245 static bool
12246 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12247 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12248 HOST_WIDE_INT vcall_offset,
12249 const_tree function ATTRIBUTE_UNUSED)
12250 {
12251 /* Bound the loop used in the default method above. */
12252 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12253 }
12254
12255 /* How to allocate a 'struct machine_function'. */
12256
12257 static struct machine_function *
12258 sparc_init_machine_status (void)
12259 {
12260 return ggc_cleared_alloc<machine_function> ();
12261 }
12262
12263 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12264 We need to emit DTP-relative relocations. */
12265
12266 static void
12267 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12268 {
12269 switch (size)
12270 {
12271 case 4:
12272 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12273 break;
12274 case 8:
12275 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12276 break;
12277 default:
12278 gcc_unreachable ();
12279 }
12280 output_addr_const (file, x);
12281 fputs (")", file);
12282 }
12283
12284 /* Do whatever processing is required at the end of a file. */
12285
12286 static void
12287 sparc_file_end (void)
12288 {
12289 /* If we need to emit the special GOT helper function, do so now. */
12290 if (got_helper_rtx)
12291 {
12292 const char *name = XSTR (got_helper_rtx, 0);
12293 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12294 #ifdef DWARF2_UNWIND_INFO
12295 bool do_cfi;
12296 #endif
12297
12298 if (USE_HIDDEN_LINKONCE)
12299 {
12300 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12301 get_identifier (name),
12302 build_function_type_list (void_type_node,
12303 NULL_TREE));
12304 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12305 NULL_TREE, void_type_node);
12306 TREE_PUBLIC (decl) = 1;
12307 TREE_STATIC (decl) = 1;
12308 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12309 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12310 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12311 resolve_unique_section (decl, 0, flag_function_sections);
12312 allocate_struct_function (decl, true);
12313 cfun->is_thunk = 1;
12314 current_function_decl = decl;
12315 init_varasm_status ();
12316 assemble_start_function (decl, name);
12317 }
12318 else
12319 {
12320 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12321 switch_to_section (text_section);
12322 if (align > 0)
12323 ASM_OUTPUT_ALIGN (asm_out_file, align);
12324 ASM_OUTPUT_LABEL (asm_out_file, name);
12325 }
12326
12327 #ifdef DWARF2_UNWIND_INFO
12328 do_cfi = dwarf2out_do_cfi_asm ();
12329 if (do_cfi)
12330 fprintf (asm_out_file, "\t.cfi_startproc\n");
12331 #endif
12332 if (flag_delayed_branch)
12333 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12334 reg_name, reg_name);
12335 else
12336 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12337 reg_name, reg_name);
12338 #ifdef DWARF2_UNWIND_INFO
12339 if (do_cfi)
12340 fprintf (asm_out_file, "\t.cfi_endproc\n");
12341 #endif
12342 }
12343
12344 if (NEED_INDICATE_EXEC_STACK)
12345 file_end_indicate_exec_stack ();
12346
12347 #ifdef TARGET_SOLARIS
12348 solaris_file_end ();
12349 #endif
12350 }
12351
12352 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12353 /* Implement TARGET_MANGLE_TYPE. */
12354
12355 static const char *
12356 sparc_mangle_type (const_tree type)
12357 {
12358 if (TARGET_ARCH32
12359 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12360 && TARGET_LONG_DOUBLE_128)
12361 return "g";
12362
12363 /* For all other types, use normal C++ mangling. */
12364 return NULL;
12365 }
12366 #endif
12367
12368 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12369 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12370 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12371
12372 void
12373 sparc_emit_membar_for_model (enum memmodel model,
12374 int load_store, int before_after)
12375 {
12376 /* Bits for the MEMBAR mmask field. */
12377 const int LoadLoad = 1;
12378 const int StoreLoad = 2;
12379 const int LoadStore = 4;
12380 const int StoreStore = 8;
12381
12382 int mm = 0, implied = 0;
12383
12384 switch (sparc_memory_model)
12385 {
12386 case SMM_SC:
12387 /* Sequential Consistency. All memory transactions are immediately
12388 visible in sequential execution order. No barriers needed. */
12389 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12390 break;
12391
12392 case SMM_TSO:
12393 /* Total Store Ordering: all memory transactions with store semantics
12394 are followed by an implied StoreStore. */
12395 implied |= StoreStore;
12396
12397 /* If we're not looking for a raw barrer (before+after), then atomic
12398 operations get the benefit of being both load and store. */
12399 if (load_store == 3 && before_after == 1)
12400 implied |= StoreLoad;
12401 /* FALLTHRU */
12402
12403 case SMM_PSO:
12404 /* Partial Store Ordering: all memory transactions with load semantics
12405 are followed by an implied LoadLoad | LoadStore. */
12406 implied |= LoadLoad | LoadStore;
12407
12408 /* If we're not looking for a raw barrer (before+after), then atomic
12409 operations get the benefit of being both load and store. */
12410 if (load_store == 3 && before_after == 2)
12411 implied |= StoreLoad | StoreStore;
12412 /* FALLTHRU */
12413
12414 case SMM_RMO:
12415 /* Relaxed Memory Ordering: no implicit bits. */
12416 break;
12417
12418 default:
12419 gcc_unreachable ();
12420 }
12421
12422 if (before_after & 1)
12423 {
12424 if (is_mm_release (model) || is_mm_acq_rel (model)
12425 || is_mm_seq_cst (model))
12426 {
12427 if (load_store & 1)
12428 mm |= LoadLoad | StoreLoad;
12429 if (load_store & 2)
12430 mm |= LoadStore | StoreStore;
12431 }
12432 }
12433 if (before_after & 2)
12434 {
12435 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12436 || is_mm_seq_cst (model))
12437 {
12438 if (load_store & 1)
12439 mm |= LoadLoad | LoadStore;
12440 if (load_store & 2)
12441 mm |= StoreLoad | StoreStore;
12442 }
12443 }
12444
12445 /* Remove the bits implied by the system memory model. */
12446 mm &= ~implied;
12447
12448 /* For raw barriers (before+after), always emit a barrier.
12449 This will become a compile-time barrier if needed. */
12450 if (mm || before_after == 3)
12451 emit_insn (gen_membar (GEN_INT (mm)));
12452 }
12453
12454 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12455 compare and swap on the word containing the byte or half-word. */
12456
12457 static void
12458 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12459 rtx oldval, rtx newval)
12460 {
12461 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12462 rtx addr = gen_reg_rtx (Pmode);
12463 rtx off = gen_reg_rtx (SImode);
12464 rtx oldv = gen_reg_rtx (SImode);
12465 rtx newv = gen_reg_rtx (SImode);
12466 rtx oldvalue = gen_reg_rtx (SImode);
12467 rtx newvalue = gen_reg_rtx (SImode);
12468 rtx res = gen_reg_rtx (SImode);
12469 rtx resv = gen_reg_rtx (SImode);
12470 rtx memsi, val, mask, cc;
12471
12472 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12473
12474 if (Pmode != SImode)
12475 addr1 = gen_lowpart (SImode, addr1);
12476 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12477
12478 memsi = gen_rtx_MEM (SImode, addr);
12479 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12480 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12481
12482 val = copy_to_reg (memsi);
12483
12484 emit_insn (gen_rtx_SET (off,
12485 gen_rtx_XOR (SImode, off,
12486 GEN_INT (GET_MODE (mem) == QImode
12487 ? 3 : 2))));
12488
12489 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12490
12491 if (GET_MODE (mem) == QImode)
12492 mask = force_reg (SImode, GEN_INT (0xff));
12493 else
12494 mask = force_reg (SImode, GEN_INT (0xffff));
12495
12496 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12497
12498 emit_insn (gen_rtx_SET (val,
12499 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12500 val)));
12501
12502 oldval = gen_lowpart (SImode, oldval);
12503 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12504
12505 newval = gen_lowpart_common (SImode, newval);
12506 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12507
12508 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12509
12510 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12511
12512 rtx_code_label *end_label = gen_label_rtx ();
12513 rtx_code_label *loop_label = gen_label_rtx ();
12514 emit_label (loop_label);
12515
12516 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12517
12518 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12519
12520 emit_move_insn (bool_result, const1_rtx);
12521
12522 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12523
12524 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12525
12526 emit_insn (gen_rtx_SET (resv,
12527 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12528 res)));
12529
12530 emit_move_insn (bool_result, const0_rtx);
12531
12532 cc = gen_compare_reg_1 (NE, resv, val);
12533 emit_insn (gen_rtx_SET (val, resv));
12534
12535 /* Use cbranchcc4 to separate the compare and branch! */
12536 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12537 cc, const0_rtx, loop_label));
12538
12539 emit_label (end_label);
12540
12541 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12542
12543 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12544
12545 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12546 }
12547
12548 /* Expand code to perform a compare-and-swap. */
12549
12550 void
12551 sparc_expand_compare_and_swap (rtx operands[])
12552 {
12553 rtx bval, retval, mem, oldval, newval;
12554 machine_mode mode;
12555 enum memmodel model;
12556
12557 bval = operands[0];
12558 retval = operands[1];
12559 mem = operands[2];
12560 oldval = operands[3];
12561 newval = operands[4];
12562 model = (enum memmodel) INTVAL (operands[6]);
12563 mode = GET_MODE (mem);
12564
12565 sparc_emit_membar_for_model (model, 3, 1);
12566
12567 if (reg_overlap_mentioned_p (retval, oldval))
12568 oldval = copy_to_reg (oldval);
12569
12570 if (mode == QImode || mode == HImode)
12571 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12572 else
12573 {
12574 rtx (*gen) (rtx, rtx, rtx, rtx);
12575 rtx x;
12576
12577 if (mode == SImode)
12578 gen = gen_atomic_compare_and_swapsi_1;
12579 else
12580 gen = gen_atomic_compare_and_swapdi_1;
12581 emit_insn (gen (retval, mem, oldval, newval));
12582
12583 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12584 if (x != bval)
12585 convert_move (bval, x, 1);
12586 }
12587
12588 sparc_emit_membar_for_model (model, 3, 2);
12589 }
12590
12591 void
12592 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12593 {
12594 rtx t_1, t_2, t_3;
12595
12596 sel = gen_lowpart (DImode, sel);
12597 switch (vmode)
12598 {
12599 case E_V2SImode:
12600 /* inp = xxxxxxxAxxxxxxxB */
12601 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12602 NULL_RTX, 1, OPTAB_DIRECT);
12603 /* t_1 = ....xxxxxxxAxxx. */
12604 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12605 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12606 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12607 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12608 /* sel = .......B */
12609 /* t_1 = ...A.... */
12610 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12611 /* sel = ...A...B */
12612 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12613 /* sel = AAAABBBB * 4 */
12614 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12615 /* sel = { A*4, A*4+1, A*4+2, ... } */
12616 break;
12617
12618 case E_V4HImode:
12619 /* inp = xxxAxxxBxxxCxxxD */
12620 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12621 NULL_RTX, 1, OPTAB_DIRECT);
12622 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12623 NULL_RTX, 1, OPTAB_DIRECT);
12624 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12625 NULL_RTX, 1, OPTAB_DIRECT);
12626 /* t_1 = ..xxxAxxxBxxxCxx */
12627 /* t_2 = ....xxxAxxxBxxxC */
12628 /* t_3 = ......xxxAxxxBxx */
12629 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12630 GEN_INT (0x07),
12631 NULL_RTX, 1, OPTAB_DIRECT);
12632 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12633 GEN_INT (0x0700),
12634 NULL_RTX, 1, OPTAB_DIRECT);
12635 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12636 GEN_INT (0x070000),
12637 NULL_RTX, 1, OPTAB_DIRECT);
12638 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12639 GEN_INT (0x07000000),
12640 NULL_RTX, 1, OPTAB_DIRECT);
12641 /* sel = .......D */
12642 /* t_1 = .....C.. */
12643 /* t_2 = ...B.... */
12644 /* t_3 = .A...... */
12645 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12646 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12647 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12648 /* sel = .A.B.C.D */
12649 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12650 /* sel = AABBCCDD * 2 */
12651 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12652 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12653 break;
12654
12655 case E_V8QImode:
12656 /* input = xAxBxCxDxExFxGxH */
12657 sel = expand_simple_binop (DImode, AND, sel,
12658 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12659 | 0x0f0f0f0f),
12660 NULL_RTX, 1, OPTAB_DIRECT);
12661 /* sel = .A.B.C.D.E.F.G.H */
12662 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12663 NULL_RTX, 1, OPTAB_DIRECT);
12664 /* t_1 = ..A.B.C.D.E.F.G. */
12665 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12666 NULL_RTX, 1, OPTAB_DIRECT);
12667 /* sel = .AABBCCDDEEFFGGH */
12668 sel = expand_simple_binop (DImode, AND, sel,
12669 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12670 | 0xff00ff),
12671 NULL_RTX, 1, OPTAB_DIRECT);
12672 /* sel = ..AB..CD..EF..GH */
12673 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12674 NULL_RTX, 1, OPTAB_DIRECT);
12675 /* t_1 = ....AB..CD..EF.. */
12676 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12677 NULL_RTX, 1, OPTAB_DIRECT);
12678 /* sel = ..ABABCDCDEFEFGH */
12679 sel = expand_simple_binop (DImode, AND, sel,
12680 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12681 NULL_RTX, 1, OPTAB_DIRECT);
12682 /* sel = ....ABCD....EFGH */
12683 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12684 NULL_RTX, 1, OPTAB_DIRECT);
12685 /* t_1 = ........ABCD.... */
12686 sel = gen_lowpart (SImode, sel);
12687 t_1 = gen_lowpart (SImode, t_1);
12688 break;
12689
12690 default:
12691 gcc_unreachable ();
12692 }
12693
12694 /* Always perform the final addition/merge within the bmask insn. */
12695 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12696 }
12697
12698 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12699
12700 static bool
12701 sparc_frame_pointer_required (void)
12702 {
12703 /* If the stack pointer is dynamically modified in the function, it cannot
12704 serve as the frame pointer. */
12705 if (cfun->calls_alloca)
12706 return true;
12707
12708 /* If the function receives nonlocal gotos, it needs to save the frame
12709 pointer in the nonlocal_goto_save_area object. */
12710 if (cfun->has_nonlocal_label)
12711 return true;
12712
12713 /* In flat mode, that's it. */
12714 if (TARGET_FLAT)
12715 return false;
12716
12717 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12718 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12719 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12720 }
12721
12722 /* The way this is structured, we can't eliminate SFP in favor of SP
12723 if the frame pointer is required: we want to use the SFP->HFP elimination
12724 in that case. But the test in update_eliminables doesn't know we are
12725 assuming below that we only do the former elimination. */
12726
12727 static bool
12728 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12729 {
12730 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12731 }
12732
12733 /* Return the hard frame pointer directly to bypass the stack bias. */
12734
12735 static rtx
12736 sparc_builtin_setjmp_frame_value (void)
12737 {
12738 return hard_frame_pointer_rtx;
12739 }
12740
12741 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12742 they won't be allocated. */
12743
12744 static void
12745 sparc_conditional_register_usage (void)
12746 {
12747 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12748 {
12749 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12750 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12751 }
12752 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12753 /* then honor it. */
12754 if (TARGET_ARCH32 && fixed_regs[5])
12755 fixed_regs[5] = 1;
12756 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12757 fixed_regs[5] = 0;
12758 if (! TARGET_V9)
12759 {
12760 int regno;
12761 for (regno = SPARC_FIRST_V9_FP_REG;
12762 regno <= SPARC_LAST_V9_FP_REG;
12763 regno++)
12764 fixed_regs[regno] = 1;
12765 /* %fcc0 is used by v8 and v9. */
12766 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12767 regno <= SPARC_LAST_V9_FCC_REG;
12768 regno++)
12769 fixed_regs[regno] = 1;
12770 }
12771 if (! TARGET_FPU)
12772 {
12773 int regno;
12774 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12775 fixed_regs[regno] = 1;
12776 }
12777 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12778 /* then honor it. Likewise with g3 and g4. */
12779 if (fixed_regs[2] == 2)
12780 fixed_regs[2] = ! TARGET_APP_REGS;
12781 if (fixed_regs[3] == 2)
12782 fixed_regs[3] = ! TARGET_APP_REGS;
12783 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12784 fixed_regs[4] = ! TARGET_APP_REGS;
12785 else if (TARGET_CM_EMBMEDANY)
12786 fixed_regs[4] = 1;
12787 else if (fixed_regs[4] == 2)
12788 fixed_regs[4] = 0;
12789 if (TARGET_FLAT)
12790 {
12791 int regno;
12792 /* Disable leaf functions. */
12793 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12794 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12795 leaf_reg_remap [regno] = regno;
12796 }
12797 if (TARGET_VIS)
12798 global_regs[SPARC_GSR_REG] = 1;
12799 }
12800
12801 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12802
12803 - We can't load constants into FP registers.
12804 - We can't load FP constants into integer registers when soft-float,
12805 because there is no soft-float pattern with a r/F constraint.
12806 - We can't load FP constants into integer registers for TFmode unless
12807 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12808 - Try and reload integer constants (symbolic or otherwise) back into
12809 registers directly, rather than having them dumped to memory. */
12810
12811 static reg_class_t
12812 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12813 {
12814 machine_mode mode = GET_MODE (x);
12815 if (CONSTANT_P (x))
12816 {
12817 if (FP_REG_CLASS_P (rclass)
12818 || rclass == GENERAL_OR_FP_REGS
12819 || rclass == GENERAL_OR_EXTRA_FP_REGS
12820 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12821 || (mode == TFmode && ! const_zero_operand (x, mode)))
12822 return NO_REGS;
12823
12824 if (GET_MODE_CLASS (mode) == MODE_INT)
12825 return GENERAL_REGS;
12826
12827 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12828 {
12829 if (! FP_REG_CLASS_P (rclass)
12830 || !(const_zero_operand (x, mode)
12831 || const_all_ones_operand (x, mode)))
12832 return NO_REGS;
12833 }
12834 }
12835
12836 if (TARGET_VIS3
12837 && ! TARGET_ARCH64
12838 && (rclass == EXTRA_FP_REGS
12839 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12840 {
12841 int regno = true_regnum (x);
12842
12843 if (SPARC_INT_REG_P (regno))
12844 return (rclass == EXTRA_FP_REGS
12845 ? FP_REGS : GENERAL_OR_FP_REGS);
12846 }
12847
12848 return rclass;
12849 }
12850
12851 /* Return true if we use LRA instead of reload pass. */
12852
12853 static bool
12854 sparc_lra_p (void)
12855 {
12856 return TARGET_LRA;
12857 }
12858
12859 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12860 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12861
12862 const char *
12863 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12864 {
12865 char mulstr[32];
12866
12867 gcc_assert (! TARGET_ARCH64);
12868
12869 if (sparc_check_64 (operands[1], insn) <= 0)
12870 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12871 if (which_alternative == 1)
12872 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12873 if (GET_CODE (operands[2]) == CONST_INT)
12874 {
12875 if (which_alternative == 1)
12876 {
12877 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12878 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12879 output_asm_insn (mulstr, operands);
12880 return "srlx\t%L0, 32, %H0";
12881 }
12882 else
12883 {
12884 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12885 output_asm_insn ("or\t%L1, %3, %3", operands);
12886 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12887 output_asm_insn (mulstr, operands);
12888 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12889 return "mov\t%3, %L0";
12890 }
12891 }
12892 else if (rtx_equal_p (operands[1], operands[2]))
12893 {
12894 if (which_alternative == 1)
12895 {
12896 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12897 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12898 output_asm_insn (mulstr, operands);
12899 return "srlx\t%L0, 32, %H0";
12900 }
12901 else
12902 {
12903 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12904 output_asm_insn ("or\t%L1, %3, %3", operands);
12905 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12906 output_asm_insn (mulstr, operands);
12907 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12908 return "mov\t%3, %L0";
12909 }
12910 }
12911 if (sparc_check_64 (operands[2], insn) <= 0)
12912 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12913 if (which_alternative == 1)
12914 {
12915 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12916 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12917 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12918 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12919 output_asm_insn (mulstr, operands);
12920 return "srlx\t%L0, 32, %H0";
12921 }
12922 else
12923 {
12924 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12925 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12926 output_asm_insn ("or\t%L1, %3, %3", operands);
12927 output_asm_insn ("or\t%L2, %4, %4", operands);
12928 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12929 output_asm_insn (mulstr, operands);
12930 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12931 return "mov\t%3, %L0";
12932 }
12933 }
12934
12935 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12936 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12937 and INNER_MODE are the modes describing TARGET. */
12938
12939 static void
12940 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12941 machine_mode inner_mode)
12942 {
12943 rtx t1, final_insn, sel;
12944 int bmask;
12945
12946 t1 = gen_reg_rtx (mode);
12947
12948 elt = convert_modes (SImode, inner_mode, elt, true);
12949 emit_move_insn (gen_lowpart(SImode, t1), elt);
12950
12951 switch (mode)
12952 {
12953 case E_V2SImode:
12954 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12955 bmask = 0x45674567;
12956 break;
12957 case E_V4HImode:
12958 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12959 bmask = 0x67676767;
12960 break;
12961 case E_V8QImode:
12962 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12963 bmask = 0x77777777;
12964 break;
12965 default:
12966 gcc_unreachable ();
12967 }
12968
12969 sel = force_reg (SImode, GEN_INT (bmask));
12970 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12971 emit_insn (final_insn);
12972 }
12973
12974 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12975 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12976
12977 static void
12978 vector_init_fpmerge (rtx target, rtx elt)
12979 {
12980 rtx t1, t2, t2_low, t3, t3_low;
12981
12982 t1 = gen_reg_rtx (V4QImode);
12983 elt = convert_modes (SImode, QImode, elt, true);
12984 emit_move_insn (gen_lowpart (SImode, t1), elt);
12985
12986 t2 = gen_reg_rtx (V8QImode);
12987 t2_low = gen_lowpart (V4QImode, t2);
12988 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12989
12990 t3 = gen_reg_rtx (V8QImode);
12991 t3_low = gen_lowpart (V4QImode, t3);
12992 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12993
12994 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12995 }
12996
12997 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12998 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12999
13000 static void
13001 vector_init_faligndata (rtx target, rtx elt)
13002 {
13003 rtx t1 = gen_reg_rtx (V4HImode);
13004 int i;
13005
13006 elt = convert_modes (SImode, HImode, elt, true);
13007 emit_move_insn (gen_lowpart (SImode, t1), elt);
13008
13009 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13010 force_reg (SImode, GEN_INT (6)),
13011 const0_rtx));
13012
13013 for (i = 0; i < 4; i++)
13014 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13015 }
13016
13017 /* Emit code to initialize TARGET to values for individual fields VALS. */
13018
13019 void
13020 sparc_expand_vector_init (rtx target, rtx vals)
13021 {
13022 const machine_mode mode = GET_MODE (target);
13023 const machine_mode inner_mode = GET_MODE_INNER (mode);
13024 const int n_elts = GET_MODE_NUNITS (mode);
13025 int i, n_var = 0;
13026 bool all_same = true;
13027 rtx mem;
13028
13029 for (i = 0; i < n_elts; i++)
13030 {
13031 rtx x = XVECEXP (vals, 0, i);
13032 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13033 n_var++;
13034
13035 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13036 all_same = false;
13037 }
13038
13039 if (n_var == 0)
13040 {
13041 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13042 return;
13043 }
13044
13045 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13046 {
13047 if (GET_MODE_SIZE (inner_mode) == 4)
13048 {
13049 emit_move_insn (gen_lowpart (SImode, target),
13050 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13051 return;
13052 }
13053 else if (GET_MODE_SIZE (inner_mode) == 8)
13054 {
13055 emit_move_insn (gen_lowpart (DImode, target),
13056 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13057 return;
13058 }
13059 }
13060 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13061 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13062 {
13063 emit_move_insn (gen_highpart (word_mode, target),
13064 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13065 emit_move_insn (gen_lowpart (word_mode, target),
13066 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13067 return;
13068 }
13069
13070 if (all_same && GET_MODE_SIZE (mode) == 8)
13071 {
13072 if (TARGET_VIS2)
13073 {
13074 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13075 return;
13076 }
13077 if (mode == V8QImode)
13078 {
13079 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13080 return;
13081 }
13082 if (mode == V4HImode)
13083 {
13084 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13085 return;
13086 }
13087 }
13088
13089 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13090 for (i = 0; i < n_elts; i++)
13091 emit_move_insn (adjust_address_nv (mem, inner_mode,
13092 i * GET_MODE_SIZE (inner_mode)),
13093 XVECEXP (vals, 0, i));
13094 emit_move_insn (target, mem);
13095 }
13096
13097 /* Implement TARGET_SECONDARY_RELOAD. */
13098
13099 static reg_class_t
13100 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13101 machine_mode mode, secondary_reload_info *sri)
13102 {
13103 enum reg_class rclass = (enum reg_class) rclass_i;
13104
13105 sri->icode = CODE_FOR_nothing;
13106 sri->extra_cost = 0;
13107
13108 /* We need a temporary when loading/storing a HImode/QImode value
13109 between memory and the FPU registers. This can happen when combine puts
13110 a paradoxical subreg in a float/fix conversion insn. */
13111 if (FP_REG_CLASS_P (rclass)
13112 && (mode == HImode || mode == QImode)
13113 && (GET_CODE (x) == MEM
13114 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13115 && true_regnum (x) == -1)))
13116 return GENERAL_REGS;
13117
13118 /* On 32-bit we need a temporary when loading/storing a DFmode value
13119 between unaligned memory and the upper FPU registers. */
13120 if (TARGET_ARCH32
13121 && rclass == EXTRA_FP_REGS
13122 && mode == DFmode
13123 && GET_CODE (x) == MEM
13124 && ! mem_min_alignment (x, 8))
13125 return FP_REGS;
13126
13127 if (((TARGET_CM_MEDANY
13128 && symbolic_operand (x, mode))
13129 || (TARGET_CM_EMBMEDANY
13130 && text_segment_operand (x, mode)))
13131 && ! flag_pic)
13132 {
13133 if (in_p)
13134 sri->icode = direct_optab_handler (reload_in_optab, mode);
13135 else
13136 sri->icode = direct_optab_handler (reload_out_optab, mode);
13137 return NO_REGS;
13138 }
13139
13140 if (TARGET_VIS3 && TARGET_ARCH32)
13141 {
13142 int regno = true_regnum (x);
13143
13144 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13145 to move 8-byte values in 4-byte pieces. This only works via
13146 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13147 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13148 an FP_REGS intermediate move. */
13149 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13150 || ((general_or_i64_p (rclass)
13151 || rclass == GENERAL_OR_FP_REGS)
13152 && SPARC_FP_REG_P (regno)))
13153 {
13154 sri->extra_cost = 2;
13155 return FP_REGS;
13156 }
13157 }
13158
13159 return NO_REGS;
13160 }
13161
13162 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13163
13164 On SPARC when not VIS3 it is not possible to directly move data
13165 between GENERAL_REGS and FP_REGS. */
13166
13167 static bool
13168 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13169 reg_class_t class2)
13170 {
13171 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13172 && (! TARGET_VIS3
13173 || GET_MODE_SIZE (mode) > 8
13174 || GET_MODE_SIZE (mode) < 4));
13175 }
13176
13177 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13178
13179 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13180 because the movsi and movsf patterns don't handle r/f moves.
13181 For v8 we copy the default definition. */
13182
13183 static machine_mode
13184 sparc_secondary_memory_needed_mode (machine_mode mode)
13185 {
13186 if (TARGET_ARCH64)
13187 {
13188 if (GET_MODE_BITSIZE (mode) < 32)
13189 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13190 return mode;
13191 }
13192 else
13193 {
13194 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13195 return mode_for_size (BITS_PER_WORD,
13196 GET_MODE_CLASS (mode), 0).require ();
13197 return mode;
13198 }
13199 }
13200
13201 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13202 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13203
13204 bool
13205 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13206 {
13207 enum rtx_code rc = GET_CODE (operands[1]);
13208 machine_mode cmp_mode;
13209 rtx cc_reg, dst, cmp;
13210
13211 cmp = operands[1];
13212 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13213 return false;
13214
13215 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13216 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13217
13218 cmp_mode = GET_MODE (XEXP (cmp, 0));
13219 rc = GET_CODE (cmp);
13220
13221 dst = operands[0];
13222 if (! rtx_equal_p (operands[2], dst)
13223 && ! rtx_equal_p (operands[3], dst))
13224 {
13225 if (reg_overlap_mentioned_p (dst, cmp))
13226 dst = gen_reg_rtx (mode);
13227
13228 emit_move_insn (dst, operands[3]);
13229 }
13230 else if (operands[2] == dst)
13231 {
13232 operands[2] = operands[3];
13233
13234 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13235 rc = reverse_condition_maybe_unordered (rc);
13236 else
13237 rc = reverse_condition (rc);
13238 }
13239
13240 if (XEXP (cmp, 1) == const0_rtx
13241 && GET_CODE (XEXP (cmp, 0)) == REG
13242 && cmp_mode == DImode
13243 && v9_regcmp_p (rc))
13244 cc_reg = XEXP (cmp, 0);
13245 else
13246 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13247
13248 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13249
13250 emit_insn (gen_rtx_SET (dst,
13251 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13252
13253 if (dst != operands[0])
13254 emit_move_insn (operands[0], dst);
13255
13256 return true;
13257 }
13258
13259 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13260 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13261 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13262 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13263 code to be used for the condition mask. */
13264
13265 void
13266 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13267 {
13268 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13269 enum rtx_code code = GET_CODE (operands[3]);
13270
13271 mask = gen_reg_rtx (Pmode);
13272 cop0 = operands[4];
13273 cop1 = operands[5];
13274 if (code == LT || code == GE)
13275 {
13276 rtx t;
13277
13278 code = swap_condition (code);
13279 t = cop0; cop0 = cop1; cop1 = t;
13280 }
13281
13282 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13283
13284 fcmp = gen_rtx_UNSPEC (Pmode,
13285 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13286 fcode);
13287
13288 cmask = gen_rtx_UNSPEC (DImode,
13289 gen_rtvec (2, mask, gsr),
13290 ccode);
13291
13292 bshuf = gen_rtx_UNSPEC (mode,
13293 gen_rtvec (3, operands[1], operands[2], gsr),
13294 UNSPEC_BSHUFFLE);
13295
13296 emit_insn (gen_rtx_SET (mask, fcmp));
13297 emit_insn (gen_rtx_SET (gsr, cmask));
13298
13299 emit_insn (gen_rtx_SET (operands[0], bshuf));
13300 }
13301
13302 /* On sparc, any mode which naturally allocates into the float
13303 registers should return 4 here. */
13304
13305 unsigned int
13306 sparc_regmode_natural_size (machine_mode mode)
13307 {
13308 int size = UNITS_PER_WORD;
13309
13310 if (TARGET_ARCH64)
13311 {
13312 enum mode_class mclass = GET_MODE_CLASS (mode);
13313
13314 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13315 size = 4;
13316 }
13317
13318 return size;
13319 }
13320
13321 /* Implement TARGET_HARD_REGNO_NREGS.
13322
13323 On SPARC, ordinary registers hold 32 bits worth; this means both
13324 integer and floating point registers. On v9, integer regs hold 64
13325 bits worth; floating point regs hold 32 bits worth (this includes the
13326 new fp regs as even the odd ones are included in the hard register
13327 count). */
13328
13329 static unsigned int
13330 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13331 {
13332 if (regno == SPARC_GSR_REG)
13333 return 1;
13334 if (TARGET_ARCH64)
13335 {
13336 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13337 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13338 return CEIL (GET_MODE_SIZE (mode), 4);
13339 }
13340 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13341 }
13342
13343 /* Implement TARGET_HARD_REGNO_MODE_OK.
13344
13345 ??? Because of the funny way we pass parameters we should allow certain
13346 ??? types of float/complex values to be in integer registers during
13347 ??? RTL generation. This only matters on arch32. */
13348
13349 static bool
13350 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13351 {
13352 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13353 }
13354
13355 /* Implement TARGET_MODES_TIEABLE_P.
13356
13357 For V9 we have to deal with the fact that only the lower 32 floating
13358 point registers are 32-bit addressable. */
13359
13360 static bool
13361 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13362 {
13363 enum mode_class mclass1, mclass2;
13364 unsigned short size1, size2;
13365
13366 if (mode1 == mode2)
13367 return true;
13368
13369 mclass1 = GET_MODE_CLASS (mode1);
13370 mclass2 = GET_MODE_CLASS (mode2);
13371 if (mclass1 != mclass2)
13372 return false;
13373
13374 if (! TARGET_V9)
13375 return true;
13376
13377 /* Classes are the same and we are V9 so we have to deal with upper
13378 vs. lower floating point registers. If one of the modes is a
13379 4-byte mode, and the other is not, we have to mark them as not
13380 tieable because only the lower 32 floating point register are
13381 addressable 32-bits at a time.
13382
13383 We can't just test explicitly for SFmode, otherwise we won't
13384 cover the vector mode cases properly. */
13385
13386 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13387 return true;
13388
13389 size1 = GET_MODE_SIZE (mode1);
13390 size2 = GET_MODE_SIZE (mode2);
13391 if ((size1 > 4 && size2 == 4)
13392 || (size2 > 4 && size1 == 4))
13393 return false;
13394
13395 return true;
13396 }
13397
13398 /* Implement TARGET_CSTORE_MODE. */
13399
13400 static scalar_int_mode
13401 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13402 {
13403 return (TARGET_ARCH64 ? DImode : SImode);
13404 }
13405
13406 /* Return the compound expression made of T1 and T2. */
13407
13408 static inline tree
13409 compound_expr (tree t1, tree t2)
13410 {
13411 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13412 }
13413
13414 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13415
13416 static void
13417 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13418 {
13419 if (!TARGET_FPU)
13420 return;
13421
13422 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13423 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13424
13425 /* We generate the equivalent of feholdexcept (&fenv_var):
13426
13427 unsigned int fenv_var;
13428 __builtin_store_fsr (&fenv_var);
13429
13430 unsigned int tmp1_var;
13431 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13432
13433 __builtin_load_fsr (&tmp1_var); */
13434
13435 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13436 TREE_ADDRESSABLE (fenv_var) = 1;
13437 tree fenv_addr = build_fold_addr_expr (fenv_var);
13438 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13439 tree hold_stfsr
13440 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13441 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13442
13443 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13444 TREE_ADDRESSABLE (tmp1_var) = 1;
13445 tree masked_fenv_var
13446 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13447 build_int_cst (unsigned_type_node,
13448 ~(accrued_exception_mask | trap_enable_mask)));
13449 tree hold_mask
13450 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13451 NULL_TREE, NULL_TREE);
13452
13453 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13454 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13455 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13456
13457 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13458
13459 /* We reload the value of tmp1_var to clear the exceptions:
13460
13461 __builtin_load_fsr (&tmp1_var); */
13462
13463 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13464
13465 /* We generate the equivalent of feupdateenv (&fenv_var):
13466
13467 unsigned int tmp2_var;
13468 __builtin_store_fsr (&tmp2_var);
13469
13470 __builtin_load_fsr (&fenv_var);
13471
13472 if (SPARC_LOW_FE_EXCEPT_VALUES)
13473 tmp2_var >>= 5;
13474 __atomic_feraiseexcept ((int) tmp2_var); */
13475
13476 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13477 TREE_ADDRESSABLE (tmp2_var) = 1;
13478 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13479 tree update_stfsr
13480 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13481 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13482
13483 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13484
13485 tree atomic_feraiseexcept
13486 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13487 tree update_call
13488 = build_call_expr (atomic_feraiseexcept, 1,
13489 fold_convert (integer_type_node, tmp2_var));
13490
13491 if (SPARC_LOW_FE_EXCEPT_VALUES)
13492 {
13493 tree shifted_tmp2_var
13494 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13495 build_int_cst (unsigned_type_node, 5));
13496 tree update_shift
13497 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13498 update_call = compound_expr (update_shift, update_call);
13499 }
13500
13501 *update
13502 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13503 }
13504
13505 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13506
13507 SImode loads to floating-point registers are not zero-extended.
13508 The definition for LOAD_EXTEND_OP specifies that integer loads
13509 narrower than BITS_PER_WORD will be zero-extended. As a result,
13510 we inhibit changes from SImode unless they are to a mode that is
13511 identical in size.
13512
13513 Likewise for SFmode, since word-mode paradoxical subregs are
13514 problematic on big-endian architectures. */
13515
13516 static bool
13517 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13518 reg_class_t rclass)
13519 {
13520 if (TARGET_ARCH64
13521 && GET_MODE_SIZE (from) == 4
13522 && GET_MODE_SIZE (to) != 4)
13523 return !reg_classes_intersect_p (rclass, FP_REGS);
13524 return true;
13525 }
13526
13527 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13528
13529 static HOST_WIDE_INT
13530 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13531 {
13532 if (TREE_CODE (exp) == STRING_CST)
13533 return MAX (align, FASTEST_ALIGNMENT);
13534 return align;
13535 }
13536
13537 #include "gt-sparc.h"