]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Turn CONSTANT_ALIGNMENT into a hook
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
687 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
688 \f
689 #ifdef SUBTARGET_ATTRIBUTE_TABLE
690 /* Table of valid machine attributes. */
691 static const struct attribute_spec sparc_attribute_table[] =
692 {
693 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
694 do_diagnostic } */
695 SUBTARGET_ATTRIBUTE_TABLE,
696 { NULL, 0, 0, false, false, false, NULL, false }
697 };
698 #endif
699 \f
700 /* Option handling. */
701
702 /* Parsed value. */
703 enum cmodel sparc_cmodel;
704
705 char sparc_hard_reg_printed[8];
706
707 /* Initialize the GCC target structure. */
708
709 /* The default is to use .half rather than .short for aligned HI objects. */
710 #undef TARGET_ASM_ALIGNED_HI_OP
711 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
712
713 #undef TARGET_ASM_UNALIGNED_HI_OP
714 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
715 #undef TARGET_ASM_UNALIGNED_SI_OP
716 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
717 #undef TARGET_ASM_UNALIGNED_DI_OP
718 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
719
720 /* The target hook has to handle DI-mode values. */
721 #undef TARGET_ASM_INTEGER
722 #define TARGET_ASM_INTEGER sparc_assemble_integer
723
724 #undef TARGET_ASM_FUNCTION_PROLOGUE
725 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
726 #undef TARGET_ASM_FUNCTION_EPILOGUE
727 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
728
729 #undef TARGET_SCHED_ADJUST_COST
730 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
731 #undef TARGET_SCHED_ISSUE_RATE
732 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
733 #undef TARGET_SCHED_INIT
734 #define TARGET_SCHED_INIT sparc_sched_init
735 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
736 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
737
738 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
739 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
740
741 #undef TARGET_INIT_LIBFUNCS
742 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
743
744 #undef TARGET_LEGITIMIZE_ADDRESS
745 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
746 #undef TARGET_DELEGITIMIZE_ADDRESS
747 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
748 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
749 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
750
751 #undef TARGET_INIT_BUILTINS
752 #define TARGET_INIT_BUILTINS sparc_init_builtins
753 #undef TARGET_BUILTIN_DECL
754 #define TARGET_BUILTIN_DECL sparc_builtin_decl
755 #undef TARGET_EXPAND_BUILTIN
756 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
757 #undef TARGET_FOLD_BUILTIN
758 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
759
760 #if TARGET_TLS
761 #undef TARGET_HAVE_TLS
762 #define TARGET_HAVE_TLS true
763 #endif
764
765 #undef TARGET_CANNOT_FORCE_CONST_MEM
766 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
767
768 #undef TARGET_ASM_OUTPUT_MI_THUNK
769 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
770 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
771 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
772
773 #undef TARGET_RTX_COSTS
774 #define TARGET_RTX_COSTS sparc_rtx_costs
775 #undef TARGET_ADDRESS_COST
776 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
777 #undef TARGET_REGISTER_MOVE_COST
778 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
779
780 #undef TARGET_PROMOTE_FUNCTION_MODE
781 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
782
783 #undef TARGET_FUNCTION_VALUE
784 #define TARGET_FUNCTION_VALUE sparc_function_value
785 #undef TARGET_LIBCALL_VALUE
786 #define TARGET_LIBCALL_VALUE sparc_libcall_value
787 #undef TARGET_FUNCTION_VALUE_REGNO_P
788 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
789
790 #undef TARGET_STRUCT_VALUE_RTX
791 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
792 #undef TARGET_RETURN_IN_MEMORY
793 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
794 #undef TARGET_MUST_PASS_IN_STACK
795 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
796 #undef TARGET_PASS_BY_REFERENCE
797 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
798 #undef TARGET_ARG_PARTIAL_BYTES
799 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
800 #undef TARGET_FUNCTION_ARG_ADVANCE
801 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
802 #undef TARGET_FUNCTION_ARG
803 #define TARGET_FUNCTION_ARG sparc_function_arg
804 #undef TARGET_FUNCTION_INCOMING_ARG
805 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
806 #undef TARGET_FUNCTION_ARG_PADDING
807 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
808 #undef TARGET_FUNCTION_ARG_BOUNDARY
809 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
810
811 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
812 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
813 #undef TARGET_STRICT_ARGUMENT_NAMING
814 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
815
816 #undef TARGET_EXPAND_BUILTIN_VA_START
817 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
818 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
819 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
820
821 #undef TARGET_VECTOR_MODE_SUPPORTED_P
822 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
823
824 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
825 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
826
827 #ifdef SUBTARGET_INSERT_ATTRIBUTES
828 #undef TARGET_INSERT_ATTRIBUTES
829 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
830 #endif
831
832 #ifdef SUBTARGET_ATTRIBUTE_TABLE
833 #undef TARGET_ATTRIBUTE_TABLE
834 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
835 #endif
836
837 #undef TARGET_OPTION_OVERRIDE
838 #define TARGET_OPTION_OVERRIDE sparc_option_override
839
840 #ifdef TARGET_THREAD_SSP_OFFSET
841 #undef TARGET_STACK_PROTECT_GUARD
842 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
843 #endif
844
845 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
848 #endif
849
850 #undef TARGET_ASM_FILE_END
851 #define TARGET_ASM_FILE_END sparc_file_end
852
853 #undef TARGET_FRAME_POINTER_REQUIRED
854 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
855
856 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
857 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
858
859 #undef TARGET_CAN_ELIMINATE
860 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
861
862 #undef TARGET_PREFERRED_RELOAD_CLASS
863 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
864
865 #undef TARGET_SECONDARY_RELOAD
866 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
867 #undef TARGET_SECONDARY_MEMORY_NEEDED
868 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
869 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
870 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
871
872 #undef TARGET_CONDITIONAL_REGISTER_USAGE
873 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
874
875 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
876 #undef TARGET_MANGLE_TYPE
877 #define TARGET_MANGLE_TYPE sparc_mangle_type
878 #endif
879
880 #undef TARGET_LRA_P
881 #define TARGET_LRA_P sparc_lra_p
882
883 #undef TARGET_LEGITIMATE_ADDRESS_P
884 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
885
886 #undef TARGET_LEGITIMATE_CONSTANT_P
887 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
888
889 #undef TARGET_TRAMPOLINE_INIT
890 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
891
892 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
893 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
894 #undef TARGET_PRINT_OPERAND
895 #define TARGET_PRINT_OPERAND sparc_print_operand
896 #undef TARGET_PRINT_OPERAND_ADDRESS
897 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
898
899 /* The value stored by LDSTUB. */
900 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
901 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
902
903 #undef TARGET_CSTORE_MODE
904 #define TARGET_CSTORE_MODE sparc_cstore_mode
905
906 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
907 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
908
909 #undef TARGET_FIXED_CONDITION_CODE_REGS
910 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
911
912 #undef TARGET_MIN_ARITHMETIC_PRECISION
913 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
914
915 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
916 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
917
918 #undef TARGET_HARD_REGNO_NREGS
919 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
920 #undef TARGET_HARD_REGNO_MODE_OK
921 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
922
923 #undef TARGET_MODES_TIEABLE_P
924 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
925
926 #undef TARGET_CAN_CHANGE_MODE_CLASS
927 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
928
929 #undef TARGET_CONSTANT_ALIGNMENT
930 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
931
932 struct gcc_target targetm = TARGET_INITIALIZER;
933
934 /* Return the memory reference contained in X if any, zero otherwise. */
935
936 static rtx
937 mem_ref (rtx x)
938 {
939 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
940 x = XEXP (x, 0);
941
942 if (MEM_P (x))
943 return x;
944
945 return NULL_RTX;
946 }
947
948 /* We use a machine specific pass to enable workarounds for errata.
949
950 We need to have the (essentially) final form of the insn stream in order
951 to properly detect the various hazards. Therefore, this machine specific
952 pass runs as late as possible. */
953
954 /* True if INSN is a md pattern or asm statement. */
955 #define USEFUL_INSN_P(INSN) \
956 (NONDEBUG_INSN_P (INSN) \
957 && GET_CODE (PATTERN (INSN)) != USE \
958 && GET_CODE (PATTERN (INSN)) != CLOBBER)
959
960 static unsigned int
961 sparc_do_work_around_errata (void)
962 {
963 rtx_insn *insn, *next;
964
965 /* Force all instructions to be split into their final form. */
966 split_all_insns_noflow ();
967
968 /* Now look for specific patterns in the insn stream. */
969 for (insn = get_insns (); insn; insn = next)
970 {
971 bool insert_nop = false;
972 rtx set;
973
974 /* Look into the instruction in a delay slot. */
975 if (NONJUMP_INSN_P (insn))
976 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
977 insn = seq->insn (1);
978
979 /* Look for either of these two sequences:
980
981 Sequence A:
982 1. store of word size or less (e.g. st / stb / sth / stf)
983 2. any single instruction that is not a load or store
984 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
985
986 Sequence B:
987 1. store of double word size (e.g. std / stdf)
988 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
989 if (sparc_fix_b2bst
990 && NONJUMP_INSN_P (insn)
991 && (set = single_set (insn)) != NULL_RTX
992 && MEM_P (SET_DEST (set)))
993 {
994 /* Sequence B begins with a double-word store. */
995 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
996 rtx_insn *after;
997 int i;
998
999 next = next_active_insn (insn);
1000 if (!next)
1001 break;
1002
1003 for (after = next, i = 0; i < 2; i++)
1004 {
1005 /* Skip empty assembly statements. */
1006 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1007 || (USEFUL_INSN_P (after)
1008 && (asm_noperands (PATTERN (after))>=0)
1009 && !strcmp (decode_asm_operands (PATTERN (after),
1010 NULL, NULL, NULL,
1011 NULL, NULL), "")))
1012 after = next_active_insn (after);
1013 if (!after)
1014 break;
1015
1016 /* If the insn is a branch, then it cannot be problematic. */
1017 if (!NONJUMP_INSN_P (after)
1018 || GET_CODE (PATTERN (after)) == SEQUENCE)
1019 break;
1020
1021 /* Sequence B is only two instructions long. */
1022 if (seq_b)
1023 {
1024 /* Add NOP if followed by a store. */
1025 if ((set = single_set (after)) != NULL_RTX
1026 && MEM_P (SET_DEST (set)))
1027 insert_nop = true;
1028
1029 /* Otherwise it is ok. */
1030 break;
1031 }
1032
1033 /* If the second instruction is a load or a store,
1034 then the sequence cannot be problematic. */
1035 if (i == 0)
1036 {
1037 if (((set = single_set (after)) != NULL_RTX)
1038 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1039 break;
1040
1041 after = next_active_insn (after);
1042 if (!after)
1043 break;
1044 }
1045
1046 /* Add NOP if third instruction is a store. */
1047 if (i == 1
1048 && ((set = single_set (after)) != NULL_RTX)
1049 && MEM_P (SET_DEST (set)))
1050 insert_nop = true;
1051 }
1052 }
1053 else
1054 /* Look for a single-word load into an odd-numbered FP register. */
1055 if (sparc_fix_at697f
1056 && NONJUMP_INSN_P (insn)
1057 && (set = single_set (insn)) != NULL_RTX
1058 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1059 && MEM_P (SET_SRC (set))
1060 && REG_P (SET_DEST (set))
1061 && REGNO (SET_DEST (set)) > 31
1062 && REGNO (SET_DEST (set)) % 2 != 0)
1063 {
1064 /* The wrong dependency is on the enclosing double register. */
1065 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1066 unsigned int src1, src2, dest;
1067 int code;
1068
1069 next = next_active_insn (insn);
1070 if (!next)
1071 break;
1072 /* If the insn is a branch, then it cannot be problematic. */
1073 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1074 continue;
1075
1076 extract_insn (next);
1077 code = INSN_CODE (next);
1078
1079 switch (code)
1080 {
1081 case CODE_FOR_adddf3:
1082 case CODE_FOR_subdf3:
1083 case CODE_FOR_muldf3:
1084 case CODE_FOR_divdf3:
1085 dest = REGNO (recog_data.operand[0]);
1086 src1 = REGNO (recog_data.operand[1]);
1087 src2 = REGNO (recog_data.operand[2]);
1088 if (src1 != src2)
1089 {
1090 /* Case [1-4]:
1091 ld [address], %fx+1
1092 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1093 if ((src1 == x || src2 == x)
1094 && (dest == src1 || dest == src2))
1095 insert_nop = true;
1096 }
1097 else
1098 {
1099 /* Case 5:
1100 ld [address], %fx+1
1101 FPOPd %fx, %fx, %fx */
1102 if (src1 == x
1103 && dest == src1
1104 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1105 insert_nop = true;
1106 }
1107 break;
1108
1109 case CODE_FOR_sqrtdf2:
1110 dest = REGNO (recog_data.operand[0]);
1111 src1 = REGNO (recog_data.operand[1]);
1112 /* Case 6:
1113 ld [address], %fx+1
1114 fsqrtd %fx, %fx */
1115 if (src1 == x && dest == src1)
1116 insert_nop = true;
1117 break;
1118
1119 default:
1120 break;
1121 }
1122 }
1123
1124 /* Look for a single-word load into an integer register. */
1125 else if (sparc_fix_ut699
1126 && NONJUMP_INSN_P (insn)
1127 && (set = single_set (insn)) != NULL_RTX
1128 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1129 && mem_ref (SET_SRC (set)) != NULL_RTX
1130 && REG_P (SET_DEST (set))
1131 && REGNO (SET_DEST (set)) < 32)
1132 {
1133 /* There is no problem if the second memory access has a data
1134 dependency on the first single-cycle load. */
1135 rtx x = SET_DEST (set);
1136
1137 next = next_active_insn (insn);
1138 if (!next)
1139 break;
1140 /* If the insn is a branch, then it cannot be problematic. */
1141 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1142 continue;
1143
1144 /* Look for a second memory access to/from an integer register. */
1145 if ((set = single_set (next)) != NULL_RTX)
1146 {
1147 rtx src = SET_SRC (set);
1148 rtx dest = SET_DEST (set);
1149 rtx mem;
1150
1151 /* LDD is affected. */
1152 if ((mem = mem_ref (src)) != NULL_RTX
1153 && REG_P (dest)
1154 && REGNO (dest) < 32
1155 && !reg_mentioned_p (x, XEXP (mem, 0)))
1156 insert_nop = true;
1157
1158 /* STD is *not* affected. */
1159 else if (MEM_P (dest)
1160 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1161 && (src == CONST0_RTX (GET_MODE (dest))
1162 || (REG_P (src)
1163 && REGNO (src) < 32
1164 && REGNO (src) != REGNO (x)))
1165 && !reg_mentioned_p (x, XEXP (dest, 0)))
1166 insert_nop = true;
1167 }
1168 }
1169
1170 /* Look for a single-word load/operation into an FP register. */
1171 else if (sparc_fix_ut699
1172 && NONJUMP_INSN_P (insn)
1173 && (set = single_set (insn)) != NULL_RTX
1174 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1175 && REG_P (SET_DEST (set))
1176 && REGNO (SET_DEST (set)) > 31)
1177 {
1178 /* Number of instructions in the problematic window. */
1179 const int n_insns = 4;
1180 /* The problematic combination is with the sibling FP register. */
1181 const unsigned int x = REGNO (SET_DEST (set));
1182 const unsigned int y = x ^ 1;
1183 rtx_insn *after;
1184 int i;
1185
1186 next = next_active_insn (insn);
1187 if (!next)
1188 break;
1189 /* If the insn is a branch, then it cannot be problematic. */
1190 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1191 continue;
1192
1193 /* Look for a second load/operation into the sibling FP register. */
1194 if (!((set = single_set (next)) != NULL_RTX
1195 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1196 && REG_P (SET_DEST (set))
1197 && REGNO (SET_DEST (set)) == y))
1198 continue;
1199
1200 /* Look for a (possible) store from the FP register in the next N
1201 instructions, but bail out if it is again modified or if there
1202 is a store from the sibling FP register before this store. */
1203 for (after = next, i = 0; i < n_insns; i++)
1204 {
1205 bool branch_p;
1206
1207 after = next_active_insn (after);
1208 if (!after)
1209 break;
1210
1211 /* This is a branch with an empty delay slot. */
1212 if (!NONJUMP_INSN_P (after))
1213 {
1214 if (++i == n_insns)
1215 break;
1216 branch_p = true;
1217 after = NULL;
1218 }
1219 /* This is a branch with a filled delay slot. */
1220 else if (rtx_sequence *seq =
1221 dyn_cast <rtx_sequence *> (PATTERN (after)))
1222 {
1223 if (++i == n_insns)
1224 break;
1225 branch_p = true;
1226 after = seq->insn (1);
1227 }
1228 /* This is a regular instruction. */
1229 else
1230 branch_p = false;
1231
1232 if (after && (set = single_set (after)) != NULL_RTX)
1233 {
1234 const rtx src = SET_SRC (set);
1235 const rtx dest = SET_DEST (set);
1236 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1237
1238 /* If the FP register is again modified before the store,
1239 then the store isn't affected. */
1240 if (REG_P (dest)
1241 && (REGNO (dest) == x
1242 || (REGNO (dest) == y && size == 8)))
1243 break;
1244
1245 if (MEM_P (dest) && REG_P (src))
1246 {
1247 /* If there is a store from the sibling FP register
1248 before the store, then the store is not affected. */
1249 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1250 break;
1251
1252 /* Otherwise, the store is affected. */
1253 if (REGNO (src) == x && size == 4)
1254 {
1255 insert_nop = true;
1256 break;
1257 }
1258 }
1259 }
1260
1261 /* If we have a branch in the first M instructions, then we
1262 cannot see the (M+2)th instruction so we play safe. */
1263 if (branch_p && i <= (n_insns - 2))
1264 {
1265 insert_nop = true;
1266 break;
1267 }
1268 }
1269 }
1270
1271 else
1272 next = NEXT_INSN (insn);
1273
1274 if (insert_nop)
1275 emit_insn_before (gen_nop (), next);
1276 }
1277
1278 return 0;
1279 }
1280
1281 namespace {
1282
1283 const pass_data pass_data_work_around_errata =
1284 {
1285 RTL_PASS, /* type */
1286 "errata", /* name */
1287 OPTGROUP_NONE, /* optinfo_flags */
1288 TV_MACH_DEP, /* tv_id */
1289 0, /* properties_required */
1290 0, /* properties_provided */
1291 0, /* properties_destroyed */
1292 0, /* todo_flags_start */
1293 0, /* todo_flags_finish */
1294 };
1295
1296 class pass_work_around_errata : public rtl_opt_pass
1297 {
1298 public:
1299 pass_work_around_errata(gcc::context *ctxt)
1300 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1301 {}
1302
1303 /* opt_pass methods: */
1304 virtual bool gate (function *)
1305 {
1306 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1307 }
1308
1309 virtual unsigned int execute (function *)
1310 {
1311 return sparc_do_work_around_errata ();
1312 }
1313
1314 }; // class pass_work_around_errata
1315
1316 } // anon namespace
1317
1318 rtl_opt_pass *
1319 make_pass_work_around_errata (gcc::context *ctxt)
1320 {
1321 return new pass_work_around_errata (ctxt);
1322 }
1323
1324 /* Helpers for TARGET_DEBUG_OPTIONS. */
1325 static void
1326 dump_target_flag_bits (const int flags)
1327 {
1328 if (flags & MASK_64BIT)
1329 fprintf (stderr, "64BIT ");
1330 if (flags & MASK_APP_REGS)
1331 fprintf (stderr, "APP_REGS ");
1332 if (flags & MASK_FASTER_STRUCTS)
1333 fprintf (stderr, "FASTER_STRUCTS ");
1334 if (flags & MASK_FLAT)
1335 fprintf (stderr, "FLAT ");
1336 if (flags & MASK_FMAF)
1337 fprintf (stderr, "FMAF ");
1338 if (flags & MASK_FSMULD)
1339 fprintf (stderr, "FSMULD ");
1340 if (flags & MASK_FPU)
1341 fprintf (stderr, "FPU ");
1342 if (flags & MASK_HARD_QUAD)
1343 fprintf (stderr, "HARD_QUAD ");
1344 if (flags & MASK_POPC)
1345 fprintf (stderr, "POPC ");
1346 if (flags & MASK_PTR64)
1347 fprintf (stderr, "PTR64 ");
1348 if (flags & MASK_STACK_BIAS)
1349 fprintf (stderr, "STACK_BIAS ");
1350 if (flags & MASK_UNALIGNED_DOUBLES)
1351 fprintf (stderr, "UNALIGNED_DOUBLES ");
1352 if (flags & MASK_V8PLUS)
1353 fprintf (stderr, "V8PLUS ");
1354 if (flags & MASK_VIS)
1355 fprintf (stderr, "VIS ");
1356 if (flags & MASK_VIS2)
1357 fprintf (stderr, "VIS2 ");
1358 if (flags & MASK_VIS3)
1359 fprintf (stderr, "VIS3 ");
1360 if (flags & MASK_VIS4)
1361 fprintf (stderr, "VIS4 ");
1362 if (flags & MASK_VIS4B)
1363 fprintf (stderr, "VIS4B ");
1364 if (flags & MASK_CBCOND)
1365 fprintf (stderr, "CBCOND ");
1366 if (flags & MASK_DEPRECATED_V8_INSNS)
1367 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1368 if (flags & MASK_SPARCLET)
1369 fprintf (stderr, "SPARCLET ");
1370 if (flags & MASK_SPARCLITE)
1371 fprintf (stderr, "SPARCLITE ");
1372 if (flags & MASK_V8)
1373 fprintf (stderr, "V8 ");
1374 if (flags & MASK_V9)
1375 fprintf (stderr, "V9 ");
1376 }
1377
1378 static void
1379 dump_target_flags (const char *prefix, const int flags)
1380 {
1381 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1382 dump_target_flag_bits (flags);
1383 fprintf(stderr, "]\n");
1384 }
1385
1386 /* Validate and override various options, and do some machine dependent
1387 initialization. */
1388
1389 static void
1390 sparc_option_override (void)
1391 {
1392 static struct code_model {
1393 const char *const name;
1394 const enum cmodel value;
1395 } const cmodels[] = {
1396 { "32", CM_32 },
1397 { "medlow", CM_MEDLOW },
1398 { "medmid", CM_MEDMID },
1399 { "medany", CM_MEDANY },
1400 { "embmedany", CM_EMBMEDANY },
1401 { NULL, (enum cmodel) 0 }
1402 };
1403 const struct code_model *cmodel;
1404 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1405 static struct cpu_default {
1406 const int cpu;
1407 const enum processor_type processor;
1408 } const cpu_default[] = {
1409 /* There must be one entry here for each TARGET_CPU value. */
1410 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1411 { TARGET_CPU_v8, PROCESSOR_V8 },
1412 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1413 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1414 { TARGET_CPU_leon, PROCESSOR_LEON },
1415 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1416 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1417 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1418 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1419 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1420 { TARGET_CPU_v9, PROCESSOR_V9 },
1421 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1422 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1423 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1424 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1425 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1426 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1427 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1428 { TARGET_CPU_m8, PROCESSOR_M8 },
1429 { -1, PROCESSOR_V7 }
1430 };
1431 const struct cpu_default *def;
1432 /* Table of values for -m{cpu,tune}=. This must match the order of
1433 the enum processor_type in sparc-opts.h. */
1434 static struct cpu_table {
1435 const char *const name;
1436 const int disable;
1437 const int enable;
1438 } const cpu_table[] = {
1439 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1440 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1441 { "v8", MASK_ISA, MASK_V8 },
1442 /* TI TMS390Z55 supersparc */
1443 { "supersparc", MASK_ISA, MASK_V8 },
1444 { "hypersparc", MASK_ISA, MASK_V8 },
1445 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1446 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1447 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1448 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1449 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1450 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1451 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1452 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1453 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1454 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1455 /* TEMIC sparclet */
1456 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1457 { "v9", MASK_ISA, MASK_V9 },
1458 /* UltraSPARC I, II, IIi */
1459 { "ultrasparc", MASK_ISA,
1460 /* Although insns using %y are deprecated, it is a clear win. */
1461 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1462 /* UltraSPARC III */
1463 /* ??? Check if %y issue still holds true. */
1464 { "ultrasparc3", MASK_ISA,
1465 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1466 /* UltraSPARC T1 */
1467 { "niagara", MASK_ISA,
1468 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1469 /* UltraSPARC T2 */
1470 { "niagara2", MASK_ISA,
1471 MASK_V9|MASK_POPC|MASK_VIS2 },
1472 /* UltraSPARC T3 */
1473 { "niagara3", MASK_ISA,
1474 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1475 /* UltraSPARC T4 */
1476 { "niagara4", MASK_ISA,
1477 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1478 /* UltraSPARC M7 */
1479 { "niagara7", MASK_ISA,
1480 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1481 /* UltraSPARC M8 */
1482 { "m8", MASK_ISA,
1483 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1484 };
1485 const struct cpu_table *cpu;
1486 unsigned int i;
1487
1488 if (sparc_debug_string != NULL)
1489 {
1490 const char *q;
1491 char *p;
1492
1493 p = ASTRDUP (sparc_debug_string);
1494 while ((q = strtok (p, ",")) != NULL)
1495 {
1496 bool invert;
1497 int mask;
1498
1499 p = NULL;
1500 if (*q == '!')
1501 {
1502 invert = true;
1503 q++;
1504 }
1505 else
1506 invert = false;
1507
1508 if (! strcmp (q, "all"))
1509 mask = MASK_DEBUG_ALL;
1510 else if (! strcmp (q, "options"))
1511 mask = MASK_DEBUG_OPTIONS;
1512 else
1513 error ("unknown -mdebug-%s switch", q);
1514
1515 if (invert)
1516 sparc_debug &= ~mask;
1517 else
1518 sparc_debug |= mask;
1519 }
1520 }
1521
1522 /* Enable the FsMULd instruction by default if not explicitly specified by
1523 the user. It may be later disabled by the CPU (explicitly or not). */
1524 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1525 target_flags |= MASK_FSMULD;
1526
1527 if (TARGET_DEBUG_OPTIONS)
1528 {
1529 dump_target_flags("Initial target_flags", target_flags);
1530 dump_target_flags("target_flags_explicit", target_flags_explicit);
1531 }
1532
1533 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1534 SUBTARGET_OVERRIDE_OPTIONS;
1535 #endif
1536
1537 #ifndef SPARC_BI_ARCH
1538 /* Check for unsupported architecture size. */
1539 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1540 error ("%s is not supported by this configuration",
1541 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1542 #endif
1543
1544 /* We force all 64bit archs to use 128 bit long double */
1545 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1546 {
1547 error ("-mlong-double-64 not allowed with -m64");
1548 target_flags |= MASK_LONG_DOUBLE_128;
1549 }
1550
1551 /* Code model selection. */
1552 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1553
1554 #ifdef SPARC_BI_ARCH
1555 if (TARGET_ARCH32)
1556 sparc_cmodel = CM_32;
1557 #endif
1558
1559 if (sparc_cmodel_string != NULL)
1560 {
1561 if (TARGET_ARCH64)
1562 {
1563 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1564 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1565 break;
1566 if (cmodel->name == NULL)
1567 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1568 else
1569 sparc_cmodel = cmodel->value;
1570 }
1571 else
1572 error ("-mcmodel= is not supported on 32-bit systems");
1573 }
1574
1575 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1576 for (i = 8; i < 16; i++)
1577 if (!call_used_regs [i])
1578 {
1579 error ("-fcall-saved-REG is not supported for out registers");
1580 call_used_regs [i] = 1;
1581 }
1582
1583 /* Set the default CPU if no -mcpu option was specified. */
1584 if (!global_options_set.x_sparc_cpu_and_features)
1585 {
1586 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1587 if (def->cpu == TARGET_CPU_DEFAULT)
1588 break;
1589 gcc_assert (def->cpu != -1);
1590 sparc_cpu_and_features = def->processor;
1591 }
1592
1593 /* Set the default CPU if no -mtune option was specified. */
1594 if (!global_options_set.x_sparc_cpu)
1595 sparc_cpu = sparc_cpu_and_features;
1596
1597 cpu = &cpu_table[(int) sparc_cpu_and_features];
1598
1599 if (TARGET_DEBUG_OPTIONS)
1600 {
1601 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1602 dump_target_flags ("cpu->disable", cpu->disable);
1603 dump_target_flags ("cpu->enable", cpu->enable);
1604 }
1605
1606 target_flags &= ~cpu->disable;
1607 target_flags |= (cpu->enable
1608 #ifndef HAVE_AS_FMAF_HPC_VIS3
1609 & ~(MASK_FMAF | MASK_VIS3)
1610 #endif
1611 #ifndef HAVE_AS_SPARC4
1612 & ~MASK_CBCOND
1613 #endif
1614 #ifndef HAVE_AS_SPARC5_VIS4
1615 & ~(MASK_VIS4 | MASK_SUBXC)
1616 #endif
1617 #ifndef HAVE_AS_SPARC6
1618 & ~(MASK_VIS4B)
1619 #endif
1620 #ifndef HAVE_AS_LEON
1621 & ~(MASK_LEON | MASK_LEON3)
1622 #endif
1623 & ~(target_flags_explicit & MASK_FEATURES)
1624 );
1625
1626 /* -mvis2 implies -mvis. */
1627 if (TARGET_VIS2)
1628 target_flags |= MASK_VIS;
1629
1630 /* -mvis3 implies -mvis2 and -mvis. */
1631 if (TARGET_VIS3)
1632 target_flags |= MASK_VIS2 | MASK_VIS;
1633
1634 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1635 if (TARGET_VIS4)
1636 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1637
1638 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1639 if (TARGET_VIS4B)
1640 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1641
1642 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1643 FPU is disabled. */
1644 if (!TARGET_FPU)
1645 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1646 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1647
1648 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1649 are available; -m64 also implies v9. */
1650 if (TARGET_VIS || TARGET_ARCH64)
1651 {
1652 target_flags |= MASK_V9;
1653 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1654 }
1655
1656 /* -mvis also implies -mv8plus on 32-bit. */
1657 if (TARGET_VIS && !TARGET_ARCH64)
1658 target_flags |= MASK_V8PLUS;
1659
1660 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1661 if (TARGET_V9 && TARGET_ARCH32)
1662 target_flags |= MASK_DEPRECATED_V8_INSNS;
1663
1664 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1665 if (!TARGET_V9 || TARGET_ARCH64)
1666 target_flags &= ~MASK_V8PLUS;
1667
1668 /* Don't use stack biasing in 32-bit mode. */
1669 if (TARGET_ARCH32)
1670 target_flags &= ~MASK_STACK_BIAS;
1671
1672 /* Use LRA instead of reload, unless otherwise instructed. */
1673 if (!(target_flags_explicit & MASK_LRA))
1674 target_flags |= MASK_LRA;
1675
1676 /* Enable the back-to-back store errata workaround for LEON3FT. */
1677 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1678 sparc_fix_b2bst = 1;
1679
1680 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1681 if (sparc_fix_ut699)
1682 target_flags &= ~MASK_FSMULD;
1683
1684 /* Supply a default value for align_functions. */
1685 if (align_functions == 0)
1686 {
1687 if (sparc_cpu == PROCESSOR_ULTRASPARC
1688 || sparc_cpu == PROCESSOR_ULTRASPARC3
1689 || sparc_cpu == PROCESSOR_NIAGARA
1690 || sparc_cpu == PROCESSOR_NIAGARA2
1691 || sparc_cpu == PROCESSOR_NIAGARA3
1692 || sparc_cpu == PROCESSOR_NIAGARA4)
1693 align_functions = 32;
1694 else if (sparc_cpu == PROCESSOR_NIAGARA7
1695 || sparc_cpu == PROCESSOR_M8)
1696 align_functions = 64;
1697 }
1698
1699 /* Validate PCC_STRUCT_RETURN. */
1700 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1701 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1702
1703 /* Only use .uaxword when compiling for a 64-bit target. */
1704 if (!TARGET_ARCH64)
1705 targetm.asm_out.unaligned_op.di = NULL;
1706
1707 /* Do various machine dependent initializations. */
1708 sparc_init_modes ();
1709
1710 /* Set up function hooks. */
1711 init_machine_status = sparc_init_machine_status;
1712
1713 switch (sparc_cpu)
1714 {
1715 case PROCESSOR_V7:
1716 case PROCESSOR_CYPRESS:
1717 sparc_costs = &cypress_costs;
1718 break;
1719 case PROCESSOR_V8:
1720 case PROCESSOR_SPARCLITE:
1721 case PROCESSOR_SUPERSPARC:
1722 sparc_costs = &supersparc_costs;
1723 break;
1724 case PROCESSOR_F930:
1725 case PROCESSOR_F934:
1726 case PROCESSOR_HYPERSPARC:
1727 case PROCESSOR_SPARCLITE86X:
1728 sparc_costs = &hypersparc_costs;
1729 break;
1730 case PROCESSOR_LEON:
1731 sparc_costs = &leon_costs;
1732 break;
1733 case PROCESSOR_LEON3:
1734 case PROCESSOR_LEON3V7:
1735 sparc_costs = &leon3_costs;
1736 break;
1737 case PROCESSOR_SPARCLET:
1738 case PROCESSOR_TSC701:
1739 sparc_costs = &sparclet_costs;
1740 break;
1741 case PROCESSOR_V9:
1742 case PROCESSOR_ULTRASPARC:
1743 sparc_costs = &ultrasparc_costs;
1744 break;
1745 case PROCESSOR_ULTRASPARC3:
1746 sparc_costs = &ultrasparc3_costs;
1747 break;
1748 case PROCESSOR_NIAGARA:
1749 sparc_costs = &niagara_costs;
1750 break;
1751 case PROCESSOR_NIAGARA2:
1752 sparc_costs = &niagara2_costs;
1753 break;
1754 case PROCESSOR_NIAGARA3:
1755 sparc_costs = &niagara3_costs;
1756 break;
1757 case PROCESSOR_NIAGARA4:
1758 sparc_costs = &niagara4_costs;
1759 break;
1760 case PROCESSOR_NIAGARA7:
1761 sparc_costs = &niagara7_costs;
1762 break;
1763 case PROCESSOR_M8:
1764 sparc_costs = &m8_costs;
1765 break;
1766 case PROCESSOR_NATIVE:
1767 gcc_unreachable ();
1768 };
1769
1770 if (sparc_memory_model == SMM_DEFAULT)
1771 {
1772 /* Choose the memory model for the operating system. */
1773 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1774 if (os_default != SMM_DEFAULT)
1775 sparc_memory_model = os_default;
1776 /* Choose the most relaxed model for the processor. */
1777 else if (TARGET_V9)
1778 sparc_memory_model = SMM_RMO;
1779 else if (TARGET_LEON3)
1780 sparc_memory_model = SMM_TSO;
1781 else if (TARGET_LEON)
1782 sparc_memory_model = SMM_SC;
1783 else if (TARGET_V8)
1784 sparc_memory_model = SMM_PSO;
1785 else
1786 sparc_memory_model = SMM_SC;
1787 }
1788
1789 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1790 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1791 target_flags |= MASK_LONG_DOUBLE_128;
1792 #endif
1793
1794 if (TARGET_DEBUG_OPTIONS)
1795 dump_target_flags ("Final target_flags", target_flags);
1796
1797 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1798 can run at the same time. More important, it is the threshold
1799 defining when additional prefetches will be dropped by the
1800 hardware.
1801
1802 The UltraSPARC-III features a documented prefetch queue with a
1803 size of 8. Additional prefetches issued in the cpu are
1804 dropped.
1805
1806 Niagara processors are different. In these processors prefetches
1807 are handled much like regular loads. The L1 miss buffer is 32
1808 entries, but prefetches start getting affected when 30 entries
1809 become occupied. That occupation could be a mix of regular loads
1810 and prefetches though. And that buffer is shared by all threads.
1811 Once the threshold is reached, if the core is running a single
1812 thread the prefetch will retry. If more than one thread is
1813 running, the prefetch will be dropped.
1814
1815 All this makes it very difficult to determine how many
1816 simultaneous prefetches can be issued simultaneously, even in a
1817 single-threaded program. Experimental results show that setting
1818 this parameter to 32 works well when the number of threads is not
1819 high. */
1820 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1821 ((sparc_cpu == PROCESSOR_ULTRASPARC
1822 || sparc_cpu == PROCESSOR_NIAGARA
1823 || sparc_cpu == PROCESSOR_NIAGARA2
1824 || sparc_cpu == PROCESSOR_NIAGARA3
1825 || sparc_cpu == PROCESSOR_NIAGARA4)
1826 ? 2
1827 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1828 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1829 || sparc_cpu == PROCESSOR_M8)
1830 ? 32 : 3))),
1831 global_options.x_param_values,
1832 global_options_set.x_param_values);
1833
1834 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1835 bytes.
1836
1837 The Oracle SPARC Architecture (previously the UltraSPARC
1838 Architecture) specification states that when a PREFETCH[A]
1839 instruction is executed an implementation-specific amount of data
1840 is prefetched, and that it is at least 64 bytes long (aligned to
1841 at least 64 bytes).
1842
1843 However, this is not correct. The M7 (and implementations prior
1844 to that) does not guarantee a 64B prefetch into a cache if the
1845 line size is smaller. A single cache line is all that is ever
1846 prefetched. So for the M7, where the L1D$ has 32B lines and the
1847 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1848 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1849 is a read_n prefetch, which is the only type which allocates to
1850 the L1.) */
1851 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1852 (sparc_cpu == PROCESSOR_M8
1853 ? 64 : 32),
1854 global_options.x_param_values,
1855 global_options_set.x_param_values);
1856
1857 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1858 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1859 Niagara processors feature a L1D$ of 16KB. */
1860 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1861 ((sparc_cpu == PROCESSOR_ULTRASPARC
1862 || sparc_cpu == PROCESSOR_ULTRASPARC3
1863 || sparc_cpu == PROCESSOR_NIAGARA
1864 || sparc_cpu == PROCESSOR_NIAGARA2
1865 || sparc_cpu == PROCESSOR_NIAGARA3
1866 || sparc_cpu == PROCESSOR_NIAGARA4
1867 || sparc_cpu == PROCESSOR_NIAGARA7
1868 || sparc_cpu == PROCESSOR_M8)
1869 ? 16 : 64),
1870 global_options.x_param_values,
1871 global_options_set.x_param_values);
1872
1873
1874 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1875 that 512 is the default in params.def. */
1876 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1877 ((sparc_cpu == PROCESSOR_NIAGARA4
1878 || sparc_cpu == PROCESSOR_M8)
1879 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1880 ? 256 : 512)),
1881 global_options.x_param_values,
1882 global_options_set.x_param_values);
1883
1884
1885 /* Disable save slot sharing for call-clobbered registers by default.
1886 The IRA sharing algorithm works on single registers only and this
1887 pessimizes for double floating-point registers. */
1888 if (!global_options_set.x_flag_ira_share_save_slots)
1889 flag_ira_share_save_slots = 0;
1890
1891 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1892 redundant 32-to-64-bit extensions. */
1893 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1894 flag_ree = 0;
1895 }
1896 \f
1897 /* Miscellaneous utilities. */
1898
1899 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1900 or branch on register contents instructions. */
1901
1902 int
1903 v9_regcmp_p (enum rtx_code code)
1904 {
1905 return (code == EQ || code == NE || code == GE || code == LT
1906 || code == LE || code == GT);
1907 }
1908
1909 /* Nonzero if OP is a floating point constant which can
1910 be loaded into an integer register using a single
1911 sethi instruction. */
1912
1913 int
1914 fp_sethi_p (rtx op)
1915 {
1916 if (GET_CODE (op) == CONST_DOUBLE)
1917 {
1918 long i;
1919
1920 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1921 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1922 }
1923
1924 return 0;
1925 }
1926
1927 /* Nonzero if OP is a floating point constant which can
1928 be loaded into an integer register using a single
1929 mov instruction. */
1930
1931 int
1932 fp_mov_p (rtx op)
1933 {
1934 if (GET_CODE (op) == CONST_DOUBLE)
1935 {
1936 long i;
1937
1938 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1939 return SPARC_SIMM13_P (i);
1940 }
1941
1942 return 0;
1943 }
1944
1945 /* Nonzero if OP is a floating point constant which can
1946 be loaded into an integer register using a high/losum
1947 instruction sequence. */
1948
1949 int
1950 fp_high_losum_p (rtx op)
1951 {
1952 /* The constraints calling this should only be in
1953 SFmode move insns, so any constant which cannot
1954 be moved using a single insn will do. */
1955 if (GET_CODE (op) == CONST_DOUBLE)
1956 {
1957 long i;
1958
1959 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1960 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1961 }
1962
1963 return 0;
1964 }
1965
1966 /* Return true if the address of LABEL can be loaded by means of the
1967 mov{si,di}_pic_label_ref patterns in PIC mode. */
1968
1969 static bool
1970 can_use_mov_pic_label_ref (rtx label)
1971 {
1972 /* VxWorks does not impose a fixed gap between segments; the run-time
1973 gap can be different from the object-file gap. We therefore can't
1974 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1975 are absolutely sure that X is in the same segment as the GOT.
1976 Unfortunately, the flexibility of linker scripts means that we
1977 can't be sure of that in general, so assume that GOT-relative
1978 accesses are never valid on VxWorks. */
1979 if (TARGET_VXWORKS_RTP)
1980 return false;
1981
1982 /* Similarly, if the label is non-local, it might end up being placed
1983 in a different section than the current one; now mov_pic_label_ref
1984 requires the label and the code to be in the same section. */
1985 if (LABEL_REF_NONLOCAL_P (label))
1986 return false;
1987
1988 /* Finally, if we are reordering basic blocks and partition into hot
1989 and cold sections, this might happen for any label. */
1990 if (flag_reorder_blocks_and_partition)
1991 return false;
1992
1993 return true;
1994 }
1995
1996 /* Expand a move instruction. Return true if all work is done. */
1997
1998 bool
1999 sparc_expand_move (machine_mode mode, rtx *operands)
2000 {
2001 /* Handle sets of MEM first. */
2002 if (GET_CODE (operands[0]) == MEM)
2003 {
2004 /* 0 is a register (or a pair of registers) on SPARC. */
2005 if (register_or_zero_operand (operands[1], mode))
2006 return false;
2007
2008 if (!reload_in_progress)
2009 {
2010 operands[0] = validize_mem (operands[0]);
2011 operands[1] = force_reg (mode, operands[1]);
2012 }
2013 }
2014
2015 /* Fixup TLS cases. */
2016 if (TARGET_HAVE_TLS
2017 && CONSTANT_P (operands[1])
2018 && sparc_tls_referenced_p (operands [1]))
2019 {
2020 operands[1] = sparc_legitimize_tls_address (operands[1]);
2021 return false;
2022 }
2023
2024 /* Fixup PIC cases. */
2025 if (flag_pic && CONSTANT_P (operands[1]))
2026 {
2027 if (pic_address_needs_scratch (operands[1]))
2028 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2029
2030 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2031 if (GET_CODE (operands[1]) == LABEL_REF
2032 && can_use_mov_pic_label_ref (operands[1]))
2033 {
2034 if (mode == SImode)
2035 {
2036 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2037 return true;
2038 }
2039
2040 if (mode == DImode)
2041 {
2042 gcc_assert (TARGET_ARCH64);
2043 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2044 return true;
2045 }
2046 }
2047
2048 if (symbolic_operand (operands[1], mode))
2049 {
2050 operands[1]
2051 = sparc_legitimize_pic_address (operands[1],
2052 reload_in_progress
2053 ? operands[0] : NULL_RTX);
2054 return false;
2055 }
2056 }
2057
2058 /* If we are trying to toss an integer constant into FP registers,
2059 or loading a FP or vector constant, force it into memory. */
2060 if (CONSTANT_P (operands[1])
2061 && REG_P (operands[0])
2062 && (SPARC_FP_REG_P (REGNO (operands[0]))
2063 || SCALAR_FLOAT_MODE_P (mode)
2064 || VECTOR_MODE_P (mode)))
2065 {
2066 /* emit_group_store will send such bogosity to us when it is
2067 not storing directly into memory. So fix this up to avoid
2068 crashes in output_constant_pool. */
2069 if (operands [1] == const0_rtx)
2070 operands[1] = CONST0_RTX (mode);
2071
2072 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2073 always other regs. */
2074 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2075 && (const_zero_operand (operands[1], mode)
2076 || const_all_ones_operand (operands[1], mode)))
2077 return false;
2078
2079 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2080 /* We are able to build any SF constant in integer registers
2081 with at most 2 instructions. */
2082 && (mode == SFmode
2083 /* And any DF constant in integer registers if needed. */
2084 || (mode == DFmode && !can_create_pseudo_p ())))
2085 return false;
2086
2087 operands[1] = force_const_mem (mode, operands[1]);
2088 if (!reload_in_progress)
2089 operands[1] = validize_mem (operands[1]);
2090 return false;
2091 }
2092
2093 /* Accept non-constants and valid constants unmodified. */
2094 if (!CONSTANT_P (operands[1])
2095 || GET_CODE (operands[1]) == HIGH
2096 || input_operand (operands[1], mode))
2097 return false;
2098
2099 switch (mode)
2100 {
2101 case E_QImode:
2102 /* All QImode constants require only one insn, so proceed. */
2103 break;
2104
2105 case E_HImode:
2106 case E_SImode:
2107 sparc_emit_set_const32 (operands[0], operands[1]);
2108 return true;
2109
2110 case E_DImode:
2111 /* input_operand should have filtered out 32-bit mode. */
2112 sparc_emit_set_const64 (operands[0], operands[1]);
2113 return true;
2114
2115 case E_TImode:
2116 {
2117 rtx high, low;
2118 /* TImode isn't available in 32-bit mode. */
2119 split_double (operands[1], &high, &low);
2120 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2121 high));
2122 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2123 low));
2124 }
2125 return true;
2126
2127 default:
2128 gcc_unreachable ();
2129 }
2130
2131 return false;
2132 }
2133
2134 /* Load OP1, a 32-bit constant, into OP0, a register.
2135 We know it can't be done in one insn when we get
2136 here, the move expander guarantees this. */
2137
2138 static void
2139 sparc_emit_set_const32 (rtx op0, rtx op1)
2140 {
2141 machine_mode mode = GET_MODE (op0);
2142 rtx temp = op0;
2143
2144 if (can_create_pseudo_p ())
2145 temp = gen_reg_rtx (mode);
2146
2147 if (GET_CODE (op1) == CONST_INT)
2148 {
2149 gcc_assert (!small_int_operand (op1, mode)
2150 && !const_high_operand (op1, mode));
2151
2152 /* Emit them as real moves instead of a HIGH/LO_SUM,
2153 this way CSE can see everything and reuse intermediate
2154 values if it wants. */
2155 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2156 & ~(HOST_WIDE_INT) 0x3ff)));
2157
2158 emit_insn (gen_rtx_SET (op0,
2159 gen_rtx_IOR (mode, temp,
2160 GEN_INT (INTVAL (op1) & 0x3ff))));
2161 }
2162 else
2163 {
2164 /* A symbol, emit in the traditional way. */
2165 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2166 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2167 }
2168 }
2169
2170 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2171 If TEMP is nonzero, we are forbidden to use any other scratch
2172 registers. Otherwise, we are allowed to generate them as needed.
2173
2174 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2175 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2176
2177 void
2178 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2179 {
2180 rtx cst, temp1, temp2, temp3, temp4, temp5;
2181 rtx ti_temp = 0;
2182
2183 /* Deal with too large offsets. */
2184 if (GET_CODE (op1) == CONST
2185 && GET_CODE (XEXP (op1, 0)) == PLUS
2186 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2187 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2188 {
2189 gcc_assert (!temp);
2190 temp1 = gen_reg_rtx (DImode);
2191 temp2 = gen_reg_rtx (DImode);
2192 sparc_emit_set_const64 (temp2, cst);
2193 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2194 NULL_RTX);
2195 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2196 return;
2197 }
2198
2199 if (temp && GET_MODE (temp) == TImode)
2200 {
2201 ti_temp = temp;
2202 temp = gen_rtx_REG (DImode, REGNO (temp));
2203 }
2204
2205 /* SPARC-V9 code-model support. */
2206 switch (sparc_cmodel)
2207 {
2208 case CM_MEDLOW:
2209 /* The range spanned by all instructions in the object is less
2210 than 2^31 bytes (2GB) and the distance from any instruction
2211 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2212 than 2^31 bytes (2GB).
2213
2214 The executable must be in the low 4TB of the virtual address
2215 space.
2216
2217 sethi %hi(symbol), %temp1
2218 or %temp1, %lo(symbol), %reg */
2219 if (temp)
2220 temp1 = temp; /* op0 is allowed. */
2221 else
2222 temp1 = gen_reg_rtx (DImode);
2223
2224 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2225 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2226 break;
2227
2228 case CM_MEDMID:
2229 /* The range spanned by all instructions in the object is less
2230 than 2^31 bytes (2GB) and the distance from any instruction
2231 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2232 than 2^31 bytes (2GB).
2233
2234 The executable must be in the low 16TB of the virtual address
2235 space.
2236
2237 sethi %h44(symbol), %temp1
2238 or %temp1, %m44(symbol), %temp2
2239 sllx %temp2, 12, %temp3
2240 or %temp3, %l44(symbol), %reg */
2241 if (temp)
2242 {
2243 temp1 = op0;
2244 temp2 = op0;
2245 temp3 = temp; /* op0 is allowed. */
2246 }
2247 else
2248 {
2249 temp1 = gen_reg_rtx (DImode);
2250 temp2 = gen_reg_rtx (DImode);
2251 temp3 = gen_reg_rtx (DImode);
2252 }
2253
2254 emit_insn (gen_seth44 (temp1, op1));
2255 emit_insn (gen_setm44 (temp2, temp1, op1));
2256 emit_insn (gen_rtx_SET (temp3,
2257 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2258 emit_insn (gen_setl44 (op0, temp3, op1));
2259 break;
2260
2261 case CM_MEDANY:
2262 /* The range spanned by all instructions in the object is less
2263 than 2^31 bytes (2GB) and the distance from any instruction
2264 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2265 than 2^31 bytes (2GB).
2266
2267 The executable can be placed anywhere in the virtual address
2268 space.
2269
2270 sethi %hh(symbol), %temp1
2271 sethi %lm(symbol), %temp2
2272 or %temp1, %hm(symbol), %temp3
2273 sllx %temp3, 32, %temp4
2274 or %temp4, %temp2, %temp5
2275 or %temp5, %lo(symbol), %reg */
2276 if (temp)
2277 {
2278 /* It is possible that one of the registers we got for operands[2]
2279 might coincide with that of operands[0] (which is why we made
2280 it TImode). Pick the other one to use as our scratch. */
2281 if (rtx_equal_p (temp, op0))
2282 {
2283 gcc_assert (ti_temp);
2284 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2285 }
2286 temp1 = op0;
2287 temp2 = temp; /* op0 is _not_ allowed, see above. */
2288 temp3 = op0;
2289 temp4 = op0;
2290 temp5 = op0;
2291 }
2292 else
2293 {
2294 temp1 = gen_reg_rtx (DImode);
2295 temp2 = gen_reg_rtx (DImode);
2296 temp3 = gen_reg_rtx (DImode);
2297 temp4 = gen_reg_rtx (DImode);
2298 temp5 = gen_reg_rtx (DImode);
2299 }
2300
2301 emit_insn (gen_sethh (temp1, op1));
2302 emit_insn (gen_setlm (temp2, op1));
2303 emit_insn (gen_sethm (temp3, temp1, op1));
2304 emit_insn (gen_rtx_SET (temp4,
2305 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2306 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2307 emit_insn (gen_setlo (op0, temp5, op1));
2308 break;
2309
2310 case CM_EMBMEDANY:
2311 /* Old old old backwards compatibility kruft here.
2312 Essentially it is MEDLOW with a fixed 64-bit
2313 virtual base added to all data segment addresses.
2314 Text-segment stuff is computed like MEDANY, we can't
2315 reuse the code above because the relocation knobs
2316 look different.
2317
2318 Data segment: sethi %hi(symbol), %temp1
2319 add %temp1, EMBMEDANY_BASE_REG, %temp2
2320 or %temp2, %lo(symbol), %reg */
2321 if (data_segment_operand (op1, GET_MODE (op1)))
2322 {
2323 if (temp)
2324 {
2325 temp1 = temp; /* op0 is allowed. */
2326 temp2 = op0;
2327 }
2328 else
2329 {
2330 temp1 = gen_reg_rtx (DImode);
2331 temp2 = gen_reg_rtx (DImode);
2332 }
2333
2334 emit_insn (gen_embmedany_sethi (temp1, op1));
2335 emit_insn (gen_embmedany_brsum (temp2, temp1));
2336 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2337 }
2338
2339 /* Text segment: sethi %uhi(symbol), %temp1
2340 sethi %hi(symbol), %temp2
2341 or %temp1, %ulo(symbol), %temp3
2342 sllx %temp3, 32, %temp4
2343 or %temp4, %temp2, %temp5
2344 or %temp5, %lo(symbol), %reg */
2345 else
2346 {
2347 if (temp)
2348 {
2349 /* It is possible that one of the registers we got for operands[2]
2350 might coincide with that of operands[0] (which is why we made
2351 it TImode). Pick the other one to use as our scratch. */
2352 if (rtx_equal_p (temp, op0))
2353 {
2354 gcc_assert (ti_temp);
2355 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2356 }
2357 temp1 = op0;
2358 temp2 = temp; /* op0 is _not_ allowed, see above. */
2359 temp3 = op0;
2360 temp4 = op0;
2361 temp5 = op0;
2362 }
2363 else
2364 {
2365 temp1 = gen_reg_rtx (DImode);
2366 temp2 = gen_reg_rtx (DImode);
2367 temp3 = gen_reg_rtx (DImode);
2368 temp4 = gen_reg_rtx (DImode);
2369 temp5 = gen_reg_rtx (DImode);
2370 }
2371
2372 emit_insn (gen_embmedany_textuhi (temp1, op1));
2373 emit_insn (gen_embmedany_texthi (temp2, op1));
2374 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2375 emit_insn (gen_rtx_SET (temp4,
2376 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2377 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2378 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2379 }
2380 break;
2381
2382 default:
2383 gcc_unreachable ();
2384 }
2385 }
2386
2387 /* These avoid problems when cross compiling. If we do not
2388 go through all this hair then the optimizer will see
2389 invalid REG_EQUAL notes or in some cases none at all. */
2390 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2391 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2392 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2393 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2394
2395 /* The optimizer is not to assume anything about exactly
2396 which bits are set for a HIGH, they are unspecified.
2397 Unfortunately this leads to many missed optimizations
2398 during CSE. We mask out the non-HIGH bits, and matches
2399 a plain movdi, to alleviate this problem. */
2400 static rtx
2401 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2402 {
2403 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2404 }
2405
2406 static rtx
2407 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2408 {
2409 return gen_rtx_SET (dest, GEN_INT (val));
2410 }
2411
2412 static rtx
2413 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2414 {
2415 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2416 }
2417
2418 static rtx
2419 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2420 {
2421 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2422 }
2423
2424 /* Worker routines for 64-bit constant formation on arch64.
2425 One of the key things to be doing in these emissions is
2426 to create as many temp REGs as possible. This makes it
2427 possible for half-built constants to be used later when
2428 such values are similar to something required later on.
2429 Without doing this, the optimizer cannot see such
2430 opportunities. */
2431
2432 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2433 unsigned HOST_WIDE_INT, int);
2434
2435 static void
2436 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2437 unsigned HOST_WIDE_INT low_bits, int is_neg)
2438 {
2439 unsigned HOST_WIDE_INT high_bits;
2440
2441 if (is_neg)
2442 high_bits = (~low_bits) & 0xffffffff;
2443 else
2444 high_bits = low_bits;
2445
2446 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2447 if (!is_neg)
2448 {
2449 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2450 }
2451 else
2452 {
2453 /* If we are XOR'ing with -1, then we should emit a one's complement
2454 instead. This way the combiner will notice logical operations
2455 such as ANDN later on and substitute. */
2456 if ((low_bits & 0x3ff) == 0x3ff)
2457 {
2458 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2459 }
2460 else
2461 {
2462 emit_insn (gen_rtx_SET (op0,
2463 gen_safe_XOR64 (temp,
2464 (-(HOST_WIDE_INT)0x400
2465 | (low_bits & 0x3ff)))));
2466 }
2467 }
2468 }
2469
2470 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2471 unsigned HOST_WIDE_INT, int);
2472
2473 static void
2474 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2475 unsigned HOST_WIDE_INT high_bits,
2476 unsigned HOST_WIDE_INT low_immediate,
2477 int shift_count)
2478 {
2479 rtx temp2 = op0;
2480
2481 if ((high_bits & 0xfffffc00) != 0)
2482 {
2483 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2484 if ((high_bits & ~0xfffffc00) != 0)
2485 emit_insn (gen_rtx_SET (op0,
2486 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2487 else
2488 temp2 = temp;
2489 }
2490 else
2491 {
2492 emit_insn (gen_safe_SET64 (temp, high_bits));
2493 temp2 = temp;
2494 }
2495
2496 /* Now shift it up into place. */
2497 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2498 GEN_INT (shift_count))));
2499
2500 /* If there is a low immediate part piece, finish up by
2501 putting that in as well. */
2502 if (low_immediate != 0)
2503 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2504 }
2505
2506 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2507 unsigned HOST_WIDE_INT);
2508
2509 /* Full 64-bit constant decomposition. Even though this is the
2510 'worst' case, we still optimize a few things away. */
2511 static void
2512 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2513 unsigned HOST_WIDE_INT high_bits,
2514 unsigned HOST_WIDE_INT low_bits)
2515 {
2516 rtx sub_temp = op0;
2517
2518 if (can_create_pseudo_p ())
2519 sub_temp = gen_reg_rtx (DImode);
2520
2521 if ((high_bits & 0xfffffc00) != 0)
2522 {
2523 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2524 if ((high_bits & ~0xfffffc00) != 0)
2525 emit_insn (gen_rtx_SET (sub_temp,
2526 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2527 else
2528 sub_temp = temp;
2529 }
2530 else
2531 {
2532 emit_insn (gen_safe_SET64 (temp, high_bits));
2533 sub_temp = temp;
2534 }
2535
2536 if (can_create_pseudo_p ())
2537 {
2538 rtx temp2 = gen_reg_rtx (DImode);
2539 rtx temp3 = gen_reg_rtx (DImode);
2540 rtx temp4 = gen_reg_rtx (DImode);
2541
2542 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2543 GEN_INT (32))));
2544
2545 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2546 if ((low_bits & ~0xfffffc00) != 0)
2547 {
2548 emit_insn (gen_rtx_SET (temp3,
2549 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2550 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2551 }
2552 else
2553 {
2554 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2555 }
2556 }
2557 else
2558 {
2559 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2560 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2561 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2562 int to_shift = 12;
2563
2564 /* We are in the middle of reload, so this is really
2565 painful. However we do still make an attempt to
2566 avoid emitting truly stupid code. */
2567 if (low1 != const0_rtx)
2568 {
2569 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2570 GEN_INT (to_shift))));
2571 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2572 sub_temp = op0;
2573 to_shift = 12;
2574 }
2575 else
2576 {
2577 to_shift += 12;
2578 }
2579 if (low2 != const0_rtx)
2580 {
2581 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2582 GEN_INT (to_shift))));
2583 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2584 sub_temp = op0;
2585 to_shift = 8;
2586 }
2587 else
2588 {
2589 to_shift += 8;
2590 }
2591 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2592 GEN_INT (to_shift))));
2593 if (low3 != const0_rtx)
2594 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2595 /* phew... */
2596 }
2597 }
2598
2599 /* Analyze a 64-bit constant for certain properties. */
2600 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2601 unsigned HOST_WIDE_INT,
2602 int *, int *, int *);
2603
2604 static void
2605 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2606 unsigned HOST_WIDE_INT low_bits,
2607 int *hbsp, int *lbsp, int *abbasp)
2608 {
2609 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2610 int i;
2611
2612 lowest_bit_set = highest_bit_set = -1;
2613 i = 0;
2614 do
2615 {
2616 if ((lowest_bit_set == -1)
2617 && ((low_bits >> i) & 1))
2618 lowest_bit_set = i;
2619 if ((highest_bit_set == -1)
2620 && ((high_bits >> (32 - i - 1)) & 1))
2621 highest_bit_set = (64 - i - 1);
2622 }
2623 while (++i < 32
2624 && ((highest_bit_set == -1)
2625 || (lowest_bit_set == -1)));
2626 if (i == 32)
2627 {
2628 i = 0;
2629 do
2630 {
2631 if ((lowest_bit_set == -1)
2632 && ((high_bits >> i) & 1))
2633 lowest_bit_set = i + 32;
2634 if ((highest_bit_set == -1)
2635 && ((low_bits >> (32 - i - 1)) & 1))
2636 highest_bit_set = 32 - i - 1;
2637 }
2638 while (++i < 32
2639 && ((highest_bit_set == -1)
2640 || (lowest_bit_set == -1)));
2641 }
2642 /* If there are no bits set this should have gone out
2643 as one instruction! */
2644 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2645 all_bits_between_are_set = 1;
2646 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2647 {
2648 if (i < 32)
2649 {
2650 if ((low_bits & (1 << i)) != 0)
2651 continue;
2652 }
2653 else
2654 {
2655 if ((high_bits & (1 << (i - 32))) != 0)
2656 continue;
2657 }
2658 all_bits_between_are_set = 0;
2659 break;
2660 }
2661 *hbsp = highest_bit_set;
2662 *lbsp = lowest_bit_set;
2663 *abbasp = all_bits_between_are_set;
2664 }
2665
2666 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2667
2668 static int
2669 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2670 unsigned HOST_WIDE_INT low_bits)
2671 {
2672 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2673
2674 if (high_bits == 0
2675 || high_bits == 0xffffffff)
2676 return 1;
2677
2678 analyze_64bit_constant (high_bits, low_bits,
2679 &highest_bit_set, &lowest_bit_set,
2680 &all_bits_between_are_set);
2681
2682 if ((highest_bit_set == 63
2683 || lowest_bit_set == 0)
2684 && all_bits_between_are_set != 0)
2685 return 1;
2686
2687 if ((highest_bit_set - lowest_bit_set) < 21)
2688 return 1;
2689
2690 return 0;
2691 }
2692
2693 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2694 unsigned HOST_WIDE_INT,
2695 int, int);
2696
2697 static unsigned HOST_WIDE_INT
2698 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2699 unsigned HOST_WIDE_INT low_bits,
2700 int lowest_bit_set, int shift)
2701 {
2702 HOST_WIDE_INT hi, lo;
2703
2704 if (lowest_bit_set < 32)
2705 {
2706 lo = (low_bits >> lowest_bit_set) << shift;
2707 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2708 }
2709 else
2710 {
2711 lo = 0;
2712 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2713 }
2714 gcc_assert (! (hi & lo));
2715 return (hi | lo);
2716 }
2717
2718 /* Here we are sure to be arch64 and this is an integer constant
2719 being loaded into a register. Emit the most efficient
2720 insn sequence possible. Detection of all the 1-insn cases
2721 has been done already. */
2722 static void
2723 sparc_emit_set_const64 (rtx op0, rtx op1)
2724 {
2725 unsigned HOST_WIDE_INT high_bits, low_bits;
2726 int lowest_bit_set, highest_bit_set;
2727 int all_bits_between_are_set;
2728 rtx temp = 0;
2729
2730 /* Sanity check that we know what we are working with. */
2731 gcc_assert (TARGET_ARCH64
2732 && (GET_CODE (op0) == SUBREG
2733 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2734
2735 if (! can_create_pseudo_p ())
2736 temp = op0;
2737
2738 if (GET_CODE (op1) != CONST_INT)
2739 {
2740 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2741 return;
2742 }
2743
2744 if (! temp)
2745 temp = gen_reg_rtx (DImode);
2746
2747 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2748 low_bits = (INTVAL (op1) & 0xffffffff);
2749
2750 /* low_bits bits 0 --> 31
2751 high_bits bits 32 --> 63 */
2752
2753 analyze_64bit_constant (high_bits, low_bits,
2754 &highest_bit_set, &lowest_bit_set,
2755 &all_bits_between_are_set);
2756
2757 /* First try for a 2-insn sequence. */
2758
2759 /* These situations are preferred because the optimizer can
2760 * do more things with them:
2761 * 1) mov -1, %reg
2762 * sllx %reg, shift, %reg
2763 * 2) mov -1, %reg
2764 * srlx %reg, shift, %reg
2765 * 3) mov some_small_const, %reg
2766 * sllx %reg, shift, %reg
2767 */
2768 if (((highest_bit_set == 63
2769 || lowest_bit_set == 0)
2770 && all_bits_between_are_set != 0)
2771 || ((highest_bit_set - lowest_bit_set) < 12))
2772 {
2773 HOST_WIDE_INT the_const = -1;
2774 int shift = lowest_bit_set;
2775
2776 if ((highest_bit_set != 63
2777 && lowest_bit_set != 0)
2778 || all_bits_between_are_set == 0)
2779 {
2780 the_const =
2781 create_simple_focus_bits (high_bits, low_bits,
2782 lowest_bit_set, 0);
2783 }
2784 else if (lowest_bit_set == 0)
2785 shift = -(63 - highest_bit_set);
2786
2787 gcc_assert (SPARC_SIMM13_P (the_const));
2788 gcc_assert (shift != 0);
2789
2790 emit_insn (gen_safe_SET64 (temp, the_const));
2791 if (shift > 0)
2792 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2793 GEN_INT (shift))));
2794 else if (shift < 0)
2795 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2796 GEN_INT (-shift))));
2797 return;
2798 }
2799
2800 /* Now a range of 22 or less bits set somewhere.
2801 * 1) sethi %hi(focus_bits), %reg
2802 * sllx %reg, shift, %reg
2803 * 2) sethi %hi(focus_bits), %reg
2804 * srlx %reg, shift, %reg
2805 */
2806 if ((highest_bit_set - lowest_bit_set) < 21)
2807 {
2808 unsigned HOST_WIDE_INT focus_bits =
2809 create_simple_focus_bits (high_bits, low_bits,
2810 lowest_bit_set, 10);
2811
2812 gcc_assert (SPARC_SETHI_P (focus_bits));
2813 gcc_assert (lowest_bit_set != 10);
2814
2815 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2816
2817 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2818 if (lowest_bit_set < 10)
2819 emit_insn (gen_rtx_SET (op0,
2820 gen_rtx_LSHIFTRT (DImode, temp,
2821 GEN_INT (10 - lowest_bit_set))));
2822 else if (lowest_bit_set > 10)
2823 emit_insn (gen_rtx_SET (op0,
2824 gen_rtx_ASHIFT (DImode, temp,
2825 GEN_INT (lowest_bit_set - 10))));
2826 return;
2827 }
2828
2829 /* 1) sethi %hi(low_bits), %reg
2830 * or %reg, %lo(low_bits), %reg
2831 * 2) sethi %hi(~low_bits), %reg
2832 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2833 */
2834 if (high_bits == 0
2835 || high_bits == 0xffffffff)
2836 {
2837 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2838 (high_bits == 0xffffffff));
2839 return;
2840 }
2841
2842 /* Now, try 3-insn sequences. */
2843
2844 /* 1) sethi %hi(high_bits), %reg
2845 * or %reg, %lo(high_bits), %reg
2846 * sllx %reg, 32, %reg
2847 */
2848 if (low_bits == 0)
2849 {
2850 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2851 return;
2852 }
2853
2854 /* We may be able to do something quick
2855 when the constant is negated, so try that. */
2856 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2857 (~low_bits) & 0xfffffc00))
2858 {
2859 /* NOTE: The trailing bits get XOR'd so we need the
2860 non-negated bits, not the negated ones. */
2861 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2862
2863 if ((((~high_bits) & 0xffffffff) == 0
2864 && ((~low_bits) & 0x80000000) == 0)
2865 || (((~high_bits) & 0xffffffff) == 0xffffffff
2866 && ((~low_bits) & 0x80000000) != 0))
2867 {
2868 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2869
2870 if ((SPARC_SETHI_P (fast_int)
2871 && (~high_bits & 0xffffffff) == 0)
2872 || SPARC_SIMM13_P (fast_int))
2873 emit_insn (gen_safe_SET64 (temp, fast_int));
2874 else
2875 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2876 }
2877 else
2878 {
2879 rtx negated_const;
2880 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2881 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2882 sparc_emit_set_const64 (temp, negated_const);
2883 }
2884
2885 /* If we are XOR'ing with -1, then we should emit a one's complement
2886 instead. This way the combiner will notice logical operations
2887 such as ANDN later on and substitute. */
2888 if (trailing_bits == 0x3ff)
2889 {
2890 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2891 }
2892 else
2893 {
2894 emit_insn (gen_rtx_SET (op0,
2895 gen_safe_XOR64 (temp,
2896 (-0x400 | trailing_bits))));
2897 }
2898 return;
2899 }
2900
2901 /* 1) sethi %hi(xxx), %reg
2902 * or %reg, %lo(xxx), %reg
2903 * sllx %reg, yyy, %reg
2904 *
2905 * ??? This is just a generalized version of the low_bits==0
2906 * thing above, FIXME...
2907 */
2908 if ((highest_bit_set - lowest_bit_set) < 32)
2909 {
2910 unsigned HOST_WIDE_INT focus_bits =
2911 create_simple_focus_bits (high_bits, low_bits,
2912 lowest_bit_set, 0);
2913
2914 /* We can't get here in this state. */
2915 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2916
2917 /* So what we know is that the set bits straddle the
2918 middle of the 64-bit word. */
2919 sparc_emit_set_const64_quick2 (op0, temp,
2920 focus_bits, 0,
2921 lowest_bit_set);
2922 return;
2923 }
2924
2925 /* 1) sethi %hi(high_bits), %reg
2926 * or %reg, %lo(high_bits), %reg
2927 * sllx %reg, 32, %reg
2928 * or %reg, low_bits, %reg
2929 */
2930 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2931 {
2932 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2933 return;
2934 }
2935
2936 /* The easiest way when all else fails, is full decomposition. */
2937 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2938 }
2939
2940 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2941
2942 static bool
2943 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2944 {
2945 *p1 = SPARC_ICC_REG;
2946 *p2 = SPARC_FCC_REG;
2947 return true;
2948 }
2949
2950 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2951
2952 static unsigned int
2953 sparc_min_arithmetic_precision (void)
2954 {
2955 return 32;
2956 }
2957
2958 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2959 return the mode to be used for the comparison. For floating-point,
2960 CCFP[E]mode is used. CCNZmode should be used when the first operand
2961 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2962 processing is needed. */
2963
2964 machine_mode
2965 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2966 {
2967 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2968 {
2969 switch (op)
2970 {
2971 case EQ:
2972 case NE:
2973 case UNORDERED:
2974 case ORDERED:
2975 case UNLT:
2976 case UNLE:
2977 case UNGT:
2978 case UNGE:
2979 case UNEQ:
2980 case LTGT:
2981 return CCFPmode;
2982
2983 case LT:
2984 case LE:
2985 case GT:
2986 case GE:
2987 return CCFPEmode;
2988
2989 default:
2990 gcc_unreachable ();
2991 }
2992 }
2993 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2994 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2995 && y == const0_rtx)
2996 {
2997 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2998 return CCXNZmode;
2999 else
3000 return CCNZmode;
3001 }
3002 else
3003 {
3004 /* This is for the cmp<mode>_sne pattern. */
3005 if (GET_CODE (x) == NOT && y == constm1_rtx)
3006 {
3007 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3008 return CCXCmode;
3009 else
3010 return CCCmode;
3011 }
3012
3013 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3014 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3015 {
3016 if (GET_CODE (y) == UNSPEC
3017 && (XINT (y, 1) == UNSPEC_ADDV
3018 || XINT (y, 1) == UNSPEC_SUBV
3019 || XINT (y, 1) == UNSPEC_NEGV))
3020 return CCVmode;
3021 else
3022 return CCCmode;
3023 }
3024
3025 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3026 return CCXmode;
3027 else
3028 return CCmode;
3029 }
3030 }
3031
3032 /* Emit the compare insn and return the CC reg for a CODE comparison
3033 with operands X and Y. */
3034
3035 static rtx
3036 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3037 {
3038 machine_mode mode;
3039 rtx cc_reg;
3040
3041 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3042 return x;
3043
3044 mode = SELECT_CC_MODE (code, x, y);
3045
3046 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3047 fcc regs (cse can't tell they're really call clobbered regs and will
3048 remove a duplicate comparison even if there is an intervening function
3049 call - it will then try to reload the cc reg via an int reg which is why
3050 we need the movcc patterns). It is possible to provide the movcc
3051 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3052 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3053 to tell cse that CCFPE mode registers (even pseudos) are call
3054 clobbered. */
3055
3056 /* ??? This is an experiment. Rather than making changes to cse which may
3057 or may not be easy/clean, we do our own cse. This is possible because
3058 we will generate hard registers. Cse knows they're call clobbered (it
3059 doesn't know the same thing about pseudos). If we guess wrong, no big
3060 deal, but if we win, great! */
3061
3062 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3063 #if 1 /* experiment */
3064 {
3065 int reg;
3066 /* We cycle through the registers to ensure they're all exercised. */
3067 static int next_fcc_reg = 0;
3068 /* Previous x,y for each fcc reg. */
3069 static rtx prev_args[4][2];
3070
3071 /* Scan prev_args for x,y. */
3072 for (reg = 0; reg < 4; reg++)
3073 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3074 break;
3075 if (reg == 4)
3076 {
3077 reg = next_fcc_reg;
3078 prev_args[reg][0] = x;
3079 prev_args[reg][1] = y;
3080 next_fcc_reg = (next_fcc_reg + 1) & 3;
3081 }
3082 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3083 }
3084 #else
3085 cc_reg = gen_reg_rtx (mode);
3086 #endif /* ! experiment */
3087 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3088 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3089 else
3090 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3091
3092 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3093 will only result in an unrecognizable insn so no point in asserting. */
3094 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3095
3096 return cc_reg;
3097 }
3098
3099
3100 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3101
3102 rtx
3103 gen_compare_reg (rtx cmp)
3104 {
3105 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3106 }
3107
3108 /* This function is used for v9 only.
3109 DEST is the target of the Scc insn.
3110 CODE is the code for an Scc's comparison.
3111 X and Y are the values we compare.
3112
3113 This function is needed to turn
3114
3115 (set (reg:SI 110)
3116 (gt (reg:CCX 100 %icc)
3117 (const_int 0)))
3118 into
3119 (set (reg:SI 110)
3120 (gt:DI (reg:CCX 100 %icc)
3121 (const_int 0)))
3122
3123 IE: The instruction recognizer needs to see the mode of the comparison to
3124 find the right instruction. We could use "gt:DI" right in the
3125 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3126
3127 static int
3128 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3129 {
3130 if (! TARGET_ARCH64
3131 && (GET_MODE (x) == DImode
3132 || GET_MODE (dest) == DImode))
3133 return 0;
3134
3135 /* Try to use the movrCC insns. */
3136 if (TARGET_ARCH64
3137 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3138 && y == const0_rtx
3139 && v9_regcmp_p (compare_code))
3140 {
3141 rtx op0 = x;
3142 rtx temp;
3143
3144 /* Special case for op0 != 0. This can be done with one instruction if
3145 dest == x. */
3146
3147 if (compare_code == NE
3148 && GET_MODE (dest) == DImode
3149 && rtx_equal_p (op0, dest))
3150 {
3151 emit_insn (gen_rtx_SET (dest,
3152 gen_rtx_IF_THEN_ELSE (DImode,
3153 gen_rtx_fmt_ee (compare_code, DImode,
3154 op0, const0_rtx),
3155 const1_rtx,
3156 dest)));
3157 return 1;
3158 }
3159
3160 if (reg_overlap_mentioned_p (dest, op0))
3161 {
3162 /* Handle the case where dest == x.
3163 We "early clobber" the result. */
3164 op0 = gen_reg_rtx (GET_MODE (x));
3165 emit_move_insn (op0, x);
3166 }
3167
3168 emit_insn (gen_rtx_SET (dest, const0_rtx));
3169 if (GET_MODE (op0) != DImode)
3170 {
3171 temp = gen_reg_rtx (DImode);
3172 convert_move (temp, op0, 0);
3173 }
3174 else
3175 temp = op0;
3176 emit_insn (gen_rtx_SET (dest,
3177 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3178 gen_rtx_fmt_ee (compare_code, DImode,
3179 temp, const0_rtx),
3180 const1_rtx,
3181 dest)));
3182 return 1;
3183 }
3184 else
3185 {
3186 x = gen_compare_reg_1 (compare_code, x, y);
3187 y = const0_rtx;
3188
3189 emit_insn (gen_rtx_SET (dest, const0_rtx));
3190 emit_insn (gen_rtx_SET (dest,
3191 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3192 gen_rtx_fmt_ee (compare_code,
3193 GET_MODE (x), x, y),
3194 const1_rtx, dest)));
3195 return 1;
3196 }
3197 }
3198
3199
3200 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3201 without jumps using the addx/subx instructions. */
3202
3203 bool
3204 emit_scc_insn (rtx operands[])
3205 {
3206 rtx tem, x, y;
3207 enum rtx_code code;
3208 machine_mode mode;
3209
3210 /* The quad-word fp compare library routines all return nonzero to indicate
3211 true, which is different from the equivalent libgcc routines, so we must
3212 handle them specially here. */
3213 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3214 {
3215 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3216 GET_CODE (operands[1]));
3217 operands[2] = XEXP (operands[1], 0);
3218 operands[3] = XEXP (operands[1], 1);
3219 }
3220
3221 code = GET_CODE (operands[1]);
3222 x = operands[2];
3223 y = operands[3];
3224 mode = GET_MODE (x);
3225
3226 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3227 more applications). The exception to this is "reg != 0" which can
3228 be done in one instruction on v9 (so we do it). */
3229 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3230 {
3231 if (y != const0_rtx)
3232 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3233
3234 rtx pat = gen_rtx_SET (operands[0],
3235 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3236 x, const0_rtx));
3237
3238 /* If we can use addx/subx or addxc, add a clobber for CC. */
3239 if (mode == SImode || (code == NE && TARGET_VIS3))
3240 {
3241 rtx clobber
3242 = gen_rtx_CLOBBER (VOIDmode,
3243 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3244 SPARC_ICC_REG));
3245 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3246 }
3247
3248 emit_insn (pat);
3249 return true;
3250 }
3251
3252 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3253 if (TARGET_ARCH64
3254 && mode == DImode
3255 && !((code == LTU || code == GTU) && TARGET_VIS3)
3256 && gen_v9_scc (operands[0], code, x, y))
3257 return true;
3258
3259 /* We can do LTU and GEU using the addx/subx instructions too. And
3260 for GTU/LEU, if both operands are registers swap them and fall
3261 back to the easy case. */
3262 if (code == GTU || code == LEU)
3263 {
3264 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3265 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3266 {
3267 tem = x;
3268 x = y;
3269 y = tem;
3270 code = swap_condition (code);
3271 }
3272 }
3273
3274 if (code == LTU || code == GEU)
3275 {
3276 emit_insn (gen_rtx_SET (operands[0],
3277 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3278 gen_compare_reg_1 (code, x, y),
3279 const0_rtx)));
3280 return true;
3281 }
3282
3283 /* All the posibilities to use addx/subx based sequences has been
3284 exhausted, try for a 3 instruction sequence using v9 conditional
3285 moves. */
3286 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3287 return true;
3288
3289 /* Nope, do branches. */
3290 return false;
3291 }
3292
3293 /* Emit a conditional jump insn for the v9 architecture using comparison code
3294 CODE and jump target LABEL.
3295 This function exists to take advantage of the v9 brxx insns. */
3296
3297 static void
3298 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3299 {
3300 emit_jump_insn (gen_rtx_SET (pc_rtx,
3301 gen_rtx_IF_THEN_ELSE (VOIDmode,
3302 gen_rtx_fmt_ee (code, GET_MODE (op0),
3303 op0, const0_rtx),
3304 gen_rtx_LABEL_REF (VOIDmode, label),
3305 pc_rtx)));
3306 }
3307
3308 /* Emit a conditional jump insn for the UA2011 architecture using
3309 comparison code CODE and jump target LABEL. This function exists
3310 to take advantage of the UA2011 Compare and Branch insns. */
3311
3312 static void
3313 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3314 {
3315 rtx if_then_else;
3316
3317 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3318 gen_rtx_fmt_ee(code, GET_MODE(op0),
3319 op0, op1),
3320 gen_rtx_LABEL_REF (VOIDmode, label),
3321 pc_rtx);
3322
3323 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3324 }
3325
3326 void
3327 emit_conditional_branch_insn (rtx operands[])
3328 {
3329 /* The quad-word fp compare library routines all return nonzero to indicate
3330 true, which is different from the equivalent libgcc routines, so we must
3331 handle them specially here. */
3332 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3333 {
3334 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3335 GET_CODE (operands[0]));
3336 operands[1] = XEXP (operands[0], 0);
3337 operands[2] = XEXP (operands[0], 1);
3338 }
3339
3340 /* If we can tell early on that the comparison is against a constant
3341 that won't fit in the 5-bit signed immediate field of a cbcond,
3342 use one of the other v9 conditional branch sequences. */
3343 if (TARGET_CBCOND
3344 && GET_CODE (operands[1]) == REG
3345 && (GET_MODE (operands[1]) == SImode
3346 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3347 && (GET_CODE (operands[2]) != CONST_INT
3348 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3349 {
3350 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3351 return;
3352 }
3353
3354 if (TARGET_ARCH64 && operands[2] == const0_rtx
3355 && GET_CODE (operands[1]) == REG
3356 && GET_MODE (operands[1]) == DImode)
3357 {
3358 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3359 return;
3360 }
3361
3362 operands[1] = gen_compare_reg (operands[0]);
3363 operands[2] = const0_rtx;
3364 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3365 operands[1], operands[2]);
3366 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3367 operands[3]));
3368 }
3369
3370
3371 /* Generate a DFmode part of a hard TFmode register.
3372 REG is the TFmode hard register, LOW is 1 for the
3373 low 64bit of the register and 0 otherwise.
3374 */
3375 rtx
3376 gen_df_reg (rtx reg, int low)
3377 {
3378 int regno = REGNO (reg);
3379
3380 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3381 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3382 return gen_rtx_REG (DFmode, regno);
3383 }
3384 \f
3385 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3386 Unlike normal calls, TFmode operands are passed by reference. It is
3387 assumed that no more than 3 operands are required. */
3388
3389 static void
3390 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3391 {
3392 rtx ret_slot = NULL, arg[3], func_sym;
3393 int i;
3394
3395 /* We only expect to be called for conversions, unary, and binary ops. */
3396 gcc_assert (nargs == 2 || nargs == 3);
3397
3398 for (i = 0; i < nargs; ++i)
3399 {
3400 rtx this_arg = operands[i];
3401 rtx this_slot;
3402
3403 /* TFmode arguments and return values are passed by reference. */
3404 if (GET_MODE (this_arg) == TFmode)
3405 {
3406 int force_stack_temp;
3407
3408 force_stack_temp = 0;
3409 if (TARGET_BUGGY_QP_LIB && i == 0)
3410 force_stack_temp = 1;
3411
3412 if (GET_CODE (this_arg) == MEM
3413 && ! force_stack_temp)
3414 {
3415 tree expr = MEM_EXPR (this_arg);
3416 if (expr)
3417 mark_addressable (expr);
3418 this_arg = XEXP (this_arg, 0);
3419 }
3420 else if (CONSTANT_P (this_arg)
3421 && ! force_stack_temp)
3422 {
3423 this_slot = force_const_mem (TFmode, this_arg);
3424 this_arg = XEXP (this_slot, 0);
3425 }
3426 else
3427 {
3428 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3429
3430 /* Operand 0 is the return value. We'll copy it out later. */
3431 if (i > 0)
3432 emit_move_insn (this_slot, this_arg);
3433 else
3434 ret_slot = this_slot;
3435
3436 this_arg = XEXP (this_slot, 0);
3437 }
3438 }
3439
3440 arg[i] = this_arg;
3441 }
3442
3443 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3444
3445 if (GET_MODE (operands[0]) == TFmode)
3446 {
3447 if (nargs == 2)
3448 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3449 arg[0], GET_MODE (arg[0]),
3450 arg[1], GET_MODE (arg[1]));
3451 else
3452 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3453 arg[0], GET_MODE (arg[0]),
3454 arg[1], GET_MODE (arg[1]),
3455 arg[2], GET_MODE (arg[2]));
3456
3457 if (ret_slot)
3458 emit_move_insn (operands[0], ret_slot);
3459 }
3460 else
3461 {
3462 rtx ret;
3463
3464 gcc_assert (nargs == 2);
3465
3466 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3467 GET_MODE (operands[0]),
3468 arg[1], GET_MODE (arg[1]));
3469
3470 if (ret != operands[0])
3471 emit_move_insn (operands[0], ret);
3472 }
3473 }
3474
3475 /* Expand soft-float TFmode calls to sparc abi routines. */
3476
3477 static void
3478 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3479 {
3480 const char *func;
3481
3482 switch (code)
3483 {
3484 case PLUS:
3485 func = "_Qp_add";
3486 break;
3487 case MINUS:
3488 func = "_Qp_sub";
3489 break;
3490 case MULT:
3491 func = "_Qp_mul";
3492 break;
3493 case DIV:
3494 func = "_Qp_div";
3495 break;
3496 default:
3497 gcc_unreachable ();
3498 }
3499
3500 emit_soft_tfmode_libcall (func, 3, operands);
3501 }
3502
3503 static void
3504 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3505 {
3506 const char *func;
3507
3508 gcc_assert (code == SQRT);
3509 func = "_Qp_sqrt";
3510
3511 emit_soft_tfmode_libcall (func, 2, operands);
3512 }
3513
3514 static void
3515 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3516 {
3517 const char *func;
3518
3519 switch (code)
3520 {
3521 case FLOAT_EXTEND:
3522 switch (GET_MODE (operands[1]))
3523 {
3524 case E_SFmode:
3525 func = "_Qp_stoq";
3526 break;
3527 case E_DFmode:
3528 func = "_Qp_dtoq";
3529 break;
3530 default:
3531 gcc_unreachable ();
3532 }
3533 break;
3534
3535 case FLOAT_TRUNCATE:
3536 switch (GET_MODE (operands[0]))
3537 {
3538 case E_SFmode:
3539 func = "_Qp_qtos";
3540 break;
3541 case E_DFmode:
3542 func = "_Qp_qtod";
3543 break;
3544 default:
3545 gcc_unreachable ();
3546 }
3547 break;
3548
3549 case FLOAT:
3550 switch (GET_MODE (operands[1]))
3551 {
3552 case E_SImode:
3553 func = "_Qp_itoq";
3554 if (TARGET_ARCH64)
3555 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3556 break;
3557 case E_DImode:
3558 func = "_Qp_xtoq";
3559 break;
3560 default:
3561 gcc_unreachable ();
3562 }
3563 break;
3564
3565 case UNSIGNED_FLOAT:
3566 switch (GET_MODE (operands[1]))
3567 {
3568 case E_SImode:
3569 func = "_Qp_uitoq";
3570 if (TARGET_ARCH64)
3571 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3572 break;
3573 case E_DImode:
3574 func = "_Qp_uxtoq";
3575 break;
3576 default:
3577 gcc_unreachable ();
3578 }
3579 break;
3580
3581 case FIX:
3582 switch (GET_MODE (operands[0]))
3583 {
3584 case E_SImode:
3585 func = "_Qp_qtoi";
3586 break;
3587 case E_DImode:
3588 func = "_Qp_qtox";
3589 break;
3590 default:
3591 gcc_unreachable ();
3592 }
3593 break;
3594
3595 case UNSIGNED_FIX:
3596 switch (GET_MODE (operands[0]))
3597 {
3598 case E_SImode:
3599 func = "_Qp_qtoui";
3600 break;
3601 case E_DImode:
3602 func = "_Qp_qtoux";
3603 break;
3604 default:
3605 gcc_unreachable ();
3606 }
3607 break;
3608
3609 default:
3610 gcc_unreachable ();
3611 }
3612
3613 emit_soft_tfmode_libcall (func, 2, operands);
3614 }
3615
3616 /* Expand a hard-float tfmode operation. All arguments must be in
3617 registers. */
3618
3619 static void
3620 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3621 {
3622 rtx op, dest;
3623
3624 if (GET_RTX_CLASS (code) == RTX_UNARY)
3625 {
3626 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3627 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3628 }
3629 else
3630 {
3631 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3632 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3633 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3634 operands[1], operands[2]);
3635 }
3636
3637 if (register_operand (operands[0], VOIDmode))
3638 dest = operands[0];
3639 else
3640 dest = gen_reg_rtx (GET_MODE (operands[0]));
3641
3642 emit_insn (gen_rtx_SET (dest, op));
3643
3644 if (dest != operands[0])
3645 emit_move_insn (operands[0], dest);
3646 }
3647
3648 void
3649 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3650 {
3651 if (TARGET_HARD_QUAD)
3652 emit_hard_tfmode_operation (code, operands);
3653 else
3654 emit_soft_tfmode_binop (code, operands);
3655 }
3656
3657 void
3658 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3659 {
3660 if (TARGET_HARD_QUAD)
3661 emit_hard_tfmode_operation (code, operands);
3662 else
3663 emit_soft_tfmode_unop (code, operands);
3664 }
3665
3666 void
3667 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3668 {
3669 if (TARGET_HARD_QUAD)
3670 emit_hard_tfmode_operation (code, operands);
3671 else
3672 emit_soft_tfmode_cvt (code, operands);
3673 }
3674 \f
3675 /* Return nonzero if a branch/jump/call instruction will be emitting
3676 nop into its delay slot. */
3677
3678 int
3679 empty_delay_slot (rtx_insn *insn)
3680 {
3681 rtx seq;
3682
3683 /* If no previous instruction (should not happen), return true. */
3684 if (PREV_INSN (insn) == NULL)
3685 return 1;
3686
3687 seq = NEXT_INSN (PREV_INSN (insn));
3688 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3689 return 0;
3690
3691 return 1;
3692 }
3693
3694 /* Return nonzero if we should emit a nop after a cbcond instruction.
3695 The cbcond instruction does not have a delay slot, however there is
3696 a severe performance penalty if a control transfer appears right
3697 after a cbcond. Therefore we emit a nop when we detect this
3698 situation. */
3699
3700 int
3701 emit_cbcond_nop (rtx_insn *insn)
3702 {
3703 rtx next = next_active_insn (insn);
3704
3705 if (!next)
3706 return 1;
3707
3708 if (NONJUMP_INSN_P (next)
3709 && GET_CODE (PATTERN (next)) == SEQUENCE)
3710 next = XVECEXP (PATTERN (next), 0, 0);
3711 else if (CALL_P (next)
3712 && GET_CODE (PATTERN (next)) == PARALLEL)
3713 {
3714 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3715
3716 if (GET_CODE (delay) == RETURN)
3717 {
3718 /* It's a sibling call. Do not emit the nop if we're going
3719 to emit something other than the jump itself as the first
3720 instruction of the sibcall sequence. */
3721 if (sparc_leaf_function_p || TARGET_FLAT)
3722 return 0;
3723 }
3724 }
3725
3726 if (NONJUMP_INSN_P (next))
3727 return 0;
3728
3729 return 1;
3730 }
3731
3732 /* Return nonzero if TRIAL can go into the call delay slot. */
3733
3734 int
3735 eligible_for_call_delay (rtx_insn *trial)
3736 {
3737 rtx pat;
3738
3739 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3740 return 0;
3741
3742 /* Binutils allows
3743 call __tls_get_addr, %tgd_call (foo)
3744 add %l7, %o0, %o0, %tgd_add (foo)
3745 while Sun as/ld does not. */
3746 if (TARGET_GNU_TLS || !TARGET_TLS)
3747 return 1;
3748
3749 pat = PATTERN (trial);
3750
3751 /* We must reject tgd_add{32|64}, i.e.
3752 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3753 and tldm_add{32|64}, i.e.
3754 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3755 for Sun as/ld. */
3756 if (GET_CODE (pat) == SET
3757 && GET_CODE (SET_SRC (pat)) == PLUS)
3758 {
3759 rtx unspec = XEXP (SET_SRC (pat), 1);
3760
3761 if (GET_CODE (unspec) == UNSPEC
3762 && (XINT (unspec, 1) == UNSPEC_TLSGD
3763 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3764 return 0;
3765 }
3766
3767 return 1;
3768 }
3769
3770 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3771 instruction. RETURN_P is true if the v9 variant 'return' is to be
3772 considered in the test too.
3773
3774 TRIAL must be a SET whose destination is a REG appropriate for the
3775 'restore' instruction or, if RETURN_P is true, for the 'return'
3776 instruction. */
3777
3778 static int
3779 eligible_for_restore_insn (rtx trial, bool return_p)
3780 {
3781 rtx pat = PATTERN (trial);
3782 rtx src = SET_SRC (pat);
3783 bool src_is_freg = false;
3784 rtx src_reg;
3785
3786 /* Since we now can do moves between float and integer registers when
3787 VIS3 is enabled, we have to catch this case. We can allow such
3788 moves when doing a 'return' however. */
3789 src_reg = src;
3790 if (GET_CODE (src_reg) == SUBREG)
3791 src_reg = SUBREG_REG (src_reg);
3792 if (GET_CODE (src_reg) == REG
3793 && SPARC_FP_REG_P (REGNO (src_reg)))
3794 src_is_freg = true;
3795
3796 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3797 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3798 && arith_operand (src, GET_MODE (src))
3799 && ! src_is_freg)
3800 {
3801 if (TARGET_ARCH64)
3802 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3803 else
3804 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3805 }
3806
3807 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3808 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3809 && arith_double_operand (src, GET_MODE (src))
3810 && ! src_is_freg)
3811 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3812
3813 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3814 else if (! TARGET_FPU && register_operand (src, SFmode))
3815 return 1;
3816
3817 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3818 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3819 return 1;
3820
3821 /* If we have the 'return' instruction, anything that does not use
3822 local or output registers and can go into a delay slot wins. */
3823 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3824 return 1;
3825
3826 /* The 'restore src1,src2,dest' pattern for SImode. */
3827 else if (GET_CODE (src) == PLUS
3828 && register_operand (XEXP (src, 0), SImode)
3829 && arith_operand (XEXP (src, 1), SImode))
3830 return 1;
3831
3832 /* The 'restore src1,src2,dest' pattern for DImode. */
3833 else if (GET_CODE (src) == PLUS
3834 && register_operand (XEXP (src, 0), DImode)
3835 && arith_double_operand (XEXP (src, 1), DImode))
3836 return 1;
3837
3838 /* The 'restore src1,%lo(src2),dest' pattern. */
3839 else if (GET_CODE (src) == LO_SUM
3840 && ! TARGET_CM_MEDMID
3841 && ((register_operand (XEXP (src, 0), SImode)
3842 && immediate_operand (XEXP (src, 1), SImode))
3843 || (TARGET_ARCH64
3844 && register_operand (XEXP (src, 0), DImode)
3845 && immediate_operand (XEXP (src, 1), DImode))))
3846 return 1;
3847
3848 /* The 'restore src,src,dest' pattern. */
3849 else if (GET_CODE (src) == ASHIFT
3850 && (register_operand (XEXP (src, 0), SImode)
3851 || register_operand (XEXP (src, 0), DImode))
3852 && XEXP (src, 1) == const1_rtx)
3853 return 1;
3854
3855 return 0;
3856 }
3857
3858 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3859
3860 int
3861 eligible_for_return_delay (rtx_insn *trial)
3862 {
3863 int regno;
3864 rtx pat;
3865
3866 /* If the function uses __builtin_eh_return, the eh_return machinery
3867 occupies the delay slot. */
3868 if (crtl->calls_eh_return)
3869 return 0;
3870
3871 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3872 return 0;
3873
3874 /* In the case of a leaf or flat function, anything can go into the slot. */
3875 if (sparc_leaf_function_p || TARGET_FLAT)
3876 return 1;
3877
3878 if (!NONJUMP_INSN_P (trial))
3879 return 0;
3880
3881 pat = PATTERN (trial);
3882 if (GET_CODE (pat) == PARALLEL)
3883 {
3884 int i;
3885
3886 if (! TARGET_V9)
3887 return 0;
3888 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3889 {
3890 rtx expr = XVECEXP (pat, 0, i);
3891 if (GET_CODE (expr) != SET)
3892 return 0;
3893 if (GET_CODE (SET_DEST (expr)) != REG)
3894 return 0;
3895 regno = REGNO (SET_DEST (expr));
3896 if (regno >= 8 && regno < 24)
3897 return 0;
3898 }
3899 return !epilogue_renumber (&pat, 1);
3900 }
3901
3902 if (GET_CODE (pat) != SET)
3903 return 0;
3904
3905 if (GET_CODE (SET_DEST (pat)) != REG)
3906 return 0;
3907
3908 regno = REGNO (SET_DEST (pat));
3909
3910 /* Otherwise, only operations which can be done in tandem with
3911 a `restore' or `return' insn can go into the delay slot. */
3912 if (regno >= 8 && regno < 24)
3913 return 0;
3914
3915 /* If this instruction sets up floating point register and we have a return
3916 instruction, it can probably go in. But restore will not work
3917 with FP_REGS. */
3918 if (! SPARC_INT_REG_P (regno))
3919 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3920
3921 return eligible_for_restore_insn (trial, true);
3922 }
3923
3924 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3925
3926 int
3927 eligible_for_sibcall_delay (rtx_insn *trial)
3928 {
3929 rtx pat;
3930
3931 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3932 return 0;
3933
3934 if (!NONJUMP_INSN_P (trial))
3935 return 0;
3936
3937 pat = PATTERN (trial);
3938
3939 if (sparc_leaf_function_p || TARGET_FLAT)
3940 {
3941 /* If the tail call is done using the call instruction,
3942 we have to restore %o7 in the delay slot. */
3943 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3944 return 0;
3945
3946 /* %g1 is used to build the function address */
3947 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3948 return 0;
3949
3950 return 1;
3951 }
3952
3953 if (GET_CODE (pat) != SET)
3954 return 0;
3955
3956 /* Otherwise, only operations which can be done in tandem with
3957 a `restore' insn can go into the delay slot. */
3958 if (GET_CODE (SET_DEST (pat)) != REG
3959 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3960 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3961 return 0;
3962
3963 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3964 in most cases. */
3965 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3966 return 0;
3967
3968 return eligible_for_restore_insn (trial, false);
3969 }
3970 \f
3971 /* Determine if it's legal to put X into the constant pool. This
3972 is not possible if X contains the address of a symbol that is
3973 not constant (TLS) or not known at final link time (PIC). */
3974
3975 static bool
3976 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3977 {
3978 switch (GET_CODE (x))
3979 {
3980 case CONST_INT:
3981 case CONST_WIDE_INT:
3982 case CONST_DOUBLE:
3983 case CONST_VECTOR:
3984 /* Accept all non-symbolic constants. */
3985 return false;
3986
3987 case LABEL_REF:
3988 /* Labels are OK iff we are non-PIC. */
3989 return flag_pic != 0;
3990
3991 case SYMBOL_REF:
3992 /* 'Naked' TLS symbol references are never OK,
3993 non-TLS symbols are OK iff we are non-PIC. */
3994 if (SYMBOL_REF_TLS_MODEL (x))
3995 return true;
3996 else
3997 return flag_pic != 0;
3998
3999 case CONST:
4000 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4001 case PLUS:
4002 case MINUS:
4003 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4004 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4005 case UNSPEC:
4006 return true;
4007 default:
4008 gcc_unreachable ();
4009 }
4010 }
4011 \f
4012 /* Global Offset Table support. */
4013 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4014 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4015
4016 /* Return the SYMBOL_REF for the Global Offset Table. */
4017
4018 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4019
4020 static rtx
4021 sparc_got (void)
4022 {
4023 if (!sparc_got_symbol)
4024 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4025
4026 return sparc_got_symbol;
4027 }
4028
4029 /* Ensure that we are not using patterns that are not OK with PIC. */
4030
4031 int
4032 check_pic (int i)
4033 {
4034 rtx op;
4035
4036 switch (flag_pic)
4037 {
4038 case 1:
4039 op = recog_data.operand[i];
4040 gcc_assert (GET_CODE (op) != SYMBOL_REF
4041 && (GET_CODE (op) != CONST
4042 || (GET_CODE (XEXP (op, 0)) == MINUS
4043 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4044 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4045 /* fallthrough */
4046 case 2:
4047 default:
4048 return 1;
4049 }
4050 }
4051
4052 /* Return true if X is an address which needs a temporary register when
4053 reloaded while generating PIC code. */
4054
4055 int
4056 pic_address_needs_scratch (rtx x)
4057 {
4058 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4059 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4061 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4062 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4063 return 1;
4064
4065 return 0;
4066 }
4067
4068 /* Determine if a given RTX is a valid constant. We already know this
4069 satisfies CONSTANT_P. */
4070
4071 static bool
4072 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4073 {
4074 switch (GET_CODE (x))
4075 {
4076 case CONST:
4077 case SYMBOL_REF:
4078 if (sparc_tls_referenced_p (x))
4079 return false;
4080 break;
4081
4082 case CONST_DOUBLE:
4083 /* Floating point constants are generally not ok.
4084 The only exception is 0.0 and all-ones in VIS. */
4085 if (TARGET_VIS
4086 && SCALAR_FLOAT_MODE_P (mode)
4087 && (const_zero_operand (x, mode)
4088 || const_all_ones_operand (x, mode)))
4089 return true;
4090
4091 return false;
4092
4093 case CONST_VECTOR:
4094 /* Vector constants are generally not ok.
4095 The only exception is 0 or -1 in VIS. */
4096 if (TARGET_VIS
4097 && (const_zero_operand (x, mode)
4098 || const_all_ones_operand (x, mode)))
4099 return true;
4100
4101 return false;
4102
4103 default:
4104 break;
4105 }
4106
4107 return true;
4108 }
4109
4110 /* Determine if a given RTX is a valid constant address. */
4111
4112 bool
4113 constant_address_p (rtx x)
4114 {
4115 switch (GET_CODE (x))
4116 {
4117 case LABEL_REF:
4118 case CONST_INT:
4119 case HIGH:
4120 return true;
4121
4122 case CONST:
4123 if (flag_pic && pic_address_needs_scratch (x))
4124 return false;
4125 return sparc_legitimate_constant_p (Pmode, x);
4126
4127 case SYMBOL_REF:
4128 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4129
4130 default:
4131 return false;
4132 }
4133 }
4134
4135 /* Nonzero if the constant value X is a legitimate general operand
4136 when generating PIC code. It is given that flag_pic is on and
4137 that X satisfies CONSTANT_P. */
4138
4139 bool
4140 legitimate_pic_operand_p (rtx x)
4141 {
4142 if (pic_address_needs_scratch (x))
4143 return false;
4144 if (sparc_tls_referenced_p (x))
4145 return false;
4146 return true;
4147 }
4148
4149 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4150 (CONST_INT_P (X) \
4151 && INTVAL (X) >= -0x1000 \
4152 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4153
4154 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4155 (CONST_INT_P (X) \
4156 && INTVAL (X) >= -0x1000 \
4157 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4158
4159 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4160
4161 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4162 ordinarily. This changes a bit when generating PIC. */
4163
4164 static bool
4165 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4166 {
4167 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4168
4169 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4170 rs1 = addr;
4171 else if (GET_CODE (addr) == PLUS)
4172 {
4173 rs1 = XEXP (addr, 0);
4174 rs2 = XEXP (addr, 1);
4175
4176 /* Canonicalize. REG comes first, if there are no regs,
4177 LO_SUM comes first. */
4178 if (!REG_P (rs1)
4179 && GET_CODE (rs1) != SUBREG
4180 && (REG_P (rs2)
4181 || GET_CODE (rs2) == SUBREG
4182 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4183 {
4184 rs1 = XEXP (addr, 1);
4185 rs2 = XEXP (addr, 0);
4186 }
4187
4188 if ((flag_pic == 1
4189 && rs1 == pic_offset_table_rtx
4190 && !REG_P (rs2)
4191 && GET_CODE (rs2) != SUBREG
4192 && GET_CODE (rs2) != LO_SUM
4193 && GET_CODE (rs2) != MEM
4194 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4195 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4196 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4197 || ((REG_P (rs1)
4198 || GET_CODE (rs1) == SUBREG)
4199 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4200 {
4201 imm1 = rs2;
4202 rs2 = NULL;
4203 }
4204 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4205 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4206 {
4207 /* We prohibit REG + REG for TFmode when there are no quad move insns
4208 and we consequently need to split. We do this because REG+REG
4209 is not an offsettable address. If we get the situation in reload
4210 where source and destination of a movtf pattern are both MEMs with
4211 REG+REG address, then only one of them gets converted to an
4212 offsettable address. */
4213 if (mode == TFmode
4214 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4215 return 0;
4216
4217 /* Likewise for TImode, but in all cases. */
4218 if (mode == TImode)
4219 return 0;
4220
4221 /* We prohibit REG + REG on ARCH32 if not optimizing for
4222 DFmode/DImode because then mem_min_alignment is likely to be zero
4223 after reload and the forced split would lack a matching splitter
4224 pattern. */
4225 if (TARGET_ARCH32 && !optimize
4226 && (mode == DFmode || mode == DImode))
4227 return 0;
4228 }
4229 else if (USE_AS_OFFSETABLE_LO10
4230 && GET_CODE (rs1) == LO_SUM
4231 && TARGET_ARCH64
4232 && ! TARGET_CM_MEDMID
4233 && RTX_OK_FOR_OLO10_P (rs2, mode))
4234 {
4235 rs2 = NULL;
4236 imm1 = XEXP (rs1, 1);
4237 rs1 = XEXP (rs1, 0);
4238 if (!CONSTANT_P (imm1)
4239 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4240 return 0;
4241 }
4242 }
4243 else if (GET_CODE (addr) == LO_SUM)
4244 {
4245 rs1 = XEXP (addr, 0);
4246 imm1 = XEXP (addr, 1);
4247
4248 if (!CONSTANT_P (imm1)
4249 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4250 return 0;
4251
4252 /* We can't allow TFmode in 32-bit mode, because an offset greater
4253 than the alignment (8) may cause the LO_SUM to overflow. */
4254 if (mode == TFmode && TARGET_ARCH32)
4255 return 0;
4256
4257 /* During reload, accept the HIGH+LO_SUM construct generated by
4258 sparc_legitimize_reload_address. */
4259 if (reload_in_progress
4260 && GET_CODE (rs1) == HIGH
4261 && XEXP (rs1, 0) == imm1)
4262 return 1;
4263 }
4264 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4265 return 1;
4266 else
4267 return 0;
4268
4269 if (GET_CODE (rs1) == SUBREG)
4270 rs1 = SUBREG_REG (rs1);
4271 if (!REG_P (rs1))
4272 return 0;
4273
4274 if (rs2)
4275 {
4276 if (GET_CODE (rs2) == SUBREG)
4277 rs2 = SUBREG_REG (rs2);
4278 if (!REG_P (rs2))
4279 return 0;
4280 }
4281
4282 if (strict)
4283 {
4284 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4285 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4286 return 0;
4287 }
4288 else
4289 {
4290 if ((! SPARC_INT_REG_P (REGNO (rs1))
4291 && REGNO (rs1) != FRAME_POINTER_REGNUM
4292 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4293 || (rs2
4294 && (! SPARC_INT_REG_P (REGNO (rs2))
4295 && REGNO (rs2) != FRAME_POINTER_REGNUM
4296 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4297 return 0;
4298 }
4299 return 1;
4300 }
4301
4302 /* Return the SYMBOL_REF for the tls_get_addr function. */
4303
4304 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4305
4306 static rtx
4307 sparc_tls_get_addr (void)
4308 {
4309 if (!sparc_tls_symbol)
4310 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4311
4312 return sparc_tls_symbol;
4313 }
4314
4315 /* Return the Global Offset Table to be used in TLS mode. */
4316
4317 static rtx
4318 sparc_tls_got (void)
4319 {
4320 /* In PIC mode, this is just the PIC offset table. */
4321 if (flag_pic)
4322 {
4323 crtl->uses_pic_offset_table = 1;
4324 return pic_offset_table_rtx;
4325 }
4326
4327 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4328 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4329 if (TARGET_SUN_TLS && TARGET_ARCH32)
4330 {
4331 load_got_register ();
4332 return global_offset_table_rtx;
4333 }
4334
4335 /* In all other cases, we load a new pseudo with the GOT symbol. */
4336 return copy_to_reg (sparc_got ());
4337 }
4338
4339 /* Return true if X contains a thread-local symbol. */
4340
4341 static bool
4342 sparc_tls_referenced_p (rtx x)
4343 {
4344 if (!TARGET_HAVE_TLS)
4345 return false;
4346
4347 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4348 x = XEXP (XEXP (x, 0), 0);
4349
4350 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4351 return true;
4352
4353 /* That's all we handle in sparc_legitimize_tls_address for now. */
4354 return false;
4355 }
4356
4357 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4358 this (thread-local) address. */
4359
4360 static rtx
4361 sparc_legitimize_tls_address (rtx addr)
4362 {
4363 rtx temp1, temp2, temp3, ret, o0, got;
4364 rtx_insn *insn;
4365
4366 gcc_assert (can_create_pseudo_p ());
4367
4368 if (GET_CODE (addr) == SYMBOL_REF)
4369 switch (SYMBOL_REF_TLS_MODEL (addr))
4370 {
4371 case TLS_MODEL_GLOBAL_DYNAMIC:
4372 start_sequence ();
4373 temp1 = gen_reg_rtx (SImode);
4374 temp2 = gen_reg_rtx (SImode);
4375 ret = gen_reg_rtx (Pmode);
4376 o0 = gen_rtx_REG (Pmode, 8);
4377 got = sparc_tls_got ();
4378 emit_insn (gen_tgd_hi22 (temp1, addr));
4379 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4380 if (TARGET_ARCH32)
4381 {
4382 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4383 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4384 addr, const1_rtx));
4385 }
4386 else
4387 {
4388 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4389 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4390 addr, const1_rtx));
4391 }
4392 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4393 insn = get_insns ();
4394 end_sequence ();
4395 emit_libcall_block (insn, ret, o0, addr);
4396 break;
4397
4398 case TLS_MODEL_LOCAL_DYNAMIC:
4399 start_sequence ();
4400 temp1 = gen_reg_rtx (SImode);
4401 temp2 = gen_reg_rtx (SImode);
4402 temp3 = gen_reg_rtx (Pmode);
4403 ret = gen_reg_rtx (Pmode);
4404 o0 = gen_rtx_REG (Pmode, 8);
4405 got = sparc_tls_got ();
4406 emit_insn (gen_tldm_hi22 (temp1));
4407 emit_insn (gen_tldm_lo10 (temp2, temp1));
4408 if (TARGET_ARCH32)
4409 {
4410 emit_insn (gen_tldm_add32 (o0, got, temp2));
4411 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4412 const1_rtx));
4413 }
4414 else
4415 {
4416 emit_insn (gen_tldm_add64 (o0, got, temp2));
4417 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4418 const1_rtx));
4419 }
4420 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4421 insn = get_insns ();
4422 end_sequence ();
4423 emit_libcall_block (insn, temp3, o0,
4424 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4425 UNSPEC_TLSLD_BASE));
4426 temp1 = gen_reg_rtx (SImode);
4427 temp2 = gen_reg_rtx (SImode);
4428 emit_insn (gen_tldo_hix22 (temp1, addr));
4429 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4430 if (TARGET_ARCH32)
4431 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4432 else
4433 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4434 break;
4435
4436 case TLS_MODEL_INITIAL_EXEC:
4437 temp1 = gen_reg_rtx (SImode);
4438 temp2 = gen_reg_rtx (SImode);
4439 temp3 = gen_reg_rtx (Pmode);
4440 got = sparc_tls_got ();
4441 emit_insn (gen_tie_hi22 (temp1, addr));
4442 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4443 if (TARGET_ARCH32)
4444 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4445 else
4446 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4447 if (TARGET_SUN_TLS)
4448 {
4449 ret = gen_reg_rtx (Pmode);
4450 if (TARGET_ARCH32)
4451 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4452 temp3, addr));
4453 else
4454 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4455 temp3, addr));
4456 }
4457 else
4458 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4459 break;
4460
4461 case TLS_MODEL_LOCAL_EXEC:
4462 temp1 = gen_reg_rtx (Pmode);
4463 temp2 = gen_reg_rtx (Pmode);
4464 if (TARGET_ARCH32)
4465 {
4466 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4467 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4468 }
4469 else
4470 {
4471 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4472 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4473 }
4474 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4475 break;
4476
4477 default:
4478 gcc_unreachable ();
4479 }
4480
4481 else if (GET_CODE (addr) == CONST)
4482 {
4483 rtx base, offset;
4484
4485 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4486
4487 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4488 offset = XEXP (XEXP (addr, 0), 1);
4489
4490 base = force_operand (base, NULL_RTX);
4491 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4492 offset = force_reg (Pmode, offset);
4493 ret = gen_rtx_PLUS (Pmode, base, offset);
4494 }
4495
4496 else
4497 gcc_unreachable (); /* for now ... */
4498
4499 return ret;
4500 }
4501
4502 /* Legitimize PIC addresses. If the address is already position-independent,
4503 we return ORIG. Newly generated position-independent addresses go into a
4504 reg. This is REG if nonzero, otherwise we allocate register(s) as
4505 necessary. */
4506
4507 static rtx
4508 sparc_legitimize_pic_address (rtx orig, rtx reg)
4509 {
4510 bool gotdata_op = false;
4511
4512 if (GET_CODE (orig) == SYMBOL_REF
4513 /* See the comment in sparc_expand_move. */
4514 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4515 {
4516 rtx pic_ref, address;
4517 rtx_insn *insn;
4518
4519 if (reg == 0)
4520 {
4521 gcc_assert (can_create_pseudo_p ());
4522 reg = gen_reg_rtx (Pmode);
4523 }
4524
4525 if (flag_pic == 2)
4526 {
4527 /* If not during reload, allocate another temp reg here for loading
4528 in the address, so that these instructions can be optimized
4529 properly. */
4530 rtx temp_reg = (! can_create_pseudo_p ()
4531 ? reg : gen_reg_rtx (Pmode));
4532
4533 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4534 won't get confused into thinking that these two instructions
4535 are loading in the true address of the symbol. If in the
4536 future a PIC rtx exists, that should be used instead. */
4537 if (TARGET_ARCH64)
4538 {
4539 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4540 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4541 }
4542 else
4543 {
4544 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4545 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4546 }
4547 address = temp_reg;
4548 gotdata_op = true;
4549 }
4550 else
4551 address = orig;
4552
4553 crtl->uses_pic_offset_table = 1;
4554 if (gotdata_op)
4555 {
4556 if (TARGET_ARCH64)
4557 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4558 pic_offset_table_rtx,
4559 address, orig));
4560 else
4561 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4562 pic_offset_table_rtx,
4563 address, orig));
4564 }
4565 else
4566 {
4567 pic_ref
4568 = gen_const_mem (Pmode,
4569 gen_rtx_PLUS (Pmode,
4570 pic_offset_table_rtx, address));
4571 insn = emit_move_insn (reg, pic_ref);
4572 }
4573
4574 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4575 by loop. */
4576 set_unique_reg_note (insn, REG_EQUAL, orig);
4577 return reg;
4578 }
4579 else if (GET_CODE (orig) == CONST)
4580 {
4581 rtx base, offset;
4582
4583 if (GET_CODE (XEXP (orig, 0)) == PLUS
4584 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4585 return orig;
4586
4587 if (reg == 0)
4588 {
4589 gcc_assert (can_create_pseudo_p ());
4590 reg = gen_reg_rtx (Pmode);
4591 }
4592
4593 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4594 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4595 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4596 base == reg ? NULL_RTX : reg);
4597
4598 if (GET_CODE (offset) == CONST_INT)
4599 {
4600 if (SMALL_INT (offset))
4601 return plus_constant (Pmode, base, INTVAL (offset));
4602 else if (can_create_pseudo_p ())
4603 offset = force_reg (Pmode, offset);
4604 else
4605 /* If we reach here, then something is seriously wrong. */
4606 gcc_unreachable ();
4607 }
4608 return gen_rtx_PLUS (Pmode, base, offset);
4609 }
4610 else if (GET_CODE (orig) == LABEL_REF)
4611 /* ??? We ought to be checking that the register is live instead, in case
4612 it is eliminated. */
4613 crtl->uses_pic_offset_table = 1;
4614
4615 return orig;
4616 }
4617
4618 /* Try machine-dependent ways of modifying an illegitimate address X
4619 to be legitimate. If we find one, return the new, valid address.
4620
4621 OLDX is the address as it was before break_out_memory_refs was called.
4622 In some cases it is useful to look at this to decide what needs to be done.
4623
4624 MODE is the mode of the operand pointed to by X.
4625
4626 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4627
4628 static rtx
4629 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4630 machine_mode mode)
4631 {
4632 rtx orig_x = x;
4633
4634 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4635 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4636 force_operand (XEXP (x, 0), NULL_RTX));
4637 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4638 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4639 force_operand (XEXP (x, 1), NULL_RTX));
4640 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4641 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4642 XEXP (x, 1));
4643 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4644 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4645 force_operand (XEXP (x, 1), NULL_RTX));
4646
4647 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4648 return x;
4649
4650 if (sparc_tls_referenced_p (x))
4651 x = sparc_legitimize_tls_address (x);
4652 else if (flag_pic)
4653 x = sparc_legitimize_pic_address (x, NULL_RTX);
4654 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4655 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4656 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4657 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4658 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4659 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4660 else if (GET_CODE (x) == SYMBOL_REF
4661 || GET_CODE (x) == CONST
4662 || GET_CODE (x) == LABEL_REF)
4663 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4664
4665 return x;
4666 }
4667
4668 /* Delegitimize an address that was legitimized by the above function. */
4669
4670 static rtx
4671 sparc_delegitimize_address (rtx x)
4672 {
4673 x = delegitimize_mem_from_attrs (x);
4674
4675 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4676 switch (XINT (XEXP (x, 1), 1))
4677 {
4678 case UNSPEC_MOVE_PIC:
4679 case UNSPEC_TLSLE:
4680 x = XVECEXP (XEXP (x, 1), 0, 0);
4681 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4682 break;
4683 default:
4684 break;
4685 }
4686
4687 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4688 if (GET_CODE (x) == MINUS
4689 && REG_P (XEXP (x, 0))
4690 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4691 && GET_CODE (XEXP (x, 1)) == LO_SUM
4692 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4693 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4694 {
4695 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4696 gcc_assert (GET_CODE (x) == LABEL_REF);
4697 }
4698
4699 return x;
4700 }
4701
4702 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4703 replace the input X, or the original X if no replacement is called for.
4704 The output parameter *WIN is 1 if the calling macro should goto WIN,
4705 0 if it should not.
4706
4707 For SPARC, we wish to handle addresses by splitting them into
4708 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4709 This cuts the number of extra insns by one.
4710
4711 Do nothing when generating PIC code and the address is a symbolic
4712 operand or requires a scratch register. */
4713
4714 rtx
4715 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4716 int opnum, int type,
4717 int ind_levels ATTRIBUTE_UNUSED, int *win)
4718 {
4719 /* Decompose SImode constants into HIGH+LO_SUM. */
4720 if (CONSTANT_P (x)
4721 && (mode != TFmode || TARGET_ARCH64)
4722 && GET_MODE (x) == SImode
4723 && GET_CODE (x) != LO_SUM
4724 && GET_CODE (x) != HIGH
4725 && sparc_cmodel <= CM_MEDLOW
4726 && !(flag_pic
4727 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4728 {
4729 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4730 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4731 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4732 opnum, (enum reload_type)type);
4733 *win = 1;
4734 return x;
4735 }
4736
4737 /* We have to recognize what we have already generated above. */
4738 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4739 {
4740 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4741 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4742 opnum, (enum reload_type)type);
4743 *win = 1;
4744 return x;
4745 }
4746
4747 *win = 0;
4748 return x;
4749 }
4750
4751 /* Return true if ADDR (a legitimate address expression)
4752 has an effect that depends on the machine mode it is used for.
4753
4754 In PIC mode,
4755
4756 (mem:HI [%l7+a])
4757
4758 is not equivalent to
4759
4760 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4761
4762 because [%l7+a+1] is interpreted as the address of (a+1). */
4763
4764
4765 static bool
4766 sparc_mode_dependent_address_p (const_rtx addr,
4767 addr_space_t as ATTRIBUTE_UNUSED)
4768 {
4769 if (flag_pic && GET_CODE (addr) == PLUS)
4770 {
4771 rtx op0 = XEXP (addr, 0);
4772 rtx op1 = XEXP (addr, 1);
4773 if (op0 == pic_offset_table_rtx
4774 && symbolic_operand (op1, VOIDmode))
4775 return true;
4776 }
4777
4778 return false;
4779 }
4780
4781 #ifdef HAVE_GAS_HIDDEN
4782 # define USE_HIDDEN_LINKONCE 1
4783 #else
4784 # define USE_HIDDEN_LINKONCE 0
4785 #endif
4786
4787 static void
4788 get_pc_thunk_name (char name[32], unsigned int regno)
4789 {
4790 const char *reg_name = reg_names[regno];
4791
4792 /* Skip the leading '%' as that cannot be used in a
4793 symbol name. */
4794 reg_name += 1;
4795
4796 if (USE_HIDDEN_LINKONCE)
4797 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4798 else
4799 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4800 }
4801
4802 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4803
4804 static rtx
4805 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4806 {
4807 int orig_flag_pic = flag_pic;
4808 rtx insn;
4809
4810 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4811 flag_pic = 0;
4812 if (TARGET_ARCH64)
4813 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4814 else
4815 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4816 flag_pic = orig_flag_pic;
4817
4818 return insn;
4819 }
4820
4821 /* Emit code to load the GOT register. */
4822
4823 void
4824 load_got_register (void)
4825 {
4826 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4827 if (!global_offset_table_rtx)
4828 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4829
4830 if (TARGET_VXWORKS_RTP)
4831 emit_insn (gen_vxworks_load_got ());
4832 else
4833 {
4834 /* The GOT symbol is subject to a PC-relative relocation so we need a
4835 helper function to add the PC value and thus get the final value. */
4836 if (!got_helper_rtx)
4837 {
4838 char name[32];
4839 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4840 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4841 }
4842
4843 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4844 got_helper_rtx,
4845 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4846 }
4847
4848 /* Need to emit this whether or not we obey regdecls,
4849 since setjmp/longjmp can cause life info to screw up.
4850 ??? In the case where we don't obey regdecls, this is not sufficient
4851 since we may not fall out the bottom. */
4852 emit_use (global_offset_table_rtx);
4853 }
4854
4855 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4856 address of the call target. */
4857
4858 void
4859 sparc_emit_call_insn (rtx pat, rtx addr)
4860 {
4861 rtx_insn *insn;
4862
4863 insn = emit_call_insn (pat);
4864
4865 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4866 if (TARGET_VXWORKS_RTP
4867 && flag_pic
4868 && GET_CODE (addr) == SYMBOL_REF
4869 && (SYMBOL_REF_DECL (addr)
4870 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4871 : !SYMBOL_REF_LOCAL_P (addr)))
4872 {
4873 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4874 crtl->uses_pic_offset_table = 1;
4875 }
4876 }
4877 \f
4878 /* Return 1 if RTX is a MEM which is known to be aligned to at
4879 least a DESIRED byte boundary. */
4880
4881 int
4882 mem_min_alignment (rtx mem, int desired)
4883 {
4884 rtx addr, base, offset;
4885
4886 /* If it's not a MEM we can't accept it. */
4887 if (GET_CODE (mem) != MEM)
4888 return 0;
4889
4890 /* Obviously... */
4891 if (!TARGET_UNALIGNED_DOUBLES
4892 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4893 return 1;
4894
4895 /* ??? The rest of the function predates MEM_ALIGN so
4896 there is probably a bit of redundancy. */
4897 addr = XEXP (mem, 0);
4898 base = offset = NULL_RTX;
4899 if (GET_CODE (addr) == PLUS)
4900 {
4901 if (GET_CODE (XEXP (addr, 0)) == REG)
4902 {
4903 base = XEXP (addr, 0);
4904
4905 /* What we are saying here is that if the base
4906 REG is aligned properly, the compiler will make
4907 sure any REG based index upon it will be so
4908 as well. */
4909 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4910 offset = XEXP (addr, 1);
4911 else
4912 offset = const0_rtx;
4913 }
4914 }
4915 else if (GET_CODE (addr) == REG)
4916 {
4917 base = addr;
4918 offset = const0_rtx;
4919 }
4920
4921 if (base != NULL_RTX)
4922 {
4923 int regno = REGNO (base);
4924
4925 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4926 {
4927 /* Check if the compiler has recorded some information
4928 about the alignment of the base REG. If reload has
4929 completed, we already matched with proper alignments.
4930 If not running global_alloc, reload might give us
4931 unaligned pointer to local stack though. */
4932 if (((cfun != 0
4933 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4934 || (optimize && reload_completed))
4935 && (INTVAL (offset) & (desired - 1)) == 0)
4936 return 1;
4937 }
4938 else
4939 {
4940 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4941 return 1;
4942 }
4943 }
4944 else if (! TARGET_UNALIGNED_DOUBLES
4945 || CONSTANT_P (addr)
4946 || GET_CODE (addr) == LO_SUM)
4947 {
4948 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4949 is true, in which case we can only assume that an access is aligned if
4950 it is to a constant address, or the address involves a LO_SUM. */
4951 return 1;
4952 }
4953
4954 /* An obviously unaligned address. */
4955 return 0;
4956 }
4957
4958 \f
4959 /* Vectors to keep interesting information about registers where it can easily
4960 be got. We used to use the actual mode value as the bit number, but there
4961 are more than 32 modes now. Instead we use two tables: one indexed by
4962 hard register number, and one indexed by mode. */
4963
4964 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4965 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4966 mapped into one sparc_mode_class mode. */
4967
4968 enum sparc_mode_class {
4969 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4970 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4971 CC_MODE, CCFP_MODE
4972 };
4973
4974 /* Modes for single-word and smaller quantities. */
4975 #define S_MODES \
4976 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4977
4978 /* Modes for double-word and smaller quantities. */
4979 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4980
4981 /* Modes for quad-word and smaller quantities. */
4982 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4983
4984 /* Modes for 8-word and smaller quantities. */
4985 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4986
4987 /* Modes for single-float quantities. */
4988 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4989
4990 /* Modes for double-float and smaller quantities. */
4991 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4992
4993 /* Modes for quad-float and smaller quantities. */
4994 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4995
4996 /* Modes for quad-float pairs and smaller quantities. */
4997 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4998
4999 /* Modes for double-float only quantities. */
5000 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5001
5002 /* Modes for quad-float and double-float only quantities. */
5003 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5004
5005 /* Modes for quad-float pairs and double-float only quantities. */
5006 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5007
5008 /* Modes for condition codes. */
5009 #define CC_MODES (1 << (int) CC_MODE)
5010 #define CCFP_MODES (1 << (int) CCFP_MODE)
5011
5012 /* Value is 1 if register/mode pair is acceptable on sparc.
5013
5014 The funny mixture of D and T modes is because integer operations
5015 do not specially operate on tetra quantities, so non-quad-aligned
5016 registers can hold quadword quantities (except %o4 and %i4 because
5017 they cross fixed registers).
5018
5019 ??? Note that, despite the settings, non-double-aligned parameter
5020 registers can hold double-word quantities in 32-bit mode. */
5021
5022 /* This points to either the 32-bit or the 64-bit version. */
5023 static const int *hard_regno_mode_classes;
5024
5025 static const int hard_32bit_mode_classes[] = {
5026 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5027 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5028 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5029 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5030
5031 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5032 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5033 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5034 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5035
5036 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5037 and none can hold SFmode/SImode values. */
5038 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5039 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5040 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5041 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5042
5043 /* %fcc[0123] */
5044 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5045
5046 /* %icc, %sfp, %gsr */
5047 CC_MODES, 0, D_MODES
5048 };
5049
5050 static const int hard_64bit_mode_classes[] = {
5051 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5052 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5053 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5054 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5055
5056 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5057 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5058 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5059 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5060
5061 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5062 and none can hold SFmode/SImode values. */
5063 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5064 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5065 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5066 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5067
5068 /* %fcc[0123] */
5069 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5070
5071 /* %icc, %sfp, %gsr */
5072 CC_MODES, 0, D_MODES
5073 };
5074
5075 static int sparc_mode_class [NUM_MACHINE_MODES];
5076
5077 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5078
5079 static void
5080 sparc_init_modes (void)
5081 {
5082 int i;
5083
5084 for (i = 0; i < NUM_MACHINE_MODES; i++)
5085 {
5086 machine_mode m = (machine_mode) i;
5087 unsigned int size = GET_MODE_SIZE (m);
5088
5089 switch (GET_MODE_CLASS (m))
5090 {
5091 case MODE_INT:
5092 case MODE_PARTIAL_INT:
5093 case MODE_COMPLEX_INT:
5094 if (size < 4)
5095 sparc_mode_class[i] = 1 << (int) H_MODE;
5096 else if (size == 4)
5097 sparc_mode_class[i] = 1 << (int) S_MODE;
5098 else if (size == 8)
5099 sparc_mode_class[i] = 1 << (int) D_MODE;
5100 else if (size == 16)
5101 sparc_mode_class[i] = 1 << (int) T_MODE;
5102 else if (size == 32)
5103 sparc_mode_class[i] = 1 << (int) O_MODE;
5104 else
5105 sparc_mode_class[i] = 0;
5106 break;
5107 case MODE_VECTOR_INT:
5108 if (size == 4)
5109 sparc_mode_class[i] = 1 << (int) SF_MODE;
5110 else if (size == 8)
5111 sparc_mode_class[i] = 1 << (int) DF_MODE;
5112 else
5113 sparc_mode_class[i] = 0;
5114 break;
5115 case MODE_FLOAT:
5116 case MODE_COMPLEX_FLOAT:
5117 if (size == 4)
5118 sparc_mode_class[i] = 1 << (int) SF_MODE;
5119 else if (size == 8)
5120 sparc_mode_class[i] = 1 << (int) DF_MODE;
5121 else if (size == 16)
5122 sparc_mode_class[i] = 1 << (int) TF_MODE;
5123 else if (size == 32)
5124 sparc_mode_class[i] = 1 << (int) OF_MODE;
5125 else
5126 sparc_mode_class[i] = 0;
5127 break;
5128 case MODE_CC:
5129 if (m == CCFPmode || m == CCFPEmode)
5130 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5131 else
5132 sparc_mode_class[i] = 1 << (int) CC_MODE;
5133 break;
5134 default:
5135 sparc_mode_class[i] = 0;
5136 break;
5137 }
5138 }
5139
5140 if (TARGET_ARCH64)
5141 hard_regno_mode_classes = hard_64bit_mode_classes;
5142 else
5143 hard_regno_mode_classes = hard_32bit_mode_classes;
5144
5145 /* Initialize the array used by REGNO_REG_CLASS. */
5146 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5147 {
5148 if (i < 16 && TARGET_V8PLUS)
5149 sparc_regno_reg_class[i] = I64_REGS;
5150 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5151 sparc_regno_reg_class[i] = GENERAL_REGS;
5152 else if (i < 64)
5153 sparc_regno_reg_class[i] = FP_REGS;
5154 else if (i < 96)
5155 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5156 else if (i < 100)
5157 sparc_regno_reg_class[i] = FPCC_REGS;
5158 else
5159 sparc_regno_reg_class[i] = NO_REGS;
5160 }
5161 }
5162 \f
5163 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5164
5165 static inline bool
5166 save_global_or_fp_reg_p (unsigned int regno,
5167 int leaf_function ATTRIBUTE_UNUSED)
5168 {
5169 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5170 }
5171
5172 /* Return whether the return address register (%i7) is needed. */
5173
5174 static inline bool
5175 return_addr_reg_needed_p (int leaf_function)
5176 {
5177 /* If it is live, for example because of __builtin_return_address (0). */
5178 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5179 return true;
5180
5181 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5182 if (!leaf_function
5183 /* Loading the GOT register clobbers %o7. */
5184 || crtl->uses_pic_offset_table
5185 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5186 return true;
5187
5188 return false;
5189 }
5190
5191 /* Return whether REGNO, a local or in register, must be saved/restored. */
5192
5193 static bool
5194 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5195 {
5196 /* General case: call-saved registers live at some point. */
5197 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5198 return true;
5199
5200 /* Frame pointer register (%fp) if needed. */
5201 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5202 return true;
5203
5204 /* Return address register (%i7) if needed. */
5205 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5206 return true;
5207
5208 /* GOT register (%l7) if needed. */
5209 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5210 return true;
5211
5212 /* If the function accesses prior frames, the frame pointer and the return
5213 address of the previous frame must be saved on the stack. */
5214 if (crtl->accesses_prior_frames
5215 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5216 return true;
5217
5218 return false;
5219 }
5220
5221 /* Compute the frame size required by the function. This function is called
5222 during the reload pass and also by sparc_expand_prologue. */
5223
5224 HOST_WIDE_INT
5225 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5226 {
5227 HOST_WIDE_INT frame_size, apparent_frame_size;
5228 int args_size, n_global_fp_regs = 0;
5229 bool save_local_in_regs_p = false;
5230 unsigned int i;
5231
5232 /* If the function allocates dynamic stack space, the dynamic offset is
5233 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5234 if (leaf_function && !cfun->calls_alloca)
5235 args_size = 0;
5236 else
5237 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5238
5239 /* Calculate space needed for global registers. */
5240 if (TARGET_ARCH64)
5241 {
5242 for (i = 0; i < 8; i++)
5243 if (save_global_or_fp_reg_p (i, 0))
5244 n_global_fp_regs += 2;
5245 }
5246 else
5247 {
5248 for (i = 0; i < 8; i += 2)
5249 if (save_global_or_fp_reg_p (i, 0)
5250 || save_global_or_fp_reg_p (i + 1, 0))
5251 n_global_fp_regs += 2;
5252 }
5253
5254 /* In the flat window model, find out which local and in registers need to
5255 be saved. We don't reserve space in the current frame for them as they
5256 will be spilled into the register window save area of the caller's frame.
5257 However, as soon as we use this register window save area, we must create
5258 that of the current frame to make it the live one. */
5259 if (TARGET_FLAT)
5260 for (i = 16; i < 32; i++)
5261 if (save_local_or_in_reg_p (i, leaf_function))
5262 {
5263 save_local_in_regs_p = true;
5264 break;
5265 }
5266
5267 /* Calculate space needed for FP registers. */
5268 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5269 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5270 n_global_fp_regs += 2;
5271
5272 if (size == 0
5273 && n_global_fp_regs == 0
5274 && args_size == 0
5275 && !save_local_in_regs_p)
5276 frame_size = apparent_frame_size = 0;
5277 else
5278 {
5279 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5280 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5281 apparent_frame_size += n_global_fp_regs * 4;
5282
5283 /* We need to add the size of the outgoing argument area. */
5284 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5285
5286 /* And that of the register window save area. */
5287 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5288
5289 /* Finally, bump to the appropriate alignment. */
5290 frame_size = SPARC_STACK_ALIGN (frame_size);
5291 }
5292
5293 /* Set up values for use in prologue and epilogue. */
5294 sparc_frame_size = frame_size;
5295 sparc_apparent_frame_size = apparent_frame_size;
5296 sparc_n_global_fp_regs = n_global_fp_regs;
5297 sparc_save_local_in_regs_p = save_local_in_regs_p;
5298
5299 return frame_size;
5300 }
5301
5302 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5303
5304 int
5305 sparc_initial_elimination_offset (int to)
5306 {
5307 int offset;
5308
5309 if (to == STACK_POINTER_REGNUM)
5310 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5311 else
5312 offset = 0;
5313
5314 offset += SPARC_STACK_BIAS;
5315 return offset;
5316 }
5317
5318 /* Output any necessary .register pseudo-ops. */
5319
5320 void
5321 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5322 {
5323 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5324 int i;
5325
5326 if (TARGET_ARCH32)
5327 return;
5328
5329 /* Check if %g[2367] were used without
5330 .register being printed for them already. */
5331 for (i = 2; i < 8; i++)
5332 {
5333 if (df_regs_ever_live_p (i)
5334 && ! sparc_hard_reg_printed [i])
5335 {
5336 sparc_hard_reg_printed [i] = 1;
5337 /* %g7 is used as TLS base register, use #ignore
5338 for it instead of #scratch. */
5339 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5340 i == 7 ? "ignore" : "scratch");
5341 }
5342 if (i == 3) i = 5;
5343 }
5344 #endif
5345 }
5346
5347 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5348
5349 #if PROBE_INTERVAL > 4096
5350 #error Cannot use indexed addressing mode for stack probing
5351 #endif
5352
5353 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5354 inclusive. These are offsets from the current stack pointer.
5355
5356 Note that we don't use the REG+REG addressing mode for the probes because
5357 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5358 so the advantages of having a single code win here. */
5359
5360 static void
5361 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5362 {
5363 rtx g1 = gen_rtx_REG (Pmode, 1);
5364
5365 /* See if we have a constant small number of probes to generate. If so,
5366 that's the easy case. */
5367 if (size <= PROBE_INTERVAL)
5368 {
5369 emit_move_insn (g1, GEN_INT (first));
5370 emit_insn (gen_rtx_SET (g1,
5371 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5372 emit_stack_probe (plus_constant (Pmode, g1, -size));
5373 }
5374
5375 /* The run-time loop is made up of 9 insns in the generic case while the
5376 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5377 else if (size <= 4 * PROBE_INTERVAL)
5378 {
5379 HOST_WIDE_INT i;
5380
5381 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5382 emit_insn (gen_rtx_SET (g1,
5383 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5384 emit_stack_probe (g1);
5385
5386 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5387 it exceeds SIZE. If only two probes are needed, this will not
5388 generate any code. Then probe at FIRST + SIZE. */
5389 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5390 {
5391 emit_insn (gen_rtx_SET (g1,
5392 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5393 emit_stack_probe (g1);
5394 }
5395
5396 emit_stack_probe (plus_constant (Pmode, g1,
5397 (i - PROBE_INTERVAL) - size));
5398 }
5399
5400 /* Otherwise, do the same as above, but in a loop. Note that we must be
5401 extra careful with variables wrapping around because we might be at
5402 the very top (or the very bottom) of the address space and we have
5403 to be able to handle this case properly; in particular, we use an
5404 equality test for the loop condition. */
5405 else
5406 {
5407 HOST_WIDE_INT rounded_size;
5408 rtx g4 = gen_rtx_REG (Pmode, 4);
5409
5410 emit_move_insn (g1, GEN_INT (first));
5411
5412
5413 /* Step 1: round SIZE to the previous multiple of the interval. */
5414
5415 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5416 emit_move_insn (g4, GEN_INT (rounded_size));
5417
5418
5419 /* Step 2: compute initial and final value of the loop counter. */
5420
5421 /* TEST_ADDR = SP + FIRST. */
5422 emit_insn (gen_rtx_SET (g1,
5423 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5424
5425 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5426 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5427
5428
5429 /* Step 3: the loop
5430
5431 while (TEST_ADDR != LAST_ADDR)
5432 {
5433 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5434 probe at TEST_ADDR
5435 }
5436
5437 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5438 until it is equal to ROUNDED_SIZE. */
5439
5440 if (TARGET_ARCH64)
5441 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5442 else
5443 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5444
5445
5446 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5447 that SIZE is equal to ROUNDED_SIZE. */
5448
5449 if (size != rounded_size)
5450 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5451 }
5452
5453 /* Make sure nothing is scheduled before we are done. */
5454 emit_insn (gen_blockage ());
5455 }
5456
5457 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5458 absolute addresses. */
5459
5460 const char *
5461 output_probe_stack_range (rtx reg1, rtx reg2)
5462 {
5463 static int labelno = 0;
5464 char loop_lab[32];
5465 rtx xops[2];
5466
5467 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5468
5469 /* Loop. */
5470 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5471
5472 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5473 xops[0] = reg1;
5474 xops[1] = GEN_INT (-PROBE_INTERVAL);
5475 output_asm_insn ("add\t%0, %1, %0", xops);
5476
5477 /* Test if TEST_ADDR == LAST_ADDR. */
5478 xops[1] = reg2;
5479 output_asm_insn ("cmp\t%0, %1", xops);
5480
5481 /* Probe at TEST_ADDR and branch. */
5482 if (TARGET_ARCH64)
5483 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5484 else
5485 fputs ("\tbne\t", asm_out_file);
5486 assemble_name_raw (asm_out_file, loop_lab);
5487 fputc ('\n', asm_out_file);
5488 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5489 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5490
5491 return "";
5492 }
5493
5494 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5495 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5496 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5497 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5498 the action to be performed if it returns false. Return the new offset. */
5499
5500 typedef bool (*sorr_pred_t) (unsigned int, int);
5501 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5502
5503 static int
5504 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5505 int offset, int leaf_function, sorr_pred_t save_p,
5506 sorr_act_t action_true, sorr_act_t action_false)
5507 {
5508 unsigned int i;
5509 rtx mem;
5510 rtx_insn *insn;
5511
5512 if (TARGET_ARCH64 && high <= 32)
5513 {
5514 int fp_offset = -1;
5515
5516 for (i = low; i < high; i++)
5517 {
5518 if (save_p (i, leaf_function))
5519 {
5520 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5521 base, offset));
5522 if (action_true == SORR_SAVE)
5523 {
5524 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5525 RTX_FRAME_RELATED_P (insn) = 1;
5526 }
5527 else /* action_true == SORR_RESTORE */
5528 {
5529 /* The frame pointer must be restored last since its old
5530 value may be used as base address for the frame. This
5531 is problematic in 64-bit mode only because of the lack
5532 of double-word load instruction. */
5533 if (i == HARD_FRAME_POINTER_REGNUM)
5534 fp_offset = offset;
5535 else
5536 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5537 }
5538 offset += 8;
5539 }
5540 else if (action_false == SORR_ADVANCE)
5541 offset += 8;
5542 }
5543
5544 if (fp_offset >= 0)
5545 {
5546 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5547 emit_move_insn (hard_frame_pointer_rtx, mem);
5548 }
5549 }
5550 else
5551 {
5552 for (i = low; i < high; i += 2)
5553 {
5554 bool reg0 = save_p (i, leaf_function);
5555 bool reg1 = save_p (i + 1, leaf_function);
5556 machine_mode mode;
5557 int regno;
5558
5559 if (reg0 && reg1)
5560 {
5561 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5562 regno = i;
5563 }
5564 else if (reg0)
5565 {
5566 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5567 regno = i;
5568 }
5569 else if (reg1)
5570 {
5571 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5572 regno = i + 1;
5573 offset += 4;
5574 }
5575 else
5576 {
5577 if (action_false == SORR_ADVANCE)
5578 offset += 8;
5579 continue;
5580 }
5581
5582 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5583 if (action_true == SORR_SAVE)
5584 {
5585 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5586 RTX_FRAME_RELATED_P (insn) = 1;
5587 if (mode == DImode)
5588 {
5589 rtx set1, set2;
5590 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5591 offset));
5592 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5593 RTX_FRAME_RELATED_P (set1) = 1;
5594 mem
5595 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5596 offset + 4));
5597 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5598 RTX_FRAME_RELATED_P (set2) = 1;
5599 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5600 gen_rtx_PARALLEL (VOIDmode,
5601 gen_rtvec (2, set1, set2)));
5602 }
5603 }
5604 else /* action_true == SORR_RESTORE */
5605 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5606
5607 /* Bump and round down to double word
5608 in case we already bumped by 4. */
5609 offset = ROUND_DOWN (offset + 8, 8);
5610 }
5611 }
5612
5613 return offset;
5614 }
5615
5616 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5617
5618 static rtx
5619 emit_adjust_base_to_offset (rtx base, int offset)
5620 {
5621 /* ??? This might be optimized a little as %g1 might already have a
5622 value close enough that a single add insn will do. */
5623 /* ??? Although, all of this is probably only a temporary fix because
5624 if %g1 can hold a function result, then sparc_expand_epilogue will
5625 lose (the result will be clobbered). */
5626 rtx new_base = gen_rtx_REG (Pmode, 1);
5627 emit_move_insn (new_base, GEN_INT (offset));
5628 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5629 return new_base;
5630 }
5631
5632 /* Emit code to save/restore call-saved global and FP registers. */
5633
5634 static void
5635 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5636 {
5637 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5638 {
5639 base = emit_adjust_base_to_offset (base, offset);
5640 offset = 0;
5641 }
5642
5643 offset
5644 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5645 save_global_or_fp_reg_p, action, SORR_NONE);
5646 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5647 save_global_or_fp_reg_p, action, SORR_NONE);
5648 }
5649
5650 /* Emit code to save/restore call-saved local and in registers. */
5651
5652 static void
5653 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5654 {
5655 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5656 {
5657 base = emit_adjust_base_to_offset (base, offset);
5658 offset = 0;
5659 }
5660
5661 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5662 save_local_or_in_reg_p, action, SORR_ADVANCE);
5663 }
5664
5665 /* Emit a window_save insn. */
5666
5667 static rtx_insn *
5668 emit_window_save (rtx increment)
5669 {
5670 rtx_insn *insn = emit_insn (gen_window_save (increment));
5671 RTX_FRAME_RELATED_P (insn) = 1;
5672
5673 /* The incoming return address (%o7) is saved in %i7. */
5674 add_reg_note (insn, REG_CFA_REGISTER,
5675 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5676 gen_rtx_REG (Pmode,
5677 INCOMING_RETURN_ADDR_REGNUM)));
5678
5679 /* The window save event. */
5680 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5681
5682 /* The CFA is %fp, the hard frame pointer. */
5683 add_reg_note (insn, REG_CFA_DEF_CFA,
5684 plus_constant (Pmode, hard_frame_pointer_rtx,
5685 INCOMING_FRAME_SP_OFFSET));
5686
5687 return insn;
5688 }
5689
5690 /* Generate an increment for the stack pointer. */
5691
5692 static rtx
5693 gen_stack_pointer_inc (rtx increment)
5694 {
5695 return gen_rtx_SET (stack_pointer_rtx,
5696 gen_rtx_PLUS (Pmode,
5697 stack_pointer_rtx,
5698 increment));
5699 }
5700
5701 /* Expand the function prologue. The prologue is responsible for reserving
5702 storage for the frame, saving the call-saved registers and loading the
5703 GOT register if needed. */
5704
5705 void
5706 sparc_expand_prologue (void)
5707 {
5708 HOST_WIDE_INT size;
5709 rtx_insn *insn;
5710
5711 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5712 on the final value of the flag means deferring the prologue/epilogue
5713 expansion until just before the second scheduling pass, which is too
5714 late to emit multiple epilogues or return insns.
5715
5716 Of course we are making the assumption that the value of the flag
5717 will not change between now and its final value. Of the three parts
5718 of the formula, only the last one can reasonably vary. Let's take a
5719 closer look, after assuming that the first two ones are set to true
5720 (otherwise the last value is effectively silenced).
5721
5722 If only_leaf_regs_used returns false, the global predicate will also
5723 be false so the actual frame size calculated below will be positive.
5724 As a consequence, the save_register_window insn will be emitted in
5725 the instruction stream; now this insn explicitly references %fp
5726 which is not a leaf register so only_leaf_regs_used will always
5727 return false subsequently.
5728
5729 If only_leaf_regs_used returns true, we hope that the subsequent
5730 optimization passes won't cause non-leaf registers to pop up. For
5731 example, the regrename pass has special provisions to not rename to
5732 non-leaf registers in a leaf function. */
5733 sparc_leaf_function_p
5734 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5735
5736 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5737
5738 if (flag_stack_usage_info)
5739 current_function_static_stack_size = size;
5740
5741 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5742 || flag_stack_clash_protection)
5743 {
5744 if (crtl->is_leaf && !cfun->calls_alloca)
5745 {
5746 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5747 sparc_emit_probe_stack_range (get_stack_check_protect (),
5748 size - get_stack_check_protect ());
5749 }
5750 else if (size > 0)
5751 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5752 }
5753
5754 if (size == 0)
5755 ; /* do nothing. */
5756 else if (sparc_leaf_function_p)
5757 {
5758 rtx size_int_rtx = GEN_INT (-size);
5759
5760 if (size <= 4096)
5761 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5762 else if (size <= 8192)
5763 {
5764 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5765 RTX_FRAME_RELATED_P (insn) = 1;
5766
5767 /* %sp is still the CFA register. */
5768 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5769 }
5770 else
5771 {
5772 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5773 emit_move_insn (size_rtx, size_int_rtx);
5774 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5775 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5776 gen_stack_pointer_inc (size_int_rtx));
5777 }
5778
5779 RTX_FRAME_RELATED_P (insn) = 1;
5780 }
5781 else
5782 {
5783 rtx size_int_rtx = GEN_INT (-size);
5784
5785 if (size <= 4096)
5786 emit_window_save (size_int_rtx);
5787 else if (size <= 8192)
5788 {
5789 emit_window_save (GEN_INT (-4096));
5790
5791 /* %sp is not the CFA register anymore. */
5792 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5793
5794 /* Make sure no %fp-based store is issued until after the frame is
5795 established. The offset between the frame pointer and the stack
5796 pointer is calculated relative to the value of the stack pointer
5797 at the end of the function prologue, and moving instructions that
5798 access the stack via the frame pointer between the instructions
5799 that decrement the stack pointer could result in accessing the
5800 register window save area, which is volatile. */
5801 emit_insn (gen_frame_blockage ());
5802 }
5803 else
5804 {
5805 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5806 emit_move_insn (size_rtx, size_int_rtx);
5807 emit_window_save (size_rtx);
5808 }
5809 }
5810
5811 if (sparc_leaf_function_p)
5812 {
5813 sparc_frame_base_reg = stack_pointer_rtx;
5814 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5815 }
5816 else
5817 {
5818 sparc_frame_base_reg = hard_frame_pointer_rtx;
5819 sparc_frame_base_offset = SPARC_STACK_BIAS;
5820 }
5821
5822 if (sparc_n_global_fp_regs > 0)
5823 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5824 sparc_frame_base_offset
5825 - sparc_apparent_frame_size,
5826 SORR_SAVE);
5827
5828 /* Load the GOT register if needed. */
5829 if (crtl->uses_pic_offset_table)
5830 load_got_register ();
5831
5832 /* Advertise that the data calculated just above are now valid. */
5833 sparc_prologue_data_valid_p = true;
5834 }
5835
5836 /* Expand the function prologue. The prologue is responsible for reserving
5837 storage for the frame, saving the call-saved registers and loading the
5838 GOT register if needed. */
5839
5840 void
5841 sparc_flat_expand_prologue (void)
5842 {
5843 HOST_WIDE_INT size;
5844 rtx_insn *insn;
5845
5846 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5847
5848 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5849
5850 if (flag_stack_usage_info)
5851 current_function_static_stack_size = size;
5852
5853 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5854 || flag_stack_clash_protection)
5855 {
5856 if (crtl->is_leaf && !cfun->calls_alloca)
5857 {
5858 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5859 sparc_emit_probe_stack_range (get_stack_check_protect (),
5860 size - get_stack_check_protect ());
5861 }
5862 else if (size > 0)
5863 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5864 }
5865
5866 if (sparc_save_local_in_regs_p)
5867 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5868 SORR_SAVE);
5869
5870 if (size == 0)
5871 ; /* do nothing. */
5872 else
5873 {
5874 rtx size_int_rtx, size_rtx;
5875
5876 size_rtx = size_int_rtx = GEN_INT (-size);
5877
5878 /* We establish the frame (i.e. decrement the stack pointer) first, even
5879 if we use a frame pointer, because we cannot clobber any call-saved
5880 registers, including the frame pointer, if we haven't created a new
5881 register save area, for the sake of compatibility with the ABI. */
5882 if (size <= 4096)
5883 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5884 else if (size <= 8192 && !frame_pointer_needed)
5885 {
5886 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5887 RTX_FRAME_RELATED_P (insn) = 1;
5888 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5889 }
5890 else
5891 {
5892 size_rtx = gen_rtx_REG (Pmode, 1);
5893 emit_move_insn (size_rtx, size_int_rtx);
5894 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5895 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5896 gen_stack_pointer_inc (size_int_rtx));
5897 }
5898 RTX_FRAME_RELATED_P (insn) = 1;
5899
5900 /* Ensure nothing is scheduled until after the frame is established. */
5901 emit_insn (gen_blockage ());
5902
5903 if (frame_pointer_needed)
5904 {
5905 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5906 gen_rtx_MINUS (Pmode,
5907 stack_pointer_rtx,
5908 size_rtx)));
5909 RTX_FRAME_RELATED_P (insn) = 1;
5910
5911 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5912 gen_rtx_SET (hard_frame_pointer_rtx,
5913 plus_constant (Pmode, stack_pointer_rtx,
5914 size)));
5915 }
5916
5917 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5918 {
5919 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5920 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5921
5922 insn = emit_move_insn (i7, o7);
5923 RTX_FRAME_RELATED_P (insn) = 1;
5924
5925 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5926
5927 /* Prevent this instruction from ever being considered dead,
5928 even if this function has no epilogue. */
5929 emit_use (i7);
5930 }
5931 }
5932
5933 if (frame_pointer_needed)
5934 {
5935 sparc_frame_base_reg = hard_frame_pointer_rtx;
5936 sparc_frame_base_offset = SPARC_STACK_BIAS;
5937 }
5938 else
5939 {
5940 sparc_frame_base_reg = stack_pointer_rtx;
5941 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5942 }
5943
5944 if (sparc_n_global_fp_regs > 0)
5945 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5946 sparc_frame_base_offset
5947 - sparc_apparent_frame_size,
5948 SORR_SAVE);
5949
5950 /* Load the GOT register if needed. */
5951 if (crtl->uses_pic_offset_table)
5952 load_got_register ();
5953
5954 /* Advertise that the data calculated just above are now valid. */
5955 sparc_prologue_data_valid_p = true;
5956 }
5957
5958 /* This function generates the assembly code for function entry, which boils
5959 down to emitting the necessary .register directives. */
5960
5961 static void
5962 sparc_asm_function_prologue (FILE *file)
5963 {
5964 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5965 if (!TARGET_FLAT)
5966 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5967
5968 sparc_output_scratch_registers (file);
5969 }
5970
5971 /* Expand the function epilogue, either normal or part of a sibcall.
5972 We emit all the instructions except the return or the call. */
5973
5974 void
5975 sparc_expand_epilogue (bool for_eh)
5976 {
5977 HOST_WIDE_INT size = sparc_frame_size;
5978
5979 if (cfun->calls_alloca)
5980 emit_insn (gen_frame_blockage ());
5981
5982 if (sparc_n_global_fp_regs > 0)
5983 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5984 sparc_frame_base_offset
5985 - sparc_apparent_frame_size,
5986 SORR_RESTORE);
5987
5988 if (size == 0 || for_eh)
5989 ; /* do nothing. */
5990 else if (sparc_leaf_function_p)
5991 {
5992 if (size <= 4096)
5993 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5994 else if (size <= 8192)
5995 {
5996 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5997 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5998 }
5999 else
6000 {
6001 rtx reg = gen_rtx_REG (Pmode, 1);
6002 emit_move_insn (reg, GEN_INT (size));
6003 emit_insn (gen_stack_pointer_inc (reg));
6004 }
6005 }
6006 }
6007
6008 /* Expand the function epilogue, either normal or part of a sibcall.
6009 We emit all the instructions except the return or the call. */
6010
6011 void
6012 sparc_flat_expand_epilogue (bool for_eh)
6013 {
6014 HOST_WIDE_INT size = sparc_frame_size;
6015
6016 if (sparc_n_global_fp_regs > 0)
6017 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6018 sparc_frame_base_offset
6019 - sparc_apparent_frame_size,
6020 SORR_RESTORE);
6021
6022 /* If we have a frame pointer, we'll need both to restore it before the
6023 frame is destroyed and use its current value in destroying the frame.
6024 Since we don't have an atomic way to do that in the flat window model,
6025 we save the current value into a temporary register (%g1). */
6026 if (frame_pointer_needed && !for_eh)
6027 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6028
6029 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6030 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6031 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6032
6033 if (sparc_save_local_in_regs_p)
6034 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6035 sparc_frame_base_offset,
6036 SORR_RESTORE);
6037
6038 if (size == 0 || for_eh)
6039 ; /* do nothing. */
6040 else if (frame_pointer_needed)
6041 {
6042 /* Make sure the frame is destroyed after everything else is done. */
6043 emit_insn (gen_blockage ());
6044
6045 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6046 }
6047 else
6048 {
6049 /* Likewise. */
6050 emit_insn (gen_blockage ());
6051
6052 if (size <= 4096)
6053 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6054 else if (size <= 8192)
6055 {
6056 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6057 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6058 }
6059 else
6060 {
6061 rtx reg = gen_rtx_REG (Pmode, 1);
6062 emit_move_insn (reg, GEN_INT (size));
6063 emit_insn (gen_stack_pointer_inc (reg));
6064 }
6065 }
6066 }
6067
6068 /* Return true if it is appropriate to emit `return' instructions in the
6069 body of a function. */
6070
6071 bool
6072 sparc_can_use_return_insn_p (void)
6073 {
6074 return sparc_prologue_data_valid_p
6075 && sparc_n_global_fp_regs == 0
6076 && TARGET_FLAT
6077 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6078 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6079 }
6080
6081 /* This function generates the assembly code for function exit. */
6082
6083 static void
6084 sparc_asm_function_epilogue (FILE *file)
6085 {
6086 /* If the last two instructions of a function are "call foo; dslot;"
6087 the return address might point to the first instruction in the next
6088 function and we have to output a dummy nop for the sake of sane
6089 backtraces in such cases. This is pointless for sibling calls since
6090 the return address is explicitly adjusted. */
6091
6092 rtx_insn *insn = get_last_insn ();
6093
6094 rtx last_real_insn = prev_real_insn (insn);
6095 if (last_real_insn
6096 && NONJUMP_INSN_P (last_real_insn)
6097 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6098 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6099
6100 if (last_real_insn
6101 && CALL_P (last_real_insn)
6102 && !SIBLING_CALL_P (last_real_insn))
6103 fputs("\tnop\n", file);
6104
6105 sparc_output_deferred_case_vectors ();
6106 }
6107
6108 /* Output a 'restore' instruction. */
6109
6110 static void
6111 output_restore (rtx pat)
6112 {
6113 rtx operands[3];
6114
6115 if (! pat)
6116 {
6117 fputs ("\t restore\n", asm_out_file);
6118 return;
6119 }
6120
6121 gcc_assert (GET_CODE (pat) == SET);
6122
6123 operands[0] = SET_DEST (pat);
6124 pat = SET_SRC (pat);
6125
6126 switch (GET_CODE (pat))
6127 {
6128 case PLUS:
6129 operands[1] = XEXP (pat, 0);
6130 operands[2] = XEXP (pat, 1);
6131 output_asm_insn (" restore %r1, %2, %Y0", operands);
6132 break;
6133 case LO_SUM:
6134 operands[1] = XEXP (pat, 0);
6135 operands[2] = XEXP (pat, 1);
6136 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6137 break;
6138 case ASHIFT:
6139 operands[1] = XEXP (pat, 0);
6140 gcc_assert (XEXP (pat, 1) == const1_rtx);
6141 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6142 break;
6143 default:
6144 operands[1] = pat;
6145 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6146 break;
6147 }
6148 }
6149
6150 /* Output a return. */
6151
6152 const char *
6153 output_return (rtx_insn *insn)
6154 {
6155 if (crtl->calls_eh_return)
6156 {
6157 /* If the function uses __builtin_eh_return, the eh_return
6158 machinery occupies the delay slot. */
6159 gcc_assert (!final_sequence);
6160
6161 if (flag_delayed_branch)
6162 {
6163 if (!TARGET_FLAT && TARGET_V9)
6164 fputs ("\treturn\t%i7+8\n", asm_out_file);
6165 else
6166 {
6167 if (!TARGET_FLAT)
6168 fputs ("\trestore\n", asm_out_file);
6169
6170 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6171 }
6172
6173 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6174 }
6175 else
6176 {
6177 if (!TARGET_FLAT)
6178 fputs ("\trestore\n", asm_out_file);
6179
6180 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6181 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6182 }
6183 }
6184 else if (sparc_leaf_function_p || TARGET_FLAT)
6185 {
6186 /* This is a leaf or flat function so we don't have to bother restoring
6187 the register window, which frees us from dealing with the convoluted
6188 semantics of restore/return. We simply output the jump to the
6189 return address and the insn in the delay slot (if any). */
6190
6191 return "jmp\t%%o7+%)%#";
6192 }
6193 else
6194 {
6195 /* This is a regular function so we have to restore the register window.
6196 We may have a pending insn for the delay slot, which will be either
6197 combined with the 'restore' instruction or put in the delay slot of
6198 the 'return' instruction. */
6199
6200 if (final_sequence)
6201 {
6202 rtx_insn *delay;
6203 rtx pat;
6204 int seen;
6205
6206 delay = NEXT_INSN (insn);
6207 gcc_assert (delay);
6208
6209 pat = PATTERN (delay);
6210
6211 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6212 {
6213 epilogue_renumber (&pat, 0);
6214 return "return\t%%i7+%)%#";
6215 }
6216 else
6217 {
6218 output_asm_insn ("jmp\t%%i7+%)", NULL);
6219
6220 /* We're going to output the insn in the delay slot manually.
6221 Make sure to output its source location first. */
6222 PATTERN (delay) = gen_blockage ();
6223 INSN_CODE (delay) = -1;
6224 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6225 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6226
6227 output_restore (pat);
6228 }
6229 }
6230 else
6231 {
6232 /* The delay slot is empty. */
6233 if (TARGET_V9)
6234 return "return\t%%i7+%)\n\t nop";
6235 else if (flag_delayed_branch)
6236 return "jmp\t%%i7+%)\n\t restore";
6237 else
6238 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6239 }
6240 }
6241
6242 return "";
6243 }
6244
6245 /* Output a sibling call. */
6246
6247 const char *
6248 output_sibcall (rtx_insn *insn, rtx call_operand)
6249 {
6250 rtx operands[1];
6251
6252 gcc_assert (flag_delayed_branch);
6253
6254 operands[0] = call_operand;
6255
6256 if (sparc_leaf_function_p || TARGET_FLAT)
6257 {
6258 /* This is a leaf or flat function so we don't have to bother restoring
6259 the register window. We simply output the jump to the function and
6260 the insn in the delay slot (if any). */
6261
6262 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6263
6264 if (final_sequence)
6265 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6266 operands);
6267 else
6268 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6269 it into branch if possible. */
6270 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6271 operands);
6272 }
6273 else
6274 {
6275 /* This is a regular function so we have to restore the register window.
6276 We may have a pending insn for the delay slot, which will be combined
6277 with the 'restore' instruction. */
6278
6279 output_asm_insn ("call\t%a0, 0", operands);
6280
6281 if (final_sequence)
6282 {
6283 rtx_insn *delay;
6284 rtx pat;
6285 int seen;
6286
6287 delay = NEXT_INSN (insn);
6288 gcc_assert (delay);
6289
6290 pat = PATTERN (delay);
6291
6292 /* We're going to output the insn in the delay slot manually.
6293 Make sure to output its source location first. */
6294 PATTERN (delay) = gen_blockage ();
6295 INSN_CODE (delay) = -1;
6296 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6297 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6298
6299 output_restore (pat);
6300 }
6301 else
6302 output_restore (NULL_RTX);
6303 }
6304
6305 return "";
6306 }
6307 \f
6308 /* Functions for handling argument passing.
6309
6310 For 32-bit, the first 6 args are normally in registers and the rest are
6311 pushed. Any arg that starts within the first 6 words is at least
6312 partially passed in a register unless its data type forbids.
6313
6314 For 64-bit, the argument registers are laid out as an array of 16 elements
6315 and arguments are added sequentially. The first 6 int args and up to the
6316 first 16 fp args (depending on size) are passed in regs.
6317
6318 Slot Stack Integral Float Float in structure Double Long Double
6319 ---- ----- -------- ----- ------------------ ------ -----------
6320 15 [SP+248] %f31 %f30,%f31 %d30
6321 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6322 13 [SP+232] %f27 %f26,%f27 %d26
6323 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6324 11 [SP+216] %f23 %f22,%f23 %d22
6325 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6326 9 [SP+200] %f19 %f18,%f19 %d18
6327 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6328 7 [SP+184] %f15 %f14,%f15 %d14
6329 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6330 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6331 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6332 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6333 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6334 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6335 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6336
6337 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6338
6339 Integral arguments are always passed as 64-bit quantities appropriately
6340 extended.
6341
6342 Passing of floating point values is handled as follows.
6343 If a prototype is in scope:
6344 If the value is in a named argument (i.e. not a stdarg function or a
6345 value not part of the `...') then the value is passed in the appropriate
6346 fp reg.
6347 If the value is part of the `...' and is passed in one of the first 6
6348 slots then the value is passed in the appropriate int reg.
6349 If the value is part of the `...' and is not passed in one of the first 6
6350 slots then the value is passed in memory.
6351 If a prototype is not in scope:
6352 If the value is one of the first 6 arguments the value is passed in the
6353 appropriate integer reg and the appropriate fp reg.
6354 If the value is not one of the first 6 arguments the value is passed in
6355 the appropriate fp reg and in memory.
6356
6357
6358 Summary of the calling conventions implemented by GCC on the SPARC:
6359
6360 32-bit ABI:
6361 size argument return value
6362
6363 small integer <4 int. reg. int. reg.
6364 word 4 int. reg. int. reg.
6365 double word 8 int. reg. int. reg.
6366
6367 _Complex small integer <8 int. reg. int. reg.
6368 _Complex word 8 int. reg. int. reg.
6369 _Complex double word 16 memory int. reg.
6370
6371 vector integer <=8 int. reg. FP reg.
6372 vector integer >8 memory memory
6373
6374 float 4 int. reg. FP reg.
6375 double 8 int. reg. FP reg.
6376 long double 16 memory memory
6377
6378 _Complex float 8 memory FP reg.
6379 _Complex double 16 memory FP reg.
6380 _Complex long double 32 memory FP reg.
6381
6382 vector float any memory memory
6383
6384 aggregate any memory memory
6385
6386
6387
6388 64-bit ABI:
6389 size argument return value
6390
6391 small integer <8 int. reg. int. reg.
6392 word 8 int. reg. int. reg.
6393 double word 16 int. reg. int. reg.
6394
6395 _Complex small integer <16 int. reg. int. reg.
6396 _Complex word 16 int. reg. int. reg.
6397 _Complex double word 32 memory int. reg.
6398
6399 vector integer <=16 FP reg. FP reg.
6400 vector integer 16<s<=32 memory FP reg.
6401 vector integer >32 memory memory
6402
6403 float 4 FP reg. FP reg.
6404 double 8 FP reg. FP reg.
6405 long double 16 FP reg. FP reg.
6406
6407 _Complex float 8 FP reg. FP reg.
6408 _Complex double 16 FP reg. FP reg.
6409 _Complex long double 32 memory FP reg.
6410
6411 vector float <=16 FP reg. FP reg.
6412 vector float 16<s<=32 memory FP reg.
6413 vector float >32 memory memory
6414
6415 aggregate <=16 reg. reg.
6416 aggregate 16<s<=32 memory reg.
6417 aggregate >32 memory memory
6418
6419
6420
6421 Note #1: complex floating-point types follow the extended SPARC ABIs as
6422 implemented by the Sun compiler.
6423
6424 Note #2: integral vector types follow the scalar floating-point types
6425 conventions to match what is implemented by the Sun VIS SDK.
6426
6427 Note #3: floating-point vector types follow the aggregate types
6428 conventions. */
6429
6430
6431 /* Maximum number of int regs for args. */
6432 #define SPARC_INT_ARG_MAX 6
6433 /* Maximum number of fp regs for args. */
6434 #define SPARC_FP_ARG_MAX 16
6435 /* Number of words (partially) occupied for a given size in units. */
6436 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6437
6438 /* Handle the INIT_CUMULATIVE_ARGS macro.
6439 Initialize a variable CUM of type CUMULATIVE_ARGS
6440 for a call to a function whose data type is FNTYPE.
6441 For a library call, FNTYPE is 0. */
6442
6443 void
6444 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6445 {
6446 cum->words = 0;
6447 cum->prototype_p = fntype && prototype_p (fntype);
6448 cum->libcall_p = !fntype;
6449 }
6450
6451 /* Handle promotion of pointer and integer arguments. */
6452
6453 static machine_mode
6454 sparc_promote_function_mode (const_tree type, machine_mode mode,
6455 int *punsignedp, const_tree, int)
6456 {
6457 if (type && POINTER_TYPE_P (type))
6458 {
6459 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6460 return Pmode;
6461 }
6462
6463 /* Integral arguments are passed as full words, as per the ABI. */
6464 if (GET_MODE_CLASS (mode) == MODE_INT
6465 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6466 return word_mode;
6467
6468 return mode;
6469 }
6470
6471 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6472
6473 static bool
6474 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6475 {
6476 return TARGET_ARCH64 ? true : false;
6477 }
6478
6479 /* Traverse the record TYPE recursively and call FUNC on its fields.
6480 NAMED is true if this is for a named parameter. DATA is passed
6481 to FUNC for each field. OFFSET is the starting position and
6482 PACKED is true if we are inside a packed record. */
6483
6484 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6485 static void
6486 traverse_record_type (const_tree type, bool named, T *data,
6487 HOST_WIDE_INT offset = 0, bool packed = false)
6488 {
6489 /* The ABI obviously doesn't specify how packed structures are passed.
6490 These are passed in integer regs if possible, otherwise memory. */
6491 if (!packed)
6492 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6493 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6494 {
6495 packed = true;
6496 break;
6497 }
6498
6499 /* Walk the real fields, but skip those with no size or a zero size.
6500 ??? Fields with variable offset are handled as having zero offset. */
6501 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6502 if (TREE_CODE (field) == FIELD_DECL)
6503 {
6504 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6505 continue;
6506
6507 HOST_WIDE_INT bitpos = offset;
6508 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6509 bitpos += int_bit_position (field);
6510
6511 tree field_type = TREE_TYPE (field);
6512 if (TREE_CODE (field_type) == RECORD_TYPE)
6513 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6514 packed);
6515 else
6516 {
6517 const bool fp_type
6518 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6519 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6520 data);
6521 }
6522 }
6523 }
6524
6525 /* Handle recursive register classifying for structure layout. */
6526
6527 typedef struct
6528 {
6529 bool fp_regs; /* true if field eligible to FP registers. */
6530 bool fp_regs_in_first_word; /* true if such field in first word. */
6531 } classify_data_t;
6532
6533 /* A subroutine of function_arg_slotno. Classify the field. */
6534
6535 inline void
6536 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6537 classify_data_t *data)
6538 {
6539 if (fp)
6540 {
6541 data->fp_regs = true;
6542 if (bitpos < BITS_PER_WORD)
6543 data->fp_regs_in_first_word = true;
6544 }
6545 }
6546
6547 /* Compute the slot number to pass an argument in.
6548 Return the slot number or -1 if passing on the stack.
6549
6550 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6551 the preceding args and about the function being called.
6552 MODE is the argument's machine mode.
6553 TYPE is the data type of the argument (as a tree).
6554 This is null for libcalls where that information may
6555 not be available.
6556 NAMED is nonzero if this argument is a named parameter
6557 (otherwise it is an extra parameter matching an ellipsis).
6558 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6559 *PREGNO records the register number to use if scalar type.
6560 *PPADDING records the amount of padding needed in words. */
6561
6562 static int
6563 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6564 const_tree type, bool named, bool incoming,
6565 int *pregno, int *ppadding)
6566 {
6567 int regbase = (incoming
6568 ? SPARC_INCOMING_INT_ARG_FIRST
6569 : SPARC_OUTGOING_INT_ARG_FIRST);
6570 int slotno = cum->words;
6571 enum mode_class mclass;
6572 int regno;
6573
6574 *ppadding = 0;
6575
6576 if (type && TREE_ADDRESSABLE (type))
6577 return -1;
6578
6579 if (TARGET_ARCH32
6580 && mode == BLKmode
6581 && type
6582 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6583 return -1;
6584
6585 /* For SPARC64, objects requiring 16-byte alignment get it. */
6586 if (TARGET_ARCH64
6587 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6588 && (slotno & 1) != 0)
6589 slotno++, *ppadding = 1;
6590
6591 mclass = GET_MODE_CLASS (mode);
6592 if (type && TREE_CODE (type) == VECTOR_TYPE)
6593 {
6594 /* Vector types deserve special treatment because they are
6595 polymorphic wrt their mode, depending upon whether VIS
6596 instructions are enabled. */
6597 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6598 {
6599 /* The SPARC port defines no floating-point vector modes. */
6600 gcc_assert (mode == BLKmode);
6601 }
6602 else
6603 {
6604 /* Integral vector types should either have a vector
6605 mode or an integral mode, because we are guaranteed
6606 by pass_by_reference that their size is not greater
6607 than 16 bytes and TImode is 16-byte wide. */
6608 gcc_assert (mode != BLKmode);
6609
6610 /* Vector integers are handled like floats according to
6611 the Sun VIS SDK. */
6612 mclass = MODE_FLOAT;
6613 }
6614 }
6615
6616 switch (mclass)
6617 {
6618 case MODE_FLOAT:
6619 case MODE_COMPLEX_FLOAT:
6620 case MODE_VECTOR_INT:
6621 if (TARGET_ARCH64 && TARGET_FPU && named)
6622 {
6623 /* If all arg slots are filled, then must pass on stack. */
6624 if (slotno >= SPARC_FP_ARG_MAX)
6625 return -1;
6626
6627 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6628 /* Arguments filling only one single FP register are
6629 right-justified in the outer double FP register. */
6630 if (GET_MODE_SIZE (mode) <= 4)
6631 regno++;
6632 break;
6633 }
6634 /* fallthrough */
6635
6636 case MODE_INT:
6637 case MODE_COMPLEX_INT:
6638 /* If all arg slots are filled, then must pass on stack. */
6639 if (slotno >= SPARC_INT_ARG_MAX)
6640 return -1;
6641
6642 regno = regbase + slotno;
6643 break;
6644
6645 case MODE_RANDOM:
6646 if (mode == VOIDmode)
6647 /* MODE is VOIDmode when generating the actual call. */
6648 return -1;
6649
6650 gcc_assert (mode == BLKmode);
6651
6652 if (TARGET_ARCH32
6653 || !type
6654 || (TREE_CODE (type) != RECORD_TYPE
6655 && TREE_CODE (type) != VECTOR_TYPE))
6656 {
6657 /* If all arg slots are filled, then must pass on stack. */
6658 if (slotno >= SPARC_INT_ARG_MAX)
6659 return -1;
6660
6661 regno = regbase + slotno;
6662 }
6663 else /* TARGET_ARCH64 && type */
6664 {
6665 /* If all arg slots are filled, then must pass on stack. */
6666 if (slotno >= SPARC_FP_ARG_MAX)
6667 return -1;
6668
6669 if (TREE_CODE (type) == RECORD_TYPE)
6670 {
6671 classify_data_t data = { false, false };
6672 traverse_record_type<classify_data_t, classify_registers>
6673 (type, named, &data);
6674
6675 if (data.fp_regs)
6676 {
6677 /* If all FP slots are filled except for the last one and
6678 there is no FP field in the first word, then must pass
6679 on stack. */
6680 if (slotno >= SPARC_FP_ARG_MAX - 1
6681 && !data.fp_regs_in_first_word)
6682 return -1;
6683 }
6684 else
6685 {
6686 /* If all int slots are filled, then must pass on stack. */
6687 if (slotno >= SPARC_INT_ARG_MAX)
6688 return -1;
6689 }
6690 }
6691
6692 /* PREGNO isn't set since both int and FP regs can be used. */
6693 return slotno;
6694 }
6695 break;
6696
6697 default :
6698 gcc_unreachable ();
6699 }
6700
6701 *pregno = regno;
6702 return slotno;
6703 }
6704
6705 /* Handle recursive register counting/assigning for structure layout. */
6706
6707 typedef struct
6708 {
6709 int slotno; /* slot number of the argument. */
6710 int regbase; /* regno of the base register. */
6711 int intoffset; /* offset of the first pending integer field. */
6712 int nregs; /* number of words passed in registers. */
6713 bool stack; /* true if part of the argument is on the stack. */
6714 rtx ret; /* return expression being built. */
6715 } assign_data_t;
6716
6717 /* A subroutine of function_arg_record_value. Compute the number of integer
6718 registers to be assigned between PARMS->intoffset and BITPOS. Return
6719 true if at least one integer register is assigned or false otherwise. */
6720
6721 static bool
6722 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6723 {
6724 if (data->intoffset < 0)
6725 return false;
6726
6727 const int intoffset = data->intoffset;
6728 data->intoffset = -1;
6729
6730 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6731 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6732 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6733 int nregs = (endbit - startbit) / BITS_PER_WORD;
6734
6735 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6736 {
6737 nregs = SPARC_INT_ARG_MAX - this_slotno;
6738
6739 /* We need to pass this field (partly) on the stack. */
6740 data->stack = 1;
6741 }
6742
6743 if (nregs <= 0)
6744 return false;
6745
6746 *pnregs = nregs;
6747 return true;
6748 }
6749
6750 /* A subroutine of function_arg_record_value. Compute the number and the mode
6751 of the FP registers to be assigned for FIELD. Return true if at least one
6752 FP register is assigned or false otherwise. */
6753
6754 static bool
6755 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6756 assign_data_t *data,
6757 int *pnregs, machine_mode *pmode)
6758 {
6759 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6760 machine_mode mode = DECL_MODE (field);
6761 int nregs, nslots;
6762
6763 /* Slots are counted as words while regs are counted as having the size of
6764 the (inner) mode. */
6765 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6766 {
6767 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6768 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6769 }
6770 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6771 {
6772 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6773 nregs = 2;
6774 }
6775 else
6776 nregs = 1;
6777
6778 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6779
6780 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6781 {
6782 nslots = SPARC_FP_ARG_MAX - this_slotno;
6783 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6784
6785 /* We need to pass this field (partly) on the stack. */
6786 data->stack = 1;
6787
6788 if (nregs <= 0)
6789 return false;
6790 }
6791
6792 *pnregs = nregs;
6793 *pmode = mode;
6794 return true;
6795 }
6796
6797 /* A subroutine of function_arg_record_value. Count the number of registers
6798 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6799
6800 inline void
6801 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6802 assign_data_t *data)
6803 {
6804 if (fp)
6805 {
6806 int nregs;
6807 machine_mode mode;
6808
6809 if (compute_int_layout (bitpos, data, &nregs))
6810 data->nregs += nregs;
6811
6812 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6813 data->nregs += nregs;
6814 }
6815 else
6816 {
6817 if (data->intoffset < 0)
6818 data->intoffset = bitpos;
6819 }
6820 }
6821
6822 /* A subroutine of function_arg_record_value. Assign the bits of the
6823 structure between PARMS->intoffset and BITPOS to integer registers. */
6824
6825 static void
6826 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6827 {
6828 int intoffset = data->intoffset;
6829 machine_mode mode;
6830 int nregs;
6831
6832 if (!compute_int_layout (bitpos, data, &nregs))
6833 return;
6834
6835 /* If this is the trailing part of a word, only load that much into
6836 the register. Otherwise load the whole register. Note that in
6837 the latter case we may pick up unwanted bits. It's not a problem
6838 at the moment but may wish to revisit. */
6839 if (intoffset % BITS_PER_WORD != 0)
6840 mode = smallest_int_mode_for_size (BITS_PER_WORD
6841 - intoffset % BITS_PER_WORD);
6842 else
6843 mode = word_mode;
6844
6845 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6846 unsigned int regno = data->regbase + this_slotno;
6847 intoffset /= BITS_PER_UNIT;
6848
6849 do
6850 {
6851 rtx reg = gen_rtx_REG (mode, regno);
6852 XVECEXP (data->ret, 0, data->stack + data->nregs)
6853 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6854 data->nregs += 1;
6855 mode = word_mode;
6856 regno += 1;
6857 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6858 }
6859 while (--nregs > 0);
6860 }
6861
6862 /* A subroutine of function_arg_record_value. Assign FIELD at position
6863 BITPOS to FP registers. */
6864
6865 static void
6866 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6867 assign_data_t *data)
6868 {
6869 int nregs;
6870 machine_mode mode;
6871
6872 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6873 return;
6874
6875 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6876 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6877 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6878 regno++;
6879 int pos = bitpos / BITS_PER_UNIT;
6880
6881 do
6882 {
6883 rtx reg = gen_rtx_REG (mode, regno);
6884 XVECEXP (data->ret, 0, data->stack + data->nregs)
6885 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6886 data->nregs += 1;
6887 regno += GET_MODE_SIZE (mode) / 4;
6888 pos += GET_MODE_SIZE (mode);
6889 }
6890 while (--nregs > 0);
6891 }
6892
6893 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6894 the structure between PARMS->intoffset and BITPOS to registers. */
6895
6896 inline void
6897 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6898 assign_data_t *data)
6899 {
6900 if (fp)
6901 {
6902 assign_int_registers (bitpos, data);
6903
6904 assign_fp_registers (field, bitpos, data);
6905 }
6906 else
6907 {
6908 if (data->intoffset < 0)
6909 data->intoffset = bitpos;
6910 }
6911 }
6912
6913 /* Used by function_arg and sparc_function_value_1 to implement the complex
6914 conventions of the 64-bit ABI for passing and returning structures.
6915 Return an expression valid as a return value for the FUNCTION_ARG
6916 and TARGET_FUNCTION_VALUE.
6917
6918 TYPE is the data type of the argument (as a tree).
6919 This is null for libcalls where that information may
6920 not be available.
6921 MODE is the argument's machine mode.
6922 SLOTNO is the index number of the argument's slot in the parameter array.
6923 NAMED is true if this argument is a named parameter
6924 (otherwise it is an extra parameter matching an ellipsis).
6925 REGBASE is the regno of the base register for the parameter array. */
6926
6927 static rtx
6928 function_arg_record_value (const_tree type, machine_mode mode,
6929 int slotno, bool named, int regbase)
6930 {
6931 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6932 assign_data_t data;
6933 int nregs;
6934
6935 data.slotno = slotno;
6936 data.regbase = regbase;
6937
6938 /* Count how many registers we need. */
6939 data.nregs = 0;
6940 data.intoffset = 0;
6941 data.stack = false;
6942 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6943
6944 /* Take into account pending integer fields. */
6945 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6946 data.nregs += nregs;
6947
6948 /* Allocate the vector and handle some annoying special cases. */
6949 nregs = data.nregs;
6950
6951 if (nregs == 0)
6952 {
6953 /* ??? Empty structure has no value? Duh? */
6954 if (typesize <= 0)
6955 {
6956 /* Though there's nothing really to store, return a word register
6957 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6958 leads to breakage due to the fact that there are zero bytes to
6959 load. */
6960 return gen_rtx_REG (mode, regbase);
6961 }
6962
6963 /* ??? C++ has structures with no fields, and yet a size. Give up
6964 for now and pass everything back in integer registers. */
6965 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6966 if (nregs + slotno > SPARC_INT_ARG_MAX)
6967 nregs = SPARC_INT_ARG_MAX - slotno;
6968 }
6969
6970 gcc_assert (nregs > 0);
6971
6972 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6973
6974 /* If at least one field must be passed on the stack, generate
6975 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6976 also be passed on the stack. We can't do much better because the
6977 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6978 of structures for which the fields passed exclusively in registers
6979 are not at the beginning of the structure. */
6980 if (data.stack)
6981 XVECEXP (data.ret, 0, 0)
6982 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6983
6984 /* Assign the registers. */
6985 data.nregs = 0;
6986 data.intoffset = 0;
6987 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6988
6989 /* Assign pending integer fields. */
6990 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6991
6992 gcc_assert (data.nregs == nregs);
6993
6994 return data.ret;
6995 }
6996
6997 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6998 of the 64-bit ABI for passing and returning unions.
6999 Return an expression valid as a return value for the FUNCTION_ARG
7000 and TARGET_FUNCTION_VALUE.
7001
7002 SIZE is the size in bytes of the union.
7003 MODE is the argument's machine mode.
7004 REGNO is the hard register the union will be passed in. */
7005
7006 static rtx
7007 function_arg_union_value (int size, machine_mode mode, int slotno,
7008 int regno)
7009 {
7010 int nwords = CEIL_NWORDS (size), i;
7011 rtx regs;
7012
7013 /* See comment in previous function for empty structures. */
7014 if (nwords == 0)
7015 return gen_rtx_REG (mode, regno);
7016
7017 if (slotno == SPARC_INT_ARG_MAX - 1)
7018 nwords = 1;
7019
7020 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7021
7022 for (i = 0; i < nwords; i++)
7023 {
7024 /* Unions are passed left-justified. */
7025 XVECEXP (regs, 0, i)
7026 = gen_rtx_EXPR_LIST (VOIDmode,
7027 gen_rtx_REG (word_mode, regno),
7028 GEN_INT (UNITS_PER_WORD * i));
7029 regno++;
7030 }
7031
7032 return regs;
7033 }
7034
7035 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7036 for passing and returning BLKmode vectors.
7037 Return an expression valid as a return value for the FUNCTION_ARG
7038 and TARGET_FUNCTION_VALUE.
7039
7040 SIZE is the size in bytes of the vector.
7041 REGNO is the FP hard register the vector will be passed in. */
7042
7043 static rtx
7044 function_arg_vector_value (int size, int regno)
7045 {
7046 const int nregs = MAX (1, size / 8);
7047 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7048
7049 if (size < 8)
7050 XVECEXP (regs, 0, 0)
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (SImode, regno),
7053 const0_rtx);
7054 else
7055 for (int i = 0; i < nregs; i++)
7056 XVECEXP (regs, 0, i)
7057 = gen_rtx_EXPR_LIST (VOIDmode,
7058 gen_rtx_REG (DImode, regno + 2*i),
7059 GEN_INT (i*8));
7060
7061 return regs;
7062 }
7063
7064 /* Determine where to put an argument to a function.
7065 Value is zero to push the argument on the stack,
7066 or a hard register in which to store the argument.
7067
7068 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7069 the preceding args and about the function being called.
7070 MODE is the argument's machine mode.
7071 TYPE is the data type of the argument (as a tree).
7072 This is null for libcalls where that information may
7073 not be available.
7074 NAMED is true if this argument is a named parameter
7075 (otherwise it is an extra parameter matching an ellipsis).
7076 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7077 TARGET_FUNCTION_INCOMING_ARG. */
7078
7079 static rtx
7080 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7081 const_tree type, bool named, bool incoming)
7082 {
7083 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7084
7085 int regbase = (incoming
7086 ? SPARC_INCOMING_INT_ARG_FIRST
7087 : SPARC_OUTGOING_INT_ARG_FIRST);
7088 int slotno, regno, padding;
7089 enum mode_class mclass = GET_MODE_CLASS (mode);
7090
7091 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7092 &regno, &padding);
7093 if (slotno == -1)
7094 return 0;
7095
7096 /* Vector types deserve special treatment because they are polymorphic wrt
7097 their mode, depending upon whether VIS instructions are enabled. */
7098 if (type && TREE_CODE (type) == VECTOR_TYPE)
7099 {
7100 HOST_WIDE_INT size = int_size_in_bytes (type);
7101 gcc_assert ((TARGET_ARCH32 && size <= 8)
7102 || (TARGET_ARCH64 && size <= 16));
7103
7104 if (mode == BLKmode)
7105 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7106
7107 mclass = MODE_FLOAT;
7108 }
7109
7110 if (TARGET_ARCH32)
7111 return gen_rtx_REG (mode, regno);
7112
7113 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7114 and are promoted to registers if possible. */
7115 if (type && TREE_CODE (type) == RECORD_TYPE)
7116 {
7117 HOST_WIDE_INT size = int_size_in_bytes (type);
7118 gcc_assert (size <= 16);
7119
7120 return function_arg_record_value (type, mode, slotno, named, regbase);
7121 }
7122
7123 /* Unions up to 16 bytes in size are passed in integer registers. */
7124 else if (type && TREE_CODE (type) == UNION_TYPE)
7125 {
7126 HOST_WIDE_INT size = int_size_in_bytes (type);
7127 gcc_assert (size <= 16);
7128
7129 return function_arg_union_value (size, mode, slotno, regno);
7130 }
7131
7132 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7133 but also have the slot allocated for them.
7134 If no prototype is in scope fp values in register slots get passed
7135 in two places, either fp regs and int regs or fp regs and memory. */
7136 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7137 && SPARC_FP_REG_P (regno))
7138 {
7139 rtx reg = gen_rtx_REG (mode, regno);
7140 if (cum->prototype_p || cum->libcall_p)
7141 return reg;
7142 else
7143 {
7144 rtx v0, v1;
7145
7146 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7147 {
7148 int intreg;
7149
7150 /* On incoming, we don't need to know that the value
7151 is passed in %f0 and %i0, and it confuses other parts
7152 causing needless spillage even on the simplest cases. */
7153 if (incoming)
7154 return reg;
7155
7156 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7157 + (regno - SPARC_FP_ARG_FIRST) / 2);
7158
7159 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7160 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7161 const0_rtx);
7162 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7163 }
7164 else
7165 {
7166 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7167 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7168 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7169 }
7170 }
7171 }
7172
7173 /* All other aggregate types are passed in an integer register in a mode
7174 corresponding to the size of the type. */
7175 else if (type && AGGREGATE_TYPE_P (type))
7176 {
7177 HOST_WIDE_INT size = int_size_in_bytes (type);
7178 gcc_assert (size <= 16);
7179
7180 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7181 }
7182
7183 return gen_rtx_REG (mode, regno);
7184 }
7185
7186 /* Handle the TARGET_FUNCTION_ARG target hook. */
7187
7188 static rtx
7189 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7190 const_tree type, bool named)
7191 {
7192 return sparc_function_arg_1 (cum, mode, type, named, false);
7193 }
7194
7195 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7196
7197 static rtx
7198 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7199 const_tree type, bool named)
7200 {
7201 return sparc_function_arg_1 (cum, mode, type, named, true);
7202 }
7203
7204 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7205
7206 static unsigned int
7207 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7208 {
7209 return ((TARGET_ARCH64
7210 && (GET_MODE_ALIGNMENT (mode) == 128
7211 || (type && TYPE_ALIGN (type) == 128)))
7212 ? 128
7213 : PARM_BOUNDARY);
7214 }
7215
7216 /* For an arg passed partly in registers and partly in memory,
7217 this is the number of bytes of registers used.
7218 For args passed entirely in registers or entirely in memory, zero.
7219
7220 Any arg that starts in the first 6 regs but won't entirely fit in them
7221 needs partial registers on v8. On v9, structures with integer
7222 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7223 values that begin in the last fp reg [where "last fp reg" varies with the
7224 mode] will be split between that reg and memory. */
7225
7226 static int
7227 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7228 tree type, bool named)
7229 {
7230 int slotno, regno, padding;
7231
7232 /* We pass false for incoming here, it doesn't matter. */
7233 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7234 false, &regno, &padding);
7235
7236 if (slotno == -1)
7237 return 0;
7238
7239 if (TARGET_ARCH32)
7240 {
7241 if ((slotno + (mode == BLKmode
7242 ? CEIL_NWORDS (int_size_in_bytes (type))
7243 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7244 > SPARC_INT_ARG_MAX)
7245 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7246 }
7247 else
7248 {
7249 /* We are guaranteed by pass_by_reference that the size of the
7250 argument is not greater than 16 bytes, so we only need to return
7251 one word if the argument is partially passed in registers. */
7252
7253 if (type && AGGREGATE_TYPE_P (type))
7254 {
7255 int size = int_size_in_bytes (type);
7256
7257 if (size > UNITS_PER_WORD
7258 && (slotno == SPARC_INT_ARG_MAX - 1
7259 || slotno == SPARC_FP_ARG_MAX - 1))
7260 return UNITS_PER_WORD;
7261 }
7262 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7263 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7264 && ! (TARGET_FPU && named)))
7265 {
7266 /* The complex types are passed as packed types. */
7267 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7268 && slotno == SPARC_INT_ARG_MAX - 1)
7269 return UNITS_PER_WORD;
7270 }
7271 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7272 {
7273 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7274 > SPARC_FP_ARG_MAX)
7275 return UNITS_PER_WORD;
7276 }
7277 }
7278
7279 return 0;
7280 }
7281
7282 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7283 Specify whether to pass the argument by reference. */
7284
7285 static bool
7286 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7287 machine_mode mode, const_tree type,
7288 bool named ATTRIBUTE_UNUSED)
7289 {
7290 if (TARGET_ARCH32)
7291 /* Original SPARC 32-bit ABI says that structures and unions,
7292 and quad-precision floats are passed by reference. For Pascal,
7293 also pass arrays by reference. All other base types are passed
7294 in registers.
7295
7296 Extended ABI (as implemented by the Sun compiler) says that all
7297 complex floats are passed by reference. Pass complex integers
7298 in registers up to 8 bytes. More generally, enforce the 2-word
7299 cap for passing arguments in registers.
7300
7301 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7302 integers are passed like floats of the same size, that is in
7303 registers up to 8 bytes. Pass all vector floats by reference
7304 like structure and unions. */
7305 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7306 || mode == SCmode
7307 /* Catch CDImode, TFmode, DCmode and TCmode. */
7308 || GET_MODE_SIZE (mode) > 8
7309 || (type
7310 && TREE_CODE (type) == VECTOR_TYPE
7311 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7312 else
7313 /* Original SPARC 64-bit ABI says that structures and unions
7314 smaller than 16 bytes are passed in registers, as well as
7315 all other base types.
7316
7317 Extended ABI (as implemented by the Sun compiler) says that
7318 complex floats are passed in registers up to 16 bytes. Pass
7319 all complex integers in registers up to 16 bytes. More generally,
7320 enforce the 2-word cap for passing arguments in registers.
7321
7322 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7323 integers are passed like floats of the same size, that is in
7324 registers (up to 16 bytes). Pass all vector floats like structure
7325 and unions. */
7326 return ((type
7327 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7328 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7329 /* Catch CTImode and TCmode. */
7330 || GET_MODE_SIZE (mode) > 16);
7331 }
7332
7333 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7334 Update the data in CUM to advance over an argument
7335 of mode MODE and data type TYPE.
7336 TYPE is null for libcalls where that information may not be available. */
7337
7338 static void
7339 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7340 const_tree type, bool named)
7341 {
7342 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7343 int regno, padding;
7344
7345 /* We pass false for incoming here, it doesn't matter. */
7346 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7347
7348 /* If argument requires leading padding, add it. */
7349 cum->words += padding;
7350
7351 if (TARGET_ARCH32)
7352 cum->words += (mode == BLKmode
7353 ? CEIL_NWORDS (int_size_in_bytes (type))
7354 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7355 else
7356 {
7357 if (type && AGGREGATE_TYPE_P (type))
7358 {
7359 int size = int_size_in_bytes (type);
7360
7361 if (size <= 8)
7362 ++cum->words;
7363 else if (size <= 16)
7364 cum->words += 2;
7365 else /* passed by reference */
7366 ++cum->words;
7367 }
7368 else
7369 cum->words += (mode == BLKmode
7370 ? CEIL_NWORDS (int_size_in_bytes (type))
7371 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7372 }
7373 }
7374
7375 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7376 are always stored left shifted in their argument slot. */
7377
7378 static pad_direction
7379 sparc_function_arg_padding (machine_mode mode, const_tree type)
7380 {
7381 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7382 return PAD_UPWARD;
7383
7384 /* Fall back to the default. */
7385 return default_function_arg_padding (mode, type);
7386 }
7387
7388 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7389 Specify whether to return the return value in memory. */
7390
7391 static bool
7392 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7393 {
7394 if (TARGET_ARCH32)
7395 /* Original SPARC 32-bit ABI says that structures and unions,
7396 and quad-precision floats are returned in memory. All other
7397 base types are returned in registers.
7398
7399 Extended ABI (as implemented by the Sun compiler) says that
7400 all complex floats are returned in registers (8 FP registers
7401 at most for '_Complex long double'). Return all complex integers
7402 in registers (4 at most for '_Complex long long').
7403
7404 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7405 integers are returned like floats of the same size, that is in
7406 registers up to 8 bytes and in memory otherwise. Return all
7407 vector floats in memory like structure and unions; note that
7408 they always have BLKmode like the latter. */
7409 return (TYPE_MODE (type) == BLKmode
7410 || TYPE_MODE (type) == TFmode
7411 || (TREE_CODE (type) == VECTOR_TYPE
7412 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7413 else
7414 /* Original SPARC 64-bit ABI says that structures and unions
7415 smaller than 32 bytes are returned in registers, as well as
7416 all other base types.
7417
7418 Extended ABI (as implemented by the Sun compiler) says that all
7419 complex floats are returned in registers (8 FP registers at most
7420 for '_Complex long double'). Return all complex integers in
7421 registers (4 at most for '_Complex TItype').
7422
7423 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7424 integers are returned like floats of the same size, that is in
7425 registers. Return all vector floats like structure and unions;
7426 note that they always have BLKmode like the latter. */
7427 return (TYPE_MODE (type) == BLKmode
7428 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7429 }
7430
7431 /* Handle the TARGET_STRUCT_VALUE target hook.
7432 Return where to find the structure return value address. */
7433
7434 static rtx
7435 sparc_struct_value_rtx (tree fndecl, int incoming)
7436 {
7437 if (TARGET_ARCH64)
7438 return 0;
7439 else
7440 {
7441 rtx mem;
7442
7443 if (incoming)
7444 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7445 STRUCT_VALUE_OFFSET));
7446 else
7447 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7448 STRUCT_VALUE_OFFSET));
7449
7450 /* Only follow the SPARC ABI for fixed-size structure returns.
7451 Variable size structure returns are handled per the normal
7452 procedures in GCC. This is enabled by -mstd-struct-return */
7453 if (incoming == 2
7454 && sparc_std_struct_return
7455 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7456 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7457 {
7458 /* We must check and adjust the return address, as it is optional
7459 as to whether the return object is really provided. */
7460 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7461 rtx scratch = gen_reg_rtx (SImode);
7462 rtx_code_label *endlab = gen_label_rtx ();
7463
7464 /* Calculate the return object size. */
7465 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7466 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7467 /* Construct a temporary return value. */
7468 rtx temp_val
7469 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7470
7471 /* Implement SPARC 32-bit psABI callee return struct checking:
7472
7473 Fetch the instruction where we will return to and see if
7474 it's an unimp instruction (the most significant 10 bits
7475 will be zero). */
7476 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7477 plus_constant (Pmode,
7478 ret_reg, 8)));
7479 /* Assume the size is valid and pre-adjust. */
7480 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7481 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7482 0, endlab);
7483 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7484 /* Write the address of the memory pointed to by temp_val into
7485 the memory pointed to by mem. */
7486 emit_move_insn (mem, XEXP (temp_val, 0));
7487 emit_label (endlab);
7488 }
7489
7490 return mem;
7491 }
7492 }
7493
7494 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7495 For v9, function return values are subject to the same rules as arguments,
7496 except that up to 32 bytes may be returned in registers. */
7497
7498 static rtx
7499 sparc_function_value_1 (const_tree type, machine_mode mode,
7500 bool outgoing)
7501 {
7502 /* Beware that the two values are swapped here wrt function_arg. */
7503 int regbase = (outgoing
7504 ? SPARC_INCOMING_INT_ARG_FIRST
7505 : SPARC_OUTGOING_INT_ARG_FIRST);
7506 enum mode_class mclass = GET_MODE_CLASS (mode);
7507 int regno;
7508
7509 /* Vector types deserve special treatment because they are polymorphic wrt
7510 their mode, depending upon whether VIS instructions are enabled. */
7511 if (type && TREE_CODE (type) == VECTOR_TYPE)
7512 {
7513 HOST_WIDE_INT size = int_size_in_bytes (type);
7514 gcc_assert ((TARGET_ARCH32 && size <= 8)
7515 || (TARGET_ARCH64 && size <= 32));
7516
7517 if (mode == BLKmode)
7518 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7519
7520 mclass = MODE_FLOAT;
7521 }
7522
7523 if (TARGET_ARCH64 && type)
7524 {
7525 /* Structures up to 32 bytes in size are returned in registers. */
7526 if (TREE_CODE (type) == RECORD_TYPE)
7527 {
7528 HOST_WIDE_INT size = int_size_in_bytes (type);
7529 gcc_assert (size <= 32);
7530
7531 return function_arg_record_value (type, mode, 0, 1, regbase);
7532 }
7533
7534 /* Unions up to 32 bytes in size are returned in integer registers. */
7535 else if (TREE_CODE (type) == UNION_TYPE)
7536 {
7537 HOST_WIDE_INT size = int_size_in_bytes (type);
7538 gcc_assert (size <= 32);
7539
7540 return function_arg_union_value (size, mode, 0, regbase);
7541 }
7542
7543 /* Objects that require it are returned in FP registers. */
7544 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7545 ;
7546
7547 /* All other aggregate types are returned in an integer register in a
7548 mode corresponding to the size of the type. */
7549 else if (AGGREGATE_TYPE_P (type))
7550 {
7551 /* All other aggregate types are passed in an integer register
7552 in a mode corresponding to the size of the type. */
7553 HOST_WIDE_INT size = int_size_in_bytes (type);
7554 gcc_assert (size <= 32);
7555
7556 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7557
7558 /* ??? We probably should have made the same ABI change in
7559 3.4.0 as the one we made for unions. The latter was
7560 required by the SCD though, while the former is not
7561 specified, so we favored compatibility and efficiency.
7562
7563 Now we're stuck for aggregates larger than 16 bytes,
7564 because OImode vanished in the meantime. Let's not
7565 try to be unduly clever, and simply follow the ABI
7566 for unions in that case. */
7567 if (mode == BLKmode)
7568 return function_arg_union_value (size, mode, 0, regbase);
7569 else
7570 mclass = MODE_INT;
7571 }
7572
7573 /* We should only have pointer and integer types at this point. This
7574 must match sparc_promote_function_mode. */
7575 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7576 mode = word_mode;
7577 }
7578
7579 /* We should only have pointer and integer types at this point, except with
7580 -freg-struct-return. This must match sparc_promote_function_mode. */
7581 else if (TARGET_ARCH32
7582 && !(type && AGGREGATE_TYPE_P (type))
7583 && mclass == MODE_INT
7584 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7585 mode = word_mode;
7586
7587 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7588 regno = SPARC_FP_ARG_FIRST;
7589 else
7590 regno = regbase;
7591
7592 return gen_rtx_REG (mode, regno);
7593 }
7594
7595 /* Handle TARGET_FUNCTION_VALUE.
7596 On the SPARC, the value is found in the first "output" register, but the
7597 called function leaves it in the first "input" register. */
7598
7599 static rtx
7600 sparc_function_value (const_tree valtype,
7601 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7602 bool outgoing)
7603 {
7604 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7605 }
7606
7607 /* Handle TARGET_LIBCALL_VALUE. */
7608
7609 static rtx
7610 sparc_libcall_value (machine_mode mode,
7611 const_rtx fun ATTRIBUTE_UNUSED)
7612 {
7613 return sparc_function_value_1 (NULL_TREE, mode, false);
7614 }
7615
7616 /* Handle FUNCTION_VALUE_REGNO_P.
7617 On the SPARC, the first "output" reg is used for integer values, and the
7618 first floating point register is used for floating point values. */
7619
7620 static bool
7621 sparc_function_value_regno_p (const unsigned int regno)
7622 {
7623 return (regno == 8 || (TARGET_FPU && regno == 32));
7624 }
7625
7626 /* Do what is necessary for `va_start'. We look at the current function
7627 to determine if stdarg or varargs is used and return the address of
7628 the first unnamed parameter. */
7629
7630 static rtx
7631 sparc_builtin_saveregs (void)
7632 {
7633 int first_reg = crtl->args.info.words;
7634 rtx address;
7635 int regno;
7636
7637 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7638 emit_move_insn (gen_rtx_MEM (word_mode,
7639 gen_rtx_PLUS (Pmode,
7640 frame_pointer_rtx,
7641 GEN_INT (FIRST_PARM_OFFSET (0)
7642 + (UNITS_PER_WORD
7643 * regno)))),
7644 gen_rtx_REG (word_mode,
7645 SPARC_INCOMING_INT_ARG_FIRST + regno));
7646
7647 address = gen_rtx_PLUS (Pmode,
7648 frame_pointer_rtx,
7649 GEN_INT (FIRST_PARM_OFFSET (0)
7650 + UNITS_PER_WORD * first_reg));
7651
7652 return address;
7653 }
7654
7655 /* Implement `va_start' for stdarg. */
7656
7657 static void
7658 sparc_va_start (tree valist, rtx nextarg)
7659 {
7660 nextarg = expand_builtin_saveregs ();
7661 std_expand_builtin_va_start (valist, nextarg);
7662 }
7663
7664 /* Implement `va_arg' for stdarg. */
7665
7666 static tree
7667 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7668 gimple_seq *post_p)
7669 {
7670 HOST_WIDE_INT size, rsize, align;
7671 tree addr, incr;
7672 bool indirect;
7673 tree ptrtype = build_pointer_type (type);
7674
7675 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7676 {
7677 indirect = true;
7678 size = rsize = UNITS_PER_WORD;
7679 align = 0;
7680 }
7681 else
7682 {
7683 indirect = false;
7684 size = int_size_in_bytes (type);
7685 rsize = ROUND_UP (size, UNITS_PER_WORD);
7686 align = 0;
7687
7688 if (TARGET_ARCH64)
7689 {
7690 /* For SPARC64, objects requiring 16-byte alignment get it. */
7691 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7692 align = 2 * UNITS_PER_WORD;
7693
7694 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7695 are left-justified in their slots. */
7696 if (AGGREGATE_TYPE_P (type))
7697 {
7698 if (size == 0)
7699 size = rsize = UNITS_PER_WORD;
7700 else
7701 size = rsize;
7702 }
7703 }
7704 }
7705
7706 incr = valist;
7707 if (align)
7708 {
7709 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7710 incr = fold_convert (sizetype, incr);
7711 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7712 size_int (-align));
7713 incr = fold_convert (ptr_type_node, incr);
7714 }
7715
7716 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7717 addr = incr;
7718
7719 if (BYTES_BIG_ENDIAN && size < rsize)
7720 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7721
7722 if (indirect)
7723 {
7724 addr = fold_convert (build_pointer_type (ptrtype), addr);
7725 addr = build_va_arg_indirect_ref (addr);
7726 }
7727
7728 /* If the address isn't aligned properly for the type, we need a temporary.
7729 FIXME: This is inefficient, usually we can do this in registers. */
7730 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7731 {
7732 tree tmp = create_tmp_var (type, "va_arg_tmp");
7733 tree dest_addr = build_fold_addr_expr (tmp);
7734 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7735 3, dest_addr, addr, size_int (rsize));
7736 TREE_ADDRESSABLE (tmp) = 1;
7737 gimplify_and_add (copy, pre_p);
7738 addr = dest_addr;
7739 }
7740
7741 else
7742 addr = fold_convert (ptrtype, addr);
7743
7744 incr = fold_build_pointer_plus_hwi (incr, rsize);
7745 gimplify_assign (valist, incr, post_p);
7746
7747 return build_va_arg_indirect_ref (addr);
7748 }
7749 \f
7750 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7751 Specify whether the vector mode is supported by the hardware. */
7752
7753 static bool
7754 sparc_vector_mode_supported_p (machine_mode mode)
7755 {
7756 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7757 }
7758 \f
7759 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7760
7761 static machine_mode
7762 sparc_preferred_simd_mode (scalar_mode mode)
7763 {
7764 if (TARGET_VIS)
7765 switch (mode)
7766 {
7767 case E_SImode:
7768 return V2SImode;
7769 case E_HImode:
7770 return V4HImode;
7771 case E_QImode:
7772 return V8QImode;
7773
7774 default:;
7775 }
7776
7777 return word_mode;
7778 }
7779 \f
7780 /* Return the string to output an unconditional branch to LABEL, which is
7781 the operand number of the label.
7782
7783 DEST is the destination insn (i.e. the label), INSN is the source. */
7784
7785 const char *
7786 output_ubranch (rtx dest, rtx_insn *insn)
7787 {
7788 static char string[64];
7789 bool v9_form = false;
7790 int delta;
7791 char *p;
7792
7793 /* Even if we are trying to use cbcond for this, evaluate
7794 whether we can use V9 branches as our backup plan. */
7795
7796 delta = 5000000;
7797 if (INSN_ADDRESSES_SET_P ())
7798 delta = (INSN_ADDRESSES (INSN_UID (dest))
7799 - INSN_ADDRESSES (INSN_UID (insn)));
7800
7801 /* Leave some instructions for "slop". */
7802 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7803 v9_form = true;
7804
7805 if (TARGET_CBCOND)
7806 {
7807 bool emit_nop = emit_cbcond_nop (insn);
7808 bool far = false;
7809 const char *rval;
7810
7811 if (delta < -500 || delta > 500)
7812 far = true;
7813
7814 if (far)
7815 {
7816 if (v9_form)
7817 rval = "ba,a,pt\t%%xcc, %l0";
7818 else
7819 rval = "b,a\t%l0";
7820 }
7821 else
7822 {
7823 if (emit_nop)
7824 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7825 else
7826 rval = "cwbe\t%%g0, %%g0, %l0";
7827 }
7828 return rval;
7829 }
7830
7831 if (v9_form)
7832 strcpy (string, "ba%*,pt\t%%xcc, ");
7833 else
7834 strcpy (string, "b%*\t");
7835
7836 p = strchr (string, '\0');
7837 *p++ = '%';
7838 *p++ = 'l';
7839 *p++ = '0';
7840 *p++ = '%';
7841 *p++ = '(';
7842 *p = '\0';
7843
7844 return string;
7845 }
7846
7847 /* Return the string to output a conditional branch to LABEL, which is
7848 the operand number of the label. OP is the conditional expression.
7849 XEXP (OP, 0) is assumed to be a condition code register (integer or
7850 floating point) and its mode specifies what kind of comparison we made.
7851
7852 DEST is the destination insn (i.e. the label), INSN is the source.
7853
7854 REVERSED is nonzero if we should reverse the sense of the comparison.
7855
7856 ANNUL is nonzero if we should generate an annulling branch. */
7857
7858 const char *
7859 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7860 rtx_insn *insn)
7861 {
7862 static char string[64];
7863 enum rtx_code code = GET_CODE (op);
7864 rtx cc_reg = XEXP (op, 0);
7865 machine_mode mode = GET_MODE (cc_reg);
7866 const char *labelno, *branch;
7867 int spaces = 8, far;
7868 char *p;
7869
7870 /* v9 branches are limited to +-1MB. If it is too far away,
7871 change
7872
7873 bne,pt %xcc, .LC30
7874
7875 to
7876
7877 be,pn %xcc, .+12
7878 nop
7879 ba .LC30
7880
7881 and
7882
7883 fbne,a,pn %fcc2, .LC29
7884
7885 to
7886
7887 fbe,pt %fcc2, .+16
7888 nop
7889 ba .LC29 */
7890
7891 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7892 if (reversed ^ far)
7893 {
7894 /* Reversal of FP compares takes care -- an ordered compare
7895 becomes an unordered compare and vice versa. */
7896 if (mode == CCFPmode || mode == CCFPEmode)
7897 code = reverse_condition_maybe_unordered (code);
7898 else
7899 code = reverse_condition (code);
7900 }
7901
7902 /* Start by writing the branch condition. */
7903 if (mode == CCFPmode || mode == CCFPEmode)
7904 {
7905 switch (code)
7906 {
7907 case NE:
7908 branch = "fbne";
7909 break;
7910 case EQ:
7911 branch = "fbe";
7912 break;
7913 case GE:
7914 branch = "fbge";
7915 break;
7916 case GT:
7917 branch = "fbg";
7918 break;
7919 case LE:
7920 branch = "fble";
7921 break;
7922 case LT:
7923 branch = "fbl";
7924 break;
7925 case UNORDERED:
7926 branch = "fbu";
7927 break;
7928 case ORDERED:
7929 branch = "fbo";
7930 break;
7931 case UNGT:
7932 branch = "fbug";
7933 break;
7934 case UNLT:
7935 branch = "fbul";
7936 break;
7937 case UNEQ:
7938 branch = "fbue";
7939 break;
7940 case UNGE:
7941 branch = "fbuge";
7942 break;
7943 case UNLE:
7944 branch = "fbule";
7945 break;
7946 case LTGT:
7947 branch = "fblg";
7948 break;
7949 default:
7950 gcc_unreachable ();
7951 }
7952
7953 /* ??? !v9: FP branches cannot be preceded by another floating point
7954 insn. Because there is currently no concept of pre-delay slots,
7955 we can fix this only by always emitting a nop before a floating
7956 point branch. */
7957
7958 string[0] = '\0';
7959 if (! TARGET_V9)
7960 strcpy (string, "nop\n\t");
7961 strcat (string, branch);
7962 }
7963 else
7964 {
7965 switch (code)
7966 {
7967 case NE:
7968 if (mode == CCVmode || mode == CCXVmode)
7969 branch = "bvs";
7970 else
7971 branch = "bne";
7972 break;
7973 case EQ:
7974 if (mode == CCVmode || mode == CCXVmode)
7975 branch = "bvc";
7976 else
7977 branch = "be";
7978 break;
7979 case GE:
7980 if (mode == CCNZmode || mode == CCXNZmode)
7981 branch = "bpos";
7982 else
7983 branch = "bge";
7984 break;
7985 case GT:
7986 branch = "bg";
7987 break;
7988 case LE:
7989 branch = "ble";
7990 break;
7991 case LT:
7992 if (mode == CCNZmode || mode == CCXNZmode)
7993 branch = "bneg";
7994 else
7995 branch = "bl";
7996 break;
7997 case GEU:
7998 branch = "bgeu";
7999 break;
8000 case GTU:
8001 branch = "bgu";
8002 break;
8003 case LEU:
8004 branch = "bleu";
8005 break;
8006 case LTU:
8007 branch = "blu";
8008 break;
8009 default:
8010 gcc_unreachable ();
8011 }
8012 strcpy (string, branch);
8013 }
8014 spaces -= strlen (branch);
8015 p = strchr (string, '\0');
8016
8017 /* Now add the annulling, the label, and a possible noop. */
8018 if (annul && ! far)
8019 {
8020 strcpy (p, ",a");
8021 p += 2;
8022 spaces -= 2;
8023 }
8024
8025 if (TARGET_V9)
8026 {
8027 rtx note;
8028 int v8 = 0;
8029
8030 if (! far && insn && INSN_ADDRESSES_SET_P ())
8031 {
8032 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8033 - INSN_ADDRESSES (INSN_UID (insn)));
8034 /* Leave some instructions for "slop". */
8035 if (delta < -260000 || delta >= 260000)
8036 v8 = 1;
8037 }
8038
8039 switch (mode)
8040 {
8041 case E_CCmode:
8042 case E_CCNZmode:
8043 case E_CCCmode:
8044 case E_CCVmode:
8045 labelno = "%%icc, ";
8046 if (v8)
8047 labelno = "";
8048 break;
8049 case E_CCXmode:
8050 case E_CCXNZmode:
8051 case E_CCXCmode:
8052 case E_CCXVmode:
8053 labelno = "%%xcc, ";
8054 gcc_assert (!v8);
8055 break;
8056 case E_CCFPmode:
8057 case E_CCFPEmode:
8058 {
8059 static char v9_fcc_labelno[] = "%%fccX, ";
8060 /* Set the char indicating the number of the fcc reg to use. */
8061 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8062 labelno = v9_fcc_labelno;
8063 if (v8)
8064 {
8065 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8066 labelno = "";
8067 }
8068 }
8069 break;
8070 default:
8071 gcc_unreachable ();
8072 }
8073
8074 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8075 {
8076 strcpy (p,
8077 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8078 >= profile_probability::even ()) ^ far)
8079 ? ",pt" : ",pn");
8080 p += 3;
8081 spaces -= 3;
8082 }
8083 }
8084 else
8085 labelno = "";
8086
8087 if (spaces > 0)
8088 *p++ = '\t';
8089 else
8090 *p++ = ' ';
8091 strcpy (p, labelno);
8092 p = strchr (p, '\0');
8093 if (far)
8094 {
8095 strcpy (p, ".+12\n\t nop\n\tb\t");
8096 /* Skip the next insn if requested or
8097 if we know that it will be a nop. */
8098 if (annul || ! final_sequence)
8099 p[3] = '6';
8100 p += 14;
8101 }
8102 *p++ = '%';
8103 *p++ = 'l';
8104 *p++ = label + '0';
8105 *p++ = '%';
8106 *p++ = '#';
8107 *p = '\0';
8108
8109 return string;
8110 }
8111
8112 /* Emit a library call comparison between floating point X and Y.
8113 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8114 Return the new operator to be used in the comparison sequence.
8115
8116 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8117 values as arguments instead of the TFmode registers themselves,
8118 that's why we cannot call emit_float_lib_cmp. */
8119
8120 rtx
8121 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8122 {
8123 const char *qpfunc;
8124 rtx slot0, slot1, result, tem, tem2, libfunc;
8125 machine_mode mode;
8126 enum rtx_code new_comparison;
8127
8128 switch (comparison)
8129 {
8130 case EQ:
8131 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8132 break;
8133
8134 case NE:
8135 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8136 break;
8137
8138 case GT:
8139 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8140 break;
8141
8142 case GE:
8143 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8144 break;
8145
8146 case LT:
8147 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8148 break;
8149
8150 case LE:
8151 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8152 break;
8153
8154 case ORDERED:
8155 case UNORDERED:
8156 case UNGT:
8157 case UNLT:
8158 case UNEQ:
8159 case UNGE:
8160 case UNLE:
8161 case LTGT:
8162 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8163 break;
8164
8165 default:
8166 gcc_unreachable ();
8167 }
8168
8169 if (TARGET_ARCH64)
8170 {
8171 if (MEM_P (x))
8172 {
8173 tree expr = MEM_EXPR (x);
8174 if (expr)
8175 mark_addressable (expr);
8176 slot0 = x;
8177 }
8178 else
8179 {
8180 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8181 emit_move_insn (slot0, x);
8182 }
8183
8184 if (MEM_P (y))
8185 {
8186 tree expr = MEM_EXPR (y);
8187 if (expr)
8188 mark_addressable (expr);
8189 slot1 = y;
8190 }
8191 else
8192 {
8193 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8194 emit_move_insn (slot1, y);
8195 }
8196
8197 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8198 emit_library_call (libfunc, LCT_NORMAL,
8199 DImode,
8200 XEXP (slot0, 0), Pmode,
8201 XEXP (slot1, 0), Pmode);
8202 mode = DImode;
8203 }
8204 else
8205 {
8206 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8207 emit_library_call (libfunc, LCT_NORMAL,
8208 SImode,
8209 x, TFmode, y, TFmode);
8210 mode = SImode;
8211 }
8212
8213
8214 /* Immediately move the result of the libcall into a pseudo
8215 register so reload doesn't clobber the value if it needs
8216 the return register for a spill reg. */
8217 result = gen_reg_rtx (mode);
8218 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8219
8220 switch (comparison)
8221 {
8222 default:
8223 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8224 case ORDERED:
8225 case UNORDERED:
8226 new_comparison = (comparison == UNORDERED ? EQ : NE);
8227 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8228 case UNGT:
8229 case UNGE:
8230 new_comparison = (comparison == UNGT ? GT : NE);
8231 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8232 case UNLE:
8233 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8234 case UNLT:
8235 tem = gen_reg_rtx (mode);
8236 if (TARGET_ARCH32)
8237 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8238 else
8239 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8240 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8241 case UNEQ:
8242 case LTGT:
8243 tem = gen_reg_rtx (mode);
8244 if (TARGET_ARCH32)
8245 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8246 else
8247 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8248 tem2 = gen_reg_rtx (mode);
8249 if (TARGET_ARCH32)
8250 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8251 else
8252 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8253 new_comparison = (comparison == UNEQ ? EQ : NE);
8254 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8255 }
8256
8257 gcc_unreachable ();
8258 }
8259
8260 /* Generate an unsigned DImode to FP conversion. This is the same code
8261 optabs would emit if we didn't have TFmode patterns. */
8262
8263 void
8264 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8265 {
8266 rtx i0, i1, f0, in, out;
8267
8268 out = operands[0];
8269 in = force_reg (DImode, operands[1]);
8270 rtx_code_label *neglab = gen_label_rtx ();
8271 rtx_code_label *donelab = gen_label_rtx ();
8272 i0 = gen_reg_rtx (DImode);
8273 i1 = gen_reg_rtx (DImode);
8274 f0 = gen_reg_rtx (mode);
8275
8276 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8277
8278 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8279 emit_jump_insn (gen_jump (donelab));
8280 emit_barrier ();
8281
8282 emit_label (neglab);
8283
8284 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8285 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8286 emit_insn (gen_iordi3 (i0, i0, i1));
8287 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8288 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8289
8290 emit_label (donelab);
8291 }
8292
8293 /* Generate an FP to unsigned DImode conversion. This is the same code
8294 optabs would emit if we didn't have TFmode patterns. */
8295
8296 void
8297 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8298 {
8299 rtx i0, i1, f0, in, out, limit;
8300
8301 out = operands[0];
8302 in = force_reg (mode, operands[1]);
8303 rtx_code_label *neglab = gen_label_rtx ();
8304 rtx_code_label *donelab = gen_label_rtx ();
8305 i0 = gen_reg_rtx (DImode);
8306 i1 = gen_reg_rtx (DImode);
8307 limit = gen_reg_rtx (mode);
8308 f0 = gen_reg_rtx (mode);
8309
8310 emit_move_insn (limit,
8311 const_double_from_real_value (
8312 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8313 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8314
8315 emit_insn (gen_rtx_SET (out,
8316 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8317 emit_jump_insn (gen_jump (donelab));
8318 emit_barrier ();
8319
8320 emit_label (neglab);
8321
8322 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8323 emit_insn (gen_rtx_SET (i0,
8324 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8325 emit_insn (gen_movdi (i1, const1_rtx));
8326 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8327 emit_insn (gen_xordi3 (out, i0, i1));
8328
8329 emit_label (donelab);
8330 }
8331
8332 /* Return the string to output a compare and branch instruction to DEST.
8333 DEST is the destination insn (i.e. the label), INSN is the source,
8334 and OP is the conditional expression. */
8335
8336 const char *
8337 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8338 {
8339 machine_mode mode = GET_MODE (XEXP (op, 0));
8340 enum rtx_code code = GET_CODE (op);
8341 const char *cond_str, *tmpl;
8342 int far, emit_nop, len;
8343 static char string[64];
8344 char size_char;
8345
8346 /* Compare and Branch is limited to +-2KB. If it is too far away,
8347 change
8348
8349 cxbne X, Y, .LC30
8350
8351 to
8352
8353 cxbe X, Y, .+16
8354 nop
8355 ba,pt xcc, .LC30
8356 nop */
8357
8358 len = get_attr_length (insn);
8359
8360 far = len == 4;
8361 emit_nop = len == 2;
8362
8363 if (far)
8364 code = reverse_condition (code);
8365
8366 size_char = ((mode == SImode) ? 'w' : 'x');
8367
8368 switch (code)
8369 {
8370 case NE:
8371 cond_str = "ne";
8372 break;
8373
8374 case EQ:
8375 cond_str = "e";
8376 break;
8377
8378 case GE:
8379 cond_str = "ge";
8380 break;
8381
8382 case GT:
8383 cond_str = "g";
8384 break;
8385
8386 case LE:
8387 cond_str = "le";
8388 break;
8389
8390 case LT:
8391 cond_str = "l";
8392 break;
8393
8394 case GEU:
8395 cond_str = "cc";
8396 break;
8397
8398 case GTU:
8399 cond_str = "gu";
8400 break;
8401
8402 case LEU:
8403 cond_str = "leu";
8404 break;
8405
8406 case LTU:
8407 cond_str = "cs";
8408 break;
8409
8410 default:
8411 gcc_unreachable ();
8412 }
8413
8414 if (far)
8415 {
8416 int veryfar = 1, delta;
8417
8418 if (INSN_ADDRESSES_SET_P ())
8419 {
8420 delta = (INSN_ADDRESSES (INSN_UID (dest))
8421 - INSN_ADDRESSES (INSN_UID (insn)));
8422 /* Leave some instructions for "slop". */
8423 if (delta >= -260000 && delta < 260000)
8424 veryfar = 0;
8425 }
8426
8427 if (veryfar)
8428 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8429 else
8430 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8431 }
8432 else
8433 {
8434 if (emit_nop)
8435 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8436 else
8437 tmpl = "c%cb%s\t%%1, %%2, %%3";
8438 }
8439
8440 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8441
8442 return string;
8443 }
8444
8445 /* Return the string to output a conditional branch to LABEL, testing
8446 register REG. LABEL is the operand number of the label; REG is the
8447 operand number of the reg. OP is the conditional expression. The mode
8448 of REG says what kind of comparison we made.
8449
8450 DEST is the destination insn (i.e. the label), INSN is the source.
8451
8452 REVERSED is nonzero if we should reverse the sense of the comparison.
8453
8454 ANNUL is nonzero if we should generate an annulling branch. */
8455
8456 const char *
8457 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8458 int annul, rtx_insn *insn)
8459 {
8460 static char string[64];
8461 enum rtx_code code = GET_CODE (op);
8462 machine_mode mode = GET_MODE (XEXP (op, 0));
8463 rtx note;
8464 int far;
8465 char *p;
8466
8467 /* branch on register are limited to +-128KB. If it is too far away,
8468 change
8469
8470 brnz,pt %g1, .LC30
8471
8472 to
8473
8474 brz,pn %g1, .+12
8475 nop
8476 ba,pt %xcc, .LC30
8477
8478 and
8479
8480 brgez,a,pn %o1, .LC29
8481
8482 to
8483
8484 brlz,pt %o1, .+16
8485 nop
8486 ba,pt %xcc, .LC29 */
8487
8488 far = get_attr_length (insn) >= 3;
8489
8490 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8491 if (reversed ^ far)
8492 code = reverse_condition (code);
8493
8494 /* Only 64-bit versions of these instructions exist. */
8495 gcc_assert (mode == DImode);
8496
8497 /* Start by writing the branch condition. */
8498
8499 switch (code)
8500 {
8501 case NE:
8502 strcpy (string, "brnz");
8503 break;
8504
8505 case EQ:
8506 strcpy (string, "brz");
8507 break;
8508
8509 case GE:
8510 strcpy (string, "brgez");
8511 break;
8512
8513 case LT:
8514 strcpy (string, "brlz");
8515 break;
8516
8517 case LE:
8518 strcpy (string, "brlez");
8519 break;
8520
8521 case GT:
8522 strcpy (string, "brgz");
8523 break;
8524
8525 default:
8526 gcc_unreachable ();
8527 }
8528
8529 p = strchr (string, '\0');
8530
8531 /* Now add the annulling, reg, label, and nop. */
8532 if (annul && ! far)
8533 {
8534 strcpy (p, ",a");
8535 p += 2;
8536 }
8537
8538 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8539 {
8540 strcpy (p,
8541 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8542 >= profile_probability::even ()) ^ far)
8543 ? ",pt" : ",pn");
8544 p += 3;
8545 }
8546
8547 *p = p < string + 8 ? '\t' : ' ';
8548 p++;
8549 *p++ = '%';
8550 *p++ = '0' + reg;
8551 *p++ = ',';
8552 *p++ = ' ';
8553 if (far)
8554 {
8555 int veryfar = 1, delta;
8556
8557 if (INSN_ADDRESSES_SET_P ())
8558 {
8559 delta = (INSN_ADDRESSES (INSN_UID (dest))
8560 - INSN_ADDRESSES (INSN_UID (insn)));
8561 /* Leave some instructions for "slop". */
8562 if (delta >= -260000 && delta < 260000)
8563 veryfar = 0;
8564 }
8565
8566 strcpy (p, ".+12\n\t nop\n\t");
8567 /* Skip the next insn if requested or
8568 if we know that it will be a nop. */
8569 if (annul || ! final_sequence)
8570 p[3] = '6';
8571 p += 12;
8572 if (veryfar)
8573 {
8574 strcpy (p, "b\t");
8575 p += 2;
8576 }
8577 else
8578 {
8579 strcpy (p, "ba,pt\t%%xcc, ");
8580 p += 13;
8581 }
8582 }
8583 *p++ = '%';
8584 *p++ = 'l';
8585 *p++ = '0' + label;
8586 *p++ = '%';
8587 *p++ = '#';
8588 *p = '\0';
8589
8590 return string;
8591 }
8592
8593 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8594 Such instructions cannot be used in the delay slot of return insn on v9.
8595 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8596 */
8597
8598 static int
8599 epilogue_renumber (register rtx *where, int test)
8600 {
8601 register const char *fmt;
8602 register int i;
8603 register enum rtx_code code;
8604
8605 if (*where == 0)
8606 return 0;
8607
8608 code = GET_CODE (*where);
8609
8610 switch (code)
8611 {
8612 case REG:
8613 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8614 return 1;
8615 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8616 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8617 /* fallthrough */
8618 case SCRATCH:
8619 case CC0:
8620 case PC:
8621 case CONST_INT:
8622 case CONST_WIDE_INT:
8623 case CONST_DOUBLE:
8624 return 0;
8625
8626 /* Do not replace the frame pointer with the stack pointer because
8627 it can cause the delayed instruction to load below the stack.
8628 This occurs when instructions like:
8629
8630 (set (reg/i:SI 24 %i0)
8631 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8632 (const_int -20 [0xffffffec])) 0))
8633
8634 are in the return delayed slot. */
8635 case PLUS:
8636 if (GET_CODE (XEXP (*where, 0)) == REG
8637 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8638 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8639 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8640 return 1;
8641 break;
8642
8643 case MEM:
8644 if (SPARC_STACK_BIAS
8645 && GET_CODE (XEXP (*where, 0)) == REG
8646 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8647 return 1;
8648 break;
8649
8650 default:
8651 break;
8652 }
8653
8654 fmt = GET_RTX_FORMAT (code);
8655
8656 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8657 {
8658 if (fmt[i] == 'E')
8659 {
8660 register int j;
8661 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8662 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8663 return 1;
8664 }
8665 else if (fmt[i] == 'e'
8666 && epilogue_renumber (&(XEXP (*where, i)), test))
8667 return 1;
8668 }
8669 return 0;
8670 }
8671 \f
8672 /* Leaf functions and non-leaf functions have different needs. */
8673
8674 static const int
8675 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8676
8677 static const int
8678 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8679
8680 static const int *const reg_alloc_orders[] = {
8681 reg_leaf_alloc_order,
8682 reg_nonleaf_alloc_order};
8683
8684 void
8685 order_regs_for_local_alloc (void)
8686 {
8687 static int last_order_nonleaf = 1;
8688
8689 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8690 {
8691 last_order_nonleaf = !last_order_nonleaf;
8692 memcpy ((char *) reg_alloc_order,
8693 (const char *) reg_alloc_orders[last_order_nonleaf],
8694 FIRST_PSEUDO_REGISTER * sizeof (int));
8695 }
8696 }
8697 \f
8698 /* Return 1 if REG and MEM are legitimate enough to allow the various
8699 MEM<-->REG splits to be run. */
8700
8701 int
8702 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8703 {
8704 /* Punt if we are here by mistake. */
8705 gcc_assert (reload_completed);
8706
8707 /* We must have an offsettable memory reference. */
8708 if (!offsettable_memref_p (mem))
8709 return 0;
8710
8711 /* If we have legitimate args for ldd/std, we do not want
8712 the split to happen. */
8713 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8714 return 0;
8715
8716 /* Success. */
8717 return 1;
8718 }
8719
8720 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8721
8722 void
8723 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8724 {
8725 rtx high_part = gen_highpart (mode, dest);
8726 rtx low_part = gen_lowpart (mode, dest);
8727 rtx word0 = adjust_address (src, mode, 0);
8728 rtx word1 = adjust_address (src, mode, 4);
8729
8730 if (reg_overlap_mentioned_p (high_part, word1))
8731 {
8732 emit_move_insn_1 (low_part, word1);
8733 emit_move_insn_1 (high_part, word0);
8734 }
8735 else
8736 {
8737 emit_move_insn_1 (high_part, word0);
8738 emit_move_insn_1 (low_part, word1);
8739 }
8740 }
8741
8742 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8743
8744 void
8745 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8746 {
8747 rtx word0 = adjust_address (dest, mode, 0);
8748 rtx word1 = adjust_address (dest, mode, 4);
8749 rtx high_part = gen_highpart (mode, src);
8750 rtx low_part = gen_lowpart (mode, src);
8751
8752 emit_move_insn_1 (word0, high_part);
8753 emit_move_insn_1 (word1, low_part);
8754 }
8755
8756 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8757
8758 int
8759 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8760 {
8761 /* Punt if we are here by mistake. */
8762 gcc_assert (reload_completed);
8763
8764 if (GET_CODE (reg1) == SUBREG)
8765 reg1 = SUBREG_REG (reg1);
8766 if (GET_CODE (reg1) != REG)
8767 return 0;
8768 const int regno1 = REGNO (reg1);
8769
8770 if (GET_CODE (reg2) == SUBREG)
8771 reg2 = SUBREG_REG (reg2);
8772 if (GET_CODE (reg2) != REG)
8773 return 0;
8774 const int regno2 = REGNO (reg2);
8775
8776 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8777 return 1;
8778
8779 if (TARGET_VIS3)
8780 {
8781 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8782 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8783 return 1;
8784 }
8785
8786 return 0;
8787 }
8788
8789 /* Split a REG <--> REG move into a pair of moves in MODE. */
8790
8791 void
8792 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8793 {
8794 rtx dest1 = gen_highpart (mode, dest);
8795 rtx dest2 = gen_lowpart (mode, dest);
8796 rtx src1 = gen_highpart (mode, src);
8797 rtx src2 = gen_lowpart (mode, src);
8798
8799 /* Now emit using the real source and destination we found, swapping
8800 the order if we detect overlap. */
8801 if (reg_overlap_mentioned_p (dest1, src2))
8802 {
8803 emit_move_insn_1 (dest2, src2);
8804 emit_move_insn_1 (dest1, src1);
8805 }
8806 else
8807 {
8808 emit_move_insn_1 (dest1, src1);
8809 emit_move_insn_1 (dest2, src2);
8810 }
8811 }
8812
8813 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8814 This makes them candidates for using ldd and std insns.
8815
8816 Note reg1 and reg2 *must* be hard registers. */
8817
8818 int
8819 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8820 {
8821 /* We might have been passed a SUBREG. */
8822 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8823 return 0;
8824
8825 if (REGNO (reg1) % 2 != 0)
8826 return 0;
8827
8828 /* Integer ldd is deprecated in SPARC V9 */
8829 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8830 return 0;
8831
8832 return (REGNO (reg1) == REGNO (reg2) - 1);
8833 }
8834
8835 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8836 an ldd or std insn.
8837
8838 This can only happen when addr1 and addr2, the addresses in mem1
8839 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8840 addr1 must also be aligned on a 64-bit boundary.
8841
8842 Also iff dependent_reg_rtx is not null it should not be used to
8843 compute the address for mem1, i.e. we cannot optimize a sequence
8844 like:
8845 ld [%o0], %o0
8846 ld [%o0 + 4], %o1
8847 to
8848 ldd [%o0], %o0
8849 nor:
8850 ld [%g3 + 4], %g3
8851 ld [%g3], %g2
8852 to
8853 ldd [%g3], %g2
8854
8855 But, note that the transformation from:
8856 ld [%g2 + 4], %g3
8857 ld [%g2], %g2
8858 to
8859 ldd [%g2], %g2
8860 is perfectly fine. Thus, the peephole2 patterns always pass us
8861 the destination register of the first load, never the second one.
8862
8863 For stores we don't have a similar problem, so dependent_reg_rtx is
8864 NULL_RTX. */
8865
8866 int
8867 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8868 {
8869 rtx addr1, addr2;
8870 unsigned int reg1;
8871 HOST_WIDE_INT offset1;
8872
8873 /* The mems cannot be volatile. */
8874 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8875 return 0;
8876
8877 /* MEM1 should be aligned on a 64-bit boundary. */
8878 if (MEM_ALIGN (mem1) < 64)
8879 return 0;
8880
8881 addr1 = XEXP (mem1, 0);
8882 addr2 = XEXP (mem2, 0);
8883
8884 /* Extract a register number and offset (if used) from the first addr. */
8885 if (GET_CODE (addr1) == PLUS)
8886 {
8887 /* If not a REG, return zero. */
8888 if (GET_CODE (XEXP (addr1, 0)) != REG)
8889 return 0;
8890 else
8891 {
8892 reg1 = REGNO (XEXP (addr1, 0));
8893 /* The offset must be constant! */
8894 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8895 return 0;
8896 offset1 = INTVAL (XEXP (addr1, 1));
8897 }
8898 }
8899 else if (GET_CODE (addr1) != REG)
8900 return 0;
8901 else
8902 {
8903 reg1 = REGNO (addr1);
8904 /* This was a simple (mem (reg)) expression. Offset is 0. */
8905 offset1 = 0;
8906 }
8907
8908 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8909 if (GET_CODE (addr2) != PLUS)
8910 return 0;
8911
8912 if (GET_CODE (XEXP (addr2, 0)) != REG
8913 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8914 return 0;
8915
8916 if (reg1 != REGNO (XEXP (addr2, 0)))
8917 return 0;
8918
8919 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8920 return 0;
8921
8922 /* The first offset must be evenly divisible by 8 to ensure the
8923 address is 64-bit aligned. */
8924 if (offset1 % 8 != 0)
8925 return 0;
8926
8927 /* The offset for the second addr must be 4 more than the first addr. */
8928 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8929 return 0;
8930
8931 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8932 instructions. */
8933 return 1;
8934 }
8935
8936 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8937
8938 rtx
8939 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8940 {
8941 rtx x = widen_memory_access (mem1, mode, 0);
8942 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8943 return x;
8944 }
8945
8946 /* Return 1 if reg is a pseudo, or is the first register in
8947 a hard register pair. This makes it suitable for use in
8948 ldd and std insns. */
8949
8950 int
8951 register_ok_for_ldd (rtx reg)
8952 {
8953 /* We might have been passed a SUBREG. */
8954 if (!REG_P (reg))
8955 return 0;
8956
8957 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8958 return (REGNO (reg) % 2 == 0);
8959
8960 return 1;
8961 }
8962
8963 /* Return 1 if OP, a MEM, has an address which is known to be
8964 aligned to an 8-byte boundary. */
8965
8966 int
8967 memory_ok_for_ldd (rtx op)
8968 {
8969 /* In 64-bit mode, we assume that the address is word-aligned. */
8970 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8971 return 0;
8972
8973 if (! can_create_pseudo_p ()
8974 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8975 return 0;
8976
8977 return 1;
8978 }
8979 \f
8980 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8981
8982 static bool
8983 sparc_print_operand_punct_valid_p (unsigned char code)
8984 {
8985 if (code == '#'
8986 || code == '*'
8987 || code == '('
8988 || code == ')'
8989 || code == '_'
8990 || code == '&')
8991 return true;
8992
8993 return false;
8994 }
8995
8996 /* Implement TARGET_PRINT_OPERAND.
8997 Print operand X (an rtx) in assembler syntax to file FILE.
8998 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8999 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9000
9001 static void
9002 sparc_print_operand (FILE *file, rtx x, int code)
9003 {
9004 const char *s;
9005
9006 switch (code)
9007 {
9008 case '#':
9009 /* Output an insn in a delay slot. */
9010 if (final_sequence)
9011 sparc_indent_opcode = 1;
9012 else
9013 fputs ("\n\t nop", file);
9014 return;
9015 case '*':
9016 /* Output an annul flag if there's nothing for the delay slot and we
9017 are optimizing. This is always used with '(' below.
9018 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9019 this is a dbx bug. So, we only do this when optimizing.
9020 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9021 Always emit a nop in case the next instruction is a branch. */
9022 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9023 fputs (",a", file);
9024 return;
9025 case '(':
9026 /* Output a 'nop' if there's nothing for the delay slot and we are
9027 not optimizing. This is always used with '*' above. */
9028 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9029 fputs ("\n\t nop", file);
9030 else if (final_sequence)
9031 sparc_indent_opcode = 1;
9032 return;
9033 case ')':
9034 /* Output the right displacement from the saved PC on function return.
9035 The caller may have placed an "unimp" insn immediately after the call
9036 so we have to account for it. This insn is used in the 32-bit ABI
9037 when calling a function that returns a non zero-sized structure. The
9038 64-bit ABI doesn't have it. Be careful to have this test be the same
9039 as that for the call. The exception is when sparc_std_struct_return
9040 is enabled, the psABI is followed exactly and the adjustment is made
9041 by the code in sparc_struct_value_rtx. The call emitted is the same
9042 when sparc_std_struct_return is enabled. */
9043 if (!TARGET_ARCH64
9044 && cfun->returns_struct
9045 && !sparc_std_struct_return
9046 && DECL_SIZE (DECL_RESULT (current_function_decl))
9047 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9048 == INTEGER_CST
9049 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9050 fputs ("12", file);
9051 else
9052 fputc ('8', file);
9053 return;
9054 case '_':
9055 /* Output the Embedded Medium/Anywhere code model base register. */
9056 fputs (EMBMEDANY_BASE_REG, file);
9057 return;
9058 case '&':
9059 /* Print some local dynamic TLS name. */
9060 if (const char *name = get_some_local_dynamic_name ())
9061 assemble_name (file, name);
9062 else
9063 output_operand_lossage ("'%%&' used without any "
9064 "local dynamic TLS references");
9065 return;
9066
9067 case 'Y':
9068 /* Adjust the operand to take into account a RESTORE operation. */
9069 if (GET_CODE (x) == CONST_INT)
9070 break;
9071 else if (GET_CODE (x) != REG)
9072 output_operand_lossage ("invalid %%Y operand");
9073 else if (REGNO (x) < 8)
9074 fputs (reg_names[REGNO (x)], file);
9075 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9076 fputs (reg_names[REGNO (x)-16], file);
9077 else
9078 output_operand_lossage ("invalid %%Y operand");
9079 return;
9080 case 'L':
9081 /* Print out the low order register name of a register pair. */
9082 if (WORDS_BIG_ENDIAN)
9083 fputs (reg_names[REGNO (x)+1], file);
9084 else
9085 fputs (reg_names[REGNO (x)], file);
9086 return;
9087 case 'H':
9088 /* Print out the high order register name of a register pair. */
9089 if (WORDS_BIG_ENDIAN)
9090 fputs (reg_names[REGNO (x)], file);
9091 else
9092 fputs (reg_names[REGNO (x)+1], file);
9093 return;
9094 case 'R':
9095 /* Print out the second register name of a register pair or quad.
9096 I.e., R (%o0) => %o1. */
9097 fputs (reg_names[REGNO (x)+1], file);
9098 return;
9099 case 'S':
9100 /* Print out the third register name of a register quad.
9101 I.e., S (%o0) => %o2. */
9102 fputs (reg_names[REGNO (x)+2], file);
9103 return;
9104 case 'T':
9105 /* Print out the fourth register name of a register quad.
9106 I.e., T (%o0) => %o3. */
9107 fputs (reg_names[REGNO (x)+3], file);
9108 return;
9109 case 'x':
9110 /* Print a condition code register. */
9111 if (REGNO (x) == SPARC_ICC_REG)
9112 {
9113 switch (GET_MODE (x))
9114 {
9115 case E_CCmode:
9116 case E_CCNZmode:
9117 case E_CCCmode:
9118 case E_CCVmode:
9119 s = "%icc";
9120 break;
9121 case E_CCXmode:
9122 case E_CCXNZmode:
9123 case E_CCXCmode:
9124 case E_CCXVmode:
9125 s = "%xcc";
9126 break;
9127 default:
9128 gcc_unreachable ();
9129 }
9130 fputs (s, file);
9131 }
9132 else
9133 /* %fccN register */
9134 fputs (reg_names[REGNO (x)], file);
9135 return;
9136 case 'm':
9137 /* Print the operand's address only. */
9138 output_address (GET_MODE (x), XEXP (x, 0));
9139 return;
9140 case 'r':
9141 /* In this case we need a register. Use %g0 if the
9142 operand is const0_rtx. */
9143 if (x == const0_rtx
9144 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9145 {
9146 fputs ("%g0", file);
9147 return;
9148 }
9149 else
9150 break;
9151
9152 case 'A':
9153 switch (GET_CODE (x))
9154 {
9155 case IOR:
9156 s = "or";
9157 break;
9158 case AND:
9159 s = "and";
9160 break;
9161 case XOR:
9162 s = "xor";
9163 break;
9164 default:
9165 output_operand_lossage ("invalid %%A operand");
9166 s = "";
9167 break;
9168 }
9169 fputs (s, file);
9170 return;
9171
9172 case 'B':
9173 switch (GET_CODE (x))
9174 {
9175 case IOR:
9176 s = "orn";
9177 break;
9178 case AND:
9179 s = "andn";
9180 break;
9181 case XOR:
9182 s = "xnor";
9183 break;
9184 default:
9185 output_operand_lossage ("invalid %%B operand");
9186 s = "";
9187 break;
9188 }
9189 fputs (s, file);
9190 return;
9191
9192 /* This is used by the conditional move instructions. */
9193 case 'C':
9194 {
9195 machine_mode mode = GET_MODE (XEXP (x, 0));
9196 switch (GET_CODE (x))
9197 {
9198 case NE:
9199 if (mode == CCVmode || mode == CCXVmode)
9200 s = "vs";
9201 else
9202 s = "ne";
9203 break;
9204 case EQ:
9205 if (mode == CCVmode || mode == CCXVmode)
9206 s = "vc";
9207 else
9208 s = "e";
9209 break;
9210 case GE:
9211 if (mode == CCNZmode || mode == CCXNZmode)
9212 s = "pos";
9213 else
9214 s = "ge";
9215 break;
9216 case GT:
9217 s = "g";
9218 break;
9219 case LE:
9220 s = "le";
9221 break;
9222 case LT:
9223 if (mode == CCNZmode || mode == CCXNZmode)
9224 s = "neg";
9225 else
9226 s = "l";
9227 break;
9228 case GEU:
9229 s = "geu";
9230 break;
9231 case GTU:
9232 s = "gu";
9233 break;
9234 case LEU:
9235 s = "leu";
9236 break;
9237 case LTU:
9238 s = "lu";
9239 break;
9240 case LTGT:
9241 s = "lg";
9242 break;
9243 case UNORDERED:
9244 s = "u";
9245 break;
9246 case ORDERED:
9247 s = "o";
9248 break;
9249 case UNLT:
9250 s = "ul";
9251 break;
9252 case UNLE:
9253 s = "ule";
9254 break;
9255 case UNGT:
9256 s = "ug";
9257 break;
9258 case UNGE:
9259 s = "uge"
9260 ; break;
9261 case UNEQ:
9262 s = "ue";
9263 break;
9264 default:
9265 output_operand_lossage ("invalid %%C operand");
9266 s = "";
9267 break;
9268 }
9269 fputs (s, file);
9270 return;
9271 }
9272
9273 /* This are used by the movr instruction pattern. */
9274 case 'D':
9275 {
9276 switch (GET_CODE (x))
9277 {
9278 case NE:
9279 s = "ne";
9280 break;
9281 case EQ:
9282 s = "e";
9283 break;
9284 case GE:
9285 s = "gez";
9286 break;
9287 case LT:
9288 s = "lz";
9289 break;
9290 case LE:
9291 s = "lez";
9292 break;
9293 case GT:
9294 s = "gz";
9295 break;
9296 default:
9297 output_operand_lossage ("invalid %%D operand");
9298 s = "";
9299 break;
9300 }
9301 fputs (s, file);
9302 return;
9303 }
9304
9305 case 'b':
9306 {
9307 /* Print a sign-extended character. */
9308 int i = trunc_int_for_mode (INTVAL (x), QImode);
9309 fprintf (file, "%d", i);
9310 return;
9311 }
9312
9313 case 'f':
9314 /* Operand must be a MEM; write its address. */
9315 if (GET_CODE (x) != MEM)
9316 output_operand_lossage ("invalid %%f operand");
9317 output_address (GET_MODE (x), XEXP (x, 0));
9318 return;
9319
9320 case 's':
9321 {
9322 /* Print a sign-extended 32-bit value. */
9323 HOST_WIDE_INT i;
9324 if (GET_CODE(x) == CONST_INT)
9325 i = INTVAL (x);
9326 else
9327 {
9328 output_operand_lossage ("invalid %%s operand");
9329 return;
9330 }
9331 i = trunc_int_for_mode (i, SImode);
9332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9333 return;
9334 }
9335
9336 case 0:
9337 /* Do nothing special. */
9338 break;
9339
9340 default:
9341 /* Undocumented flag. */
9342 output_operand_lossage ("invalid operand output code");
9343 }
9344
9345 if (GET_CODE (x) == REG)
9346 fputs (reg_names[REGNO (x)], file);
9347 else if (GET_CODE (x) == MEM)
9348 {
9349 fputc ('[', file);
9350 /* Poor Sun assembler doesn't understand absolute addressing. */
9351 if (CONSTANT_P (XEXP (x, 0)))
9352 fputs ("%g0+", file);
9353 output_address (GET_MODE (x), XEXP (x, 0));
9354 fputc (']', file);
9355 }
9356 else if (GET_CODE (x) == HIGH)
9357 {
9358 fputs ("%hi(", file);
9359 output_addr_const (file, XEXP (x, 0));
9360 fputc (')', file);
9361 }
9362 else if (GET_CODE (x) == LO_SUM)
9363 {
9364 sparc_print_operand (file, XEXP (x, 0), 0);
9365 if (TARGET_CM_MEDMID)
9366 fputs ("+%l44(", file);
9367 else
9368 fputs ("+%lo(", file);
9369 output_addr_const (file, XEXP (x, 1));
9370 fputc (')', file);
9371 }
9372 else if (GET_CODE (x) == CONST_DOUBLE)
9373 output_operand_lossage ("floating-point constant not a valid immediate operand");
9374 else
9375 output_addr_const (file, x);
9376 }
9377
9378 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9379
9380 static void
9381 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9382 {
9383 register rtx base, index = 0;
9384 int offset = 0;
9385 register rtx addr = x;
9386
9387 if (REG_P (addr))
9388 fputs (reg_names[REGNO (addr)], file);
9389 else if (GET_CODE (addr) == PLUS)
9390 {
9391 if (CONST_INT_P (XEXP (addr, 0)))
9392 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9393 else if (CONST_INT_P (XEXP (addr, 1)))
9394 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9395 else
9396 base = XEXP (addr, 0), index = XEXP (addr, 1);
9397 if (GET_CODE (base) == LO_SUM)
9398 {
9399 gcc_assert (USE_AS_OFFSETABLE_LO10
9400 && TARGET_ARCH64
9401 && ! TARGET_CM_MEDMID);
9402 output_operand (XEXP (base, 0), 0);
9403 fputs ("+%lo(", file);
9404 output_address (VOIDmode, XEXP (base, 1));
9405 fprintf (file, ")+%d", offset);
9406 }
9407 else
9408 {
9409 fputs (reg_names[REGNO (base)], file);
9410 if (index == 0)
9411 fprintf (file, "%+d", offset);
9412 else if (REG_P (index))
9413 fprintf (file, "+%s", reg_names[REGNO (index)]);
9414 else if (GET_CODE (index) == SYMBOL_REF
9415 || GET_CODE (index) == LABEL_REF
9416 || GET_CODE (index) == CONST)
9417 fputc ('+', file), output_addr_const (file, index);
9418 else gcc_unreachable ();
9419 }
9420 }
9421 else if (GET_CODE (addr) == MINUS
9422 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9423 {
9424 output_addr_const (file, XEXP (addr, 0));
9425 fputs ("-(", file);
9426 output_addr_const (file, XEXP (addr, 1));
9427 fputs ("-.)", file);
9428 }
9429 else if (GET_CODE (addr) == LO_SUM)
9430 {
9431 output_operand (XEXP (addr, 0), 0);
9432 if (TARGET_CM_MEDMID)
9433 fputs ("+%l44(", file);
9434 else
9435 fputs ("+%lo(", file);
9436 output_address (VOIDmode, XEXP (addr, 1));
9437 fputc (')', file);
9438 }
9439 else if (flag_pic
9440 && GET_CODE (addr) == CONST
9441 && GET_CODE (XEXP (addr, 0)) == MINUS
9442 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9443 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9444 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9445 {
9446 addr = XEXP (addr, 0);
9447 output_addr_const (file, XEXP (addr, 0));
9448 /* Group the args of the second CONST in parenthesis. */
9449 fputs ("-(", file);
9450 /* Skip past the second CONST--it does nothing for us. */
9451 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9452 /* Close the parenthesis. */
9453 fputc (')', file);
9454 }
9455 else
9456 {
9457 output_addr_const (file, addr);
9458 }
9459 }
9460 \f
9461 /* Target hook for assembling integer objects. The sparc version has
9462 special handling for aligned DI-mode objects. */
9463
9464 static bool
9465 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9466 {
9467 /* ??? We only output .xword's for symbols and only then in environments
9468 where the assembler can handle them. */
9469 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9470 {
9471 if (TARGET_V9)
9472 {
9473 assemble_integer_with_op ("\t.xword\t", x);
9474 return true;
9475 }
9476 else
9477 {
9478 assemble_aligned_integer (4, const0_rtx);
9479 assemble_aligned_integer (4, x);
9480 return true;
9481 }
9482 }
9483 return default_assemble_integer (x, size, aligned_p);
9484 }
9485 \f
9486 /* Return the value of a code used in the .proc pseudo-op that says
9487 what kind of result this function returns. For non-C types, we pick
9488 the closest C type. */
9489
9490 #ifndef SHORT_TYPE_SIZE
9491 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9492 #endif
9493
9494 #ifndef INT_TYPE_SIZE
9495 #define INT_TYPE_SIZE BITS_PER_WORD
9496 #endif
9497
9498 #ifndef LONG_TYPE_SIZE
9499 #define LONG_TYPE_SIZE BITS_PER_WORD
9500 #endif
9501
9502 #ifndef LONG_LONG_TYPE_SIZE
9503 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9504 #endif
9505
9506 #ifndef FLOAT_TYPE_SIZE
9507 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9508 #endif
9509
9510 #ifndef DOUBLE_TYPE_SIZE
9511 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9512 #endif
9513
9514 #ifndef LONG_DOUBLE_TYPE_SIZE
9515 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9516 #endif
9517
9518 unsigned long
9519 sparc_type_code (register tree type)
9520 {
9521 register unsigned long qualifiers = 0;
9522 register unsigned shift;
9523
9524 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9525 setting more, since some assemblers will give an error for this. Also,
9526 we must be careful to avoid shifts of 32 bits or more to avoid getting
9527 unpredictable results. */
9528
9529 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9530 {
9531 switch (TREE_CODE (type))
9532 {
9533 case ERROR_MARK:
9534 return qualifiers;
9535
9536 case ARRAY_TYPE:
9537 qualifiers |= (3 << shift);
9538 break;
9539
9540 case FUNCTION_TYPE:
9541 case METHOD_TYPE:
9542 qualifiers |= (2 << shift);
9543 break;
9544
9545 case POINTER_TYPE:
9546 case REFERENCE_TYPE:
9547 case OFFSET_TYPE:
9548 qualifiers |= (1 << shift);
9549 break;
9550
9551 case RECORD_TYPE:
9552 return (qualifiers | 8);
9553
9554 case UNION_TYPE:
9555 case QUAL_UNION_TYPE:
9556 return (qualifiers | 9);
9557
9558 case ENUMERAL_TYPE:
9559 return (qualifiers | 10);
9560
9561 case VOID_TYPE:
9562 return (qualifiers | 16);
9563
9564 case INTEGER_TYPE:
9565 /* If this is a range type, consider it to be the underlying
9566 type. */
9567 if (TREE_TYPE (type) != 0)
9568 break;
9569
9570 /* Carefully distinguish all the standard types of C,
9571 without messing up if the language is not C. We do this by
9572 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9573 look at both the names and the above fields, but that's redundant.
9574 Any type whose size is between two C types will be considered
9575 to be the wider of the two types. Also, we do not have a
9576 special code to use for "long long", so anything wider than
9577 long is treated the same. Note that we can't distinguish
9578 between "int" and "long" in this code if they are the same
9579 size, but that's fine, since neither can the assembler. */
9580
9581 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9582 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9583
9584 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9585 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9586
9587 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9588 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9589
9590 else
9591 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9592
9593 case REAL_TYPE:
9594 /* If this is a range type, consider it to be the underlying
9595 type. */
9596 if (TREE_TYPE (type) != 0)
9597 break;
9598
9599 /* Carefully distinguish all the standard types of C,
9600 without messing up if the language is not C. */
9601
9602 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9603 return (qualifiers | 6);
9604
9605 else
9606 return (qualifiers | 7);
9607
9608 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9609 /* ??? We need to distinguish between double and float complex types,
9610 but I don't know how yet because I can't reach this code from
9611 existing front-ends. */
9612 return (qualifiers | 7); /* Who knows? */
9613
9614 case VECTOR_TYPE:
9615 case BOOLEAN_TYPE: /* Boolean truth value type. */
9616 case LANG_TYPE:
9617 case NULLPTR_TYPE:
9618 return qualifiers;
9619
9620 default:
9621 gcc_unreachable (); /* Not a type! */
9622 }
9623 }
9624
9625 return qualifiers;
9626 }
9627 \f
9628 /* Nested function support. */
9629
9630 /* Emit RTL insns to initialize the variable parts of a trampoline.
9631 FNADDR is an RTX for the address of the function's pure code.
9632 CXT is an RTX for the static chain value for the function.
9633
9634 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9635 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9636 (to store insns). This is a bit excessive. Perhaps a different
9637 mechanism would be better here.
9638
9639 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9640
9641 static void
9642 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9643 {
9644 /* SPARC 32-bit trampoline:
9645
9646 sethi %hi(fn), %g1
9647 sethi %hi(static), %g2
9648 jmp %g1+%lo(fn)
9649 or %g2, %lo(static), %g2
9650
9651 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9652 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9653 */
9654
9655 emit_move_insn
9656 (adjust_address (m_tramp, SImode, 0),
9657 expand_binop (SImode, ior_optab,
9658 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9659 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9660 NULL_RTX, 1, OPTAB_DIRECT));
9661
9662 emit_move_insn
9663 (adjust_address (m_tramp, SImode, 4),
9664 expand_binop (SImode, ior_optab,
9665 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9666 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9667 NULL_RTX, 1, OPTAB_DIRECT));
9668
9669 emit_move_insn
9670 (adjust_address (m_tramp, SImode, 8),
9671 expand_binop (SImode, ior_optab,
9672 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9673 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9674 NULL_RTX, 1, OPTAB_DIRECT));
9675
9676 emit_move_insn
9677 (adjust_address (m_tramp, SImode, 12),
9678 expand_binop (SImode, ior_optab,
9679 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9680 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9681 NULL_RTX, 1, OPTAB_DIRECT));
9682
9683 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9684 aligned on a 16 byte boundary so one flush clears it all. */
9685 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9686 if (sparc_cpu != PROCESSOR_ULTRASPARC
9687 && sparc_cpu != PROCESSOR_ULTRASPARC3
9688 && sparc_cpu != PROCESSOR_NIAGARA
9689 && sparc_cpu != PROCESSOR_NIAGARA2
9690 && sparc_cpu != PROCESSOR_NIAGARA3
9691 && sparc_cpu != PROCESSOR_NIAGARA4
9692 && sparc_cpu != PROCESSOR_NIAGARA7
9693 && sparc_cpu != PROCESSOR_M8)
9694 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9695
9696 /* Call __enable_execute_stack after writing onto the stack to make sure
9697 the stack address is accessible. */
9698 #ifdef HAVE_ENABLE_EXECUTE_STACK
9699 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9700 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9701 #endif
9702
9703 }
9704
9705 /* The 64-bit version is simpler because it makes more sense to load the
9706 values as "immediate" data out of the trampoline. It's also easier since
9707 we can read the PC without clobbering a register. */
9708
9709 static void
9710 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9711 {
9712 /* SPARC 64-bit trampoline:
9713
9714 rd %pc, %g1
9715 ldx [%g1+24], %g5
9716 jmp %g5
9717 ldx [%g1+16], %g5
9718 +16 bytes data
9719 */
9720
9721 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9722 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9723 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9724 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9725 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9726 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9727 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9728 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9729 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9730 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9731 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9732
9733 if (sparc_cpu != PROCESSOR_ULTRASPARC
9734 && sparc_cpu != PROCESSOR_ULTRASPARC3
9735 && sparc_cpu != PROCESSOR_NIAGARA
9736 && sparc_cpu != PROCESSOR_NIAGARA2
9737 && sparc_cpu != PROCESSOR_NIAGARA3
9738 && sparc_cpu != PROCESSOR_NIAGARA4
9739 && sparc_cpu != PROCESSOR_NIAGARA7
9740 && sparc_cpu != PROCESSOR_M8)
9741 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9742
9743 /* Call __enable_execute_stack after writing onto the stack to make sure
9744 the stack address is accessible. */
9745 #ifdef HAVE_ENABLE_EXECUTE_STACK
9746 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9747 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9748 #endif
9749 }
9750
9751 /* Worker for TARGET_TRAMPOLINE_INIT. */
9752
9753 static void
9754 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9755 {
9756 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9757 cxt = force_reg (Pmode, cxt);
9758 if (TARGET_ARCH64)
9759 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9760 else
9761 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9762 }
9763 \f
9764 /* Adjust the cost of a scheduling dependency. Return the new cost of
9765 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9766
9767 static int
9768 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9769 int cost)
9770 {
9771 enum attr_type insn_type;
9772
9773 if (recog_memoized (insn) < 0)
9774 return cost;
9775
9776 insn_type = get_attr_type (insn);
9777
9778 if (dep_type == 0)
9779 {
9780 /* Data dependency; DEP_INSN writes a register that INSN reads some
9781 cycles later. */
9782
9783 /* if a load, then the dependence must be on the memory address;
9784 add an extra "cycle". Note that the cost could be two cycles
9785 if the reg was written late in an instruction group; we ca not tell
9786 here. */
9787 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9788 return cost + 3;
9789
9790 /* Get the delay only if the address of the store is the dependence. */
9791 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9792 {
9793 rtx pat = PATTERN(insn);
9794 rtx dep_pat = PATTERN (dep_insn);
9795
9796 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9797 return cost; /* This should not happen! */
9798
9799 /* The dependency between the two instructions was on the data that
9800 is being stored. Assume that this implies that the address of the
9801 store is not dependent. */
9802 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9803 return cost;
9804
9805 return cost + 3; /* An approximation. */
9806 }
9807
9808 /* A shift instruction cannot receive its data from an instruction
9809 in the same cycle; add a one cycle penalty. */
9810 if (insn_type == TYPE_SHIFT)
9811 return cost + 3; /* Split before cascade into shift. */
9812 }
9813 else
9814 {
9815 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9816 INSN writes some cycles later. */
9817
9818 /* These are only significant for the fpu unit; writing a fp reg before
9819 the fpu has finished with it stalls the processor. */
9820
9821 /* Reusing an integer register causes no problems. */
9822 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9823 return 0;
9824 }
9825
9826 return cost;
9827 }
9828
9829 static int
9830 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9831 int cost)
9832 {
9833 enum attr_type insn_type, dep_type;
9834 rtx pat = PATTERN(insn);
9835 rtx dep_pat = PATTERN (dep_insn);
9836
9837 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9838 return cost;
9839
9840 insn_type = get_attr_type (insn);
9841 dep_type = get_attr_type (dep_insn);
9842
9843 switch (dtype)
9844 {
9845 case 0:
9846 /* Data dependency; DEP_INSN writes a register that INSN reads some
9847 cycles later. */
9848
9849 switch (insn_type)
9850 {
9851 case TYPE_STORE:
9852 case TYPE_FPSTORE:
9853 /* Get the delay iff the address of the store is the dependence. */
9854 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9855 return cost;
9856
9857 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9858 return cost;
9859 return cost + 3;
9860
9861 case TYPE_LOAD:
9862 case TYPE_SLOAD:
9863 case TYPE_FPLOAD:
9864 /* If a load, then the dependence must be on the memory address. If
9865 the addresses aren't equal, then it might be a false dependency */
9866 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9867 {
9868 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9869 || GET_CODE (SET_DEST (dep_pat)) != MEM
9870 || GET_CODE (SET_SRC (pat)) != MEM
9871 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9872 XEXP (SET_SRC (pat), 0)))
9873 return cost + 2;
9874
9875 return cost + 8;
9876 }
9877 break;
9878
9879 case TYPE_BRANCH:
9880 /* Compare to branch latency is 0. There is no benefit from
9881 separating compare and branch. */
9882 if (dep_type == TYPE_COMPARE)
9883 return 0;
9884 /* Floating point compare to branch latency is less than
9885 compare to conditional move. */
9886 if (dep_type == TYPE_FPCMP)
9887 return cost - 1;
9888 break;
9889 default:
9890 break;
9891 }
9892 break;
9893
9894 case REG_DEP_ANTI:
9895 /* Anti-dependencies only penalize the fpu unit. */
9896 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9897 return 0;
9898 break;
9899
9900 default:
9901 break;
9902 }
9903
9904 return cost;
9905 }
9906
9907 static int
9908 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9909 unsigned int)
9910 {
9911 switch (sparc_cpu)
9912 {
9913 case PROCESSOR_SUPERSPARC:
9914 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9915 break;
9916 case PROCESSOR_HYPERSPARC:
9917 case PROCESSOR_SPARCLITE86X:
9918 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9919 break;
9920 default:
9921 break;
9922 }
9923 return cost;
9924 }
9925
9926 static void
9927 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9928 int sched_verbose ATTRIBUTE_UNUSED,
9929 int max_ready ATTRIBUTE_UNUSED)
9930 {}
9931
9932 static int
9933 sparc_use_sched_lookahead (void)
9934 {
9935 if (sparc_cpu == PROCESSOR_NIAGARA
9936 || sparc_cpu == PROCESSOR_NIAGARA2
9937 || sparc_cpu == PROCESSOR_NIAGARA3)
9938 return 0;
9939 if (sparc_cpu == PROCESSOR_NIAGARA4
9940 || sparc_cpu == PROCESSOR_NIAGARA7
9941 || sparc_cpu == PROCESSOR_M8)
9942 return 2;
9943 if (sparc_cpu == PROCESSOR_ULTRASPARC
9944 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9945 return 4;
9946 if ((1 << sparc_cpu) &
9947 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9948 (1 << PROCESSOR_SPARCLITE86X)))
9949 return 3;
9950 return 0;
9951 }
9952
9953 static int
9954 sparc_issue_rate (void)
9955 {
9956 switch (sparc_cpu)
9957 {
9958 case PROCESSOR_NIAGARA:
9959 case PROCESSOR_NIAGARA2:
9960 case PROCESSOR_NIAGARA3:
9961 default:
9962 return 1;
9963 case PROCESSOR_NIAGARA4:
9964 case PROCESSOR_NIAGARA7:
9965 case PROCESSOR_V9:
9966 /* Assume V9 processors are capable of at least dual-issue. */
9967 return 2;
9968 case PROCESSOR_SUPERSPARC:
9969 return 3;
9970 case PROCESSOR_HYPERSPARC:
9971 case PROCESSOR_SPARCLITE86X:
9972 return 2;
9973 case PROCESSOR_ULTRASPARC:
9974 case PROCESSOR_ULTRASPARC3:
9975 case PROCESSOR_M8:
9976 return 4;
9977 }
9978 }
9979
9980 static int
9981 set_extends (rtx_insn *insn)
9982 {
9983 register rtx pat = PATTERN (insn);
9984
9985 switch (GET_CODE (SET_SRC (pat)))
9986 {
9987 /* Load and some shift instructions zero extend. */
9988 case MEM:
9989 case ZERO_EXTEND:
9990 /* sethi clears the high bits */
9991 case HIGH:
9992 /* LO_SUM is used with sethi. sethi cleared the high
9993 bits and the values used with lo_sum are positive */
9994 case LO_SUM:
9995 /* Store flag stores 0 or 1 */
9996 case LT: case LTU:
9997 case GT: case GTU:
9998 case LE: case LEU:
9999 case GE: case GEU:
10000 case EQ:
10001 case NE:
10002 return 1;
10003 case AND:
10004 {
10005 rtx op0 = XEXP (SET_SRC (pat), 0);
10006 rtx op1 = XEXP (SET_SRC (pat), 1);
10007 if (GET_CODE (op1) == CONST_INT)
10008 return INTVAL (op1) >= 0;
10009 if (GET_CODE (op0) != REG)
10010 return 0;
10011 if (sparc_check_64 (op0, insn) == 1)
10012 return 1;
10013 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10014 }
10015 case IOR:
10016 case XOR:
10017 {
10018 rtx op0 = XEXP (SET_SRC (pat), 0);
10019 rtx op1 = XEXP (SET_SRC (pat), 1);
10020 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10021 return 0;
10022 if (GET_CODE (op1) == CONST_INT)
10023 return INTVAL (op1) >= 0;
10024 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10025 }
10026 case LSHIFTRT:
10027 return GET_MODE (SET_SRC (pat)) == SImode;
10028 /* Positive integers leave the high bits zero. */
10029 case CONST_INT:
10030 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10031 case ASHIFTRT:
10032 case SIGN_EXTEND:
10033 return - (GET_MODE (SET_SRC (pat)) == SImode);
10034 case REG:
10035 return sparc_check_64 (SET_SRC (pat), insn);
10036 default:
10037 return 0;
10038 }
10039 }
10040
10041 /* We _ought_ to have only one kind per function, but... */
10042 static GTY(()) rtx sparc_addr_diff_list;
10043 static GTY(()) rtx sparc_addr_list;
10044
10045 void
10046 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10047 {
10048 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10049 if (diff)
10050 sparc_addr_diff_list
10051 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10052 else
10053 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10054 }
10055
10056 static void
10057 sparc_output_addr_vec (rtx vec)
10058 {
10059 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10060 int idx, vlen = XVECLEN (body, 0);
10061
10062 #ifdef ASM_OUTPUT_ADDR_VEC_START
10063 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10064 #endif
10065
10066 #ifdef ASM_OUTPUT_CASE_LABEL
10067 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10068 NEXT_INSN (lab));
10069 #else
10070 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10071 #endif
10072
10073 for (idx = 0; idx < vlen; idx++)
10074 {
10075 ASM_OUTPUT_ADDR_VEC_ELT
10076 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10077 }
10078
10079 #ifdef ASM_OUTPUT_ADDR_VEC_END
10080 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10081 #endif
10082 }
10083
10084 static void
10085 sparc_output_addr_diff_vec (rtx vec)
10086 {
10087 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10088 rtx base = XEXP (XEXP (body, 0), 0);
10089 int idx, vlen = XVECLEN (body, 1);
10090
10091 #ifdef ASM_OUTPUT_ADDR_VEC_START
10092 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10093 #endif
10094
10095 #ifdef ASM_OUTPUT_CASE_LABEL
10096 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10097 NEXT_INSN (lab));
10098 #else
10099 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10100 #endif
10101
10102 for (idx = 0; idx < vlen; idx++)
10103 {
10104 ASM_OUTPUT_ADDR_DIFF_ELT
10105 (asm_out_file,
10106 body,
10107 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10108 CODE_LABEL_NUMBER (base));
10109 }
10110
10111 #ifdef ASM_OUTPUT_ADDR_VEC_END
10112 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10113 #endif
10114 }
10115
10116 static void
10117 sparc_output_deferred_case_vectors (void)
10118 {
10119 rtx t;
10120 int align;
10121
10122 if (sparc_addr_list == NULL_RTX
10123 && sparc_addr_diff_list == NULL_RTX)
10124 return;
10125
10126 /* Align to cache line in the function's code section. */
10127 switch_to_section (current_function_section ());
10128
10129 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10130 if (align > 0)
10131 ASM_OUTPUT_ALIGN (asm_out_file, align);
10132
10133 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10134 sparc_output_addr_vec (XEXP (t, 0));
10135 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10136 sparc_output_addr_diff_vec (XEXP (t, 0));
10137
10138 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10139 }
10140
10141 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10142 unknown. Return 1 if the high bits are zero, -1 if the register is
10143 sign extended. */
10144 int
10145 sparc_check_64 (rtx x, rtx_insn *insn)
10146 {
10147 /* If a register is set only once it is safe to ignore insns this
10148 code does not know how to handle. The loop will either recognize
10149 the single set and return the correct value or fail to recognize
10150 it and return 0. */
10151 int set_once = 0;
10152 rtx y = x;
10153
10154 gcc_assert (GET_CODE (x) == REG);
10155
10156 if (GET_MODE (x) == DImode)
10157 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10158
10159 if (flag_expensive_optimizations
10160 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10161 set_once = 1;
10162
10163 if (insn == 0)
10164 {
10165 if (set_once)
10166 insn = get_last_insn_anywhere ();
10167 else
10168 return 0;
10169 }
10170
10171 while ((insn = PREV_INSN (insn)))
10172 {
10173 switch (GET_CODE (insn))
10174 {
10175 case JUMP_INSN:
10176 case NOTE:
10177 break;
10178 case CODE_LABEL:
10179 case CALL_INSN:
10180 default:
10181 if (! set_once)
10182 return 0;
10183 break;
10184 case INSN:
10185 {
10186 rtx pat = PATTERN (insn);
10187 if (GET_CODE (pat) != SET)
10188 return 0;
10189 if (rtx_equal_p (x, SET_DEST (pat)))
10190 return set_extends (insn);
10191 if (y && rtx_equal_p (y, SET_DEST (pat)))
10192 return set_extends (insn);
10193 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10194 return 0;
10195 }
10196 }
10197 }
10198 return 0;
10199 }
10200
10201 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10202 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10203
10204 const char *
10205 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10206 {
10207 static char asm_code[60];
10208
10209 /* The scratch register is only required when the destination
10210 register is not a 64-bit global or out register. */
10211 if (which_alternative != 2)
10212 operands[3] = operands[0];
10213
10214 /* We can only shift by constants <= 63. */
10215 if (GET_CODE (operands[2]) == CONST_INT)
10216 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10217
10218 if (GET_CODE (operands[1]) == CONST_INT)
10219 {
10220 output_asm_insn ("mov\t%1, %3", operands);
10221 }
10222 else
10223 {
10224 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10225 if (sparc_check_64 (operands[1], insn) <= 0)
10226 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10227 output_asm_insn ("or\t%L1, %3, %3", operands);
10228 }
10229
10230 strcpy (asm_code, opcode);
10231
10232 if (which_alternative != 2)
10233 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10234 else
10235 return
10236 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10237 }
10238 \f
10239 /* Output rtl to increment the profiler label LABELNO
10240 for profiling a function entry. */
10241
10242 void
10243 sparc_profile_hook (int labelno)
10244 {
10245 char buf[32];
10246 rtx lab, fun;
10247
10248 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10249 if (NO_PROFILE_COUNTERS)
10250 {
10251 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10252 }
10253 else
10254 {
10255 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10256 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10257 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10258 }
10259 }
10260 \f
10261 #ifdef TARGET_SOLARIS
10262 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10263
10264 static void
10265 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10266 tree decl ATTRIBUTE_UNUSED)
10267 {
10268 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10269 {
10270 solaris_elf_asm_comdat_section (name, flags, decl);
10271 return;
10272 }
10273
10274 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10275
10276 if (!(flags & SECTION_DEBUG))
10277 fputs (",#alloc", asm_out_file);
10278 if (flags & SECTION_WRITE)
10279 fputs (",#write", asm_out_file);
10280 if (flags & SECTION_TLS)
10281 fputs (",#tls", asm_out_file);
10282 if (flags & SECTION_CODE)
10283 fputs (",#execinstr", asm_out_file);
10284
10285 if (flags & SECTION_NOTYPE)
10286 ;
10287 else if (flags & SECTION_BSS)
10288 fputs (",#nobits", asm_out_file);
10289 else
10290 fputs (",#progbits", asm_out_file);
10291
10292 fputc ('\n', asm_out_file);
10293 }
10294 #endif /* TARGET_SOLARIS */
10295
10296 /* We do not allow indirect calls to be optimized into sibling calls.
10297
10298 We cannot use sibling calls when delayed branches are disabled
10299 because they will likely require the call delay slot to be filled.
10300
10301 Also, on SPARC 32-bit we cannot emit a sibling call when the
10302 current function returns a structure. This is because the "unimp
10303 after call" convention would cause the callee to return to the
10304 wrong place. The generic code already disallows cases where the
10305 function being called returns a structure.
10306
10307 It may seem strange how this last case could occur. Usually there
10308 is code after the call which jumps to epilogue code which dumps the
10309 return value into the struct return area. That ought to invalidate
10310 the sibling call right? Well, in the C++ case we can end up passing
10311 the pointer to the struct return area to a constructor (which returns
10312 void) and then nothing else happens. Such a sibling call would look
10313 valid without the added check here.
10314
10315 VxWorks PIC PLT entries require the global pointer to be initialized
10316 on entry. We therefore can't emit sibling calls to them. */
10317 static bool
10318 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10319 {
10320 return (decl
10321 && flag_delayed_branch
10322 && (TARGET_ARCH64 || ! cfun->returns_struct)
10323 && !(TARGET_VXWORKS_RTP
10324 && flag_pic
10325 && !targetm.binds_local_p (decl)));
10326 }
10327 \f
10328 /* libfunc renaming. */
10329
10330 static void
10331 sparc_init_libfuncs (void)
10332 {
10333 if (TARGET_ARCH32)
10334 {
10335 /* Use the subroutines that Sun's library provides for integer
10336 multiply and divide. The `*' prevents an underscore from
10337 being prepended by the compiler. .umul is a little faster
10338 than .mul. */
10339 set_optab_libfunc (smul_optab, SImode, "*.umul");
10340 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10341 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10342 set_optab_libfunc (smod_optab, SImode, "*.rem");
10343 set_optab_libfunc (umod_optab, SImode, "*.urem");
10344
10345 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10346 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10347 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10348 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10349 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10350 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10351
10352 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10353 is because with soft-float, the SFmode and DFmode sqrt
10354 instructions will be absent, and the compiler will notice and
10355 try to use the TFmode sqrt instruction for calls to the
10356 builtin function sqrt, but this fails. */
10357 if (TARGET_FPU)
10358 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10359
10360 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10361 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10362 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10363 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10364 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10365 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10366
10367 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10368 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10369 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10370 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10371
10372 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10373 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10374 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10375 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10376
10377 if (DITF_CONVERSION_LIBFUNCS)
10378 {
10379 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10380 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10381 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10382 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10383 }
10384
10385 if (SUN_CONVERSION_LIBFUNCS)
10386 {
10387 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10388 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10389 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10390 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10391 }
10392 }
10393 if (TARGET_ARCH64)
10394 {
10395 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10396 do not exist in the library. Make sure the compiler does not
10397 emit calls to them by accident. (It should always use the
10398 hardware instructions.) */
10399 set_optab_libfunc (smul_optab, SImode, 0);
10400 set_optab_libfunc (sdiv_optab, SImode, 0);
10401 set_optab_libfunc (udiv_optab, SImode, 0);
10402 set_optab_libfunc (smod_optab, SImode, 0);
10403 set_optab_libfunc (umod_optab, SImode, 0);
10404
10405 if (SUN_INTEGER_MULTIPLY_64)
10406 {
10407 set_optab_libfunc (smul_optab, DImode, "__mul64");
10408 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10409 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10410 set_optab_libfunc (smod_optab, DImode, "__rem64");
10411 set_optab_libfunc (umod_optab, DImode, "__urem64");
10412 }
10413
10414 if (SUN_CONVERSION_LIBFUNCS)
10415 {
10416 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10417 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10418 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10419 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10420 }
10421 }
10422 }
10423 \f
10424 /* SPARC builtins. */
10425 enum sparc_builtins
10426 {
10427 /* FPU builtins. */
10428 SPARC_BUILTIN_LDFSR,
10429 SPARC_BUILTIN_STFSR,
10430
10431 /* VIS 1.0 builtins. */
10432 SPARC_BUILTIN_FPACK16,
10433 SPARC_BUILTIN_FPACK32,
10434 SPARC_BUILTIN_FPACKFIX,
10435 SPARC_BUILTIN_FEXPAND,
10436 SPARC_BUILTIN_FPMERGE,
10437 SPARC_BUILTIN_FMUL8X16,
10438 SPARC_BUILTIN_FMUL8X16AU,
10439 SPARC_BUILTIN_FMUL8X16AL,
10440 SPARC_BUILTIN_FMUL8SUX16,
10441 SPARC_BUILTIN_FMUL8ULX16,
10442 SPARC_BUILTIN_FMULD8SUX16,
10443 SPARC_BUILTIN_FMULD8ULX16,
10444 SPARC_BUILTIN_FALIGNDATAV4HI,
10445 SPARC_BUILTIN_FALIGNDATAV8QI,
10446 SPARC_BUILTIN_FALIGNDATAV2SI,
10447 SPARC_BUILTIN_FALIGNDATADI,
10448 SPARC_BUILTIN_WRGSR,
10449 SPARC_BUILTIN_RDGSR,
10450 SPARC_BUILTIN_ALIGNADDR,
10451 SPARC_BUILTIN_ALIGNADDRL,
10452 SPARC_BUILTIN_PDIST,
10453 SPARC_BUILTIN_EDGE8,
10454 SPARC_BUILTIN_EDGE8L,
10455 SPARC_BUILTIN_EDGE16,
10456 SPARC_BUILTIN_EDGE16L,
10457 SPARC_BUILTIN_EDGE32,
10458 SPARC_BUILTIN_EDGE32L,
10459 SPARC_BUILTIN_FCMPLE16,
10460 SPARC_BUILTIN_FCMPLE32,
10461 SPARC_BUILTIN_FCMPNE16,
10462 SPARC_BUILTIN_FCMPNE32,
10463 SPARC_BUILTIN_FCMPGT16,
10464 SPARC_BUILTIN_FCMPGT32,
10465 SPARC_BUILTIN_FCMPEQ16,
10466 SPARC_BUILTIN_FCMPEQ32,
10467 SPARC_BUILTIN_FPADD16,
10468 SPARC_BUILTIN_FPADD16S,
10469 SPARC_BUILTIN_FPADD32,
10470 SPARC_BUILTIN_FPADD32S,
10471 SPARC_BUILTIN_FPSUB16,
10472 SPARC_BUILTIN_FPSUB16S,
10473 SPARC_BUILTIN_FPSUB32,
10474 SPARC_BUILTIN_FPSUB32S,
10475 SPARC_BUILTIN_ARRAY8,
10476 SPARC_BUILTIN_ARRAY16,
10477 SPARC_BUILTIN_ARRAY32,
10478
10479 /* VIS 2.0 builtins. */
10480 SPARC_BUILTIN_EDGE8N,
10481 SPARC_BUILTIN_EDGE8LN,
10482 SPARC_BUILTIN_EDGE16N,
10483 SPARC_BUILTIN_EDGE16LN,
10484 SPARC_BUILTIN_EDGE32N,
10485 SPARC_BUILTIN_EDGE32LN,
10486 SPARC_BUILTIN_BMASK,
10487 SPARC_BUILTIN_BSHUFFLEV4HI,
10488 SPARC_BUILTIN_BSHUFFLEV8QI,
10489 SPARC_BUILTIN_BSHUFFLEV2SI,
10490 SPARC_BUILTIN_BSHUFFLEDI,
10491
10492 /* VIS 3.0 builtins. */
10493 SPARC_BUILTIN_CMASK8,
10494 SPARC_BUILTIN_CMASK16,
10495 SPARC_BUILTIN_CMASK32,
10496 SPARC_BUILTIN_FCHKSM16,
10497 SPARC_BUILTIN_FSLL16,
10498 SPARC_BUILTIN_FSLAS16,
10499 SPARC_BUILTIN_FSRL16,
10500 SPARC_BUILTIN_FSRA16,
10501 SPARC_BUILTIN_FSLL32,
10502 SPARC_BUILTIN_FSLAS32,
10503 SPARC_BUILTIN_FSRL32,
10504 SPARC_BUILTIN_FSRA32,
10505 SPARC_BUILTIN_PDISTN,
10506 SPARC_BUILTIN_FMEAN16,
10507 SPARC_BUILTIN_FPADD64,
10508 SPARC_BUILTIN_FPSUB64,
10509 SPARC_BUILTIN_FPADDS16,
10510 SPARC_BUILTIN_FPADDS16S,
10511 SPARC_BUILTIN_FPSUBS16,
10512 SPARC_BUILTIN_FPSUBS16S,
10513 SPARC_BUILTIN_FPADDS32,
10514 SPARC_BUILTIN_FPADDS32S,
10515 SPARC_BUILTIN_FPSUBS32,
10516 SPARC_BUILTIN_FPSUBS32S,
10517 SPARC_BUILTIN_FUCMPLE8,
10518 SPARC_BUILTIN_FUCMPNE8,
10519 SPARC_BUILTIN_FUCMPGT8,
10520 SPARC_BUILTIN_FUCMPEQ8,
10521 SPARC_BUILTIN_FHADDS,
10522 SPARC_BUILTIN_FHADDD,
10523 SPARC_BUILTIN_FHSUBS,
10524 SPARC_BUILTIN_FHSUBD,
10525 SPARC_BUILTIN_FNHADDS,
10526 SPARC_BUILTIN_FNHADDD,
10527 SPARC_BUILTIN_UMULXHI,
10528 SPARC_BUILTIN_XMULX,
10529 SPARC_BUILTIN_XMULXHI,
10530
10531 /* VIS 4.0 builtins. */
10532 SPARC_BUILTIN_FPADD8,
10533 SPARC_BUILTIN_FPADDS8,
10534 SPARC_BUILTIN_FPADDUS8,
10535 SPARC_BUILTIN_FPADDUS16,
10536 SPARC_BUILTIN_FPCMPLE8,
10537 SPARC_BUILTIN_FPCMPGT8,
10538 SPARC_BUILTIN_FPCMPULE16,
10539 SPARC_BUILTIN_FPCMPUGT16,
10540 SPARC_BUILTIN_FPCMPULE32,
10541 SPARC_BUILTIN_FPCMPUGT32,
10542 SPARC_BUILTIN_FPMAX8,
10543 SPARC_BUILTIN_FPMAX16,
10544 SPARC_BUILTIN_FPMAX32,
10545 SPARC_BUILTIN_FPMAXU8,
10546 SPARC_BUILTIN_FPMAXU16,
10547 SPARC_BUILTIN_FPMAXU32,
10548 SPARC_BUILTIN_FPMIN8,
10549 SPARC_BUILTIN_FPMIN16,
10550 SPARC_BUILTIN_FPMIN32,
10551 SPARC_BUILTIN_FPMINU8,
10552 SPARC_BUILTIN_FPMINU16,
10553 SPARC_BUILTIN_FPMINU32,
10554 SPARC_BUILTIN_FPSUB8,
10555 SPARC_BUILTIN_FPSUBS8,
10556 SPARC_BUILTIN_FPSUBUS8,
10557 SPARC_BUILTIN_FPSUBUS16,
10558
10559 /* VIS 4.0B builtins. */
10560
10561 /* Note that all the DICTUNPACK* entries should be kept
10562 contiguous. */
10563 SPARC_BUILTIN_FIRST_DICTUNPACK,
10564 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10565 SPARC_BUILTIN_DICTUNPACK16,
10566 SPARC_BUILTIN_DICTUNPACK32,
10567 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10568
10569 /* Note that all the FPCMP*SHL entries should be kept
10570 contiguous. */
10571 SPARC_BUILTIN_FIRST_FPCMPSHL,
10572 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10573 SPARC_BUILTIN_FPCMPGT8SHL,
10574 SPARC_BUILTIN_FPCMPEQ8SHL,
10575 SPARC_BUILTIN_FPCMPNE8SHL,
10576 SPARC_BUILTIN_FPCMPLE16SHL,
10577 SPARC_BUILTIN_FPCMPGT16SHL,
10578 SPARC_BUILTIN_FPCMPEQ16SHL,
10579 SPARC_BUILTIN_FPCMPNE16SHL,
10580 SPARC_BUILTIN_FPCMPLE32SHL,
10581 SPARC_BUILTIN_FPCMPGT32SHL,
10582 SPARC_BUILTIN_FPCMPEQ32SHL,
10583 SPARC_BUILTIN_FPCMPNE32SHL,
10584 SPARC_BUILTIN_FPCMPULE8SHL,
10585 SPARC_BUILTIN_FPCMPUGT8SHL,
10586 SPARC_BUILTIN_FPCMPULE16SHL,
10587 SPARC_BUILTIN_FPCMPUGT16SHL,
10588 SPARC_BUILTIN_FPCMPULE32SHL,
10589 SPARC_BUILTIN_FPCMPUGT32SHL,
10590 SPARC_BUILTIN_FPCMPDE8SHL,
10591 SPARC_BUILTIN_FPCMPDE16SHL,
10592 SPARC_BUILTIN_FPCMPDE32SHL,
10593 SPARC_BUILTIN_FPCMPUR8SHL,
10594 SPARC_BUILTIN_FPCMPUR16SHL,
10595 SPARC_BUILTIN_FPCMPUR32SHL,
10596 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10597
10598 SPARC_BUILTIN_MAX
10599 };
10600
10601 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10602 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10603
10604 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10605 The instruction should require a constant operand of some sort. The
10606 function prints an error if OPVAL is not valid. */
10607
10608 static int
10609 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10610 {
10611 if (GET_CODE (opval) != CONST_INT)
10612 {
10613 error ("%qs expects a constant argument", insn_data[icode].name);
10614 return false;
10615 }
10616
10617 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10618 {
10619 error ("constant argument out of range for %qs", insn_data[icode].name);
10620 return false;
10621 }
10622 return true;
10623 }
10624
10625 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10626 function decl or NULL_TREE if the builtin was not added. */
10627
10628 static tree
10629 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10630 tree type)
10631 {
10632 tree t
10633 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10634
10635 if (t)
10636 {
10637 sparc_builtins[code] = t;
10638 sparc_builtins_icode[code] = icode;
10639 }
10640
10641 return t;
10642 }
10643
10644 /* Likewise, but also marks the function as "const". */
10645
10646 static tree
10647 def_builtin_const (const char *name, enum insn_code icode,
10648 enum sparc_builtins code, tree type)
10649 {
10650 tree t = def_builtin (name, icode, code, type);
10651
10652 if (t)
10653 TREE_READONLY (t) = 1;
10654
10655 return t;
10656 }
10657
10658 /* Implement the TARGET_INIT_BUILTINS target hook.
10659 Create builtin functions for special SPARC instructions. */
10660
10661 static void
10662 sparc_init_builtins (void)
10663 {
10664 if (TARGET_FPU)
10665 sparc_fpu_init_builtins ();
10666
10667 if (TARGET_VIS)
10668 sparc_vis_init_builtins ();
10669 }
10670
10671 /* Create builtin functions for FPU instructions. */
10672
10673 static void
10674 sparc_fpu_init_builtins (void)
10675 {
10676 tree ftype
10677 = build_function_type_list (void_type_node,
10678 build_pointer_type (unsigned_type_node), 0);
10679 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10680 SPARC_BUILTIN_LDFSR, ftype);
10681 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10682 SPARC_BUILTIN_STFSR, ftype);
10683 }
10684
10685 /* Create builtin functions for VIS instructions. */
10686
10687 static void
10688 sparc_vis_init_builtins (void)
10689 {
10690 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10691 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10692 tree v4hi = build_vector_type (intHI_type_node, 4);
10693 tree v2hi = build_vector_type (intHI_type_node, 2);
10694 tree v2si = build_vector_type (intSI_type_node, 2);
10695 tree v1si = build_vector_type (intSI_type_node, 1);
10696
10697 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10698 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10699 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10700 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10701 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10702 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10703 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10704 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10705 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10706 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10707 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10708 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10709 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10710 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10711 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10712 v8qi, v8qi,
10713 intDI_type_node, 0);
10714 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10715 v8qi, v8qi, 0);
10716 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10717 v8qi, v8qi, 0);
10718 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10719 intSI_type_node, 0);
10720 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10721 intSI_type_node, 0);
10722 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10723 intDI_type_node, 0);
10724 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10725 intDI_type_node,
10726 intDI_type_node, 0);
10727 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10728 intSI_type_node,
10729 intSI_type_node, 0);
10730 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10731 ptr_type_node,
10732 intSI_type_node, 0);
10733 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10734 ptr_type_node,
10735 intDI_type_node, 0);
10736 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10737 ptr_type_node,
10738 ptr_type_node, 0);
10739 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10740 ptr_type_node,
10741 ptr_type_node, 0);
10742 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10743 v4hi, v4hi, 0);
10744 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10745 v2si, v2si, 0);
10746 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10747 v4hi, v4hi, 0);
10748 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10749 v2si, v2si, 0);
10750 tree void_ftype_di = build_function_type_list (void_type_node,
10751 intDI_type_node, 0);
10752 tree di_ftype_void = build_function_type_list (intDI_type_node,
10753 void_type_node, 0);
10754 tree void_ftype_si = build_function_type_list (void_type_node,
10755 intSI_type_node, 0);
10756 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10757 float_type_node,
10758 float_type_node, 0);
10759 tree df_ftype_df_df = build_function_type_list (double_type_node,
10760 double_type_node,
10761 double_type_node, 0);
10762
10763 /* Packing and expanding vectors. */
10764 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10765 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10766 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10767 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10768 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10769 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10770 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10771 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10772 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10773 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10774
10775 /* Multiplications. */
10776 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10777 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10778 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10779 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10780 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10781 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10782 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10783 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10784 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10785 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10786 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10787 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10788 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10789 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10790
10791 /* Data aligning. */
10792 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10793 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10794 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10795 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10796 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10797 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10798 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10799 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10800
10801 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10802 SPARC_BUILTIN_WRGSR, void_ftype_di);
10803 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10804 SPARC_BUILTIN_RDGSR, di_ftype_void);
10805
10806 if (TARGET_ARCH64)
10807 {
10808 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10809 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10810 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10811 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10812 }
10813 else
10814 {
10815 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10816 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10817 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10818 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10819 }
10820
10821 /* Pixel distance. */
10822 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10823 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10824
10825 /* Edge handling. */
10826 if (TARGET_ARCH64)
10827 {
10828 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10829 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10830 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10831 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10832 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10833 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10834 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10835 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10836 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10837 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10838 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10839 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10840 }
10841 else
10842 {
10843 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10844 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10845 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10846 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10847 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10848 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10849 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10850 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10851 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10852 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10853 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10854 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10855 }
10856
10857 /* Pixel compare. */
10858 if (TARGET_ARCH64)
10859 {
10860 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10861 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10862 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10863 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10864 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10865 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10866 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10867 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10868 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10869 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10870 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10871 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10872 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10873 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10874 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10875 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10876 }
10877 else
10878 {
10879 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10880 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10881 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10882 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10883 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10884 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10885 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10886 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10887 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10888 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10889 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10890 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10891 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10892 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10893 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10894 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10895 }
10896
10897 /* Addition and subtraction. */
10898 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10899 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10900 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10901 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10902 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10903 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10904 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10905 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10906 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10907 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10908 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10909 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10910 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10911 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10912 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10913 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10914
10915 /* Three-dimensional array addressing. */
10916 if (TARGET_ARCH64)
10917 {
10918 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10919 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10920 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10921 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10922 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10923 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10924 }
10925 else
10926 {
10927 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10928 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10929 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10930 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10931 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10932 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10933 }
10934
10935 if (TARGET_VIS2)
10936 {
10937 /* Edge handling. */
10938 if (TARGET_ARCH64)
10939 {
10940 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10941 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10942 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10943 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10944 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10945 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10946 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10947 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10948 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10949 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10950 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10951 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10952 }
10953 else
10954 {
10955 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10956 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10957 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10958 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10959 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10960 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10961 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10962 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10963 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10964 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10965 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10966 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10967 }
10968
10969 /* Byte mask and shuffle. */
10970 if (TARGET_ARCH64)
10971 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10972 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10973 else
10974 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10975 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10976 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10977 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10978 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10979 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10980 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10981 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10982 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10983 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10984 }
10985
10986 if (TARGET_VIS3)
10987 {
10988 if (TARGET_ARCH64)
10989 {
10990 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10991 SPARC_BUILTIN_CMASK8, void_ftype_di);
10992 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10993 SPARC_BUILTIN_CMASK16, void_ftype_di);
10994 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10995 SPARC_BUILTIN_CMASK32, void_ftype_di);
10996 }
10997 else
10998 {
10999 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11000 SPARC_BUILTIN_CMASK8, void_ftype_si);
11001 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11002 SPARC_BUILTIN_CMASK16, void_ftype_si);
11003 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11004 SPARC_BUILTIN_CMASK32, void_ftype_si);
11005 }
11006
11007 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11008 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11009
11010 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11011 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11012 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11013 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11014 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11015 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11016 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11017 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11018 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11019 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11020 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11021 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11022 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11023 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11024 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11025 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11026
11027 if (TARGET_ARCH64)
11028 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11029 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11030 else
11031 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11032 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11033
11034 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11035 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11036 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11037 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11038 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11039 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11040
11041 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11042 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11043 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11044 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11045 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11046 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11047 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11048 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11049 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11050 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11051 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11052 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11053 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11054 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11055 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11056 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11057
11058 if (TARGET_ARCH64)
11059 {
11060 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11061 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11062 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11063 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11064 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11065 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11066 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11067 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11068 }
11069 else
11070 {
11071 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11072 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11073 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11074 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11075 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11076 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11077 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11078 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11079 }
11080
11081 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11082 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11083 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11084 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11085 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11086 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11087 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11088 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11089 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11090 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11091 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11092 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11093
11094 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11095 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11096 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11097 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11098 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11099 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11100 }
11101
11102 if (TARGET_VIS4)
11103 {
11104 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11105 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11106 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11107 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11108 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11109 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11110 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11111 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11112
11113
11114 if (TARGET_ARCH64)
11115 {
11116 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11117 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11118 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11119 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11120 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11121 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11122 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11123 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11124 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11125 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11126 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11127 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11128 }
11129 else
11130 {
11131 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11132 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11133 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11134 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11135 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11136 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11137 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11138 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11139 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11140 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11141 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11142 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11143 }
11144
11145 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11146 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11147 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11148 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11149 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11150 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11151 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11152 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11153 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11154 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11155 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11156 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11157 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11158 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11159 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11160 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11161 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11162 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11163 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11164 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11165 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11166 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11167 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11168 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11169 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11170 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11171 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11172 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11173 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11174 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11175 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11176 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11177 }
11178
11179 if (TARGET_VIS4B)
11180 {
11181 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11182 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11183 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11184 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11185 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11186 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11187
11188 if (TARGET_ARCH64)
11189 {
11190 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11191 v8qi, v8qi,
11192 intSI_type_node, 0);
11193 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11194 v4hi, v4hi,
11195 intSI_type_node, 0);
11196 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11197 v2si, v2si,
11198 intSI_type_node, 0);
11199
11200 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11201 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11202 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11203 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11204 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11205 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11206 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11207 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11208
11209 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11210 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11211 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11212 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11213 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11214 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11215 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11216 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11217
11218 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11219 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11220 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11221 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11222 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11223 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11224 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11225 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11226
11227
11228 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11229 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11230 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11231 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11232
11233 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11234 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11235 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11236 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11237
11238 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11239 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11240 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11241 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11242
11243 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11244 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11245 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11246 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11247 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11248 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11249
11250 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11251 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11252 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11253 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11254 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11255 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11256
11257 }
11258 else
11259 {
11260 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11261 v8qi, v8qi,
11262 intSI_type_node, 0);
11263 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11264 v4hi, v4hi,
11265 intSI_type_node, 0);
11266 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11267 v2si, v2si,
11268 intSI_type_node, 0);
11269
11270 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11271 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11272 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11273 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11274 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11275 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11276 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11277 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11278
11279 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11280 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11281 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11282 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11283 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11284 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11285 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11286 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11287
11288 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11289 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11290 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11291 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11292 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11293 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11294 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11295 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11296
11297
11298 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11299 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11300 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11301 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11302
11303 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11304 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11305 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11306 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11307
11308 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11309 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11310 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11311 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11312
11313 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11314 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11315 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11316 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11317 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11318 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11319
11320 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11321 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11322 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11323 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11324 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11325 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11326 }
11327 }
11328 }
11329
11330 /* Implement TARGET_BUILTIN_DECL hook. */
11331
11332 static tree
11333 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11334 {
11335 if (code >= SPARC_BUILTIN_MAX)
11336 return error_mark_node;
11337
11338 return sparc_builtins[code];
11339 }
11340
11341 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11342
11343 static rtx
11344 sparc_expand_builtin (tree exp, rtx target,
11345 rtx subtarget ATTRIBUTE_UNUSED,
11346 machine_mode tmode ATTRIBUTE_UNUSED,
11347 int ignore ATTRIBUTE_UNUSED)
11348 {
11349 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11350 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11351 enum insn_code icode = sparc_builtins_icode[code];
11352 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11353 call_expr_arg_iterator iter;
11354 int arg_count = 0;
11355 rtx pat, op[4];
11356 tree arg;
11357
11358 if (nonvoid)
11359 {
11360 machine_mode tmode = insn_data[icode].operand[0].mode;
11361 if (!target
11362 || GET_MODE (target) != tmode
11363 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11364 op[0] = gen_reg_rtx (tmode);
11365 else
11366 op[0] = target;
11367 }
11368
11369 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11370 {
11371 const struct insn_operand_data *insn_op;
11372 int idx;
11373
11374 if (arg == error_mark_node)
11375 return NULL_RTX;
11376
11377 arg_count++;
11378 idx = arg_count - !nonvoid;
11379 insn_op = &insn_data[icode].operand[idx];
11380 op[arg_count] = expand_normal (arg);
11381
11382 /* Some of the builtins require constant arguments. We check
11383 for this here. */
11384 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11385 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11386 && arg_count == 3)
11387 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11388 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11389 && arg_count == 2))
11390 {
11391 if (!check_constant_argument (icode, idx, op[arg_count]))
11392 return const0_rtx;
11393 }
11394
11395 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11396 {
11397 if (!address_operand (op[arg_count], SImode))
11398 {
11399 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11400 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11401 }
11402 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11403 }
11404
11405 else if (insn_op->mode == V1DImode
11406 && GET_MODE (op[arg_count]) == DImode)
11407 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11408
11409 else if (insn_op->mode == V1SImode
11410 && GET_MODE (op[arg_count]) == SImode)
11411 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11412
11413 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11414 insn_op->mode))
11415 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11416 }
11417
11418 switch (arg_count)
11419 {
11420 case 0:
11421 pat = GEN_FCN (icode) (op[0]);
11422 break;
11423 case 1:
11424 if (nonvoid)
11425 pat = GEN_FCN (icode) (op[0], op[1]);
11426 else
11427 pat = GEN_FCN (icode) (op[1]);
11428 break;
11429 case 2:
11430 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11431 break;
11432 case 3:
11433 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11434 break;
11435 default:
11436 gcc_unreachable ();
11437 }
11438
11439 if (!pat)
11440 return NULL_RTX;
11441
11442 emit_insn (pat);
11443
11444 return (nonvoid ? op[0] : const0_rtx);
11445 }
11446
11447 /* Return the upper 16 bits of the 8x16 multiplication. */
11448
11449 static int
11450 sparc_vis_mul8x16 (int e8, int e16)
11451 {
11452 return (e8 * e16 + 128) / 256;
11453 }
11454
11455 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11456 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11457
11458 static void
11459 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11460 tree inner_type, tree cst0, tree cst1)
11461 {
11462 unsigned i, num = VECTOR_CST_NELTS (cst0);
11463 int scale;
11464
11465 switch (fncode)
11466 {
11467 case SPARC_BUILTIN_FMUL8X16:
11468 for (i = 0; i < num; ++i)
11469 {
11470 int val
11471 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11472 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11473 n_elts->quick_push (build_int_cst (inner_type, val));
11474 }
11475 break;
11476
11477 case SPARC_BUILTIN_FMUL8X16AU:
11478 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11479
11480 for (i = 0; i < num; ++i)
11481 {
11482 int val
11483 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11484 scale);
11485 n_elts->quick_push (build_int_cst (inner_type, val));
11486 }
11487 break;
11488
11489 case SPARC_BUILTIN_FMUL8X16AL:
11490 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11491
11492 for (i = 0; i < num; ++i)
11493 {
11494 int val
11495 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11496 scale);
11497 n_elts->quick_push (build_int_cst (inner_type, val));
11498 }
11499 break;
11500
11501 default:
11502 gcc_unreachable ();
11503 }
11504 }
11505
11506 /* Implement TARGET_FOLD_BUILTIN hook.
11507
11508 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11509 result of the function call is ignored. NULL_TREE is returned if the
11510 function could not be folded. */
11511
11512 static tree
11513 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11514 tree *args, bool ignore)
11515 {
11516 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11517 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11518 tree arg0, arg1, arg2;
11519
11520 if (ignore)
11521 switch (code)
11522 {
11523 case SPARC_BUILTIN_LDFSR:
11524 case SPARC_BUILTIN_STFSR:
11525 case SPARC_BUILTIN_ALIGNADDR:
11526 case SPARC_BUILTIN_WRGSR:
11527 case SPARC_BUILTIN_BMASK:
11528 case SPARC_BUILTIN_CMASK8:
11529 case SPARC_BUILTIN_CMASK16:
11530 case SPARC_BUILTIN_CMASK32:
11531 break;
11532
11533 default:
11534 return build_zero_cst (rtype);
11535 }
11536
11537 switch (code)
11538 {
11539 case SPARC_BUILTIN_FEXPAND:
11540 arg0 = args[0];
11541 STRIP_NOPS (arg0);
11542
11543 if (TREE_CODE (arg0) == VECTOR_CST)
11544 {
11545 tree inner_type = TREE_TYPE (rtype);
11546 unsigned i;
11547
11548 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11549 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11550 {
11551 unsigned HOST_WIDE_INT val
11552 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11553 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11554 }
11555 return build_vector (rtype, n_elts);
11556 }
11557 break;
11558
11559 case SPARC_BUILTIN_FMUL8X16:
11560 case SPARC_BUILTIN_FMUL8X16AU:
11561 case SPARC_BUILTIN_FMUL8X16AL:
11562 arg0 = args[0];
11563 arg1 = args[1];
11564 STRIP_NOPS (arg0);
11565 STRIP_NOPS (arg1);
11566
11567 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11568 {
11569 tree inner_type = TREE_TYPE (rtype);
11570 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11571 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11572 return build_vector (rtype, n_elts);
11573 }
11574 break;
11575
11576 case SPARC_BUILTIN_FPMERGE:
11577 arg0 = args[0];
11578 arg1 = args[1];
11579 STRIP_NOPS (arg0);
11580 STRIP_NOPS (arg1);
11581
11582 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11583 {
11584 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11585 unsigned i;
11586 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11587 {
11588 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11589 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11590 }
11591
11592 return build_vector (rtype, n_elts);
11593 }
11594 break;
11595
11596 case SPARC_BUILTIN_PDIST:
11597 case SPARC_BUILTIN_PDISTN:
11598 arg0 = args[0];
11599 arg1 = args[1];
11600 STRIP_NOPS (arg0);
11601 STRIP_NOPS (arg1);
11602 if (code == SPARC_BUILTIN_PDIST)
11603 {
11604 arg2 = args[2];
11605 STRIP_NOPS (arg2);
11606 }
11607 else
11608 arg2 = integer_zero_node;
11609
11610 if (TREE_CODE (arg0) == VECTOR_CST
11611 && TREE_CODE (arg1) == VECTOR_CST
11612 && TREE_CODE (arg2) == INTEGER_CST)
11613 {
11614 bool overflow = false;
11615 widest_int result = wi::to_widest (arg2);
11616 widest_int tmp;
11617 unsigned i;
11618
11619 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11620 {
11621 tree e0 = VECTOR_CST_ELT (arg0, i);
11622 tree e1 = VECTOR_CST_ELT (arg1, i);
11623
11624 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11625
11626 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11627 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11628 if (wi::neg_p (tmp))
11629 tmp = wi::neg (tmp, &neg2_ovf);
11630 else
11631 neg2_ovf = false;
11632 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11633 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11634 }
11635
11636 gcc_assert (!overflow);
11637
11638 return wide_int_to_tree (rtype, result);
11639 }
11640
11641 default:
11642 break;
11643 }
11644
11645 return NULL_TREE;
11646 }
11647 \f
11648 /* ??? This duplicates information provided to the compiler by the
11649 ??? scheduler description. Some day, teach genautomata to output
11650 ??? the latencies and then CSE will just use that. */
11651
11652 static bool
11653 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11654 int opno ATTRIBUTE_UNUSED,
11655 int *total, bool speed ATTRIBUTE_UNUSED)
11656 {
11657 int code = GET_CODE (x);
11658 bool float_mode_p = FLOAT_MODE_P (mode);
11659
11660 switch (code)
11661 {
11662 case CONST_INT:
11663 if (SMALL_INT (x))
11664 *total = 0;
11665 else
11666 *total = 2;
11667 return true;
11668
11669 case CONST_WIDE_INT:
11670 *total = 0;
11671 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11672 *total += 2;
11673 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11674 *total += 2;
11675 return true;
11676
11677 case HIGH:
11678 *total = 2;
11679 return true;
11680
11681 case CONST:
11682 case LABEL_REF:
11683 case SYMBOL_REF:
11684 *total = 4;
11685 return true;
11686
11687 case CONST_DOUBLE:
11688 *total = 8;
11689 return true;
11690
11691 case MEM:
11692 /* If outer-code was a sign or zero extension, a cost
11693 of COSTS_N_INSNS (1) was already added in. This is
11694 why we are subtracting it back out. */
11695 if (outer_code == ZERO_EXTEND)
11696 {
11697 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11698 }
11699 else if (outer_code == SIGN_EXTEND)
11700 {
11701 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11702 }
11703 else if (float_mode_p)
11704 {
11705 *total = sparc_costs->float_load;
11706 }
11707 else
11708 {
11709 *total = sparc_costs->int_load;
11710 }
11711
11712 return true;
11713
11714 case PLUS:
11715 case MINUS:
11716 if (float_mode_p)
11717 *total = sparc_costs->float_plusminus;
11718 else
11719 *total = COSTS_N_INSNS (1);
11720 return false;
11721
11722 case FMA:
11723 {
11724 rtx sub;
11725
11726 gcc_assert (float_mode_p);
11727 *total = sparc_costs->float_mul;
11728
11729 sub = XEXP (x, 0);
11730 if (GET_CODE (sub) == NEG)
11731 sub = XEXP (sub, 0);
11732 *total += rtx_cost (sub, mode, FMA, 0, speed);
11733
11734 sub = XEXP (x, 2);
11735 if (GET_CODE (sub) == NEG)
11736 sub = XEXP (sub, 0);
11737 *total += rtx_cost (sub, mode, FMA, 2, speed);
11738 return true;
11739 }
11740
11741 case MULT:
11742 if (float_mode_p)
11743 *total = sparc_costs->float_mul;
11744 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11745 *total = COSTS_N_INSNS (25);
11746 else
11747 {
11748 int bit_cost;
11749
11750 bit_cost = 0;
11751 if (sparc_costs->int_mul_bit_factor)
11752 {
11753 int nbits;
11754
11755 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11756 {
11757 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11758 for (nbits = 0; value != 0; value &= value - 1)
11759 nbits++;
11760 }
11761 else
11762 nbits = 7;
11763
11764 if (nbits < 3)
11765 nbits = 3;
11766 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11767 bit_cost = COSTS_N_INSNS (bit_cost);
11768 }
11769
11770 if (mode == DImode || !TARGET_HARD_MUL)
11771 *total = sparc_costs->int_mulX + bit_cost;
11772 else
11773 *total = sparc_costs->int_mul + bit_cost;
11774 }
11775 return false;
11776
11777 case ASHIFT:
11778 case ASHIFTRT:
11779 case LSHIFTRT:
11780 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11781 return false;
11782
11783 case DIV:
11784 case UDIV:
11785 case MOD:
11786 case UMOD:
11787 if (float_mode_p)
11788 {
11789 if (mode == DFmode)
11790 *total = sparc_costs->float_div_df;
11791 else
11792 *total = sparc_costs->float_div_sf;
11793 }
11794 else
11795 {
11796 if (mode == DImode)
11797 *total = sparc_costs->int_divX;
11798 else
11799 *total = sparc_costs->int_div;
11800 }
11801 return false;
11802
11803 case NEG:
11804 if (! float_mode_p)
11805 {
11806 *total = COSTS_N_INSNS (1);
11807 return false;
11808 }
11809 /* FALLTHRU */
11810
11811 case ABS:
11812 case FLOAT:
11813 case UNSIGNED_FLOAT:
11814 case FIX:
11815 case UNSIGNED_FIX:
11816 case FLOAT_EXTEND:
11817 case FLOAT_TRUNCATE:
11818 *total = sparc_costs->float_move;
11819 return false;
11820
11821 case SQRT:
11822 if (mode == DFmode)
11823 *total = sparc_costs->float_sqrt_df;
11824 else
11825 *total = sparc_costs->float_sqrt_sf;
11826 return false;
11827
11828 case COMPARE:
11829 if (float_mode_p)
11830 *total = sparc_costs->float_cmp;
11831 else
11832 *total = COSTS_N_INSNS (1);
11833 return false;
11834
11835 case IF_THEN_ELSE:
11836 if (float_mode_p)
11837 *total = sparc_costs->float_cmove;
11838 else
11839 *total = sparc_costs->int_cmove;
11840 return false;
11841
11842 case IOR:
11843 /* Handle the NAND vector patterns. */
11844 if (sparc_vector_mode_supported_p (mode)
11845 && GET_CODE (XEXP (x, 0)) == NOT
11846 && GET_CODE (XEXP (x, 1)) == NOT)
11847 {
11848 *total = COSTS_N_INSNS (1);
11849 return true;
11850 }
11851 else
11852 return false;
11853
11854 default:
11855 return false;
11856 }
11857 }
11858
11859 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11860
11861 static inline bool
11862 general_or_i64_p (reg_class_t rclass)
11863 {
11864 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11865 }
11866
11867 /* Implement TARGET_REGISTER_MOVE_COST. */
11868
11869 static int
11870 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11871 reg_class_t from, reg_class_t to)
11872 {
11873 bool need_memory = false;
11874
11875 /* This helps postreload CSE to eliminate redundant comparisons. */
11876 if (from == NO_REGS || to == NO_REGS)
11877 return 100;
11878
11879 if (from == FPCC_REGS || to == FPCC_REGS)
11880 need_memory = true;
11881 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11882 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11883 {
11884 if (TARGET_VIS3)
11885 {
11886 int size = GET_MODE_SIZE (mode);
11887 if (size == 8 || size == 4)
11888 {
11889 if (! TARGET_ARCH32 || size == 4)
11890 return 4;
11891 else
11892 return 6;
11893 }
11894 }
11895 need_memory = true;
11896 }
11897
11898 if (need_memory)
11899 {
11900 if (sparc_cpu == PROCESSOR_ULTRASPARC
11901 || sparc_cpu == PROCESSOR_ULTRASPARC3
11902 || sparc_cpu == PROCESSOR_NIAGARA
11903 || sparc_cpu == PROCESSOR_NIAGARA2
11904 || sparc_cpu == PROCESSOR_NIAGARA3
11905 || sparc_cpu == PROCESSOR_NIAGARA4
11906 || sparc_cpu == PROCESSOR_NIAGARA7
11907 || sparc_cpu == PROCESSOR_M8)
11908 return 12;
11909
11910 return 6;
11911 }
11912
11913 return 2;
11914 }
11915
11916 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11917 This is achieved by means of a manual dynamic stack space allocation in
11918 the current frame. We make the assumption that SEQ doesn't contain any
11919 function calls, with the possible exception of calls to the GOT helper. */
11920
11921 static void
11922 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11923 {
11924 /* We must preserve the lowest 16 words for the register save area. */
11925 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11926 /* We really need only 2 words of fresh stack space. */
11927 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11928
11929 rtx slot
11930 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11931 SPARC_STACK_BIAS + offset));
11932
11933 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11934 emit_insn (gen_rtx_SET (slot, reg));
11935 if (reg2)
11936 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11937 reg2));
11938 emit_insn (seq);
11939 if (reg2)
11940 emit_insn (gen_rtx_SET (reg2,
11941 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11942 emit_insn (gen_rtx_SET (reg, slot));
11943 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11944 }
11945
11946 /* Output the assembler code for a thunk function. THUNK_DECL is the
11947 declaration for the thunk function itself, FUNCTION is the decl for
11948 the target function. DELTA is an immediate constant offset to be
11949 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11950 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11951
11952 static void
11953 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11954 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11955 tree function)
11956 {
11957 rtx this_rtx, funexp;
11958 rtx_insn *insn;
11959 unsigned int int_arg_first;
11960
11961 reload_completed = 1;
11962 epilogue_completed = 1;
11963
11964 emit_note (NOTE_INSN_PROLOGUE_END);
11965
11966 if (TARGET_FLAT)
11967 {
11968 sparc_leaf_function_p = 1;
11969
11970 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11971 }
11972 else if (flag_delayed_branch)
11973 {
11974 /* We will emit a regular sibcall below, so we need to instruct
11975 output_sibcall that we are in a leaf function. */
11976 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11977
11978 /* This will cause final.c to invoke leaf_renumber_regs so we
11979 must behave as if we were in a not-yet-leafified function. */
11980 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11981 }
11982 else
11983 {
11984 /* We will emit the sibcall manually below, so we will need to
11985 manually spill non-leaf registers. */
11986 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11987
11988 /* We really are in a leaf function. */
11989 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11990 }
11991
11992 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11993 returns a structure, the structure return pointer is there instead. */
11994 if (TARGET_ARCH64
11995 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11996 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11997 else
11998 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11999
12000 /* Add DELTA. When possible use a plain add, otherwise load it into
12001 a register first. */
12002 if (delta)
12003 {
12004 rtx delta_rtx = GEN_INT (delta);
12005
12006 if (! SPARC_SIMM13_P (delta))
12007 {
12008 rtx scratch = gen_rtx_REG (Pmode, 1);
12009 emit_move_insn (scratch, delta_rtx);
12010 delta_rtx = scratch;
12011 }
12012
12013 /* THIS_RTX += DELTA. */
12014 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12015 }
12016
12017 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12018 if (vcall_offset)
12019 {
12020 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12021 rtx scratch = gen_rtx_REG (Pmode, 1);
12022
12023 gcc_assert (vcall_offset < 0);
12024
12025 /* SCRATCH = *THIS_RTX. */
12026 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12027
12028 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12029 may not have any available scratch register at this point. */
12030 if (SPARC_SIMM13_P (vcall_offset))
12031 ;
12032 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12033 else if (! fixed_regs[5]
12034 /* The below sequence is made up of at least 2 insns,
12035 while the default method may need only one. */
12036 && vcall_offset < -8192)
12037 {
12038 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12039 emit_move_insn (scratch2, vcall_offset_rtx);
12040 vcall_offset_rtx = scratch2;
12041 }
12042 else
12043 {
12044 rtx increment = GEN_INT (-4096);
12045
12046 /* VCALL_OFFSET is a negative number whose typical range can be
12047 estimated as -32768..0 in 32-bit mode. In almost all cases
12048 it is therefore cheaper to emit multiple add insns than
12049 spilling and loading the constant into a register (at least
12050 6 insns). */
12051 while (! SPARC_SIMM13_P (vcall_offset))
12052 {
12053 emit_insn (gen_add2_insn (scratch, increment));
12054 vcall_offset += 4096;
12055 }
12056 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12057 }
12058
12059 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12060 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12061 gen_rtx_PLUS (Pmode,
12062 scratch,
12063 vcall_offset_rtx)));
12064
12065 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12066 emit_insn (gen_add2_insn (this_rtx, scratch));
12067 }
12068
12069 /* Generate a tail call to the target function. */
12070 if (! TREE_USED (function))
12071 {
12072 assemble_external (function);
12073 TREE_USED (function) = 1;
12074 }
12075 funexp = XEXP (DECL_RTL (function), 0);
12076
12077 if (flag_delayed_branch)
12078 {
12079 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12080 insn = emit_call_insn (gen_sibcall (funexp));
12081 SIBLING_CALL_P (insn) = 1;
12082 }
12083 else
12084 {
12085 /* The hoops we have to jump through in order to generate a sibcall
12086 without using delay slots... */
12087 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12088
12089 if (flag_pic)
12090 {
12091 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12092 start_sequence ();
12093 load_got_register (); /* clobbers %o7 */
12094 scratch = sparc_legitimize_pic_address (funexp, scratch);
12095 seq = get_insns ();
12096 end_sequence ();
12097 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12098 }
12099 else if (TARGET_ARCH32)
12100 {
12101 emit_insn (gen_rtx_SET (scratch,
12102 gen_rtx_HIGH (SImode, funexp)));
12103 emit_insn (gen_rtx_SET (scratch,
12104 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12105 }
12106 else /* TARGET_ARCH64 */
12107 {
12108 switch (sparc_cmodel)
12109 {
12110 case CM_MEDLOW:
12111 case CM_MEDMID:
12112 /* The destination can serve as a temporary. */
12113 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12114 break;
12115
12116 case CM_MEDANY:
12117 case CM_EMBMEDANY:
12118 /* The destination cannot serve as a temporary. */
12119 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12120 start_sequence ();
12121 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12122 seq = get_insns ();
12123 end_sequence ();
12124 emit_and_preserve (seq, spill_reg, 0);
12125 break;
12126
12127 default:
12128 gcc_unreachable ();
12129 }
12130 }
12131
12132 emit_jump_insn (gen_indirect_jump (scratch));
12133 }
12134
12135 emit_barrier ();
12136
12137 /* Run just enough of rest_of_compilation to get the insns emitted.
12138 There's not really enough bulk here to make other passes such as
12139 instruction scheduling worth while. Note that use_thunk calls
12140 assemble_start_function and assemble_end_function. */
12141 insn = get_insns ();
12142 shorten_branches (insn);
12143 final_start_function (insn, file, 1);
12144 final (insn, file, 1);
12145 final_end_function ();
12146
12147 reload_completed = 0;
12148 epilogue_completed = 0;
12149 }
12150
12151 /* Return true if sparc_output_mi_thunk would be able to output the
12152 assembler code for the thunk function specified by the arguments
12153 it is passed, and false otherwise. */
12154 static bool
12155 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12156 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12157 HOST_WIDE_INT vcall_offset,
12158 const_tree function ATTRIBUTE_UNUSED)
12159 {
12160 /* Bound the loop used in the default method above. */
12161 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12162 }
12163
12164 /* How to allocate a 'struct machine_function'. */
12165
12166 static struct machine_function *
12167 sparc_init_machine_status (void)
12168 {
12169 return ggc_cleared_alloc<machine_function> ();
12170 }
12171
12172 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12173 We need to emit DTP-relative relocations. */
12174
12175 static void
12176 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12177 {
12178 switch (size)
12179 {
12180 case 4:
12181 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12182 break;
12183 case 8:
12184 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12185 break;
12186 default:
12187 gcc_unreachable ();
12188 }
12189 output_addr_const (file, x);
12190 fputs (")", file);
12191 }
12192
12193 /* Do whatever processing is required at the end of a file. */
12194
12195 static void
12196 sparc_file_end (void)
12197 {
12198 /* If we need to emit the special GOT helper function, do so now. */
12199 if (got_helper_rtx)
12200 {
12201 const char *name = XSTR (got_helper_rtx, 0);
12202 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12203 #ifdef DWARF2_UNWIND_INFO
12204 bool do_cfi;
12205 #endif
12206
12207 if (USE_HIDDEN_LINKONCE)
12208 {
12209 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12210 get_identifier (name),
12211 build_function_type_list (void_type_node,
12212 NULL_TREE));
12213 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12214 NULL_TREE, void_type_node);
12215 TREE_PUBLIC (decl) = 1;
12216 TREE_STATIC (decl) = 1;
12217 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12218 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12219 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12220 resolve_unique_section (decl, 0, flag_function_sections);
12221 allocate_struct_function (decl, true);
12222 cfun->is_thunk = 1;
12223 current_function_decl = decl;
12224 init_varasm_status ();
12225 assemble_start_function (decl, name);
12226 }
12227 else
12228 {
12229 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12230 switch_to_section (text_section);
12231 if (align > 0)
12232 ASM_OUTPUT_ALIGN (asm_out_file, align);
12233 ASM_OUTPUT_LABEL (asm_out_file, name);
12234 }
12235
12236 #ifdef DWARF2_UNWIND_INFO
12237 do_cfi = dwarf2out_do_cfi_asm ();
12238 if (do_cfi)
12239 fprintf (asm_out_file, "\t.cfi_startproc\n");
12240 #endif
12241 if (flag_delayed_branch)
12242 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12243 reg_name, reg_name);
12244 else
12245 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12246 reg_name, reg_name);
12247 #ifdef DWARF2_UNWIND_INFO
12248 if (do_cfi)
12249 fprintf (asm_out_file, "\t.cfi_endproc\n");
12250 #endif
12251 }
12252
12253 if (NEED_INDICATE_EXEC_STACK)
12254 file_end_indicate_exec_stack ();
12255
12256 #ifdef TARGET_SOLARIS
12257 solaris_file_end ();
12258 #endif
12259 }
12260
12261 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12262 /* Implement TARGET_MANGLE_TYPE. */
12263
12264 static const char *
12265 sparc_mangle_type (const_tree type)
12266 {
12267 if (TARGET_ARCH32
12268 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12269 && TARGET_LONG_DOUBLE_128)
12270 return "g";
12271
12272 /* For all other types, use normal C++ mangling. */
12273 return NULL;
12274 }
12275 #endif
12276
12277 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12278 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12279 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12280
12281 void
12282 sparc_emit_membar_for_model (enum memmodel model,
12283 int load_store, int before_after)
12284 {
12285 /* Bits for the MEMBAR mmask field. */
12286 const int LoadLoad = 1;
12287 const int StoreLoad = 2;
12288 const int LoadStore = 4;
12289 const int StoreStore = 8;
12290
12291 int mm = 0, implied = 0;
12292
12293 switch (sparc_memory_model)
12294 {
12295 case SMM_SC:
12296 /* Sequential Consistency. All memory transactions are immediately
12297 visible in sequential execution order. No barriers needed. */
12298 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12299 break;
12300
12301 case SMM_TSO:
12302 /* Total Store Ordering: all memory transactions with store semantics
12303 are followed by an implied StoreStore. */
12304 implied |= StoreStore;
12305
12306 /* If we're not looking for a raw barrer (before+after), then atomic
12307 operations get the benefit of being both load and store. */
12308 if (load_store == 3 && before_after == 1)
12309 implied |= StoreLoad;
12310 /* FALLTHRU */
12311
12312 case SMM_PSO:
12313 /* Partial Store Ordering: all memory transactions with load semantics
12314 are followed by an implied LoadLoad | LoadStore. */
12315 implied |= LoadLoad | LoadStore;
12316
12317 /* If we're not looking for a raw barrer (before+after), then atomic
12318 operations get the benefit of being both load and store. */
12319 if (load_store == 3 && before_after == 2)
12320 implied |= StoreLoad | StoreStore;
12321 /* FALLTHRU */
12322
12323 case SMM_RMO:
12324 /* Relaxed Memory Ordering: no implicit bits. */
12325 break;
12326
12327 default:
12328 gcc_unreachable ();
12329 }
12330
12331 if (before_after & 1)
12332 {
12333 if (is_mm_release (model) || is_mm_acq_rel (model)
12334 || is_mm_seq_cst (model))
12335 {
12336 if (load_store & 1)
12337 mm |= LoadLoad | StoreLoad;
12338 if (load_store & 2)
12339 mm |= LoadStore | StoreStore;
12340 }
12341 }
12342 if (before_after & 2)
12343 {
12344 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12345 || is_mm_seq_cst (model))
12346 {
12347 if (load_store & 1)
12348 mm |= LoadLoad | LoadStore;
12349 if (load_store & 2)
12350 mm |= StoreLoad | StoreStore;
12351 }
12352 }
12353
12354 /* Remove the bits implied by the system memory model. */
12355 mm &= ~implied;
12356
12357 /* For raw barriers (before+after), always emit a barrier.
12358 This will become a compile-time barrier if needed. */
12359 if (mm || before_after == 3)
12360 emit_insn (gen_membar (GEN_INT (mm)));
12361 }
12362
12363 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12364 compare and swap on the word containing the byte or half-word. */
12365
12366 static void
12367 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12368 rtx oldval, rtx newval)
12369 {
12370 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12371 rtx addr = gen_reg_rtx (Pmode);
12372 rtx off = gen_reg_rtx (SImode);
12373 rtx oldv = gen_reg_rtx (SImode);
12374 rtx newv = gen_reg_rtx (SImode);
12375 rtx oldvalue = gen_reg_rtx (SImode);
12376 rtx newvalue = gen_reg_rtx (SImode);
12377 rtx res = gen_reg_rtx (SImode);
12378 rtx resv = gen_reg_rtx (SImode);
12379 rtx memsi, val, mask, cc;
12380
12381 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12382
12383 if (Pmode != SImode)
12384 addr1 = gen_lowpart (SImode, addr1);
12385 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12386
12387 memsi = gen_rtx_MEM (SImode, addr);
12388 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12389 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12390
12391 val = copy_to_reg (memsi);
12392
12393 emit_insn (gen_rtx_SET (off,
12394 gen_rtx_XOR (SImode, off,
12395 GEN_INT (GET_MODE (mem) == QImode
12396 ? 3 : 2))));
12397
12398 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12399
12400 if (GET_MODE (mem) == QImode)
12401 mask = force_reg (SImode, GEN_INT (0xff));
12402 else
12403 mask = force_reg (SImode, GEN_INT (0xffff));
12404
12405 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12406
12407 emit_insn (gen_rtx_SET (val,
12408 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12409 val)));
12410
12411 oldval = gen_lowpart (SImode, oldval);
12412 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12413
12414 newval = gen_lowpart_common (SImode, newval);
12415 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12416
12417 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12418
12419 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12420
12421 rtx_code_label *end_label = gen_label_rtx ();
12422 rtx_code_label *loop_label = gen_label_rtx ();
12423 emit_label (loop_label);
12424
12425 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12426
12427 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12428
12429 emit_move_insn (bool_result, const1_rtx);
12430
12431 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12432
12433 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12434
12435 emit_insn (gen_rtx_SET (resv,
12436 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12437 res)));
12438
12439 emit_move_insn (bool_result, const0_rtx);
12440
12441 cc = gen_compare_reg_1 (NE, resv, val);
12442 emit_insn (gen_rtx_SET (val, resv));
12443
12444 /* Use cbranchcc4 to separate the compare and branch! */
12445 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12446 cc, const0_rtx, loop_label));
12447
12448 emit_label (end_label);
12449
12450 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12451
12452 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12453
12454 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12455 }
12456
12457 /* Expand code to perform a compare-and-swap. */
12458
12459 void
12460 sparc_expand_compare_and_swap (rtx operands[])
12461 {
12462 rtx bval, retval, mem, oldval, newval;
12463 machine_mode mode;
12464 enum memmodel model;
12465
12466 bval = operands[0];
12467 retval = operands[1];
12468 mem = operands[2];
12469 oldval = operands[3];
12470 newval = operands[4];
12471 model = (enum memmodel) INTVAL (operands[6]);
12472 mode = GET_MODE (mem);
12473
12474 sparc_emit_membar_for_model (model, 3, 1);
12475
12476 if (reg_overlap_mentioned_p (retval, oldval))
12477 oldval = copy_to_reg (oldval);
12478
12479 if (mode == QImode || mode == HImode)
12480 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12481 else
12482 {
12483 rtx (*gen) (rtx, rtx, rtx, rtx);
12484 rtx x;
12485
12486 if (mode == SImode)
12487 gen = gen_atomic_compare_and_swapsi_1;
12488 else
12489 gen = gen_atomic_compare_and_swapdi_1;
12490 emit_insn (gen (retval, mem, oldval, newval));
12491
12492 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12493 if (x != bval)
12494 convert_move (bval, x, 1);
12495 }
12496
12497 sparc_emit_membar_for_model (model, 3, 2);
12498 }
12499
12500 void
12501 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12502 {
12503 rtx t_1, t_2, t_3;
12504
12505 sel = gen_lowpart (DImode, sel);
12506 switch (vmode)
12507 {
12508 case E_V2SImode:
12509 /* inp = xxxxxxxAxxxxxxxB */
12510 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12511 NULL_RTX, 1, OPTAB_DIRECT);
12512 /* t_1 = ....xxxxxxxAxxx. */
12513 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12514 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12515 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12516 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12517 /* sel = .......B */
12518 /* t_1 = ...A.... */
12519 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12520 /* sel = ...A...B */
12521 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12522 /* sel = AAAABBBB * 4 */
12523 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12524 /* sel = { A*4, A*4+1, A*4+2, ... } */
12525 break;
12526
12527 case E_V4HImode:
12528 /* inp = xxxAxxxBxxxCxxxD */
12529 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12530 NULL_RTX, 1, OPTAB_DIRECT);
12531 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12532 NULL_RTX, 1, OPTAB_DIRECT);
12533 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12534 NULL_RTX, 1, OPTAB_DIRECT);
12535 /* t_1 = ..xxxAxxxBxxxCxx */
12536 /* t_2 = ....xxxAxxxBxxxC */
12537 /* t_3 = ......xxxAxxxBxx */
12538 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12539 GEN_INT (0x07),
12540 NULL_RTX, 1, OPTAB_DIRECT);
12541 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12542 GEN_INT (0x0700),
12543 NULL_RTX, 1, OPTAB_DIRECT);
12544 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12545 GEN_INT (0x070000),
12546 NULL_RTX, 1, OPTAB_DIRECT);
12547 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12548 GEN_INT (0x07000000),
12549 NULL_RTX, 1, OPTAB_DIRECT);
12550 /* sel = .......D */
12551 /* t_1 = .....C.. */
12552 /* t_2 = ...B.... */
12553 /* t_3 = .A...... */
12554 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12555 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12556 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12557 /* sel = .A.B.C.D */
12558 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12559 /* sel = AABBCCDD * 2 */
12560 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12561 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12562 break;
12563
12564 case E_V8QImode:
12565 /* input = xAxBxCxDxExFxGxH */
12566 sel = expand_simple_binop (DImode, AND, sel,
12567 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12568 | 0x0f0f0f0f),
12569 NULL_RTX, 1, OPTAB_DIRECT);
12570 /* sel = .A.B.C.D.E.F.G.H */
12571 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12572 NULL_RTX, 1, OPTAB_DIRECT);
12573 /* t_1 = ..A.B.C.D.E.F.G. */
12574 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12575 NULL_RTX, 1, OPTAB_DIRECT);
12576 /* sel = .AABBCCDDEEFFGGH */
12577 sel = expand_simple_binop (DImode, AND, sel,
12578 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12579 | 0xff00ff),
12580 NULL_RTX, 1, OPTAB_DIRECT);
12581 /* sel = ..AB..CD..EF..GH */
12582 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12583 NULL_RTX, 1, OPTAB_DIRECT);
12584 /* t_1 = ....AB..CD..EF.. */
12585 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12586 NULL_RTX, 1, OPTAB_DIRECT);
12587 /* sel = ..ABABCDCDEFEFGH */
12588 sel = expand_simple_binop (DImode, AND, sel,
12589 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12590 NULL_RTX, 1, OPTAB_DIRECT);
12591 /* sel = ....ABCD....EFGH */
12592 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12593 NULL_RTX, 1, OPTAB_DIRECT);
12594 /* t_1 = ........ABCD.... */
12595 sel = gen_lowpart (SImode, sel);
12596 t_1 = gen_lowpart (SImode, t_1);
12597 break;
12598
12599 default:
12600 gcc_unreachable ();
12601 }
12602
12603 /* Always perform the final addition/merge within the bmask insn. */
12604 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12605 }
12606
12607 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12608
12609 static bool
12610 sparc_frame_pointer_required (void)
12611 {
12612 /* If the stack pointer is dynamically modified in the function, it cannot
12613 serve as the frame pointer. */
12614 if (cfun->calls_alloca)
12615 return true;
12616
12617 /* If the function receives nonlocal gotos, it needs to save the frame
12618 pointer in the nonlocal_goto_save_area object. */
12619 if (cfun->has_nonlocal_label)
12620 return true;
12621
12622 /* In flat mode, that's it. */
12623 if (TARGET_FLAT)
12624 return false;
12625
12626 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12627 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12628 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12629 }
12630
12631 /* The way this is structured, we can't eliminate SFP in favor of SP
12632 if the frame pointer is required: we want to use the SFP->HFP elimination
12633 in that case. But the test in update_eliminables doesn't know we are
12634 assuming below that we only do the former elimination. */
12635
12636 static bool
12637 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12638 {
12639 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12640 }
12641
12642 /* Return the hard frame pointer directly to bypass the stack bias. */
12643
12644 static rtx
12645 sparc_builtin_setjmp_frame_value (void)
12646 {
12647 return hard_frame_pointer_rtx;
12648 }
12649
12650 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12651 they won't be allocated. */
12652
12653 static void
12654 sparc_conditional_register_usage (void)
12655 {
12656 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12657 {
12658 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12659 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12660 }
12661 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12662 /* then honor it. */
12663 if (TARGET_ARCH32 && fixed_regs[5])
12664 fixed_regs[5] = 1;
12665 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12666 fixed_regs[5] = 0;
12667 if (! TARGET_V9)
12668 {
12669 int regno;
12670 for (regno = SPARC_FIRST_V9_FP_REG;
12671 regno <= SPARC_LAST_V9_FP_REG;
12672 regno++)
12673 fixed_regs[regno] = 1;
12674 /* %fcc0 is used by v8 and v9. */
12675 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12676 regno <= SPARC_LAST_V9_FCC_REG;
12677 regno++)
12678 fixed_regs[regno] = 1;
12679 }
12680 if (! TARGET_FPU)
12681 {
12682 int regno;
12683 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12684 fixed_regs[regno] = 1;
12685 }
12686 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12687 /* then honor it. Likewise with g3 and g4. */
12688 if (fixed_regs[2] == 2)
12689 fixed_regs[2] = ! TARGET_APP_REGS;
12690 if (fixed_regs[3] == 2)
12691 fixed_regs[3] = ! TARGET_APP_REGS;
12692 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12693 fixed_regs[4] = ! TARGET_APP_REGS;
12694 else if (TARGET_CM_EMBMEDANY)
12695 fixed_regs[4] = 1;
12696 else if (fixed_regs[4] == 2)
12697 fixed_regs[4] = 0;
12698 if (TARGET_FLAT)
12699 {
12700 int regno;
12701 /* Disable leaf functions. */
12702 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12704 leaf_reg_remap [regno] = regno;
12705 }
12706 if (TARGET_VIS)
12707 global_regs[SPARC_GSR_REG] = 1;
12708 }
12709
12710 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12711
12712 - We can't load constants into FP registers.
12713 - We can't load FP constants into integer registers when soft-float,
12714 because there is no soft-float pattern with a r/F constraint.
12715 - We can't load FP constants into integer registers for TFmode unless
12716 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12717 - Try and reload integer constants (symbolic or otherwise) back into
12718 registers directly, rather than having them dumped to memory. */
12719
12720 static reg_class_t
12721 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12722 {
12723 machine_mode mode = GET_MODE (x);
12724 if (CONSTANT_P (x))
12725 {
12726 if (FP_REG_CLASS_P (rclass)
12727 || rclass == GENERAL_OR_FP_REGS
12728 || rclass == GENERAL_OR_EXTRA_FP_REGS
12729 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12730 || (mode == TFmode && ! const_zero_operand (x, mode)))
12731 return NO_REGS;
12732
12733 if (GET_MODE_CLASS (mode) == MODE_INT)
12734 return GENERAL_REGS;
12735
12736 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12737 {
12738 if (! FP_REG_CLASS_P (rclass)
12739 || !(const_zero_operand (x, mode)
12740 || const_all_ones_operand (x, mode)))
12741 return NO_REGS;
12742 }
12743 }
12744
12745 if (TARGET_VIS3
12746 && ! TARGET_ARCH64
12747 && (rclass == EXTRA_FP_REGS
12748 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12749 {
12750 int regno = true_regnum (x);
12751
12752 if (SPARC_INT_REG_P (regno))
12753 return (rclass == EXTRA_FP_REGS
12754 ? FP_REGS : GENERAL_OR_FP_REGS);
12755 }
12756
12757 return rclass;
12758 }
12759
12760 /* Return true if we use LRA instead of reload pass. */
12761
12762 static bool
12763 sparc_lra_p (void)
12764 {
12765 return TARGET_LRA;
12766 }
12767
12768 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12769 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12770
12771 const char *
12772 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12773 {
12774 char mulstr[32];
12775
12776 gcc_assert (! TARGET_ARCH64);
12777
12778 if (sparc_check_64 (operands[1], insn) <= 0)
12779 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12780 if (which_alternative == 1)
12781 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12782 if (GET_CODE (operands[2]) == CONST_INT)
12783 {
12784 if (which_alternative == 1)
12785 {
12786 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12787 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12788 output_asm_insn (mulstr, operands);
12789 return "srlx\t%L0, 32, %H0";
12790 }
12791 else
12792 {
12793 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12794 output_asm_insn ("or\t%L1, %3, %3", operands);
12795 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12796 output_asm_insn (mulstr, operands);
12797 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12798 return "mov\t%3, %L0";
12799 }
12800 }
12801 else if (rtx_equal_p (operands[1], operands[2]))
12802 {
12803 if (which_alternative == 1)
12804 {
12805 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12806 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12807 output_asm_insn (mulstr, operands);
12808 return "srlx\t%L0, 32, %H0";
12809 }
12810 else
12811 {
12812 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12813 output_asm_insn ("or\t%L1, %3, %3", operands);
12814 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12815 output_asm_insn (mulstr, operands);
12816 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12817 return "mov\t%3, %L0";
12818 }
12819 }
12820 if (sparc_check_64 (operands[2], insn) <= 0)
12821 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12822 if (which_alternative == 1)
12823 {
12824 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12825 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12826 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12827 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12828 output_asm_insn (mulstr, operands);
12829 return "srlx\t%L0, 32, %H0";
12830 }
12831 else
12832 {
12833 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12834 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12835 output_asm_insn ("or\t%L1, %3, %3", operands);
12836 output_asm_insn ("or\t%L2, %4, %4", operands);
12837 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12838 output_asm_insn (mulstr, operands);
12839 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12840 return "mov\t%3, %L0";
12841 }
12842 }
12843
12844 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12845 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12846 and INNER_MODE are the modes describing TARGET. */
12847
12848 static void
12849 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12850 machine_mode inner_mode)
12851 {
12852 rtx t1, final_insn, sel;
12853 int bmask;
12854
12855 t1 = gen_reg_rtx (mode);
12856
12857 elt = convert_modes (SImode, inner_mode, elt, true);
12858 emit_move_insn (gen_lowpart(SImode, t1), elt);
12859
12860 switch (mode)
12861 {
12862 case E_V2SImode:
12863 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12864 bmask = 0x45674567;
12865 break;
12866 case E_V4HImode:
12867 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12868 bmask = 0x67676767;
12869 break;
12870 case E_V8QImode:
12871 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12872 bmask = 0x77777777;
12873 break;
12874 default:
12875 gcc_unreachable ();
12876 }
12877
12878 sel = force_reg (SImode, GEN_INT (bmask));
12879 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12880 emit_insn (final_insn);
12881 }
12882
12883 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12884 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12885
12886 static void
12887 vector_init_fpmerge (rtx target, rtx elt)
12888 {
12889 rtx t1, t2, t2_low, t3, t3_low;
12890
12891 t1 = gen_reg_rtx (V4QImode);
12892 elt = convert_modes (SImode, QImode, elt, true);
12893 emit_move_insn (gen_lowpart (SImode, t1), elt);
12894
12895 t2 = gen_reg_rtx (V8QImode);
12896 t2_low = gen_lowpart (V4QImode, t2);
12897 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12898
12899 t3 = gen_reg_rtx (V8QImode);
12900 t3_low = gen_lowpart (V4QImode, t3);
12901 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12902
12903 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12904 }
12905
12906 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12907 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12908
12909 static void
12910 vector_init_faligndata (rtx target, rtx elt)
12911 {
12912 rtx t1 = gen_reg_rtx (V4HImode);
12913 int i;
12914
12915 elt = convert_modes (SImode, HImode, elt, true);
12916 emit_move_insn (gen_lowpart (SImode, t1), elt);
12917
12918 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12919 force_reg (SImode, GEN_INT (6)),
12920 const0_rtx));
12921
12922 for (i = 0; i < 4; i++)
12923 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12924 }
12925
12926 /* Emit code to initialize TARGET to values for individual fields VALS. */
12927
12928 void
12929 sparc_expand_vector_init (rtx target, rtx vals)
12930 {
12931 const machine_mode mode = GET_MODE (target);
12932 const machine_mode inner_mode = GET_MODE_INNER (mode);
12933 const int n_elts = GET_MODE_NUNITS (mode);
12934 int i, n_var = 0;
12935 bool all_same = true;
12936 rtx mem;
12937
12938 for (i = 0; i < n_elts; i++)
12939 {
12940 rtx x = XVECEXP (vals, 0, i);
12941 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12942 n_var++;
12943
12944 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12945 all_same = false;
12946 }
12947
12948 if (n_var == 0)
12949 {
12950 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12951 return;
12952 }
12953
12954 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12955 {
12956 if (GET_MODE_SIZE (inner_mode) == 4)
12957 {
12958 emit_move_insn (gen_lowpart (SImode, target),
12959 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12960 return;
12961 }
12962 else if (GET_MODE_SIZE (inner_mode) == 8)
12963 {
12964 emit_move_insn (gen_lowpart (DImode, target),
12965 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12966 return;
12967 }
12968 }
12969 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12970 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12971 {
12972 emit_move_insn (gen_highpart (word_mode, target),
12973 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12974 emit_move_insn (gen_lowpart (word_mode, target),
12975 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12976 return;
12977 }
12978
12979 if (all_same && GET_MODE_SIZE (mode) == 8)
12980 {
12981 if (TARGET_VIS2)
12982 {
12983 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12984 return;
12985 }
12986 if (mode == V8QImode)
12987 {
12988 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12989 return;
12990 }
12991 if (mode == V4HImode)
12992 {
12993 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12994 return;
12995 }
12996 }
12997
12998 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12999 for (i = 0; i < n_elts; i++)
13000 emit_move_insn (adjust_address_nv (mem, inner_mode,
13001 i * GET_MODE_SIZE (inner_mode)),
13002 XVECEXP (vals, 0, i));
13003 emit_move_insn (target, mem);
13004 }
13005
13006 /* Implement TARGET_SECONDARY_RELOAD. */
13007
13008 static reg_class_t
13009 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13010 machine_mode mode, secondary_reload_info *sri)
13011 {
13012 enum reg_class rclass = (enum reg_class) rclass_i;
13013
13014 sri->icode = CODE_FOR_nothing;
13015 sri->extra_cost = 0;
13016
13017 /* We need a temporary when loading/storing a HImode/QImode value
13018 between memory and the FPU registers. This can happen when combine puts
13019 a paradoxical subreg in a float/fix conversion insn. */
13020 if (FP_REG_CLASS_P (rclass)
13021 && (mode == HImode || mode == QImode)
13022 && (GET_CODE (x) == MEM
13023 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13024 && true_regnum (x) == -1)))
13025 return GENERAL_REGS;
13026
13027 /* On 32-bit we need a temporary when loading/storing a DFmode value
13028 between unaligned memory and the upper FPU registers. */
13029 if (TARGET_ARCH32
13030 && rclass == EXTRA_FP_REGS
13031 && mode == DFmode
13032 && GET_CODE (x) == MEM
13033 && ! mem_min_alignment (x, 8))
13034 return FP_REGS;
13035
13036 if (((TARGET_CM_MEDANY
13037 && symbolic_operand (x, mode))
13038 || (TARGET_CM_EMBMEDANY
13039 && text_segment_operand (x, mode)))
13040 && ! flag_pic)
13041 {
13042 if (in_p)
13043 sri->icode = direct_optab_handler (reload_in_optab, mode);
13044 else
13045 sri->icode = direct_optab_handler (reload_out_optab, mode);
13046 return NO_REGS;
13047 }
13048
13049 if (TARGET_VIS3 && TARGET_ARCH32)
13050 {
13051 int regno = true_regnum (x);
13052
13053 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13054 to move 8-byte values in 4-byte pieces. This only works via
13055 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13056 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13057 an FP_REGS intermediate move. */
13058 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13059 || ((general_or_i64_p (rclass)
13060 || rclass == GENERAL_OR_FP_REGS)
13061 && SPARC_FP_REG_P (regno)))
13062 {
13063 sri->extra_cost = 2;
13064 return FP_REGS;
13065 }
13066 }
13067
13068 return NO_REGS;
13069 }
13070
13071 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13072
13073 On SPARC when not VIS3 it is not possible to directly move data
13074 between GENERAL_REGS and FP_REGS. */
13075
13076 static bool
13077 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13078 reg_class_t class2)
13079 {
13080 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13081 && (! TARGET_VIS3
13082 || GET_MODE_SIZE (mode) > 8
13083 || GET_MODE_SIZE (mode) < 4));
13084 }
13085
13086 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13087
13088 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13089 because the movsi and movsf patterns don't handle r/f moves.
13090 For v8 we copy the default definition. */
13091
13092 static machine_mode
13093 sparc_secondary_memory_needed_mode (machine_mode mode)
13094 {
13095 if (TARGET_ARCH64)
13096 {
13097 if (GET_MODE_BITSIZE (mode) < 32)
13098 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13099 return mode;
13100 }
13101 else
13102 {
13103 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13104 return mode_for_size (BITS_PER_WORD,
13105 GET_MODE_CLASS (mode), 0).require ();
13106 return mode;
13107 }
13108 }
13109
13110 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13111 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13112
13113 bool
13114 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13115 {
13116 enum rtx_code rc = GET_CODE (operands[1]);
13117 machine_mode cmp_mode;
13118 rtx cc_reg, dst, cmp;
13119
13120 cmp = operands[1];
13121 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13122 return false;
13123
13124 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13125 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13126
13127 cmp_mode = GET_MODE (XEXP (cmp, 0));
13128 rc = GET_CODE (cmp);
13129
13130 dst = operands[0];
13131 if (! rtx_equal_p (operands[2], dst)
13132 && ! rtx_equal_p (operands[3], dst))
13133 {
13134 if (reg_overlap_mentioned_p (dst, cmp))
13135 dst = gen_reg_rtx (mode);
13136
13137 emit_move_insn (dst, operands[3]);
13138 }
13139 else if (operands[2] == dst)
13140 {
13141 operands[2] = operands[3];
13142
13143 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13144 rc = reverse_condition_maybe_unordered (rc);
13145 else
13146 rc = reverse_condition (rc);
13147 }
13148
13149 if (XEXP (cmp, 1) == const0_rtx
13150 && GET_CODE (XEXP (cmp, 0)) == REG
13151 && cmp_mode == DImode
13152 && v9_regcmp_p (rc))
13153 cc_reg = XEXP (cmp, 0);
13154 else
13155 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13156
13157 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13158
13159 emit_insn (gen_rtx_SET (dst,
13160 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13161
13162 if (dst != operands[0])
13163 emit_move_insn (operands[0], dst);
13164
13165 return true;
13166 }
13167
13168 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13169 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13170 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13171 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13172 code to be used for the condition mask. */
13173
13174 void
13175 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13176 {
13177 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13178 enum rtx_code code = GET_CODE (operands[3]);
13179
13180 mask = gen_reg_rtx (Pmode);
13181 cop0 = operands[4];
13182 cop1 = operands[5];
13183 if (code == LT || code == GE)
13184 {
13185 rtx t;
13186
13187 code = swap_condition (code);
13188 t = cop0; cop0 = cop1; cop1 = t;
13189 }
13190
13191 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13192
13193 fcmp = gen_rtx_UNSPEC (Pmode,
13194 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13195 fcode);
13196
13197 cmask = gen_rtx_UNSPEC (DImode,
13198 gen_rtvec (2, mask, gsr),
13199 ccode);
13200
13201 bshuf = gen_rtx_UNSPEC (mode,
13202 gen_rtvec (3, operands[1], operands[2], gsr),
13203 UNSPEC_BSHUFFLE);
13204
13205 emit_insn (gen_rtx_SET (mask, fcmp));
13206 emit_insn (gen_rtx_SET (gsr, cmask));
13207
13208 emit_insn (gen_rtx_SET (operands[0], bshuf));
13209 }
13210
13211 /* On sparc, any mode which naturally allocates into the float
13212 registers should return 4 here. */
13213
13214 unsigned int
13215 sparc_regmode_natural_size (machine_mode mode)
13216 {
13217 int size = UNITS_PER_WORD;
13218
13219 if (TARGET_ARCH64)
13220 {
13221 enum mode_class mclass = GET_MODE_CLASS (mode);
13222
13223 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13224 size = 4;
13225 }
13226
13227 return size;
13228 }
13229
13230 /* Implement TARGET_HARD_REGNO_NREGS.
13231
13232 On SPARC, ordinary registers hold 32 bits worth; this means both
13233 integer and floating point registers. On v9, integer regs hold 64
13234 bits worth; floating point regs hold 32 bits worth (this includes the
13235 new fp regs as even the odd ones are included in the hard register
13236 count). */
13237
13238 static unsigned int
13239 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13240 {
13241 if (regno == SPARC_GSR_REG)
13242 return 1;
13243 if (TARGET_ARCH64)
13244 {
13245 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13246 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13247 return CEIL (GET_MODE_SIZE (mode), 4);
13248 }
13249 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13250 }
13251
13252 /* Implement TARGET_HARD_REGNO_MODE_OK.
13253
13254 ??? Because of the funny way we pass parameters we should allow certain
13255 ??? types of float/complex values to be in integer registers during
13256 ??? RTL generation. This only matters on arch32. */
13257
13258 static bool
13259 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13260 {
13261 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13262 }
13263
13264 /* Implement TARGET_MODES_TIEABLE_P.
13265
13266 For V9 we have to deal with the fact that only the lower 32 floating
13267 point registers are 32-bit addressable. */
13268
13269 static bool
13270 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13271 {
13272 enum mode_class mclass1, mclass2;
13273 unsigned short size1, size2;
13274
13275 if (mode1 == mode2)
13276 return true;
13277
13278 mclass1 = GET_MODE_CLASS (mode1);
13279 mclass2 = GET_MODE_CLASS (mode2);
13280 if (mclass1 != mclass2)
13281 return false;
13282
13283 if (! TARGET_V9)
13284 return true;
13285
13286 /* Classes are the same and we are V9 so we have to deal with upper
13287 vs. lower floating point registers. If one of the modes is a
13288 4-byte mode, and the other is not, we have to mark them as not
13289 tieable because only the lower 32 floating point register are
13290 addressable 32-bits at a time.
13291
13292 We can't just test explicitly for SFmode, otherwise we won't
13293 cover the vector mode cases properly. */
13294
13295 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13296 return true;
13297
13298 size1 = GET_MODE_SIZE (mode1);
13299 size2 = GET_MODE_SIZE (mode2);
13300 if ((size1 > 4 && size2 == 4)
13301 || (size2 > 4 && size1 == 4))
13302 return false;
13303
13304 return true;
13305 }
13306
13307 /* Implement TARGET_CSTORE_MODE. */
13308
13309 static scalar_int_mode
13310 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13311 {
13312 return (TARGET_ARCH64 ? DImode : SImode);
13313 }
13314
13315 /* Return the compound expression made of T1 and T2. */
13316
13317 static inline tree
13318 compound_expr (tree t1, tree t2)
13319 {
13320 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13321 }
13322
13323 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13324
13325 static void
13326 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13327 {
13328 if (!TARGET_FPU)
13329 return;
13330
13331 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13332 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13333
13334 /* We generate the equivalent of feholdexcept (&fenv_var):
13335
13336 unsigned int fenv_var;
13337 __builtin_store_fsr (&fenv_var);
13338
13339 unsigned int tmp1_var;
13340 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13341
13342 __builtin_load_fsr (&tmp1_var); */
13343
13344 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13345 TREE_ADDRESSABLE (fenv_var) = 1;
13346 tree fenv_addr = build_fold_addr_expr (fenv_var);
13347 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13348 tree hold_stfsr
13349 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13350 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13351
13352 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13353 TREE_ADDRESSABLE (tmp1_var) = 1;
13354 tree masked_fenv_var
13355 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13356 build_int_cst (unsigned_type_node,
13357 ~(accrued_exception_mask | trap_enable_mask)));
13358 tree hold_mask
13359 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13360 NULL_TREE, NULL_TREE);
13361
13362 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13363 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13364 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13365
13366 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13367
13368 /* We reload the value of tmp1_var to clear the exceptions:
13369
13370 __builtin_load_fsr (&tmp1_var); */
13371
13372 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13373
13374 /* We generate the equivalent of feupdateenv (&fenv_var):
13375
13376 unsigned int tmp2_var;
13377 __builtin_store_fsr (&tmp2_var);
13378
13379 __builtin_load_fsr (&fenv_var);
13380
13381 if (SPARC_LOW_FE_EXCEPT_VALUES)
13382 tmp2_var >>= 5;
13383 __atomic_feraiseexcept ((int) tmp2_var); */
13384
13385 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13386 TREE_ADDRESSABLE (tmp2_var) = 1;
13387 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13388 tree update_stfsr
13389 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13390 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13391
13392 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13393
13394 tree atomic_feraiseexcept
13395 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13396 tree update_call
13397 = build_call_expr (atomic_feraiseexcept, 1,
13398 fold_convert (integer_type_node, tmp2_var));
13399
13400 if (SPARC_LOW_FE_EXCEPT_VALUES)
13401 {
13402 tree shifted_tmp2_var
13403 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13404 build_int_cst (unsigned_type_node, 5));
13405 tree update_shift
13406 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13407 update_call = compound_expr (update_shift, update_call);
13408 }
13409
13410 *update
13411 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13412 }
13413
13414 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13415
13416 SImode loads to floating-point registers are not zero-extended.
13417 The definition for LOAD_EXTEND_OP specifies that integer loads
13418 narrower than BITS_PER_WORD will be zero-extended. As a result,
13419 we inhibit changes from SImode unless they are to a mode that is
13420 identical in size.
13421
13422 Likewise for SFmode, since word-mode paradoxical subregs are
13423 problematic on big-endian architectures. */
13424
13425 static bool
13426 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13427 reg_class_t rclass)
13428 {
13429 if (TARGET_ARCH64
13430 && GET_MODE_SIZE (from) == 4
13431 && GET_MODE_SIZE (to) != 4)
13432 return !reg_classes_intersect_p (rclass, FP_REGS);
13433 return true;
13434 }
13435
13436 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13437
13438 static HOST_WIDE_INT
13439 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13440 {
13441 if (TREE_CODE (exp) == STRING_CST)
13442 return MAX (align, FASTEST_ALIGNMENT);
13443 return align;
13444 }
13445
13446 #include "gt-sparc.h"