]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
* config/alpha/alpha.c (alpha_expand_prologue): Also check
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
687 \f
688 #ifdef SUBTARGET_ATTRIBUTE_TABLE
689 /* Table of valid machine attributes. */
690 static const struct attribute_spec sparc_attribute_table[] =
691 {
692 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
693 do_diagnostic } */
694 SUBTARGET_ATTRIBUTE_TABLE,
695 { NULL, 0, 0, false, false, false, NULL, false }
696 };
697 #endif
698 \f
699 /* Option handling. */
700
701 /* Parsed value. */
702 enum cmodel sparc_cmodel;
703
704 char sparc_hard_reg_printed[8];
705
706 /* Initialize the GCC target structure. */
707
708 /* The default is to use .half rather than .short for aligned HI objects. */
709 #undef TARGET_ASM_ALIGNED_HI_OP
710 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
711
712 #undef TARGET_ASM_UNALIGNED_HI_OP
713 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
714 #undef TARGET_ASM_UNALIGNED_SI_OP
715 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
716 #undef TARGET_ASM_UNALIGNED_DI_OP
717 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
718
719 /* The target hook has to handle DI-mode values. */
720 #undef TARGET_ASM_INTEGER
721 #define TARGET_ASM_INTEGER sparc_assemble_integer
722
723 #undef TARGET_ASM_FUNCTION_PROLOGUE
724 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
725 #undef TARGET_ASM_FUNCTION_EPILOGUE
726 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
727
728 #undef TARGET_SCHED_ADJUST_COST
729 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
730 #undef TARGET_SCHED_ISSUE_RATE
731 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
732 #undef TARGET_SCHED_INIT
733 #define TARGET_SCHED_INIT sparc_sched_init
734 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
735 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
736
737 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
738 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
739
740 #undef TARGET_INIT_LIBFUNCS
741 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
742
743 #undef TARGET_LEGITIMIZE_ADDRESS
744 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
745 #undef TARGET_DELEGITIMIZE_ADDRESS
746 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
747 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
748 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
749
750 #undef TARGET_INIT_BUILTINS
751 #define TARGET_INIT_BUILTINS sparc_init_builtins
752 #undef TARGET_BUILTIN_DECL
753 #define TARGET_BUILTIN_DECL sparc_builtin_decl
754 #undef TARGET_EXPAND_BUILTIN
755 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
756 #undef TARGET_FOLD_BUILTIN
757 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
758
759 #if TARGET_TLS
760 #undef TARGET_HAVE_TLS
761 #define TARGET_HAVE_TLS true
762 #endif
763
764 #undef TARGET_CANNOT_FORCE_CONST_MEM
765 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
766
767 #undef TARGET_ASM_OUTPUT_MI_THUNK
768 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
769 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
770 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
771
772 #undef TARGET_RTX_COSTS
773 #define TARGET_RTX_COSTS sparc_rtx_costs
774 #undef TARGET_ADDRESS_COST
775 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
776 #undef TARGET_REGISTER_MOVE_COST
777 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
778
779 #undef TARGET_PROMOTE_FUNCTION_MODE
780 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
781
782 #undef TARGET_FUNCTION_VALUE
783 #define TARGET_FUNCTION_VALUE sparc_function_value
784 #undef TARGET_LIBCALL_VALUE
785 #define TARGET_LIBCALL_VALUE sparc_libcall_value
786 #undef TARGET_FUNCTION_VALUE_REGNO_P
787 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
788
789 #undef TARGET_STRUCT_VALUE_RTX
790 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
791 #undef TARGET_RETURN_IN_MEMORY
792 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
793 #undef TARGET_MUST_PASS_IN_STACK
794 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
795 #undef TARGET_PASS_BY_REFERENCE
796 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
797 #undef TARGET_ARG_PARTIAL_BYTES
798 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
799 #undef TARGET_FUNCTION_ARG_ADVANCE
800 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
801 #undef TARGET_FUNCTION_ARG
802 #define TARGET_FUNCTION_ARG sparc_function_arg
803 #undef TARGET_FUNCTION_INCOMING_ARG
804 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
805 #undef TARGET_FUNCTION_ARG_PADDING
806 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
807 #undef TARGET_FUNCTION_ARG_BOUNDARY
808 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
809
810 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
811 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
812 #undef TARGET_STRICT_ARGUMENT_NAMING
813 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
814
815 #undef TARGET_EXPAND_BUILTIN_VA_START
816 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
817 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
818 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
819
820 #undef TARGET_VECTOR_MODE_SUPPORTED_P
821 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
822
823 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
824 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
825
826 #ifdef SUBTARGET_INSERT_ATTRIBUTES
827 #undef TARGET_INSERT_ATTRIBUTES
828 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
829 #endif
830
831 #ifdef SUBTARGET_ATTRIBUTE_TABLE
832 #undef TARGET_ATTRIBUTE_TABLE
833 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
834 #endif
835
836 #undef TARGET_OPTION_OVERRIDE
837 #define TARGET_OPTION_OVERRIDE sparc_option_override
838
839 #ifdef TARGET_THREAD_SSP_OFFSET
840 #undef TARGET_STACK_PROTECT_GUARD
841 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
842 #endif
843
844 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
845 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
846 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
847 #endif
848
849 #undef TARGET_ASM_FILE_END
850 #define TARGET_ASM_FILE_END sparc_file_end
851
852 #undef TARGET_FRAME_POINTER_REQUIRED
853 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
854
855 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
856 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
857
858 #undef TARGET_CAN_ELIMINATE
859 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
860
861 #undef TARGET_PREFERRED_RELOAD_CLASS
862 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
863
864 #undef TARGET_SECONDARY_RELOAD
865 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
866 #undef TARGET_SECONDARY_MEMORY_NEEDED
867 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
868 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
869 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
870
871 #undef TARGET_CONDITIONAL_REGISTER_USAGE
872 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
873
874 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
875 #undef TARGET_MANGLE_TYPE
876 #define TARGET_MANGLE_TYPE sparc_mangle_type
877 #endif
878
879 #undef TARGET_LRA_P
880 #define TARGET_LRA_P sparc_lra_p
881
882 #undef TARGET_LEGITIMATE_ADDRESS_P
883 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
884
885 #undef TARGET_LEGITIMATE_CONSTANT_P
886 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
887
888 #undef TARGET_TRAMPOLINE_INIT
889 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
890
891 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
892 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
893 #undef TARGET_PRINT_OPERAND
894 #define TARGET_PRINT_OPERAND sparc_print_operand
895 #undef TARGET_PRINT_OPERAND_ADDRESS
896 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
897
898 /* The value stored by LDSTUB. */
899 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
900 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
901
902 #undef TARGET_CSTORE_MODE
903 #define TARGET_CSTORE_MODE sparc_cstore_mode
904
905 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
906 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
907
908 #undef TARGET_FIXED_CONDITION_CODE_REGS
909 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
910
911 #undef TARGET_MIN_ARITHMETIC_PRECISION
912 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
913
914 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
915 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
916
917 #undef TARGET_HARD_REGNO_NREGS
918 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
919 #undef TARGET_HARD_REGNO_MODE_OK
920 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
921
922 #undef TARGET_MODES_TIEABLE_P
923 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
924
925 #undef TARGET_CAN_CHANGE_MODE_CLASS
926 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
927
928 struct gcc_target targetm = TARGET_INITIALIZER;
929
930 /* Return the memory reference contained in X if any, zero otherwise. */
931
932 static rtx
933 mem_ref (rtx x)
934 {
935 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
936 x = XEXP (x, 0);
937
938 if (MEM_P (x))
939 return x;
940
941 return NULL_RTX;
942 }
943
944 /* We use a machine specific pass to enable workarounds for errata.
945
946 We need to have the (essentially) final form of the insn stream in order
947 to properly detect the various hazards. Therefore, this machine specific
948 pass runs as late as possible. */
949
950 /* True if INSN is a md pattern or asm statement. */
951 #define USEFUL_INSN_P(INSN) \
952 (NONDEBUG_INSN_P (INSN) \
953 && GET_CODE (PATTERN (INSN)) != USE \
954 && GET_CODE (PATTERN (INSN)) != CLOBBER)
955
956 static unsigned int
957 sparc_do_work_around_errata (void)
958 {
959 rtx_insn *insn, *next;
960
961 /* Force all instructions to be split into their final form. */
962 split_all_insns_noflow ();
963
964 /* Now look for specific patterns in the insn stream. */
965 for (insn = get_insns (); insn; insn = next)
966 {
967 bool insert_nop = false;
968 rtx set;
969
970 /* Look into the instruction in a delay slot. */
971 if (NONJUMP_INSN_P (insn))
972 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
973 insn = seq->insn (1);
974
975 /* Look for either of these two sequences:
976
977 Sequence A:
978 1. store of word size or less (e.g. st / stb / sth / stf)
979 2. any single instruction that is not a load or store
980 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
981
982 Sequence B:
983 1. store of double word size (e.g. std / stdf)
984 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
985 if (sparc_fix_b2bst
986 && NONJUMP_INSN_P (insn)
987 && (set = single_set (insn)) != NULL_RTX
988 && MEM_P (SET_DEST (set)))
989 {
990 /* Sequence B begins with a double-word store. */
991 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
992 rtx_insn *after;
993 int i;
994
995 next = next_active_insn (insn);
996 if (!next)
997 break;
998
999 for (after = next, i = 0; i < 2; i++)
1000 {
1001 /* Skip empty assembly statements. */
1002 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1003 || (USEFUL_INSN_P (after)
1004 && (asm_noperands (PATTERN (after))>=0)
1005 && !strcmp (decode_asm_operands (PATTERN (after),
1006 NULL, NULL, NULL,
1007 NULL, NULL), "")))
1008 after = next_active_insn (after);
1009 if (!after)
1010 break;
1011
1012 /* If the insn is a branch, then it cannot be problematic. */
1013 if (!NONJUMP_INSN_P (after)
1014 || GET_CODE (PATTERN (after)) == SEQUENCE)
1015 break;
1016
1017 /* Sequence B is only two instructions long. */
1018 if (seq_b)
1019 {
1020 /* Add NOP if followed by a store. */
1021 if ((set = single_set (after)) != NULL_RTX
1022 && MEM_P (SET_DEST (set)))
1023 insert_nop = true;
1024
1025 /* Otherwise it is ok. */
1026 break;
1027 }
1028
1029 /* If the second instruction is a load or a store,
1030 then the sequence cannot be problematic. */
1031 if (i == 0)
1032 {
1033 if (((set = single_set (after)) != NULL_RTX)
1034 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1035 break;
1036
1037 after = next_active_insn (after);
1038 if (!after)
1039 break;
1040 }
1041
1042 /* Add NOP if third instruction is a store. */
1043 if (i == 1
1044 && ((set = single_set (after)) != NULL_RTX)
1045 && MEM_P (SET_DEST (set)))
1046 insert_nop = true;
1047 }
1048 }
1049 else
1050 /* Look for a single-word load into an odd-numbered FP register. */
1051 if (sparc_fix_at697f
1052 && NONJUMP_INSN_P (insn)
1053 && (set = single_set (insn)) != NULL_RTX
1054 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1055 && MEM_P (SET_SRC (set))
1056 && REG_P (SET_DEST (set))
1057 && REGNO (SET_DEST (set)) > 31
1058 && REGNO (SET_DEST (set)) % 2 != 0)
1059 {
1060 /* The wrong dependency is on the enclosing double register. */
1061 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1062 unsigned int src1, src2, dest;
1063 int code;
1064
1065 next = next_active_insn (insn);
1066 if (!next)
1067 break;
1068 /* If the insn is a branch, then it cannot be problematic. */
1069 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1070 continue;
1071
1072 extract_insn (next);
1073 code = INSN_CODE (next);
1074
1075 switch (code)
1076 {
1077 case CODE_FOR_adddf3:
1078 case CODE_FOR_subdf3:
1079 case CODE_FOR_muldf3:
1080 case CODE_FOR_divdf3:
1081 dest = REGNO (recog_data.operand[0]);
1082 src1 = REGNO (recog_data.operand[1]);
1083 src2 = REGNO (recog_data.operand[2]);
1084 if (src1 != src2)
1085 {
1086 /* Case [1-4]:
1087 ld [address], %fx+1
1088 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1089 if ((src1 == x || src2 == x)
1090 && (dest == src1 || dest == src2))
1091 insert_nop = true;
1092 }
1093 else
1094 {
1095 /* Case 5:
1096 ld [address], %fx+1
1097 FPOPd %fx, %fx, %fx */
1098 if (src1 == x
1099 && dest == src1
1100 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1101 insert_nop = true;
1102 }
1103 break;
1104
1105 case CODE_FOR_sqrtdf2:
1106 dest = REGNO (recog_data.operand[0]);
1107 src1 = REGNO (recog_data.operand[1]);
1108 /* Case 6:
1109 ld [address], %fx+1
1110 fsqrtd %fx, %fx */
1111 if (src1 == x && dest == src1)
1112 insert_nop = true;
1113 break;
1114
1115 default:
1116 break;
1117 }
1118 }
1119
1120 /* Look for a single-word load into an integer register. */
1121 else if (sparc_fix_ut699
1122 && NONJUMP_INSN_P (insn)
1123 && (set = single_set (insn)) != NULL_RTX
1124 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1125 && mem_ref (SET_SRC (set)) != NULL_RTX
1126 && REG_P (SET_DEST (set))
1127 && REGNO (SET_DEST (set)) < 32)
1128 {
1129 /* There is no problem if the second memory access has a data
1130 dependency on the first single-cycle load. */
1131 rtx x = SET_DEST (set);
1132
1133 next = next_active_insn (insn);
1134 if (!next)
1135 break;
1136 /* If the insn is a branch, then it cannot be problematic. */
1137 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1138 continue;
1139
1140 /* Look for a second memory access to/from an integer register. */
1141 if ((set = single_set (next)) != NULL_RTX)
1142 {
1143 rtx src = SET_SRC (set);
1144 rtx dest = SET_DEST (set);
1145 rtx mem;
1146
1147 /* LDD is affected. */
1148 if ((mem = mem_ref (src)) != NULL_RTX
1149 && REG_P (dest)
1150 && REGNO (dest) < 32
1151 && !reg_mentioned_p (x, XEXP (mem, 0)))
1152 insert_nop = true;
1153
1154 /* STD is *not* affected. */
1155 else if (MEM_P (dest)
1156 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1157 && (src == CONST0_RTX (GET_MODE (dest))
1158 || (REG_P (src)
1159 && REGNO (src) < 32
1160 && REGNO (src) != REGNO (x)))
1161 && !reg_mentioned_p (x, XEXP (dest, 0)))
1162 insert_nop = true;
1163 }
1164 }
1165
1166 /* Look for a single-word load/operation into an FP register. */
1167 else if (sparc_fix_ut699
1168 && NONJUMP_INSN_P (insn)
1169 && (set = single_set (insn)) != NULL_RTX
1170 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1171 && REG_P (SET_DEST (set))
1172 && REGNO (SET_DEST (set)) > 31)
1173 {
1174 /* Number of instructions in the problematic window. */
1175 const int n_insns = 4;
1176 /* The problematic combination is with the sibling FP register. */
1177 const unsigned int x = REGNO (SET_DEST (set));
1178 const unsigned int y = x ^ 1;
1179 rtx_insn *after;
1180 int i;
1181
1182 next = next_active_insn (insn);
1183 if (!next)
1184 break;
1185 /* If the insn is a branch, then it cannot be problematic. */
1186 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1187 continue;
1188
1189 /* Look for a second load/operation into the sibling FP register. */
1190 if (!((set = single_set (next)) != NULL_RTX
1191 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1192 && REG_P (SET_DEST (set))
1193 && REGNO (SET_DEST (set)) == y))
1194 continue;
1195
1196 /* Look for a (possible) store from the FP register in the next N
1197 instructions, but bail out if it is again modified or if there
1198 is a store from the sibling FP register before this store. */
1199 for (after = next, i = 0; i < n_insns; i++)
1200 {
1201 bool branch_p;
1202
1203 after = next_active_insn (after);
1204 if (!after)
1205 break;
1206
1207 /* This is a branch with an empty delay slot. */
1208 if (!NONJUMP_INSN_P (after))
1209 {
1210 if (++i == n_insns)
1211 break;
1212 branch_p = true;
1213 after = NULL;
1214 }
1215 /* This is a branch with a filled delay slot. */
1216 else if (rtx_sequence *seq =
1217 dyn_cast <rtx_sequence *> (PATTERN (after)))
1218 {
1219 if (++i == n_insns)
1220 break;
1221 branch_p = true;
1222 after = seq->insn (1);
1223 }
1224 /* This is a regular instruction. */
1225 else
1226 branch_p = false;
1227
1228 if (after && (set = single_set (after)) != NULL_RTX)
1229 {
1230 const rtx src = SET_SRC (set);
1231 const rtx dest = SET_DEST (set);
1232 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1233
1234 /* If the FP register is again modified before the store,
1235 then the store isn't affected. */
1236 if (REG_P (dest)
1237 && (REGNO (dest) == x
1238 || (REGNO (dest) == y && size == 8)))
1239 break;
1240
1241 if (MEM_P (dest) && REG_P (src))
1242 {
1243 /* If there is a store from the sibling FP register
1244 before the store, then the store is not affected. */
1245 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1246 break;
1247
1248 /* Otherwise, the store is affected. */
1249 if (REGNO (src) == x && size == 4)
1250 {
1251 insert_nop = true;
1252 break;
1253 }
1254 }
1255 }
1256
1257 /* If we have a branch in the first M instructions, then we
1258 cannot see the (M+2)th instruction so we play safe. */
1259 if (branch_p && i <= (n_insns - 2))
1260 {
1261 insert_nop = true;
1262 break;
1263 }
1264 }
1265 }
1266
1267 else
1268 next = NEXT_INSN (insn);
1269
1270 if (insert_nop)
1271 emit_insn_before (gen_nop (), next);
1272 }
1273
1274 return 0;
1275 }
1276
1277 namespace {
1278
1279 const pass_data pass_data_work_around_errata =
1280 {
1281 RTL_PASS, /* type */
1282 "errata", /* name */
1283 OPTGROUP_NONE, /* optinfo_flags */
1284 TV_MACH_DEP, /* tv_id */
1285 0, /* properties_required */
1286 0, /* properties_provided */
1287 0, /* properties_destroyed */
1288 0, /* todo_flags_start */
1289 0, /* todo_flags_finish */
1290 };
1291
1292 class pass_work_around_errata : public rtl_opt_pass
1293 {
1294 public:
1295 pass_work_around_errata(gcc::context *ctxt)
1296 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1297 {}
1298
1299 /* opt_pass methods: */
1300 virtual bool gate (function *)
1301 {
1302 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1303 }
1304
1305 virtual unsigned int execute (function *)
1306 {
1307 return sparc_do_work_around_errata ();
1308 }
1309
1310 }; // class pass_work_around_errata
1311
1312 } // anon namespace
1313
1314 rtl_opt_pass *
1315 make_pass_work_around_errata (gcc::context *ctxt)
1316 {
1317 return new pass_work_around_errata (ctxt);
1318 }
1319
1320 /* Helpers for TARGET_DEBUG_OPTIONS. */
1321 static void
1322 dump_target_flag_bits (const int flags)
1323 {
1324 if (flags & MASK_64BIT)
1325 fprintf (stderr, "64BIT ");
1326 if (flags & MASK_APP_REGS)
1327 fprintf (stderr, "APP_REGS ");
1328 if (flags & MASK_FASTER_STRUCTS)
1329 fprintf (stderr, "FASTER_STRUCTS ");
1330 if (flags & MASK_FLAT)
1331 fprintf (stderr, "FLAT ");
1332 if (flags & MASK_FMAF)
1333 fprintf (stderr, "FMAF ");
1334 if (flags & MASK_FSMULD)
1335 fprintf (stderr, "FSMULD ");
1336 if (flags & MASK_FPU)
1337 fprintf (stderr, "FPU ");
1338 if (flags & MASK_HARD_QUAD)
1339 fprintf (stderr, "HARD_QUAD ");
1340 if (flags & MASK_POPC)
1341 fprintf (stderr, "POPC ");
1342 if (flags & MASK_PTR64)
1343 fprintf (stderr, "PTR64 ");
1344 if (flags & MASK_STACK_BIAS)
1345 fprintf (stderr, "STACK_BIAS ");
1346 if (flags & MASK_UNALIGNED_DOUBLES)
1347 fprintf (stderr, "UNALIGNED_DOUBLES ");
1348 if (flags & MASK_V8PLUS)
1349 fprintf (stderr, "V8PLUS ");
1350 if (flags & MASK_VIS)
1351 fprintf (stderr, "VIS ");
1352 if (flags & MASK_VIS2)
1353 fprintf (stderr, "VIS2 ");
1354 if (flags & MASK_VIS3)
1355 fprintf (stderr, "VIS3 ");
1356 if (flags & MASK_VIS4)
1357 fprintf (stderr, "VIS4 ");
1358 if (flags & MASK_VIS4B)
1359 fprintf (stderr, "VIS4B ");
1360 if (flags & MASK_CBCOND)
1361 fprintf (stderr, "CBCOND ");
1362 if (flags & MASK_DEPRECATED_V8_INSNS)
1363 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1364 if (flags & MASK_SPARCLET)
1365 fprintf (stderr, "SPARCLET ");
1366 if (flags & MASK_SPARCLITE)
1367 fprintf (stderr, "SPARCLITE ");
1368 if (flags & MASK_V8)
1369 fprintf (stderr, "V8 ");
1370 if (flags & MASK_V9)
1371 fprintf (stderr, "V9 ");
1372 }
1373
1374 static void
1375 dump_target_flags (const char *prefix, const int flags)
1376 {
1377 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1378 dump_target_flag_bits (flags);
1379 fprintf(stderr, "]\n");
1380 }
1381
1382 /* Validate and override various options, and do some machine dependent
1383 initialization. */
1384
1385 static void
1386 sparc_option_override (void)
1387 {
1388 static struct code_model {
1389 const char *const name;
1390 const enum cmodel value;
1391 } const cmodels[] = {
1392 { "32", CM_32 },
1393 { "medlow", CM_MEDLOW },
1394 { "medmid", CM_MEDMID },
1395 { "medany", CM_MEDANY },
1396 { "embmedany", CM_EMBMEDANY },
1397 { NULL, (enum cmodel) 0 }
1398 };
1399 const struct code_model *cmodel;
1400 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1401 static struct cpu_default {
1402 const int cpu;
1403 const enum processor_type processor;
1404 } const cpu_default[] = {
1405 /* There must be one entry here for each TARGET_CPU value. */
1406 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1407 { TARGET_CPU_v8, PROCESSOR_V8 },
1408 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1409 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1410 { TARGET_CPU_leon, PROCESSOR_LEON },
1411 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1412 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1413 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1414 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1415 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1416 { TARGET_CPU_v9, PROCESSOR_V9 },
1417 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1418 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1419 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1420 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1421 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1422 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1423 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1424 { TARGET_CPU_m8, PROCESSOR_M8 },
1425 { -1, PROCESSOR_V7 }
1426 };
1427 const struct cpu_default *def;
1428 /* Table of values for -m{cpu,tune}=. This must match the order of
1429 the enum processor_type in sparc-opts.h. */
1430 static struct cpu_table {
1431 const char *const name;
1432 const int disable;
1433 const int enable;
1434 } const cpu_table[] = {
1435 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1436 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1437 { "v8", MASK_ISA, MASK_V8 },
1438 /* TI TMS390Z55 supersparc */
1439 { "supersparc", MASK_ISA, MASK_V8 },
1440 { "hypersparc", MASK_ISA, MASK_V8 },
1441 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1442 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1443 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1444 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1445 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1446 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1447 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1448 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1449 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1450 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1451 /* TEMIC sparclet */
1452 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1453 { "v9", MASK_ISA, MASK_V9 },
1454 /* UltraSPARC I, II, IIi */
1455 { "ultrasparc", MASK_ISA,
1456 /* Although insns using %y are deprecated, it is a clear win. */
1457 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1458 /* UltraSPARC III */
1459 /* ??? Check if %y issue still holds true. */
1460 { "ultrasparc3", MASK_ISA,
1461 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1462 /* UltraSPARC T1 */
1463 { "niagara", MASK_ISA,
1464 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1465 /* UltraSPARC T2 */
1466 { "niagara2", MASK_ISA,
1467 MASK_V9|MASK_POPC|MASK_VIS2 },
1468 /* UltraSPARC T3 */
1469 { "niagara3", MASK_ISA,
1470 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1471 /* UltraSPARC T4 */
1472 { "niagara4", MASK_ISA,
1473 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1474 /* UltraSPARC M7 */
1475 { "niagara7", MASK_ISA,
1476 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1477 /* UltraSPARC M8 */
1478 { "m8", MASK_ISA,
1479 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1480 };
1481 const struct cpu_table *cpu;
1482 unsigned int i;
1483
1484 if (sparc_debug_string != NULL)
1485 {
1486 const char *q;
1487 char *p;
1488
1489 p = ASTRDUP (sparc_debug_string);
1490 while ((q = strtok (p, ",")) != NULL)
1491 {
1492 bool invert;
1493 int mask;
1494
1495 p = NULL;
1496 if (*q == '!')
1497 {
1498 invert = true;
1499 q++;
1500 }
1501 else
1502 invert = false;
1503
1504 if (! strcmp (q, "all"))
1505 mask = MASK_DEBUG_ALL;
1506 else if (! strcmp (q, "options"))
1507 mask = MASK_DEBUG_OPTIONS;
1508 else
1509 error ("unknown -mdebug-%s switch", q);
1510
1511 if (invert)
1512 sparc_debug &= ~mask;
1513 else
1514 sparc_debug |= mask;
1515 }
1516 }
1517
1518 /* Enable the FsMULd instruction by default if not explicitly specified by
1519 the user. It may be later disabled by the CPU (explicitly or not). */
1520 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1521 target_flags |= MASK_FSMULD;
1522
1523 if (TARGET_DEBUG_OPTIONS)
1524 {
1525 dump_target_flags("Initial target_flags", target_flags);
1526 dump_target_flags("target_flags_explicit", target_flags_explicit);
1527 }
1528
1529 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1530 SUBTARGET_OVERRIDE_OPTIONS;
1531 #endif
1532
1533 #ifndef SPARC_BI_ARCH
1534 /* Check for unsupported architecture size. */
1535 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1536 error ("%s is not supported by this configuration",
1537 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1538 #endif
1539
1540 /* We force all 64bit archs to use 128 bit long double */
1541 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1542 {
1543 error ("-mlong-double-64 not allowed with -m64");
1544 target_flags |= MASK_LONG_DOUBLE_128;
1545 }
1546
1547 /* Code model selection. */
1548 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1549
1550 #ifdef SPARC_BI_ARCH
1551 if (TARGET_ARCH32)
1552 sparc_cmodel = CM_32;
1553 #endif
1554
1555 if (sparc_cmodel_string != NULL)
1556 {
1557 if (TARGET_ARCH64)
1558 {
1559 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1560 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1561 break;
1562 if (cmodel->name == NULL)
1563 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1564 else
1565 sparc_cmodel = cmodel->value;
1566 }
1567 else
1568 error ("-mcmodel= is not supported on 32-bit systems");
1569 }
1570
1571 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1572 for (i = 8; i < 16; i++)
1573 if (!call_used_regs [i])
1574 {
1575 error ("-fcall-saved-REG is not supported for out registers");
1576 call_used_regs [i] = 1;
1577 }
1578
1579 /* Set the default CPU if no -mcpu option was specified. */
1580 if (!global_options_set.x_sparc_cpu_and_features)
1581 {
1582 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1583 if (def->cpu == TARGET_CPU_DEFAULT)
1584 break;
1585 gcc_assert (def->cpu != -1);
1586 sparc_cpu_and_features = def->processor;
1587 }
1588
1589 /* Set the default CPU if no -mtune option was specified. */
1590 if (!global_options_set.x_sparc_cpu)
1591 sparc_cpu = sparc_cpu_and_features;
1592
1593 cpu = &cpu_table[(int) sparc_cpu_and_features];
1594
1595 if (TARGET_DEBUG_OPTIONS)
1596 {
1597 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1598 dump_target_flags ("cpu->disable", cpu->disable);
1599 dump_target_flags ("cpu->enable", cpu->enable);
1600 }
1601
1602 target_flags &= ~cpu->disable;
1603 target_flags |= (cpu->enable
1604 #ifndef HAVE_AS_FMAF_HPC_VIS3
1605 & ~(MASK_FMAF | MASK_VIS3)
1606 #endif
1607 #ifndef HAVE_AS_SPARC4
1608 & ~MASK_CBCOND
1609 #endif
1610 #ifndef HAVE_AS_SPARC5_VIS4
1611 & ~(MASK_VIS4 | MASK_SUBXC)
1612 #endif
1613 #ifndef HAVE_AS_SPARC6
1614 & ~(MASK_VIS4B)
1615 #endif
1616 #ifndef HAVE_AS_LEON
1617 & ~(MASK_LEON | MASK_LEON3)
1618 #endif
1619 & ~(target_flags_explicit & MASK_FEATURES)
1620 );
1621
1622 /* -mvis2 implies -mvis. */
1623 if (TARGET_VIS2)
1624 target_flags |= MASK_VIS;
1625
1626 /* -mvis3 implies -mvis2 and -mvis. */
1627 if (TARGET_VIS3)
1628 target_flags |= MASK_VIS2 | MASK_VIS;
1629
1630 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1631 if (TARGET_VIS4)
1632 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1633
1634 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1635 if (TARGET_VIS4B)
1636 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1637
1638 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1639 FPU is disabled. */
1640 if (!TARGET_FPU)
1641 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1642 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1643
1644 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1645 are available; -m64 also implies v9. */
1646 if (TARGET_VIS || TARGET_ARCH64)
1647 {
1648 target_flags |= MASK_V9;
1649 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1650 }
1651
1652 /* -mvis also implies -mv8plus on 32-bit. */
1653 if (TARGET_VIS && !TARGET_ARCH64)
1654 target_flags |= MASK_V8PLUS;
1655
1656 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1657 if (TARGET_V9 && TARGET_ARCH32)
1658 target_flags |= MASK_DEPRECATED_V8_INSNS;
1659
1660 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1661 if (!TARGET_V9 || TARGET_ARCH64)
1662 target_flags &= ~MASK_V8PLUS;
1663
1664 /* Don't use stack biasing in 32-bit mode. */
1665 if (TARGET_ARCH32)
1666 target_flags &= ~MASK_STACK_BIAS;
1667
1668 /* Use LRA instead of reload, unless otherwise instructed. */
1669 if (!(target_flags_explicit & MASK_LRA))
1670 target_flags |= MASK_LRA;
1671
1672 /* Enable the back-to-back store errata workaround for LEON3FT. */
1673 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1674 sparc_fix_b2bst = 1;
1675
1676 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1677 if (sparc_fix_ut699)
1678 target_flags &= ~MASK_FSMULD;
1679
1680 /* Supply a default value for align_functions. */
1681 if (align_functions == 0)
1682 {
1683 if (sparc_cpu == PROCESSOR_ULTRASPARC
1684 || sparc_cpu == PROCESSOR_ULTRASPARC3
1685 || sparc_cpu == PROCESSOR_NIAGARA
1686 || sparc_cpu == PROCESSOR_NIAGARA2
1687 || sparc_cpu == PROCESSOR_NIAGARA3
1688 || sparc_cpu == PROCESSOR_NIAGARA4)
1689 align_functions = 32;
1690 else if (sparc_cpu == PROCESSOR_NIAGARA7
1691 || sparc_cpu == PROCESSOR_M8)
1692 align_functions = 64;
1693 }
1694
1695 /* Validate PCC_STRUCT_RETURN. */
1696 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1697 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1698
1699 /* Only use .uaxword when compiling for a 64-bit target. */
1700 if (!TARGET_ARCH64)
1701 targetm.asm_out.unaligned_op.di = NULL;
1702
1703 /* Do various machine dependent initializations. */
1704 sparc_init_modes ();
1705
1706 /* Set up function hooks. */
1707 init_machine_status = sparc_init_machine_status;
1708
1709 switch (sparc_cpu)
1710 {
1711 case PROCESSOR_V7:
1712 case PROCESSOR_CYPRESS:
1713 sparc_costs = &cypress_costs;
1714 break;
1715 case PROCESSOR_V8:
1716 case PROCESSOR_SPARCLITE:
1717 case PROCESSOR_SUPERSPARC:
1718 sparc_costs = &supersparc_costs;
1719 break;
1720 case PROCESSOR_F930:
1721 case PROCESSOR_F934:
1722 case PROCESSOR_HYPERSPARC:
1723 case PROCESSOR_SPARCLITE86X:
1724 sparc_costs = &hypersparc_costs;
1725 break;
1726 case PROCESSOR_LEON:
1727 sparc_costs = &leon_costs;
1728 break;
1729 case PROCESSOR_LEON3:
1730 case PROCESSOR_LEON3V7:
1731 sparc_costs = &leon3_costs;
1732 break;
1733 case PROCESSOR_SPARCLET:
1734 case PROCESSOR_TSC701:
1735 sparc_costs = &sparclet_costs;
1736 break;
1737 case PROCESSOR_V9:
1738 case PROCESSOR_ULTRASPARC:
1739 sparc_costs = &ultrasparc_costs;
1740 break;
1741 case PROCESSOR_ULTRASPARC3:
1742 sparc_costs = &ultrasparc3_costs;
1743 break;
1744 case PROCESSOR_NIAGARA:
1745 sparc_costs = &niagara_costs;
1746 break;
1747 case PROCESSOR_NIAGARA2:
1748 sparc_costs = &niagara2_costs;
1749 break;
1750 case PROCESSOR_NIAGARA3:
1751 sparc_costs = &niagara3_costs;
1752 break;
1753 case PROCESSOR_NIAGARA4:
1754 sparc_costs = &niagara4_costs;
1755 break;
1756 case PROCESSOR_NIAGARA7:
1757 sparc_costs = &niagara7_costs;
1758 break;
1759 case PROCESSOR_M8:
1760 sparc_costs = &m8_costs;
1761 break;
1762 case PROCESSOR_NATIVE:
1763 gcc_unreachable ();
1764 };
1765
1766 if (sparc_memory_model == SMM_DEFAULT)
1767 {
1768 /* Choose the memory model for the operating system. */
1769 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1770 if (os_default != SMM_DEFAULT)
1771 sparc_memory_model = os_default;
1772 /* Choose the most relaxed model for the processor. */
1773 else if (TARGET_V9)
1774 sparc_memory_model = SMM_RMO;
1775 else if (TARGET_LEON3)
1776 sparc_memory_model = SMM_TSO;
1777 else if (TARGET_LEON)
1778 sparc_memory_model = SMM_SC;
1779 else if (TARGET_V8)
1780 sparc_memory_model = SMM_PSO;
1781 else
1782 sparc_memory_model = SMM_SC;
1783 }
1784
1785 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1786 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1787 target_flags |= MASK_LONG_DOUBLE_128;
1788 #endif
1789
1790 if (TARGET_DEBUG_OPTIONS)
1791 dump_target_flags ("Final target_flags", target_flags);
1792
1793 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1794 can run at the same time. More important, it is the threshold
1795 defining when additional prefetches will be dropped by the
1796 hardware.
1797
1798 The UltraSPARC-III features a documented prefetch queue with a
1799 size of 8. Additional prefetches issued in the cpu are
1800 dropped.
1801
1802 Niagara processors are different. In these processors prefetches
1803 are handled much like regular loads. The L1 miss buffer is 32
1804 entries, but prefetches start getting affected when 30 entries
1805 become occupied. That occupation could be a mix of regular loads
1806 and prefetches though. And that buffer is shared by all threads.
1807 Once the threshold is reached, if the core is running a single
1808 thread the prefetch will retry. If more than one thread is
1809 running, the prefetch will be dropped.
1810
1811 All this makes it very difficult to determine how many
1812 simultaneous prefetches can be issued simultaneously, even in a
1813 single-threaded program. Experimental results show that setting
1814 this parameter to 32 works well when the number of threads is not
1815 high. */
1816 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1817 ((sparc_cpu == PROCESSOR_ULTRASPARC
1818 || sparc_cpu == PROCESSOR_NIAGARA
1819 || sparc_cpu == PROCESSOR_NIAGARA2
1820 || sparc_cpu == PROCESSOR_NIAGARA3
1821 || sparc_cpu == PROCESSOR_NIAGARA4)
1822 ? 2
1823 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1824 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1825 || sparc_cpu == PROCESSOR_M8)
1826 ? 32 : 3))),
1827 global_options.x_param_values,
1828 global_options_set.x_param_values);
1829
1830 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1831 bytes.
1832
1833 The Oracle SPARC Architecture (previously the UltraSPARC
1834 Architecture) specification states that when a PREFETCH[A]
1835 instruction is executed an implementation-specific amount of data
1836 is prefetched, and that it is at least 64 bytes long (aligned to
1837 at least 64 bytes).
1838
1839 However, this is not correct. The M7 (and implementations prior
1840 to that) does not guarantee a 64B prefetch into a cache if the
1841 line size is smaller. A single cache line is all that is ever
1842 prefetched. So for the M7, where the L1D$ has 32B lines and the
1843 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1844 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1845 is a read_n prefetch, which is the only type which allocates to
1846 the L1.) */
1847 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1848 (sparc_cpu == PROCESSOR_M8
1849 ? 64 : 32),
1850 global_options.x_param_values,
1851 global_options_set.x_param_values);
1852
1853 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1854 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1855 Niagara processors feature a L1D$ of 16KB. */
1856 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1857 ((sparc_cpu == PROCESSOR_ULTRASPARC
1858 || sparc_cpu == PROCESSOR_ULTRASPARC3
1859 || sparc_cpu == PROCESSOR_NIAGARA
1860 || sparc_cpu == PROCESSOR_NIAGARA2
1861 || sparc_cpu == PROCESSOR_NIAGARA3
1862 || sparc_cpu == PROCESSOR_NIAGARA4
1863 || sparc_cpu == PROCESSOR_NIAGARA7
1864 || sparc_cpu == PROCESSOR_M8)
1865 ? 16 : 64),
1866 global_options.x_param_values,
1867 global_options_set.x_param_values);
1868
1869
1870 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1871 that 512 is the default in params.def. */
1872 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1873 ((sparc_cpu == PROCESSOR_NIAGARA4
1874 || sparc_cpu == PROCESSOR_M8)
1875 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1876 ? 256 : 512)),
1877 global_options.x_param_values,
1878 global_options_set.x_param_values);
1879
1880
1881 /* Disable save slot sharing for call-clobbered registers by default.
1882 The IRA sharing algorithm works on single registers only and this
1883 pessimizes for double floating-point registers. */
1884 if (!global_options_set.x_flag_ira_share_save_slots)
1885 flag_ira_share_save_slots = 0;
1886
1887 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1888 redundant 32-to-64-bit extensions. */
1889 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1890 flag_ree = 0;
1891 }
1892 \f
1893 /* Miscellaneous utilities. */
1894
1895 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1896 or branch on register contents instructions. */
1897
1898 int
1899 v9_regcmp_p (enum rtx_code code)
1900 {
1901 return (code == EQ || code == NE || code == GE || code == LT
1902 || code == LE || code == GT);
1903 }
1904
1905 /* Nonzero if OP is a floating point constant which can
1906 be loaded into an integer register using a single
1907 sethi instruction. */
1908
1909 int
1910 fp_sethi_p (rtx op)
1911 {
1912 if (GET_CODE (op) == CONST_DOUBLE)
1913 {
1914 long i;
1915
1916 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1917 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1918 }
1919
1920 return 0;
1921 }
1922
1923 /* Nonzero if OP is a floating point constant which can
1924 be loaded into an integer register using a single
1925 mov instruction. */
1926
1927 int
1928 fp_mov_p (rtx op)
1929 {
1930 if (GET_CODE (op) == CONST_DOUBLE)
1931 {
1932 long i;
1933
1934 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1935 return SPARC_SIMM13_P (i);
1936 }
1937
1938 return 0;
1939 }
1940
1941 /* Nonzero if OP is a floating point constant which can
1942 be loaded into an integer register using a high/losum
1943 instruction sequence. */
1944
1945 int
1946 fp_high_losum_p (rtx op)
1947 {
1948 /* The constraints calling this should only be in
1949 SFmode move insns, so any constant which cannot
1950 be moved using a single insn will do. */
1951 if (GET_CODE (op) == CONST_DOUBLE)
1952 {
1953 long i;
1954
1955 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1956 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1957 }
1958
1959 return 0;
1960 }
1961
1962 /* Return true if the address of LABEL can be loaded by means of the
1963 mov{si,di}_pic_label_ref patterns in PIC mode. */
1964
1965 static bool
1966 can_use_mov_pic_label_ref (rtx label)
1967 {
1968 /* VxWorks does not impose a fixed gap between segments; the run-time
1969 gap can be different from the object-file gap. We therefore can't
1970 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1971 are absolutely sure that X is in the same segment as the GOT.
1972 Unfortunately, the flexibility of linker scripts means that we
1973 can't be sure of that in general, so assume that GOT-relative
1974 accesses are never valid on VxWorks. */
1975 if (TARGET_VXWORKS_RTP)
1976 return false;
1977
1978 /* Similarly, if the label is non-local, it might end up being placed
1979 in a different section than the current one; now mov_pic_label_ref
1980 requires the label and the code to be in the same section. */
1981 if (LABEL_REF_NONLOCAL_P (label))
1982 return false;
1983
1984 /* Finally, if we are reordering basic blocks and partition into hot
1985 and cold sections, this might happen for any label. */
1986 if (flag_reorder_blocks_and_partition)
1987 return false;
1988
1989 return true;
1990 }
1991
1992 /* Expand a move instruction. Return true if all work is done. */
1993
1994 bool
1995 sparc_expand_move (machine_mode mode, rtx *operands)
1996 {
1997 /* Handle sets of MEM first. */
1998 if (GET_CODE (operands[0]) == MEM)
1999 {
2000 /* 0 is a register (or a pair of registers) on SPARC. */
2001 if (register_or_zero_operand (operands[1], mode))
2002 return false;
2003
2004 if (!reload_in_progress)
2005 {
2006 operands[0] = validize_mem (operands[0]);
2007 operands[1] = force_reg (mode, operands[1]);
2008 }
2009 }
2010
2011 /* Fixup TLS cases. */
2012 if (TARGET_HAVE_TLS
2013 && CONSTANT_P (operands[1])
2014 && sparc_tls_referenced_p (operands [1]))
2015 {
2016 operands[1] = sparc_legitimize_tls_address (operands[1]);
2017 return false;
2018 }
2019
2020 /* Fixup PIC cases. */
2021 if (flag_pic && CONSTANT_P (operands[1]))
2022 {
2023 if (pic_address_needs_scratch (operands[1]))
2024 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2025
2026 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2027 if (GET_CODE (operands[1]) == LABEL_REF
2028 && can_use_mov_pic_label_ref (operands[1]))
2029 {
2030 if (mode == SImode)
2031 {
2032 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2033 return true;
2034 }
2035
2036 if (mode == DImode)
2037 {
2038 gcc_assert (TARGET_ARCH64);
2039 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2040 return true;
2041 }
2042 }
2043
2044 if (symbolic_operand (operands[1], mode))
2045 {
2046 operands[1]
2047 = sparc_legitimize_pic_address (operands[1],
2048 reload_in_progress
2049 ? operands[0] : NULL_RTX);
2050 return false;
2051 }
2052 }
2053
2054 /* If we are trying to toss an integer constant into FP registers,
2055 or loading a FP or vector constant, force it into memory. */
2056 if (CONSTANT_P (operands[1])
2057 && REG_P (operands[0])
2058 && (SPARC_FP_REG_P (REGNO (operands[0]))
2059 || SCALAR_FLOAT_MODE_P (mode)
2060 || VECTOR_MODE_P (mode)))
2061 {
2062 /* emit_group_store will send such bogosity to us when it is
2063 not storing directly into memory. So fix this up to avoid
2064 crashes in output_constant_pool. */
2065 if (operands [1] == const0_rtx)
2066 operands[1] = CONST0_RTX (mode);
2067
2068 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2069 always other regs. */
2070 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2071 && (const_zero_operand (operands[1], mode)
2072 || const_all_ones_operand (operands[1], mode)))
2073 return false;
2074
2075 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2076 /* We are able to build any SF constant in integer registers
2077 with at most 2 instructions. */
2078 && (mode == SFmode
2079 /* And any DF constant in integer registers if needed. */
2080 || (mode == DFmode && !can_create_pseudo_p ())))
2081 return false;
2082
2083 operands[1] = force_const_mem (mode, operands[1]);
2084 if (!reload_in_progress)
2085 operands[1] = validize_mem (operands[1]);
2086 return false;
2087 }
2088
2089 /* Accept non-constants and valid constants unmodified. */
2090 if (!CONSTANT_P (operands[1])
2091 || GET_CODE (operands[1]) == HIGH
2092 || input_operand (operands[1], mode))
2093 return false;
2094
2095 switch (mode)
2096 {
2097 case E_QImode:
2098 /* All QImode constants require only one insn, so proceed. */
2099 break;
2100
2101 case E_HImode:
2102 case E_SImode:
2103 sparc_emit_set_const32 (operands[0], operands[1]);
2104 return true;
2105
2106 case E_DImode:
2107 /* input_operand should have filtered out 32-bit mode. */
2108 sparc_emit_set_const64 (operands[0], operands[1]);
2109 return true;
2110
2111 case E_TImode:
2112 {
2113 rtx high, low;
2114 /* TImode isn't available in 32-bit mode. */
2115 split_double (operands[1], &high, &low);
2116 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2117 high));
2118 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2119 low));
2120 }
2121 return true;
2122
2123 default:
2124 gcc_unreachable ();
2125 }
2126
2127 return false;
2128 }
2129
2130 /* Load OP1, a 32-bit constant, into OP0, a register.
2131 We know it can't be done in one insn when we get
2132 here, the move expander guarantees this. */
2133
2134 static void
2135 sparc_emit_set_const32 (rtx op0, rtx op1)
2136 {
2137 machine_mode mode = GET_MODE (op0);
2138 rtx temp = op0;
2139
2140 if (can_create_pseudo_p ())
2141 temp = gen_reg_rtx (mode);
2142
2143 if (GET_CODE (op1) == CONST_INT)
2144 {
2145 gcc_assert (!small_int_operand (op1, mode)
2146 && !const_high_operand (op1, mode));
2147
2148 /* Emit them as real moves instead of a HIGH/LO_SUM,
2149 this way CSE can see everything and reuse intermediate
2150 values if it wants. */
2151 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2152 & ~(HOST_WIDE_INT) 0x3ff)));
2153
2154 emit_insn (gen_rtx_SET (op0,
2155 gen_rtx_IOR (mode, temp,
2156 GEN_INT (INTVAL (op1) & 0x3ff))));
2157 }
2158 else
2159 {
2160 /* A symbol, emit in the traditional way. */
2161 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2162 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2163 }
2164 }
2165
2166 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2167 If TEMP is nonzero, we are forbidden to use any other scratch
2168 registers. Otherwise, we are allowed to generate them as needed.
2169
2170 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2171 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2172
2173 void
2174 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2175 {
2176 rtx cst, temp1, temp2, temp3, temp4, temp5;
2177 rtx ti_temp = 0;
2178
2179 /* Deal with too large offsets. */
2180 if (GET_CODE (op1) == CONST
2181 && GET_CODE (XEXP (op1, 0)) == PLUS
2182 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2183 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2184 {
2185 gcc_assert (!temp);
2186 temp1 = gen_reg_rtx (DImode);
2187 temp2 = gen_reg_rtx (DImode);
2188 sparc_emit_set_const64 (temp2, cst);
2189 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2190 NULL_RTX);
2191 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2192 return;
2193 }
2194
2195 if (temp && GET_MODE (temp) == TImode)
2196 {
2197 ti_temp = temp;
2198 temp = gen_rtx_REG (DImode, REGNO (temp));
2199 }
2200
2201 /* SPARC-V9 code-model support. */
2202 switch (sparc_cmodel)
2203 {
2204 case CM_MEDLOW:
2205 /* The range spanned by all instructions in the object is less
2206 than 2^31 bytes (2GB) and the distance from any instruction
2207 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2208 than 2^31 bytes (2GB).
2209
2210 The executable must be in the low 4TB of the virtual address
2211 space.
2212
2213 sethi %hi(symbol), %temp1
2214 or %temp1, %lo(symbol), %reg */
2215 if (temp)
2216 temp1 = temp; /* op0 is allowed. */
2217 else
2218 temp1 = gen_reg_rtx (DImode);
2219
2220 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2221 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2222 break;
2223
2224 case CM_MEDMID:
2225 /* The range spanned by all instructions in the object is less
2226 than 2^31 bytes (2GB) and the distance from any instruction
2227 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2228 than 2^31 bytes (2GB).
2229
2230 The executable must be in the low 16TB of the virtual address
2231 space.
2232
2233 sethi %h44(symbol), %temp1
2234 or %temp1, %m44(symbol), %temp2
2235 sllx %temp2, 12, %temp3
2236 or %temp3, %l44(symbol), %reg */
2237 if (temp)
2238 {
2239 temp1 = op0;
2240 temp2 = op0;
2241 temp3 = temp; /* op0 is allowed. */
2242 }
2243 else
2244 {
2245 temp1 = gen_reg_rtx (DImode);
2246 temp2 = gen_reg_rtx (DImode);
2247 temp3 = gen_reg_rtx (DImode);
2248 }
2249
2250 emit_insn (gen_seth44 (temp1, op1));
2251 emit_insn (gen_setm44 (temp2, temp1, op1));
2252 emit_insn (gen_rtx_SET (temp3,
2253 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2254 emit_insn (gen_setl44 (op0, temp3, op1));
2255 break;
2256
2257 case CM_MEDANY:
2258 /* The range spanned by all instructions in the object is less
2259 than 2^31 bytes (2GB) and the distance from any instruction
2260 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2261 than 2^31 bytes (2GB).
2262
2263 The executable can be placed anywhere in the virtual address
2264 space.
2265
2266 sethi %hh(symbol), %temp1
2267 sethi %lm(symbol), %temp2
2268 or %temp1, %hm(symbol), %temp3
2269 sllx %temp3, 32, %temp4
2270 or %temp4, %temp2, %temp5
2271 or %temp5, %lo(symbol), %reg */
2272 if (temp)
2273 {
2274 /* It is possible that one of the registers we got for operands[2]
2275 might coincide with that of operands[0] (which is why we made
2276 it TImode). Pick the other one to use as our scratch. */
2277 if (rtx_equal_p (temp, op0))
2278 {
2279 gcc_assert (ti_temp);
2280 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2281 }
2282 temp1 = op0;
2283 temp2 = temp; /* op0 is _not_ allowed, see above. */
2284 temp3 = op0;
2285 temp4 = op0;
2286 temp5 = op0;
2287 }
2288 else
2289 {
2290 temp1 = gen_reg_rtx (DImode);
2291 temp2 = gen_reg_rtx (DImode);
2292 temp3 = gen_reg_rtx (DImode);
2293 temp4 = gen_reg_rtx (DImode);
2294 temp5 = gen_reg_rtx (DImode);
2295 }
2296
2297 emit_insn (gen_sethh (temp1, op1));
2298 emit_insn (gen_setlm (temp2, op1));
2299 emit_insn (gen_sethm (temp3, temp1, op1));
2300 emit_insn (gen_rtx_SET (temp4,
2301 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2302 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2303 emit_insn (gen_setlo (op0, temp5, op1));
2304 break;
2305
2306 case CM_EMBMEDANY:
2307 /* Old old old backwards compatibility kruft here.
2308 Essentially it is MEDLOW with a fixed 64-bit
2309 virtual base added to all data segment addresses.
2310 Text-segment stuff is computed like MEDANY, we can't
2311 reuse the code above because the relocation knobs
2312 look different.
2313
2314 Data segment: sethi %hi(symbol), %temp1
2315 add %temp1, EMBMEDANY_BASE_REG, %temp2
2316 or %temp2, %lo(symbol), %reg */
2317 if (data_segment_operand (op1, GET_MODE (op1)))
2318 {
2319 if (temp)
2320 {
2321 temp1 = temp; /* op0 is allowed. */
2322 temp2 = op0;
2323 }
2324 else
2325 {
2326 temp1 = gen_reg_rtx (DImode);
2327 temp2 = gen_reg_rtx (DImode);
2328 }
2329
2330 emit_insn (gen_embmedany_sethi (temp1, op1));
2331 emit_insn (gen_embmedany_brsum (temp2, temp1));
2332 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2333 }
2334
2335 /* Text segment: sethi %uhi(symbol), %temp1
2336 sethi %hi(symbol), %temp2
2337 or %temp1, %ulo(symbol), %temp3
2338 sllx %temp3, 32, %temp4
2339 or %temp4, %temp2, %temp5
2340 or %temp5, %lo(symbol), %reg */
2341 else
2342 {
2343 if (temp)
2344 {
2345 /* It is possible that one of the registers we got for operands[2]
2346 might coincide with that of operands[0] (which is why we made
2347 it TImode). Pick the other one to use as our scratch. */
2348 if (rtx_equal_p (temp, op0))
2349 {
2350 gcc_assert (ti_temp);
2351 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2352 }
2353 temp1 = op0;
2354 temp2 = temp; /* op0 is _not_ allowed, see above. */
2355 temp3 = op0;
2356 temp4 = op0;
2357 temp5 = op0;
2358 }
2359 else
2360 {
2361 temp1 = gen_reg_rtx (DImode);
2362 temp2 = gen_reg_rtx (DImode);
2363 temp3 = gen_reg_rtx (DImode);
2364 temp4 = gen_reg_rtx (DImode);
2365 temp5 = gen_reg_rtx (DImode);
2366 }
2367
2368 emit_insn (gen_embmedany_textuhi (temp1, op1));
2369 emit_insn (gen_embmedany_texthi (temp2, op1));
2370 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2371 emit_insn (gen_rtx_SET (temp4,
2372 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2373 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2374 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2375 }
2376 break;
2377
2378 default:
2379 gcc_unreachable ();
2380 }
2381 }
2382
2383 /* These avoid problems when cross compiling. If we do not
2384 go through all this hair then the optimizer will see
2385 invalid REG_EQUAL notes or in some cases none at all. */
2386 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2387 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2388 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2389 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2390
2391 /* The optimizer is not to assume anything about exactly
2392 which bits are set for a HIGH, they are unspecified.
2393 Unfortunately this leads to many missed optimizations
2394 during CSE. We mask out the non-HIGH bits, and matches
2395 a plain movdi, to alleviate this problem. */
2396 static rtx
2397 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2398 {
2399 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2400 }
2401
2402 static rtx
2403 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2404 {
2405 return gen_rtx_SET (dest, GEN_INT (val));
2406 }
2407
2408 static rtx
2409 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2410 {
2411 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2412 }
2413
2414 static rtx
2415 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2416 {
2417 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2418 }
2419
2420 /* Worker routines for 64-bit constant formation on arch64.
2421 One of the key things to be doing in these emissions is
2422 to create as many temp REGs as possible. This makes it
2423 possible for half-built constants to be used later when
2424 such values are similar to something required later on.
2425 Without doing this, the optimizer cannot see such
2426 opportunities. */
2427
2428 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2429 unsigned HOST_WIDE_INT, int);
2430
2431 static void
2432 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2433 unsigned HOST_WIDE_INT low_bits, int is_neg)
2434 {
2435 unsigned HOST_WIDE_INT high_bits;
2436
2437 if (is_neg)
2438 high_bits = (~low_bits) & 0xffffffff;
2439 else
2440 high_bits = low_bits;
2441
2442 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2443 if (!is_neg)
2444 {
2445 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2446 }
2447 else
2448 {
2449 /* If we are XOR'ing with -1, then we should emit a one's complement
2450 instead. This way the combiner will notice logical operations
2451 such as ANDN later on and substitute. */
2452 if ((low_bits & 0x3ff) == 0x3ff)
2453 {
2454 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2455 }
2456 else
2457 {
2458 emit_insn (gen_rtx_SET (op0,
2459 gen_safe_XOR64 (temp,
2460 (-(HOST_WIDE_INT)0x400
2461 | (low_bits & 0x3ff)))));
2462 }
2463 }
2464 }
2465
2466 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2467 unsigned HOST_WIDE_INT, int);
2468
2469 static void
2470 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2471 unsigned HOST_WIDE_INT high_bits,
2472 unsigned HOST_WIDE_INT low_immediate,
2473 int shift_count)
2474 {
2475 rtx temp2 = op0;
2476
2477 if ((high_bits & 0xfffffc00) != 0)
2478 {
2479 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2480 if ((high_bits & ~0xfffffc00) != 0)
2481 emit_insn (gen_rtx_SET (op0,
2482 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2483 else
2484 temp2 = temp;
2485 }
2486 else
2487 {
2488 emit_insn (gen_safe_SET64 (temp, high_bits));
2489 temp2 = temp;
2490 }
2491
2492 /* Now shift it up into place. */
2493 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2494 GEN_INT (shift_count))));
2495
2496 /* If there is a low immediate part piece, finish up by
2497 putting that in as well. */
2498 if (low_immediate != 0)
2499 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2500 }
2501
2502 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2503 unsigned HOST_WIDE_INT);
2504
2505 /* Full 64-bit constant decomposition. Even though this is the
2506 'worst' case, we still optimize a few things away. */
2507 static void
2508 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2509 unsigned HOST_WIDE_INT high_bits,
2510 unsigned HOST_WIDE_INT low_bits)
2511 {
2512 rtx sub_temp = op0;
2513
2514 if (can_create_pseudo_p ())
2515 sub_temp = gen_reg_rtx (DImode);
2516
2517 if ((high_bits & 0xfffffc00) != 0)
2518 {
2519 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2520 if ((high_bits & ~0xfffffc00) != 0)
2521 emit_insn (gen_rtx_SET (sub_temp,
2522 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2523 else
2524 sub_temp = temp;
2525 }
2526 else
2527 {
2528 emit_insn (gen_safe_SET64 (temp, high_bits));
2529 sub_temp = temp;
2530 }
2531
2532 if (can_create_pseudo_p ())
2533 {
2534 rtx temp2 = gen_reg_rtx (DImode);
2535 rtx temp3 = gen_reg_rtx (DImode);
2536 rtx temp4 = gen_reg_rtx (DImode);
2537
2538 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2539 GEN_INT (32))));
2540
2541 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2542 if ((low_bits & ~0xfffffc00) != 0)
2543 {
2544 emit_insn (gen_rtx_SET (temp3,
2545 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2546 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2547 }
2548 else
2549 {
2550 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2551 }
2552 }
2553 else
2554 {
2555 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2556 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2557 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2558 int to_shift = 12;
2559
2560 /* We are in the middle of reload, so this is really
2561 painful. However we do still make an attempt to
2562 avoid emitting truly stupid code. */
2563 if (low1 != const0_rtx)
2564 {
2565 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2566 GEN_INT (to_shift))));
2567 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2568 sub_temp = op0;
2569 to_shift = 12;
2570 }
2571 else
2572 {
2573 to_shift += 12;
2574 }
2575 if (low2 != const0_rtx)
2576 {
2577 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2578 GEN_INT (to_shift))));
2579 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2580 sub_temp = op0;
2581 to_shift = 8;
2582 }
2583 else
2584 {
2585 to_shift += 8;
2586 }
2587 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2588 GEN_INT (to_shift))));
2589 if (low3 != const0_rtx)
2590 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2591 /* phew... */
2592 }
2593 }
2594
2595 /* Analyze a 64-bit constant for certain properties. */
2596 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2597 unsigned HOST_WIDE_INT,
2598 int *, int *, int *);
2599
2600 static void
2601 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2602 unsigned HOST_WIDE_INT low_bits,
2603 int *hbsp, int *lbsp, int *abbasp)
2604 {
2605 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2606 int i;
2607
2608 lowest_bit_set = highest_bit_set = -1;
2609 i = 0;
2610 do
2611 {
2612 if ((lowest_bit_set == -1)
2613 && ((low_bits >> i) & 1))
2614 lowest_bit_set = i;
2615 if ((highest_bit_set == -1)
2616 && ((high_bits >> (32 - i - 1)) & 1))
2617 highest_bit_set = (64 - i - 1);
2618 }
2619 while (++i < 32
2620 && ((highest_bit_set == -1)
2621 || (lowest_bit_set == -1)));
2622 if (i == 32)
2623 {
2624 i = 0;
2625 do
2626 {
2627 if ((lowest_bit_set == -1)
2628 && ((high_bits >> i) & 1))
2629 lowest_bit_set = i + 32;
2630 if ((highest_bit_set == -1)
2631 && ((low_bits >> (32 - i - 1)) & 1))
2632 highest_bit_set = 32 - i - 1;
2633 }
2634 while (++i < 32
2635 && ((highest_bit_set == -1)
2636 || (lowest_bit_set == -1)));
2637 }
2638 /* If there are no bits set this should have gone out
2639 as one instruction! */
2640 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2641 all_bits_between_are_set = 1;
2642 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2643 {
2644 if (i < 32)
2645 {
2646 if ((low_bits & (1 << i)) != 0)
2647 continue;
2648 }
2649 else
2650 {
2651 if ((high_bits & (1 << (i - 32))) != 0)
2652 continue;
2653 }
2654 all_bits_between_are_set = 0;
2655 break;
2656 }
2657 *hbsp = highest_bit_set;
2658 *lbsp = lowest_bit_set;
2659 *abbasp = all_bits_between_are_set;
2660 }
2661
2662 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2663
2664 static int
2665 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2666 unsigned HOST_WIDE_INT low_bits)
2667 {
2668 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2669
2670 if (high_bits == 0
2671 || high_bits == 0xffffffff)
2672 return 1;
2673
2674 analyze_64bit_constant (high_bits, low_bits,
2675 &highest_bit_set, &lowest_bit_set,
2676 &all_bits_between_are_set);
2677
2678 if ((highest_bit_set == 63
2679 || lowest_bit_set == 0)
2680 && all_bits_between_are_set != 0)
2681 return 1;
2682
2683 if ((highest_bit_set - lowest_bit_set) < 21)
2684 return 1;
2685
2686 return 0;
2687 }
2688
2689 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2690 unsigned HOST_WIDE_INT,
2691 int, int);
2692
2693 static unsigned HOST_WIDE_INT
2694 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2695 unsigned HOST_WIDE_INT low_bits,
2696 int lowest_bit_set, int shift)
2697 {
2698 HOST_WIDE_INT hi, lo;
2699
2700 if (lowest_bit_set < 32)
2701 {
2702 lo = (low_bits >> lowest_bit_set) << shift;
2703 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2704 }
2705 else
2706 {
2707 lo = 0;
2708 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2709 }
2710 gcc_assert (! (hi & lo));
2711 return (hi | lo);
2712 }
2713
2714 /* Here we are sure to be arch64 and this is an integer constant
2715 being loaded into a register. Emit the most efficient
2716 insn sequence possible. Detection of all the 1-insn cases
2717 has been done already. */
2718 static void
2719 sparc_emit_set_const64 (rtx op0, rtx op1)
2720 {
2721 unsigned HOST_WIDE_INT high_bits, low_bits;
2722 int lowest_bit_set, highest_bit_set;
2723 int all_bits_between_are_set;
2724 rtx temp = 0;
2725
2726 /* Sanity check that we know what we are working with. */
2727 gcc_assert (TARGET_ARCH64
2728 && (GET_CODE (op0) == SUBREG
2729 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2730
2731 if (! can_create_pseudo_p ())
2732 temp = op0;
2733
2734 if (GET_CODE (op1) != CONST_INT)
2735 {
2736 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2737 return;
2738 }
2739
2740 if (! temp)
2741 temp = gen_reg_rtx (DImode);
2742
2743 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2744 low_bits = (INTVAL (op1) & 0xffffffff);
2745
2746 /* low_bits bits 0 --> 31
2747 high_bits bits 32 --> 63 */
2748
2749 analyze_64bit_constant (high_bits, low_bits,
2750 &highest_bit_set, &lowest_bit_set,
2751 &all_bits_between_are_set);
2752
2753 /* First try for a 2-insn sequence. */
2754
2755 /* These situations are preferred because the optimizer can
2756 * do more things with them:
2757 * 1) mov -1, %reg
2758 * sllx %reg, shift, %reg
2759 * 2) mov -1, %reg
2760 * srlx %reg, shift, %reg
2761 * 3) mov some_small_const, %reg
2762 * sllx %reg, shift, %reg
2763 */
2764 if (((highest_bit_set == 63
2765 || lowest_bit_set == 0)
2766 && all_bits_between_are_set != 0)
2767 || ((highest_bit_set - lowest_bit_set) < 12))
2768 {
2769 HOST_WIDE_INT the_const = -1;
2770 int shift = lowest_bit_set;
2771
2772 if ((highest_bit_set != 63
2773 && lowest_bit_set != 0)
2774 || all_bits_between_are_set == 0)
2775 {
2776 the_const =
2777 create_simple_focus_bits (high_bits, low_bits,
2778 lowest_bit_set, 0);
2779 }
2780 else if (lowest_bit_set == 0)
2781 shift = -(63 - highest_bit_set);
2782
2783 gcc_assert (SPARC_SIMM13_P (the_const));
2784 gcc_assert (shift != 0);
2785
2786 emit_insn (gen_safe_SET64 (temp, the_const));
2787 if (shift > 0)
2788 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2789 GEN_INT (shift))));
2790 else if (shift < 0)
2791 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2792 GEN_INT (-shift))));
2793 return;
2794 }
2795
2796 /* Now a range of 22 or less bits set somewhere.
2797 * 1) sethi %hi(focus_bits), %reg
2798 * sllx %reg, shift, %reg
2799 * 2) sethi %hi(focus_bits), %reg
2800 * srlx %reg, shift, %reg
2801 */
2802 if ((highest_bit_set - lowest_bit_set) < 21)
2803 {
2804 unsigned HOST_WIDE_INT focus_bits =
2805 create_simple_focus_bits (high_bits, low_bits,
2806 lowest_bit_set, 10);
2807
2808 gcc_assert (SPARC_SETHI_P (focus_bits));
2809 gcc_assert (lowest_bit_set != 10);
2810
2811 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2812
2813 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2814 if (lowest_bit_set < 10)
2815 emit_insn (gen_rtx_SET (op0,
2816 gen_rtx_LSHIFTRT (DImode, temp,
2817 GEN_INT (10 - lowest_bit_set))));
2818 else if (lowest_bit_set > 10)
2819 emit_insn (gen_rtx_SET (op0,
2820 gen_rtx_ASHIFT (DImode, temp,
2821 GEN_INT (lowest_bit_set - 10))));
2822 return;
2823 }
2824
2825 /* 1) sethi %hi(low_bits), %reg
2826 * or %reg, %lo(low_bits), %reg
2827 * 2) sethi %hi(~low_bits), %reg
2828 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2829 */
2830 if (high_bits == 0
2831 || high_bits == 0xffffffff)
2832 {
2833 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2834 (high_bits == 0xffffffff));
2835 return;
2836 }
2837
2838 /* Now, try 3-insn sequences. */
2839
2840 /* 1) sethi %hi(high_bits), %reg
2841 * or %reg, %lo(high_bits), %reg
2842 * sllx %reg, 32, %reg
2843 */
2844 if (low_bits == 0)
2845 {
2846 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2847 return;
2848 }
2849
2850 /* We may be able to do something quick
2851 when the constant is negated, so try that. */
2852 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2853 (~low_bits) & 0xfffffc00))
2854 {
2855 /* NOTE: The trailing bits get XOR'd so we need the
2856 non-negated bits, not the negated ones. */
2857 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2858
2859 if ((((~high_bits) & 0xffffffff) == 0
2860 && ((~low_bits) & 0x80000000) == 0)
2861 || (((~high_bits) & 0xffffffff) == 0xffffffff
2862 && ((~low_bits) & 0x80000000) != 0))
2863 {
2864 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2865
2866 if ((SPARC_SETHI_P (fast_int)
2867 && (~high_bits & 0xffffffff) == 0)
2868 || SPARC_SIMM13_P (fast_int))
2869 emit_insn (gen_safe_SET64 (temp, fast_int));
2870 else
2871 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2872 }
2873 else
2874 {
2875 rtx negated_const;
2876 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2877 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2878 sparc_emit_set_const64 (temp, negated_const);
2879 }
2880
2881 /* If we are XOR'ing with -1, then we should emit a one's complement
2882 instead. This way the combiner will notice logical operations
2883 such as ANDN later on and substitute. */
2884 if (trailing_bits == 0x3ff)
2885 {
2886 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2887 }
2888 else
2889 {
2890 emit_insn (gen_rtx_SET (op0,
2891 gen_safe_XOR64 (temp,
2892 (-0x400 | trailing_bits))));
2893 }
2894 return;
2895 }
2896
2897 /* 1) sethi %hi(xxx), %reg
2898 * or %reg, %lo(xxx), %reg
2899 * sllx %reg, yyy, %reg
2900 *
2901 * ??? This is just a generalized version of the low_bits==0
2902 * thing above, FIXME...
2903 */
2904 if ((highest_bit_set - lowest_bit_set) < 32)
2905 {
2906 unsigned HOST_WIDE_INT focus_bits =
2907 create_simple_focus_bits (high_bits, low_bits,
2908 lowest_bit_set, 0);
2909
2910 /* We can't get here in this state. */
2911 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2912
2913 /* So what we know is that the set bits straddle the
2914 middle of the 64-bit word. */
2915 sparc_emit_set_const64_quick2 (op0, temp,
2916 focus_bits, 0,
2917 lowest_bit_set);
2918 return;
2919 }
2920
2921 /* 1) sethi %hi(high_bits), %reg
2922 * or %reg, %lo(high_bits), %reg
2923 * sllx %reg, 32, %reg
2924 * or %reg, low_bits, %reg
2925 */
2926 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2927 {
2928 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2929 return;
2930 }
2931
2932 /* The easiest way when all else fails, is full decomposition. */
2933 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2934 }
2935
2936 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2937
2938 static bool
2939 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2940 {
2941 *p1 = SPARC_ICC_REG;
2942 *p2 = SPARC_FCC_REG;
2943 return true;
2944 }
2945
2946 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2947
2948 static unsigned int
2949 sparc_min_arithmetic_precision (void)
2950 {
2951 return 32;
2952 }
2953
2954 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2955 return the mode to be used for the comparison. For floating-point,
2956 CCFP[E]mode is used. CCNZmode should be used when the first operand
2957 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2958 processing is needed. */
2959
2960 machine_mode
2961 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2962 {
2963 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2964 {
2965 switch (op)
2966 {
2967 case EQ:
2968 case NE:
2969 case UNORDERED:
2970 case ORDERED:
2971 case UNLT:
2972 case UNLE:
2973 case UNGT:
2974 case UNGE:
2975 case UNEQ:
2976 case LTGT:
2977 return CCFPmode;
2978
2979 case LT:
2980 case LE:
2981 case GT:
2982 case GE:
2983 return CCFPEmode;
2984
2985 default:
2986 gcc_unreachable ();
2987 }
2988 }
2989 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2990 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2991 && y == const0_rtx)
2992 {
2993 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2994 return CCXNZmode;
2995 else
2996 return CCNZmode;
2997 }
2998 else
2999 {
3000 /* This is for the cmp<mode>_sne pattern. */
3001 if (GET_CODE (x) == NOT && y == constm1_rtx)
3002 {
3003 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3004 return CCXCmode;
3005 else
3006 return CCCmode;
3007 }
3008
3009 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3010 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3011 {
3012 if (GET_CODE (y) == UNSPEC
3013 && (XINT (y, 1) == UNSPEC_ADDV
3014 || XINT (y, 1) == UNSPEC_SUBV
3015 || XINT (y, 1) == UNSPEC_NEGV))
3016 return CCVmode;
3017 else
3018 return CCCmode;
3019 }
3020
3021 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3022 return CCXmode;
3023 else
3024 return CCmode;
3025 }
3026 }
3027
3028 /* Emit the compare insn and return the CC reg for a CODE comparison
3029 with operands X and Y. */
3030
3031 static rtx
3032 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3033 {
3034 machine_mode mode;
3035 rtx cc_reg;
3036
3037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3038 return x;
3039
3040 mode = SELECT_CC_MODE (code, x, y);
3041
3042 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3043 fcc regs (cse can't tell they're really call clobbered regs and will
3044 remove a duplicate comparison even if there is an intervening function
3045 call - it will then try to reload the cc reg via an int reg which is why
3046 we need the movcc patterns). It is possible to provide the movcc
3047 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3048 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3049 to tell cse that CCFPE mode registers (even pseudos) are call
3050 clobbered. */
3051
3052 /* ??? This is an experiment. Rather than making changes to cse which may
3053 or may not be easy/clean, we do our own cse. This is possible because
3054 we will generate hard registers. Cse knows they're call clobbered (it
3055 doesn't know the same thing about pseudos). If we guess wrong, no big
3056 deal, but if we win, great! */
3057
3058 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3059 #if 1 /* experiment */
3060 {
3061 int reg;
3062 /* We cycle through the registers to ensure they're all exercised. */
3063 static int next_fcc_reg = 0;
3064 /* Previous x,y for each fcc reg. */
3065 static rtx prev_args[4][2];
3066
3067 /* Scan prev_args for x,y. */
3068 for (reg = 0; reg < 4; reg++)
3069 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3070 break;
3071 if (reg == 4)
3072 {
3073 reg = next_fcc_reg;
3074 prev_args[reg][0] = x;
3075 prev_args[reg][1] = y;
3076 next_fcc_reg = (next_fcc_reg + 1) & 3;
3077 }
3078 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3079 }
3080 #else
3081 cc_reg = gen_reg_rtx (mode);
3082 #endif /* ! experiment */
3083 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3084 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3085 else
3086 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3087
3088 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3089 will only result in an unrecognizable insn so no point in asserting. */
3090 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3091
3092 return cc_reg;
3093 }
3094
3095
3096 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3097
3098 rtx
3099 gen_compare_reg (rtx cmp)
3100 {
3101 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3102 }
3103
3104 /* This function is used for v9 only.
3105 DEST is the target of the Scc insn.
3106 CODE is the code for an Scc's comparison.
3107 X and Y are the values we compare.
3108
3109 This function is needed to turn
3110
3111 (set (reg:SI 110)
3112 (gt (reg:CCX 100 %icc)
3113 (const_int 0)))
3114 into
3115 (set (reg:SI 110)
3116 (gt:DI (reg:CCX 100 %icc)
3117 (const_int 0)))
3118
3119 IE: The instruction recognizer needs to see the mode of the comparison to
3120 find the right instruction. We could use "gt:DI" right in the
3121 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3122
3123 static int
3124 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3125 {
3126 if (! TARGET_ARCH64
3127 && (GET_MODE (x) == DImode
3128 || GET_MODE (dest) == DImode))
3129 return 0;
3130
3131 /* Try to use the movrCC insns. */
3132 if (TARGET_ARCH64
3133 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3134 && y == const0_rtx
3135 && v9_regcmp_p (compare_code))
3136 {
3137 rtx op0 = x;
3138 rtx temp;
3139
3140 /* Special case for op0 != 0. This can be done with one instruction if
3141 dest == x. */
3142
3143 if (compare_code == NE
3144 && GET_MODE (dest) == DImode
3145 && rtx_equal_p (op0, dest))
3146 {
3147 emit_insn (gen_rtx_SET (dest,
3148 gen_rtx_IF_THEN_ELSE (DImode,
3149 gen_rtx_fmt_ee (compare_code, DImode,
3150 op0, const0_rtx),
3151 const1_rtx,
3152 dest)));
3153 return 1;
3154 }
3155
3156 if (reg_overlap_mentioned_p (dest, op0))
3157 {
3158 /* Handle the case where dest == x.
3159 We "early clobber" the result. */
3160 op0 = gen_reg_rtx (GET_MODE (x));
3161 emit_move_insn (op0, x);
3162 }
3163
3164 emit_insn (gen_rtx_SET (dest, const0_rtx));
3165 if (GET_MODE (op0) != DImode)
3166 {
3167 temp = gen_reg_rtx (DImode);
3168 convert_move (temp, op0, 0);
3169 }
3170 else
3171 temp = op0;
3172 emit_insn (gen_rtx_SET (dest,
3173 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3174 gen_rtx_fmt_ee (compare_code, DImode,
3175 temp, const0_rtx),
3176 const1_rtx,
3177 dest)));
3178 return 1;
3179 }
3180 else
3181 {
3182 x = gen_compare_reg_1 (compare_code, x, y);
3183 y = const0_rtx;
3184
3185 emit_insn (gen_rtx_SET (dest, const0_rtx));
3186 emit_insn (gen_rtx_SET (dest,
3187 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3188 gen_rtx_fmt_ee (compare_code,
3189 GET_MODE (x), x, y),
3190 const1_rtx, dest)));
3191 return 1;
3192 }
3193 }
3194
3195
3196 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3197 without jumps using the addx/subx instructions. */
3198
3199 bool
3200 emit_scc_insn (rtx operands[])
3201 {
3202 rtx tem, x, y;
3203 enum rtx_code code;
3204 machine_mode mode;
3205
3206 /* The quad-word fp compare library routines all return nonzero to indicate
3207 true, which is different from the equivalent libgcc routines, so we must
3208 handle them specially here. */
3209 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3210 {
3211 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3212 GET_CODE (operands[1]));
3213 operands[2] = XEXP (operands[1], 0);
3214 operands[3] = XEXP (operands[1], 1);
3215 }
3216
3217 code = GET_CODE (operands[1]);
3218 x = operands[2];
3219 y = operands[3];
3220 mode = GET_MODE (x);
3221
3222 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3223 more applications). The exception to this is "reg != 0" which can
3224 be done in one instruction on v9 (so we do it). */
3225 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3226 {
3227 if (y != const0_rtx)
3228 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3229
3230 rtx pat = gen_rtx_SET (operands[0],
3231 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3232 x, const0_rtx));
3233
3234 /* If we can use addx/subx or addxc, add a clobber for CC. */
3235 if (mode == SImode || (code == NE && TARGET_VIS3))
3236 {
3237 rtx clobber
3238 = gen_rtx_CLOBBER (VOIDmode,
3239 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3240 SPARC_ICC_REG));
3241 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3242 }
3243
3244 emit_insn (pat);
3245 return true;
3246 }
3247
3248 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3249 if (TARGET_ARCH64
3250 && mode == DImode
3251 && !((code == LTU || code == GTU) && TARGET_VIS3)
3252 && gen_v9_scc (operands[0], code, x, y))
3253 return true;
3254
3255 /* We can do LTU and GEU using the addx/subx instructions too. And
3256 for GTU/LEU, if both operands are registers swap them and fall
3257 back to the easy case. */
3258 if (code == GTU || code == LEU)
3259 {
3260 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3261 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3262 {
3263 tem = x;
3264 x = y;
3265 y = tem;
3266 code = swap_condition (code);
3267 }
3268 }
3269
3270 if (code == LTU || code == GEU)
3271 {
3272 emit_insn (gen_rtx_SET (operands[0],
3273 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3274 gen_compare_reg_1 (code, x, y),
3275 const0_rtx)));
3276 return true;
3277 }
3278
3279 /* All the posibilities to use addx/subx based sequences has been
3280 exhausted, try for a 3 instruction sequence using v9 conditional
3281 moves. */
3282 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3283 return true;
3284
3285 /* Nope, do branches. */
3286 return false;
3287 }
3288
3289 /* Emit a conditional jump insn for the v9 architecture using comparison code
3290 CODE and jump target LABEL.
3291 This function exists to take advantage of the v9 brxx insns. */
3292
3293 static void
3294 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3295 {
3296 emit_jump_insn (gen_rtx_SET (pc_rtx,
3297 gen_rtx_IF_THEN_ELSE (VOIDmode,
3298 gen_rtx_fmt_ee (code, GET_MODE (op0),
3299 op0, const0_rtx),
3300 gen_rtx_LABEL_REF (VOIDmode, label),
3301 pc_rtx)));
3302 }
3303
3304 /* Emit a conditional jump insn for the UA2011 architecture using
3305 comparison code CODE and jump target LABEL. This function exists
3306 to take advantage of the UA2011 Compare and Branch insns. */
3307
3308 static void
3309 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3310 {
3311 rtx if_then_else;
3312
3313 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3314 gen_rtx_fmt_ee(code, GET_MODE(op0),
3315 op0, op1),
3316 gen_rtx_LABEL_REF (VOIDmode, label),
3317 pc_rtx);
3318
3319 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3320 }
3321
3322 void
3323 emit_conditional_branch_insn (rtx operands[])
3324 {
3325 /* The quad-word fp compare library routines all return nonzero to indicate
3326 true, which is different from the equivalent libgcc routines, so we must
3327 handle them specially here. */
3328 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3329 {
3330 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3331 GET_CODE (operands[0]));
3332 operands[1] = XEXP (operands[0], 0);
3333 operands[2] = XEXP (operands[0], 1);
3334 }
3335
3336 /* If we can tell early on that the comparison is against a constant
3337 that won't fit in the 5-bit signed immediate field of a cbcond,
3338 use one of the other v9 conditional branch sequences. */
3339 if (TARGET_CBCOND
3340 && GET_CODE (operands[1]) == REG
3341 && (GET_MODE (operands[1]) == SImode
3342 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3343 && (GET_CODE (operands[2]) != CONST_INT
3344 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3345 {
3346 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3347 return;
3348 }
3349
3350 if (TARGET_ARCH64 && operands[2] == const0_rtx
3351 && GET_CODE (operands[1]) == REG
3352 && GET_MODE (operands[1]) == DImode)
3353 {
3354 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3355 return;
3356 }
3357
3358 operands[1] = gen_compare_reg (operands[0]);
3359 operands[2] = const0_rtx;
3360 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3361 operands[1], operands[2]);
3362 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3363 operands[3]));
3364 }
3365
3366
3367 /* Generate a DFmode part of a hard TFmode register.
3368 REG is the TFmode hard register, LOW is 1 for the
3369 low 64bit of the register and 0 otherwise.
3370 */
3371 rtx
3372 gen_df_reg (rtx reg, int low)
3373 {
3374 int regno = REGNO (reg);
3375
3376 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3377 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3378 return gen_rtx_REG (DFmode, regno);
3379 }
3380 \f
3381 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3382 Unlike normal calls, TFmode operands are passed by reference. It is
3383 assumed that no more than 3 operands are required. */
3384
3385 static void
3386 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3387 {
3388 rtx ret_slot = NULL, arg[3], func_sym;
3389 int i;
3390
3391 /* We only expect to be called for conversions, unary, and binary ops. */
3392 gcc_assert (nargs == 2 || nargs == 3);
3393
3394 for (i = 0; i < nargs; ++i)
3395 {
3396 rtx this_arg = operands[i];
3397 rtx this_slot;
3398
3399 /* TFmode arguments and return values are passed by reference. */
3400 if (GET_MODE (this_arg) == TFmode)
3401 {
3402 int force_stack_temp;
3403
3404 force_stack_temp = 0;
3405 if (TARGET_BUGGY_QP_LIB && i == 0)
3406 force_stack_temp = 1;
3407
3408 if (GET_CODE (this_arg) == MEM
3409 && ! force_stack_temp)
3410 {
3411 tree expr = MEM_EXPR (this_arg);
3412 if (expr)
3413 mark_addressable (expr);
3414 this_arg = XEXP (this_arg, 0);
3415 }
3416 else if (CONSTANT_P (this_arg)
3417 && ! force_stack_temp)
3418 {
3419 this_slot = force_const_mem (TFmode, this_arg);
3420 this_arg = XEXP (this_slot, 0);
3421 }
3422 else
3423 {
3424 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3425
3426 /* Operand 0 is the return value. We'll copy it out later. */
3427 if (i > 0)
3428 emit_move_insn (this_slot, this_arg);
3429 else
3430 ret_slot = this_slot;
3431
3432 this_arg = XEXP (this_slot, 0);
3433 }
3434 }
3435
3436 arg[i] = this_arg;
3437 }
3438
3439 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3440
3441 if (GET_MODE (operands[0]) == TFmode)
3442 {
3443 if (nargs == 2)
3444 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3445 arg[0], GET_MODE (arg[0]),
3446 arg[1], GET_MODE (arg[1]));
3447 else
3448 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3449 arg[0], GET_MODE (arg[0]),
3450 arg[1], GET_MODE (arg[1]),
3451 arg[2], GET_MODE (arg[2]));
3452
3453 if (ret_slot)
3454 emit_move_insn (operands[0], ret_slot);
3455 }
3456 else
3457 {
3458 rtx ret;
3459
3460 gcc_assert (nargs == 2);
3461
3462 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3463 GET_MODE (operands[0]),
3464 arg[1], GET_MODE (arg[1]));
3465
3466 if (ret != operands[0])
3467 emit_move_insn (operands[0], ret);
3468 }
3469 }
3470
3471 /* Expand soft-float TFmode calls to sparc abi routines. */
3472
3473 static void
3474 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3475 {
3476 const char *func;
3477
3478 switch (code)
3479 {
3480 case PLUS:
3481 func = "_Qp_add";
3482 break;
3483 case MINUS:
3484 func = "_Qp_sub";
3485 break;
3486 case MULT:
3487 func = "_Qp_mul";
3488 break;
3489 case DIV:
3490 func = "_Qp_div";
3491 break;
3492 default:
3493 gcc_unreachable ();
3494 }
3495
3496 emit_soft_tfmode_libcall (func, 3, operands);
3497 }
3498
3499 static void
3500 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3501 {
3502 const char *func;
3503
3504 gcc_assert (code == SQRT);
3505 func = "_Qp_sqrt";
3506
3507 emit_soft_tfmode_libcall (func, 2, operands);
3508 }
3509
3510 static void
3511 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3512 {
3513 const char *func;
3514
3515 switch (code)
3516 {
3517 case FLOAT_EXTEND:
3518 switch (GET_MODE (operands[1]))
3519 {
3520 case E_SFmode:
3521 func = "_Qp_stoq";
3522 break;
3523 case E_DFmode:
3524 func = "_Qp_dtoq";
3525 break;
3526 default:
3527 gcc_unreachable ();
3528 }
3529 break;
3530
3531 case FLOAT_TRUNCATE:
3532 switch (GET_MODE (operands[0]))
3533 {
3534 case E_SFmode:
3535 func = "_Qp_qtos";
3536 break;
3537 case E_DFmode:
3538 func = "_Qp_qtod";
3539 break;
3540 default:
3541 gcc_unreachable ();
3542 }
3543 break;
3544
3545 case FLOAT:
3546 switch (GET_MODE (operands[1]))
3547 {
3548 case E_SImode:
3549 func = "_Qp_itoq";
3550 if (TARGET_ARCH64)
3551 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3552 break;
3553 case E_DImode:
3554 func = "_Qp_xtoq";
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 break;
3560
3561 case UNSIGNED_FLOAT:
3562 switch (GET_MODE (operands[1]))
3563 {
3564 case E_SImode:
3565 func = "_Qp_uitoq";
3566 if (TARGET_ARCH64)
3567 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3568 break;
3569 case E_DImode:
3570 func = "_Qp_uxtoq";
3571 break;
3572 default:
3573 gcc_unreachable ();
3574 }
3575 break;
3576
3577 case FIX:
3578 switch (GET_MODE (operands[0]))
3579 {
3580 case E_SImode:
3581 func = "_Qp_qtoi";
3582 break;
3583 case E_DImode:
3584 func = "_Qp_qtox";
3585 break;
3586 default:
3587 gcc_unreachable ();
3588 }
3589 break;
3590
3591 case UNSIGNED_FIX:
3592 switch (GET_MODE (operands[0]))
3593 {
3594 case E_SImode:
3595 func = "_Qp_qtoui";
3596 break;
3597 case E_DImode:
3598 func = "_Qp_qtoux";
3599 break;
3600 default:
3601 gcc_unreachable ();
3602 }
3603 break;
3604
3605 default:
3606 gcc_unreachable ();
3607 }
3608
3609 emit_soft_tfmode_libcall (func, 2, operands);
3610 }
3611
3612 /* Expand a hard-float tfmode operation. All arguments must be in
3613 registers. */
3614
3615 static void
3616 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3617 {
3618 rtx op, dest;
3619
3620 if (GET_RTX_CLASS (code) == RTX_UNARY)
3621 {
3622 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3623 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3624 }
3625 else
3626 {
3627 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3628 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3629 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3630 operands[1], operands[2]);
3631 }
3632
3633 if (register_operand (operands[0], VOIDmode))
3634 dest = operands[0];
3635 else
3636 dest = gen_reg_rtx (GET_MODE (operands[0]));
3637
3638 emit_insn (gen_rtx_SET (dest, op));
3639
3640 if (dest != operands[0])
3641 emit_move_insn (operands[0], dest);
3642 }
3643
3644 void
3645 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3646 {
3647 if (TARGET_HARD_QUAD)
3648 emit_hard_tfmode_operation (code, operands);
3649 else
3650 emit_soft_tfmode_binop (code, operands);
3651 }
3652
3653 void
3654 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3655 {
3656 if (TARGET_HARD_QUAD)
3657 emit_hard_tfmode_operation (code, operands);
3658 else
3659 emit_soft_tfmode_unop (code, operands);
3660 }
3661
3662 void
3663 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3664 {
3665 if (TARGET_HARD_QUAD)
3666 emit_hard_tfmode_operation (code, operands);
3667 else
3668 emit_soft_tfmode_cvt (code, operands);
3669 }
3670 \f
3671 /* Return nonzero if a branch/jump/call instruction will be emitting
3672 nop into its delay slot. */
3673
3674 int
3675 empty_delay_slot (rtx_insn *insn)
3676 {
3677 rtx seq;
3678
3679 /* If no previous instruction (should not happen), return true. */
3680 if (PREV_INSN (insn) == NULL)
3681 return 1;
3682
3683 seq = NEXT_INSN (PREV_INSN (insn));
3684 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3685 return 0;
3686
3687 return 1;
3688 }
3689
3690 /* Return nonzero if we should emit a nop after a cbcond instruction.
3691 The cbcond instruction does not have a delay slot, however there is
3692 a severe performance penalty if a control transfer appears right
3693 after a cbcond. Therefore we emit a nop when we detect this
3694 situation. */
3695
3696 int
3697 emit_cbcond_nop (rtx_insn *insn)
3698 {
3699 rtx next = next_active_insn (insn);
3700
3701 if (!next)
3702 return 1;
3703
3704 if (NONJUMP_INSN_P (next)
3705 && GET_CODE (PATTERN (next)) == SEQUENCE)
3706 next = XVECEXP (PATTERN (next), 0, 0);
3707 else if (CALL_P (next)
3708 && GET_CODE (PATTERN (next)) == PARALLEL)
3709 {
3710 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3711
3712 if (GET_CODE (delay) == RETURN)
3713 {
3714 /* It's a sibling call. Do not emit the nop if we're going
3715 to emit something other than the jump itself as the first
3716 instruction of the sibcall sequence. */
3717 if (sparc_leaf_function_p || TARGET_FLAT)
3718 return 0;
3719 }
3720 }
3721
3722 if (NONJUMP_INSN_P (next))
3723 return 0;
3724
3725 return 1;
3726 }
3727
3728 /* Return nonzero if TRIAL can go into the call delay slot. */
3729
3730 int
3731 eligible_for_call_delay (rtx_insn *trial)
3732 {
3733 rtx pat;
3734
3735 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3736 return 0;
3737
3738 /* Binutils allows
3739 call __tls_get_addr, %tgd_call (foo)
3740 add %l7, %o0, %o0, %tgd_add (foo)
3741 while Sun as/ld does not. */
3742 if (TARGET_GNU_TLS || !TARGET_TLS)
3743 return 1;
3744
3745 pat = PATTERN (trial);
3746
3747 /* We must reject tgd_add{32|64}, i.e.
3748 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3749 and tldm_add{32|64}, i.e.
3750 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3751 for Sun as/ld. */
3752 if (GET_CODE (pat) == SET
3753 && GET_CODE (SET_SRC (pat)) == PLUS)
3754 {
3755 rtx unspec = XEXP (SET_SRC (pat), 1);
3756
3757 if (GET_CODE (unspec) == UNSPEC
3758 && (XINT (unspec, 1) == UNSPEC_TLSGD
3759 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3760 return 0;
3761 }
3762
3763 return 1;
3764 }
3765
3766 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3767 instruction. RETURN_P is true if the v9 variant 'return' is to be
3768 considered in the test too.
3769
3770 TRIAL must be a SET whose destination is a REG appropriate for the
3771 'restore' instruction or, if RETURN_P is true, for the 'return'
3772 instruction. */
3773
3774 static int
3775 eligible_for_restore_insn (rtx trial, bool return_p)
3776 {
3777 rtx pat = PATTERN (trial);
3778 rtx src = SET_SRC (pat);
3779 bool src_is_freg = false;
3780 rtx src_reg;
3781
3782 /* Since we now can do moves between float and integer registers when
3783 VIS3 is enabled, we have to catch this case. We can allow such
3784 moves when doing a 'return' however. */
3785 src_reg = src;
3786 if (GET_CODE (src_reg) == SUBREG)
3787 src_reg = SUBREG_REG (src_reg);
3788 if (GET_CODE (src_reg) == REG
3789 && SPARC_FP_REG_P (REGNO (src_reg)))
3790 src_is_freg = true;
3791
3792 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3793 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3794 && arith_operand (src, GET_MODE (src))
3795 && ! src_is_freg)
3796 {
3797 if (TARGET_ARCH64)
3798 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3799 else
3800 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3801 }
3802
3803 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3804 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3805 && arith_double_operand (src, GET_MODE (src))
3806 && ! src_is_freg)
3807 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3808
3809 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3810 else if (! TARGET_FPU && register_operand (src, SFmode))
3811 return 1;
3812
3813 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3814 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3815 return 1;
3816
3817 /* If we have the 'return' instruction, anything that does not use
3818 local or output registers and can go into a delay slot wins. */
3819 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3820 return 1;
3821
3822 /* The 'restore src1,src2,dest' pattern for SImode. */
3823 else if (GET_CODE (src) == PLUS
3824 && register_operand (XEXP (src, 0), SImode)
3825 && arith_operand (XEXP (src, 1), SImode))
3826 return 1;
3827
3828 /* The 'restore src1,src2,dest' pattern for DImode. */
3829 else if (GET_CODE (src) == PLUS
3830 && register_operand (XEXP (src, 0), DImode)
3831 && arith_double_operand (XEXP (src, 1), DImode))
3832 return 1;
3833
3834 /* The 'restore src1,%lo(src2),dest' pattern. */
3835 else if (GET_CODE (src) == LO_SUM
3836 && ! TARGET_CM_MEDMID
3837 && ((register_operand (XEXP (src, 0), SImode)
3838 && immediate_operand (XEXP (src, 1), SImode))
3839 || (TARGET_ARCH64
3840 && register_operand (XEXP (src, 0), DImode)
3841 && immediate_operand (XEXP (src, 1), DImode))))
3842 return 1;
3843
3844 /* The 'restore src,src,dest' pattern. */
3845 else if (GET_CODE (src) == ASHIFT
3846 && (register_operand (XEXP (src, 0), SImode)
3847 || register_operand (XEXP (src, 0), DImode))
3848 && XEXP (src, 1) == const1_rtx)
3849 return 1;
3850
3851 return 0;
3852 }
3853
3854 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3855
3856 int
3857 eligible_for_return_delay (rtx_insn *trial)
3858 {
3859 int regno;
3860 rtx pat;
3861
3862 /* If the function uses __builtin_eh_return, the eh_return machinery
3863 occupies the delay slot. */
3864 if (crtl->calls_eh_return)
3865 return 0;
3866
3867 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3868 return 0;
3869
3870 /* In the case of a leaf or flat function, anything can go into the slot. */
3871 if (sparc_leaf_function_p || TARGET_FLAT)
3872 return 1;
3873
3874 if (!NONJUMP_INSN_P (trial))
3875 return 0;
3876
3877 pat = PATTERN (trial);
3878 if (GET_CODE (pat) == PARALLEL)
3879 {
3880 int i;
3881
3882 if (! TARGET_V9)
3883 return 0;
3884 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3885 {
3886 rtx expr = XVECEXP (pat, 0, i);
3887 if (GET_CODE (expr) != SET)
3888 return 0;
3889 if (GET_CODE (SET_DEST (expr)) != REG)
3890 return 0;
3891 regno = REGNO (SET_DEST (expr));
3892 if (regno >= 8 && regno < 24)
3893 return 0;
3894 }
3895 return !epilogue_renumber (&pat, 1);
3896 }
3897
3898 if (GET_CODE (pat) != SET)
3899 return 0;
3900
3901 if (GET_CODE (SET_DEST (pat)) != REG)
3902 return 0;
3903
3904 regno = REGNO (SET_DEST (pat));
3905
3906 /* Otherwise, only operations which can be done in tandem with
3907 a `restore' or `return' insn can go into the delay slot. */
3908 if (regno >= 8 && regno < 24)
3909 return 0;
3910
3911 /* If this instruction sets up floating point register and we have a return
3912 instruction, it can probably go in. But restore will not work
3913 with FP_REGS. */
3914 if (! SPARC_INT_REG_P (regno))
3915 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3916
3917 return eligible_for_restore_insn (trial, true);
3918 }
3919
3920 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3921
3922 int
3923 eligible_for_sibcall_delay (rtx_insn *trial)
3924 {
3925 rtx pat;
3926
3927 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3928 return 0;
3929
3930 if (!NONJUMP_INSN_P (trial))
3931 return 0;
3932
3933 pat = PATTERN (trial);
3934
3935 if (sparc_leaf_function_p || TARGET_FLAT)
3936 {
3937 /* If the tail call is done using the call instruction,
3938 we have to restore %o7 in the delay slot. */
3939 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3940 return 0;
3941
3942 /* %g1 is used to build the function address */
3943 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3944 return 0;
3945
3946 return 1;
3947 }
3948
3949 if (GET_CODE (pat) != SET)
3950 return 0;
3951
3952 /* Otherwise, only operations which can be done in tandem with
3953 a `restore' insn can go into the delay slot. */
3954 if (GET_CODE (SET_DEST (pat)) != REG
3955 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3956 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3957 return 0;
3958
3959 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3960 in most cases. */
3961 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3962 return 0;
3963
3964 return eligible_for_restore_insn (trial, false);
3965 }
3966 \f
3967 /* Determine if it's legal to put X into the constant pool. This
3968 is not possible if X contains the address of a symbol that is
3969 not constant (TLS) or not known at final link time (PIC). */
3970
3971 static bool
3972 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3973 {
3974 switch (GET_CODE (x))
3975 {
3976 case CONST_INT:
3977 case CONST_WIDE_INT:
3978 case CONST_DOUBLE:
3979 case CONST_VECTOR:
3980 /* Accept all non-symbolic constants. */
3981 return false;
3982
3983 case LABEL_REF:
3984 /* Labels are OK iff we are non-PIC. */
3985 return flag_pic != 0;
3986
3987 case SYMBOL_REF:
3988 /* 'Naked' TLS symbol references are never OK,
3989 non-TLS symbols are OK iff we are non-PIC. */
3990 if (SYMBOL_REF_TLS_MODEL (x))
3991 return true;
3992 else
3993 return flag_pic != 0;
3994
3995 case CONST:
3996 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3997 case PLUS:
3998 case MINUS:
3999 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4000 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4001 case UNSPEC:
4002 return true;
4003 default:
4004 gcc_unreachable ();
4005 }
4006 }
4007 \f
4008 /* Global Offset Table support. */
4009 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4010 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4011
4012 /* Return the SYMBOL_REF for the Global Offset Table. */
4013
4014 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4015
4016 static rtx
4017 sparc_got (void)
4018 {
4019 if (!sparc_got_symbol)
4020 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4021
4022 return sparc_got_symbol;
4023 }
4024
4025 /* Ensure that we are not using patterns that are not OK with PIC. */
4026
4027 int
4028 check_pic (int i)
4029 {
4030 rtx op;
4031
4032 switch (flag_pic)
4033 {
4034 case 1:
4035 op = recog_data.operand[i];
4036 gcc_assert (GET_CODE (op) != SYMBOL_REF
4037 && (GET_CODE (op) != CONST
4038 || (GET_CODE (XEXP (op, 0)) == MINUS
4039 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4040 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4041 /* fallthrough */
4042 case 2:
4043 default:
4044 return 1;
4045 }
4046 }
4047
4048 /* Return true if X is an address which needs a temporary register when
4049 reloaded while generating PIC code. */
4050
4051 int
4052 pic_address_needs_scratch (rtx x)
4053 {
4054 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4055 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4056 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4058 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4059 return 1;
4060
4061 return 0;
4062 }
4063
4064 /* Determine if a given RTX is a valid constant. We already know this
4065 satisfies CONSTANT_P. */
4066
4067 static bool
4068 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4069 {
4070 switch (GET_CODE (x))
4071 {
4072 case CONST:
4073 case SYMBOL_REF:
4074 if (sparc_tls_referenced_p (x))
4075 return false;
4076 break;
4077
4078 case CONST_DOUBLE:
4079 /* Floating point constants are generally not ok.
4080 The only exception is 0.0 and all-ones in VIS. */
4081 if (TARGET_VIS
4082 && SCALAR_FLOAT_MODE_P (mode)
4083 && (const_zero_operand (x, mode)
4084 || const_all_ones_operand (x, mode)))
4085 return true;
4086
4087 return false;
4088
4089 case CONST_VECTOR:
4090 /* Vector constants are generally not ok.
4091 The only exception is 0 or -1 in VIS. */
4092 if (TARGET_VIS
4093 && (const_zero_operand (x, mode)
4094 || const_all_ones_operand (x, mode)))
4095 return true;
4096
4097 return false;
4098
4099 default:
4100 break;
4101 }
4102
4103 return true;
4104 }
4105
4106 /* Determine if a given RTX is a valid constant address. */
4107
4108 bool
4109 constant_address_p (rtx x)
4110 {
4111 switch (GET_CODE (x))
4112 {
4113 case LABEL_REF:
4114 case CONST_INT:
4115 case HIGH:
4116 return true;
4117
4118 case CONST:
4119 if (flag_pic && pic_address_needs_scratch (x))
4120 return false;
4121 return sparc_legitimate_constant_p (Pmode, x);
4122
4123 case SYMBOL_REF:
4124 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4125
4126 default:
4127 return false;
4128 }
4129 }
4130
4131 /* Nonzero if the constant value X is a legitimate general operand
4132 when generating PIC code. It is given that flag_pic is on and
4133 that X satisfies CONSTANT_P. */
4134
4135 bool
4136 legitimate_pic_operand_p (rtx x)
4137 {
4138 if (pic_address_needs_scratch (x))
4139 return false;
4140 if (sparc_tls_referenced_p (x))
4141 return false;
4142 return true;
4143 }
4144
4145 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4146 (CONST_INT_P (X) \
4147 && INTVAL (X) >= -0x1000 \
4148 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4149
4150 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4151 (CONST_INT_P (X) \
4152 && INTVAL (X) >= -0x1000 \
4153 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4154
4155 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4156
4157 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4158 ordinarily. This changes a bit when generating PIC. */
4159
4160 static bool
4161 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4162 {
4163 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4164
4165 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4166 rs1 = addr;
4167 else if (GET_CODE (addr) == PLUS)
4168 {
4169 rs1 = XEXP (addr, 0);
4170 rs2 = XEXP (addr, 1);
4171
4172 /* Canonicalize. REG comes first, if there are no regs,
4173 LO_SUM comes first. */
4174 if (!REG_P (rs1)
4175 && GET_CODE (rs1) != SUBREG
4176 && (REG_P (rs2)
4177 || GET_CODE (rs2) == SUBREG
4178 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4179 {
4180 rs1 = XEXP (addr, 1);
4181 rs2 = XEXP (addr, 0);
4182 }
4183
4184 if ((flag_pic == 1
4185 && rs1 == pic_offset_table_rtx
4186 && !REG_P (rs2)
4187 && GET_CODE (rs2) != SUBREG
4188 && GET_CODE (rs2) != LO_SUM
4189 && GET_CODE (rs2) != MEM
4190 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4191 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4192 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4193 || ((REG_P (rs1)
4194 || GET_CODE (rs1) == SUBREG)
4195 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4196 {
4197 imm1 = rs2;
4198 rs2 = NULL;
4199 }
4200 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4201 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4202 {
4203 /* We prohibit REG + REG for TFmode when there are no quad move insns
4204 and we consequently need to split. We do this because REG+REG
4205 is not an offsettable address. If we get the situation in reload
4206 where source and destination of a movtf pattern are both MEMs with
4207 REG+REG address, then only one of them gets converted to an
4208 offsettable address. */
4209 if (mode == TFmode
4210 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4211 return 0;
4212
4213 /* Likewise for TImode, but in all cases. */
4214 if (mode == TImode)
4215 return 0;
4216
4217 /* We prohibit REG + REG on ARCH32 if not optimizing for
4218 DFmode/DImode because then mem_min_alignment is likely to be zero
4219 after reload and the forced split would lack a matching splitter
4220 pattern. */
4221 if (TARGET_ARCH32 && !optimize
4222 && (mode == DFmode || mode == DImode))
4223 return 0;
4224 }
4225 else if (USE_AS_OFFSETABLE_LO10
4226 && GET_CODE (rs1) == LO_SUM
4227 && TARGET_ARCH64
4228 && ! TARGET_CM_MEDMID
4229 && RTX_OK_FOR_OLO10_P (rs2, mode))
4230 {
4231 rs2 = NULL;
4232 imm1 = XEXP (rs1, 1);
4233 rs1 = XEXP (rs1, 0);
4234 if (!CONSTANT_P (imm1)
4235 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4236 return 0;
4237 }
4238 }
4239 else if (GET_CODE (addr) == LO_SUM)
4240 {
4241 rs1 = XEXP (addr, 0);
4242 imm1 = XEXP (addr, 1);
4243
4244 if (!CONSTANT_P (imm1)
4245 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4246 return 0;
4247
4248 /* We can't allow TFmode in 32-bit mode, because an offset greater
4249 than the alignment (8) may cause the LO_SUM to overflow. */
4250 if (mode == TFmode && TARGET_ARCH32)
4251 return 0;
4252
4253 /* During reload, accept the HIGH+LO_SUM construct generated by
4254 sparc_legitimize_reload_address. */
4255 if (reload_in_progress
4256 && GET_CODE (rs1) == HIGH
4257 && XEXP (rs1, 0) == imm1)
4258 return 1;
4259 }
4260 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4261 return 1;
4262 else
4263 return 0;
4264
4265 if (GET_CODE (rs1) == SUBREG)
4266 rs1 = SUBREG_REG (rs1);
4267 if (!REG_P (rs1))
4268 return 0;
4269
4270 if (rs2)
4271 {
4272 if (GET_CODE (rs2) == SUBREG)
4273 rs2 = SUBREG_REG (rs2);
4274 if (!REG_P (rs2))
4275 return 0;
4276 }
4277
4278 if (strict)
4279 {
4280 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4281 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4282 return 0;
4283 }
4284 else
4285 {
4286 if ((! SPARC_INT_REG_P (REGNO (rs1))
4287 && REGNO (rs1) != FRAME_POINTER_REGNUM
4288 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4289 || (rs2
4290 && (! SPARC_INT_REG_P (REGNO (rs2))
4291 && REGNO (rs2) != FRAME_POINTER_REGNUM
4292 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4293 return 0;
4294 }
4295 return 1;
4296 }
4297
4298 /* Return the SYMBOL_REF for the tls_get_addr function. */
4299
4300 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4301
4302 static rtx
4303 sparc_tls_get_addr (void)
4304 {
4305 if (!sparc_tls_symbol)
4306 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4307
4308 return sparc_tls_symbol;
4309 }
4310
4311 /* Return the Global Offset Table to be used in TLS mode. */
4312
4313 static rtx
4314 sparc_tls_got (void)
4315 {
4316 /* In PIC mode, this is just the PIC offset table. */
4317 if (flag_pic)
4318 {
4319 crtl->uses_pic_offset_table = 1;
4320 return pic_offset_table_rtx;
4321 }
4322
4323 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4324 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4325 if (TARGET_SUN_TLS && TARGET_ARCH32)
4326 {
4327 load_got_register ();
4328 return global_offset_table_rtx;
4329 }
4330
4331 /* In all other cases, we load a new pseudo with the GOT symbol. */
4332 return copy_to_reg (sparc_got ());
4333 }
4334
4335 /* Return true if X contains a thread-local symbol. */
4336
4337 static bool
4338 sparc_tls_referenced_p (rtx x)
4339 {
4340 if (!TARGET_HAVE_TLS)
4341 return false;
4342
4343 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4344 x = XEXP (XEXP (x, 0), 0);
4345
4346 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4347 return true;
4348
4349 /* That's all we handle in sparc_legitimize_tls_address for now. */
4350 return false;
4351 }
4352
4353 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4354 this (thread-local) address. */
4355
4356 static rtx
4357 sparc_legitimize_tls_address (rtx addr)
4358 {
4359 rtx temp1, temp2, temp3, ret, o0, got;
4360 rtx_insn *insn;
4361
4362 gcc_assert (can_create_pseudo_p ());
4363
4364 if (GET_CODE (addr) == SYMBOL_REF)
4365 switch (SYMBOL_REF_TLS_MODEL (addr))
4366 {
4367 case TLS_MODEL_GLOBAL_DYNAMIC:
4368 start_sequence ();
4369 temp1 = gen_reg_rtx (SImode);
4370 temp2 = gen_reg_rtx (SImode);
4371 ret = gen_reg_rtx (Pmode);
4372 o0 = gen_rtx_REG (Pmode, 8);
4373 got = sparc_tls_got ();
4374 emit_insn (gen_tgd_hi22 (temp1, addr));
4375 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4376 if (TARGET_ARCH32)
4377 {
4378 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4379 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4380 addr, const1_rtx));
4381 }
4382 else
4383 {
4384 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4385 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4386 addr, const1_rtx));
4387 }
4388 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4389 insn = get_insns ();
4390 end_sequence ();
4391 emit_libcall_block (insn, ret, o0, addr);
4392 break;
4393
4394 case TLS_MODEL_LOCAL_DYNAMIC:
4395 start_sequence ();
4396 temp1 = gen_reg_rtx (SImode);
4397 temp2 = gen_reg_rtx (SImode);
4398 temp3 = gen_reg_rtx (Pmode);
4399 ret = gen_reg_rtx (Pmode);
4400 o0 = gen_rtx_REG (Pmode, 8);
4401 got = sparc_tls_got ();
4402 emit_insn (gen_tldm_hi22 (temp1));
4403 emit_insn (gen_tldm_lo10 (temp2, temp1));
4404 if (TARGET_ARCH32)
4405 {
4406 emit_insn (gen_tldm_add32 (o0, got, temp2));
4407 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4408 const1_rtx));
4409 }
4410 else
4411 {
4412 emit_insn (gen_tldm_add64 (o0, got, temp2));
4413 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4414 const1_rtx));
4415 }
4416 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4417 insn = get_insns ();
4418 end_sequence ();
4419 emit_libcall_block (insn, temp3, o0,
4420 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4421 UNSPEC_TLSLD_BASE));
4422 temp1 = gen_reg_rtx (SImode);
4423 temp2 = gen_reg_rtx (SImode);
4424 emit_insn (gen_tldo_hix22 (temp1, addr));
4425 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4426 if (TARGET_ARCH32)
4427 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4428 else
4429 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4430 break;
4431
4432 case TLS_MODEL_INITIAL_EXEC:
4433 temp1 = gen_reg_rtx (SImode);
4434 temp2 = gen_reg_rtx (SImode);
4435 temp3 = gen_reg_rtx (Pmode);
4436 got = sparc_tls_got ();
4437 emit_insn (gen_tie_hi22 (temp1, addr));
4438 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4439 if (TARGET_ARCH32)
4440 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4441 else
4442 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4443 if (TARGET_SUN_TLS)
4444 {
4445 ret = gen_reg_rtx (Pmode);
4446 if (TARGET_ARCH32)
4447 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4448 temp3, addr));
4449 else
4450 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4451 temp3, addr));
4452 }
4453 else
4454 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4455 break;
4456
4457 case TLS_MODEL_LOCAL_EXEC:
4458 temp1 = gen_reg_rtx (Pmode);
4459 temp2 = gen_reg_rtx (Pmode);
4460 if (TARGET_ARCH32)
4461 {
4462 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4463 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4464 }
4465 else
4466 {
4467 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4468 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4469 }
4470 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4471 break;
4472
4473 default:
4474 gcc_unreachable ();
4475 }
4476
4477 else if (GET_CODE (addr) == CONST)
4478 {
4479 rtx base, offset;
4480
4481 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4482
4483 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4484 offset = XEXP (XEXP (addr, 0), 1);
4485
4486 base = force_operand (base, NULL_RTX);
4487 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4488 offset = force_reg (Pmode, offset);
4489 ret = gen_rtx_PLUS (Pmode, base, offset);
4490 }
4491
4492 else
4493 gcc_unreachable (); /* for now ... */
4494
4495 return ret;
4496 }
4497
4498 /* Legitimize PIC addresses. If the address is already position-independent,
4499 we return ORIG. Newly generated position-independent addresses go into a
4500 reg. This is REG if nonzero, otherwise we allocate register(s) as
4501 necessary. */
4502
4503 static rtx
4504 sparc_legitimize_pic_address (rtx orig, rtx reg)
4505 {
4506 bool gotdata_op = false;
4507
4508 if (GET_CODE (orig) == SYMBOL_REF
4509 /* See the comment in sparc_expand_move. */
4510 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4511 {
4512 rtx pic_ref, address;
4513 rtx_insn *insn;
4514
4515 if (reg == 0)
4516 {
4517 gcc_assert (can_create_pseudo_p ());
4518 reg = gen_reg_rtx (Pmode);
4519 }
4520
4521 if (flag_pic == 2)
4522 {
4523 /* If not during reload, allocate another temp reg here for loading
4524 in the address, so that these instructions can be optimized
4525 properly. */
4526 rtx temp_reg = (! can_create_pseudo_p ()
4527 ? reg : gen_reg_rtx (Pmode));
4528
4529 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4530 won't get confused into thinking that these two instructions
4531 are loading in the true address of the symbol. If in the
4532 future a PIC rtx exists, that should be used instead. */
4533 if (TARGET_ARCH64)
4534 {
4535 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4536 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4537 }
4538 else
4539 {
4540 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4541 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4542 }
4543 address = temp_reg;
4544 gotdata_op = true;
4545 }
4546 else
4547 address = orig;
4548
4549 crtl->uses_pic_offset_table = 1;
4550 if (gotdata_op)
4551 {
4552 if (TARGET_ARCH64)
4553 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4554 pic_offset_table_rtx,
4555 address, orig));
4556 else
4557 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4558 pic_offset_table_rtx,
4559 address, orig));
4560 }
4561 else
4562 {
4563 pic_ref
4564 = gen_const_mem (Pmode,
4565 gen_rtx_PLUS (Pmode,
4566 pic_offset_table_rtx, address));
4567 insn = emit_move_insn (reg, pic_ref);
4568 }
4569
4570 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4571 by loop. */
4572 set_unique_reg_note (insn, REG_EQUAL, orig);
4573 return reg;
4574 }
4575 else if (GET_CODE (orig) == CONST)
4576 {
4577 rtx base, offset;
4578
4579 if (GET_CODE (XEXP (orig, 0)) == PLUS
4580 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4581 return orig;
4582
4583 if (reg == 0)
4584 {
4585 gcc_assert (can_create_pseudo_p ());
4586 reg = gen_reg_rtx (Pmode);
4587 }
4588
4589 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4590 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4591 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4592 base == reg ? NULL_RTX : reg);
4593
4594 if (GET_CODE (offset) == CONST_INT)
4595 {
4596 if (SMALL_INT (offset))
4597 return plus_constant (Pmode, base, INTVAL (offset));
4598 else if (can_create_pseudo_p ())
4599 offset = force_reg (Pmode, offset);
4600 else
4601 /* If we reach here, then something is seriously wrong. */
4602 gcc_unreachable ();
4603 }
4604 return gen_rtx_PLUS (Pmode, base, offset);
4605 }
4606 else if (GET_CODE (orig) == LABEL_REF)
4607 /* ??? We ought to be checking that the register is live instead, in case
4608 it is eliminated. */
4609 crtl->uses_pic_offset_table = 1;
4610
4611 return orig;
4612 }
4613
4614 /* Try machine-dependent ways of modifying an illegitimate address X
4615 to be legitimate. If we find one, return the new, valid address.
4616
4617 OLDX is the address as it was before break_out_memory_refs was called.
4618 In some cases it is useful to look at this to decide what needs to be done.
4619
4620 MODE is the mode of the operand pointed to by X.
4621
4622 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4623
4624 static rtx
4625 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4626 machine_mode mode)
4627 {
4628 rtx orig_x = x;
4629
4630 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4631 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4632 force_operand (XEXP (x, 0), NULL_RTX));
4633 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4634 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4635 force_operand (XEXP (x, 1), NULL_RTX));
4636 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4637 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4638 XEXP (x, 1));
4639 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4640 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4641 force_operand (XEXP (x, 1), NULL_RTX));
4642
4643 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4644 return x;
4645
4646 if (sparc_tls_referenced_p (x))
4647 x = sparc_legitimize_tls_address (x);
4648 else if (flag_pic)
4649 x = sparc_legitimize_pic_address (x, NULL_RTX);
4650 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4651 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4652 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4653 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4654 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4655 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4656 else if (GET_CODE (x) == SYMBOL_REF
4657 || GET_CODE (x) == CONST
4658 || GET_CODE (x) == LABEL_REF)
4659 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4660
4661 return x;
4662 }
4663
4664 /* Delegitimize an address that was legitimized by the above function. */
4665
4666 static rtx
4667 sparc_delegitimize_address (rtx x)
4668 {
4669 x = delegitimize_mem_from_attrs (x);
4670
4671 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4672 switch (XINT (XEXP (x, 1), 1))
4673 {
4674 case UNSPEC_MOVE_PIC:
4675 case UNSPEC_TLSLE:
4676 x = XVECEXP (XEXP (x, 1), 0, 0);
4677 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4678 break;
4679 default:
4680 break;
4681 }
4682
4683 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4684 if (GET_CODE (x) == MINUS
4685 && REG_P (XEXP (x, 0))
4686 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4687 && GET_CODE (XEXP (x, 1)) == LO_SUM
4688 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4689 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4690 {
4691 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4692 gcc_assert (GET_CODE (x) == LABEL_REF);
4693 }
4694
4695 return x;
4696 }
4697
4698 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4699 replace the input X, or the original X if no replacement is called for.
4700 The output parameter *WIN is 1 if the calling macro should goto WIN,
4701 0 if it should not.
4702
4703 For SPARC, we wish to handle addresses by splitting them into
4704 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4705 This cuts the number of extra insns by one.
4706
4707 Do nothing when generating PIC code and the address is a symbolic
4708 operand or requires a scratch register. */
4709
4710 rtx
4711 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4712 int opnum, int type,
4713 int ind_levels ATTRIBUTE_UNUSED, int *win)
4714 {
4715 /* Decompose SImode constants into HIGH+LO_SUM. */
4716 if (CONSTANT_P (x)
4717 && (mode != TFmode || TARGET_ARCH64)
4718 && GET_MODE (x) == SImode
4719 && GET_CODE (x) != LO_SUM
4720 && GET_CODE (x) != HIGH
4721 && sparc_cmodel <= CM_MEDLOW
4722 && !(flag_pic
4723 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4724 {
4725 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4726 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4727 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4728 opnum, (enum reload_type)type);
4729 *win = 1;
4730 return x;
4731 }
4732
4733 /* We have to recognize what we have already generated above. */
4734 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4735 {
4736 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4737 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4738 opnum, (enum reload_type)type);
4739 *win = 1;
4740 return x;
4741 }
4742
4743 *win = 0;
4744 return x;
4745 }
4746
4747 /* Return true if ADDR (a legitimate address expression)
4748 has an effect that depends on the machine mode it is used for.
4749
4750 In PIC mode,
4751
4752 (mem:HI [%l7+a])
4753
4754 is not equivalent to
4755
4756 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4757
4758 because [%l7+a+1] is interpreted as the address of (a+1). */
4759
4760
4761 static bool
4762 sparc_mode_dependent_address_p (const_rtx addr,
4763 addr_space_t as ATTRIBUTE_UNUSED)
4764 {
4765 if (flag_pic && GET_CODE (addr) == PLUS)
4766 {
4767 rtx op0 = XEXP (addr, 0);
4768 rtx op1 = XEXP (addr, 1);
4769 if (op0 == pic_offset_table_rtx
4770 && symbolic_operand (op1, VOIDmode))
4771 return true;
4772 }
4773
4774 return false;
4775 }
4776
4777 #ifdef HAVE_GAS_HIDDEN
4778 # define USE_HIDDEN_LINKONCE 1
4779 #else
4780 # define USE_HIDDEN_LINKONCE 0
4781 #endif
4782
4783 static void
4784 get_pc_thunk_name (char name[32], unsigned int regno)
4785 {
4786 const char *reg_name = reg_names[regno];
4787
4788 /* Skip the leading '%' as that cannot be used in a
4789 symbol name. */
4790 reg_name += 1;
4791
4792 if (USE_HIDDEN_LINKONCE)
4793 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4794 else
4795 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4796 }
4797
4798 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4799
4800 static rtx
4801 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4802 {
4803 int orig_flag_pic = flag_pic;
4804 rtx insn;
4805
4806 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4807 flag_pic = 0;
4808 if (TARGET_ARCH64)
4809 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4810 else
4811 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4812 flag_pic = orig_flag_pic;
4813
4814 return insn;
4815 }
4816
4817 /* Emit code to load the GOT register. */
4818
4819 void
4820 load_got_register (void)
4821 {
4822 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4823 if (!global_offset_table_rtx)
4824 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4825
4826 if (TARGET_VXWORKS_RTP)
4827 emit_insn (gen_vxworks_load_got ());
4828 else
4829 {
4830 /* The GOT symbol is subject to a PC-relative relocation so we need a
4831 helper function to add the PC value and thus get the final value. */
4832 if (!got_helper_rtx)
4833 {
4834 char name[32];
4835 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4836 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4837 }
4838
4839 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4840 got_helper_rtx,
4841 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4842 }
4843
4844 /* Need to emit this whether or not we obey regdecls,
4845 since setjmp/longjmp can cause life info to screw up.
4846 ??? In the case where we don't obey regdecls, this is not sufficient
4847 since we may not fall out the bottom. */
4848 emit_use (global_offset_table_rtx);
4849 }
4850
4851 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4852 address of the call target. */
4853
4854 void
4855 sparc_emit_call_insn (rtx pat, rtx addr)
4856 {
4857 rtx_insn *insn;
4858
4859 insn = emit_call_insn (pat);
4860
4861 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4862 if (TARGET_VXWORKS_RTP
4863 && flag_pic
4864 && GET_CODE (addr) == SYMBOL_REF
4865 && (SYMBOL_REF_DECL (addr)
4866 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4867 : !SYMBOL_REF_LOCAL_P (addr)))
4868 {
4869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4870 crtl->uses_pic_offset_table = 1;
4871 }
4872 }
4873 \f
4874 /* Return 1 if RTX is a MEM which is known to be aligned to at
4875 least a DESIRED byte boundary. */
4876
4877 int
4878 mem_min_alignment (rtx mem, int desired)
4879 {
4880 rtx addr, base, offset;
4881
4882 /* If it's not a MEM we can't accept it. */
4883 if (GET_CODE (mem) != MEM)
4884 return 0;
4885
4886 /* Obviously... */
4887 if (!TARGET_UNALIGNED_DOUBLES
4888 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4889 return 1;
4890
4891 /* ??? The rest of the function predates MEM_ALIGN so
4892 there is probably a bit of redundancy. */
4893 addr = XEXP (mem, 0);
4894 base = offset = NULL_RTX;
4895 if (GET_CODE (addr) == PLUS)
4896 {
4897 if (GET_CODE (XEXP (addr, 0)) == REG)
4898 {
4899 base = XEXP (addr, 0);
4900
4901 /* What we are saying here is that if the base
4902 REG is aligned properly, the compiler will make
4903 sure any REG based index upon it will be so
4904 as well. */
4905 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4906 offset = XEXP (addr, 1);
4907 else
4908 offset = const0_rtx;
4909 }
4910 }
4911 else if (GET_CODE (addr) == REG)
4912 {
4913 base = addr;
4914 offset = const0_rtx;
4915 }
4916
4917 if (base != NULL_RTX)
4918 {
4919 int regno = REGNO (base);
4920
4921 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4922 {
4923 /* Check if the compiler has recorded some information
4924 about the alignment of the base REG. If reload has
4925 completed, we already matched with proper alignments.
4926 If not running global_alloc, reload might give us
4927 unaligned pointer to local stack though. */
4928 if (((cfun != 0
4929 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4930 || (optimize && reload_completed))
4931 && (INTVAL (offset) & (desired - 1)) == 0)
4932 return 1;
4933 }
4934 else
4935 {
4936 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4937 return 1;
4938 }
4939 }
4940 else if (! TARGET_UNALIGNED_DOUBLES
4941 || CONSTANT_P (addr)
4942 || GET_CODE (addr) == LO_SUM)
4943 {
4944 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4945 is true, in which case we can only assume that an access is aligned if
4946 it is to a constant address, or the address involves a LO_SUM. */
4947 return 1;
4948 }
4949
4950 /* An obviously unaligned address. */
4951 return 0;
4952 }
4953
4954 \f
4955 /* Vectors to keep interesting information about registers where it can easily
4956 be got. We used to use the actual mode value as the bit number, but there
4957 are more than 32 modes now. Instead we use two tables: one indexed by
4958 hard register number, and one indexed by mode. */
4959
4960 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4961 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4962 mapped into one sparc_mode_class mode. */
4963
4964 enum sparc_mode_class {
4965 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4966 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4967 CC_MODE, CCFP_MODE
4968 };
4969
4970 /* Modes for single-word and smaller quantities. */
4971 #define S_MODES \
4972 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4973
4974 /* Modes for double-word and smaller quantities. */
4975 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4976
4977 /* Modes for quad-word and smaller quantities. */
4978 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4979
4980 /* Modes for 8-word and smaller quantities. */
4981 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4982
4983 /* Modes for single-float quantities. */
4984 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4985
4986 /* Modes for double-float and smaller quantities. */
4987 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4988
4989 /* Modes for quad-float and smaller quantities. */
4990 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4991
4992 /* Modes for quad-float pairs and smaller quantities. */
4993 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4994
4995 /* Modes for double-float only quantities. */
4996 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4997
4998 /* Modes for quad-float and double-float only quantities. */
4999 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5000
5001 /* Modes for quad-float pairs and double-float only quantities. */
5002 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5003
5004 /* Modes for condition codes. */
5005 #define CC_MODES (1 << (int) CC_MODE)
5006 #define CCFP_MODES (1 << (int) CCFP_MODE)
5007
5008 /* Value is 1 if register/mode pair is acceptable on sparc.
5009
5010 The funny mixture of D and T modes is because integer operations
5011 do not specially operate on tetra quantities, so non-quad-aligned
5012 registers can hold quadword quantities (except %o4 and %i4 because
5013 they cross fixed registers).
5014
5015 ??? Note that, despite the settings, non-double-aligned parameter
5016 registers can hold double-word quantities in 32-bit mode. */
5017
5018 /* This points to either the 32-bit or the 64-bit version. */
5019 static const int *hard_regno_mode_classes;
5020
5021 static const int hard_32bit_mode_classes[] = {
5022 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5023 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5024 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5025 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5026
5027 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5028 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5029 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5030 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5031
5032 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5033 and none can hold SFmode/SImode values. */
5034 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5035 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5036 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5037 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5038
5039 /* %fcc[0123] */
5040 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5041
5042 /* %icc, %sfp, %gsr */
5043 CC_MODES, 0, D_MODES
5044 };
5045
5046 static const int hard_64bit_mode_classes[] = {
5047 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5048 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5049 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5050 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5051
5052 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5053 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5054 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5055 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5056
5057 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5058 and none can hold SFmode/SImode values. */
5059 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5060 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5061 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5062 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5063
5064 /* %fcc[0123] */
5065 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5066
5067 /* %icc, %sfp, %gsr */
5068 CC_MODES, 0, D_MODES
5069 };
5070
5071 static int sparc_mode_class [NUM_MACHINE_MODES];
5072
5073 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5074
5075 static void
5076 sparc_init_modes (void)
5077 {
5078 int i;
5079
5080 for (i = 0; i < NUM_MACHINE_MODES; i++)
5081 {
5082 machine_mode m = (machine_mode) i;
5083 unsigned int size = GET_MODE_SIZE (m);
5084
5085 switch (GET_MODE_CLASS (m))
5086 {
5087 case MODE_INT:
5088 case MODE_PARTIAL_INT:
5089 case MODE_COMPLEX_INT:
5090 if (size < 4)
5091 sparc_mode_class[i] = 1 << (int) H_MODE;
5092 else if (size == 4)
5093 sparc_mode_class[i] = 1 << (int) S_MODE;
5094 else if (size == 8)
5095 sparc_mode_class[i] = 1 << (int) D_MODE;
5096 else if (size == 16)
5097 sparc_mode_class[i] = 1 << (int) T_MODE;
5098 else if (size == 32)
5099 sparc_mode_class[i] = 1 << (int) O_MODE;
5100 else
5101 sparc_mode_class[i] = 0;
5102 break;
5103 case MODE_VECTOR_INT:
5104 if (size == 4)
5105 sparc_mode_class[i] = 1 << (int) SF_MODE;
5106 else if (size == 8)
5107 sparc_mode_class[i] = 1 << (int) DF_MODE;
5108 else
5109 sparc_mode_class[i] = 0;
5110 break;
5111 case MODE_FLOAT:
5112 case MODE_COMPLEX_FLOAT:
5113 if (size == 4)
5114 sparc_mode_class[i] = 1 << (int) SF_MODE;
5115 else if (size == 8)
5116 sparc_mode_class[i] = 1 << (int) DF_MODE;
5117 else if (size == 16)
5118 sparc_mode_class[i] = 1 << (int) TF_MODE;
5119 else if (size == 32)
5120 sparc_mode_class[i] = 1 << (int) OF_MODE;
5121 else
5122 sparc_mode_class[i] = 0;
5123 break;
5124 case MODE_CC:
5125 if (m == CCFPmode || m == CCFPEmode)
5126 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5127 else
5128 sparc_mode_class[i] = 1 << (int) CC_MODE;
5129 break;
5130 default:
5131 sparc_mode_class[i] = 0;
5132 break;
5133 }
5134 }
5135
5136 if (TARGET_ARCH64)
5137 hard_regno_mode_classes = hard_64bit_mode_classes;
5138 else
5139 hard_regno_mode_classes = hard_32bit_mode_classes;
5140
5141 /* Initialize the array used by REGNO_REG_CLASS. */
5142 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5143 {
5144 if (i < 16 && TARGET_V8PLUS)
5145 sparc_regno_reg_class[i] = I64_REGS;
5146 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5147 sparc_regno_reg_class[i] = GENERAL_REGS;
5148 else if (i < 64)
5149 sparc_regno_reg_class[i] = FP_REGS;
5150 else if (i < 96)
5151 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5152 else if (i < 100)
5153 sparc_regno_reg_class[i] = FPCC_REGS;
5154 else
5155 sparc_regno_reg_class[i] = NO_REGS;
5156 }
5157 }
5158 \f
5159 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5160
5161 static inline bool
5162 save_global_or_fp_reg_p (unsigned int regno,
5163 int leaf_function ATTRIBUTE_UNUSED)
5164 {
5165 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5166 }
5167
5168 /* Return whether the return address register (%i7) is needed. */
5169
5170 static inline bool
5171 return_addr_reg_needed_p (int leaf_function)
5172 {
5173 /* If it is live, for example because of __builtin_return_address (0). */
5174 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5175 return true;
5176
5177 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5178 if (!leaf_function
5179 /* Loading the GOT register clobbers %o7. */
5180 || crtl->uses_pic_offset_table
5181 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5182 return true;
5183
5184 return false;
5185 }
5186
5187 /* Return whether REGNO, a local or in register, must be saved/restored. */
5188
5189 static bool
5190 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5191 {
5192 /* General case: call-saved registers live at some point. */
5193 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5194 return true;
5195
5196 /* Frame pointer register (%fp) if needed. */
5197 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5198 return true;
5199
5200 /* Return address register (%i7) if needed. */
5201 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5202 return true;
5203
5204 /* GOT register (%l7) if needed. */
5205 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5206 return true;
5207
5208 /* If the function accesses prior frames, the frame pointer and the return
5209 address of the previous frame must be saved on the stack. */
5210 if (crtl->accesses_prior_frames
5211 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5212 return true;
5213
5214 return false;
5215 }
5216
5217 /* Compute the frame size required by the function. This function is called
5218 during the reload pass and also by sparc_expand_prologue. */
5219
5220 HOST_WIDE_INT
5221 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5222 {
5223 HOST_WIDE_INT frame_size, apparent_frame_size;
5224 int args_size, n_global_fp_regs = 0;
5225 bool save_local_in_regs_p = false;
5226 unsigned int i;
5227
5228 /* If the function allocates dynamic stack space, the dynamic offset is
5229 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5230 if (leaf_function && !cfun->calls_alloca)
5231 args_size = 0;
5232 else
5233 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5234
5235 /* Calculate space needed for global registers. */
5236 if (TARGET_ARCH64)
5237 {
5238 for (i = 0; i < 8; i++)
5239 if (save_global_or_fp_reg_p (i, 0))
5240 n_global_fp_regs += 2;
5241 }
5242 else
5243 {
5244 for (i = 0; i < 8; i += 2)
5245 if (save_global_or_fp_reg_p (i, 0)
5246 || save_global_or_fp_reg_p (i + 1, 0))
5247 n_global_fp_regs += 2;
5248 }
5249
5250 /* In the flat window model, find out which local and in registers need to
5251 be saved. We don't reserve space in the current frame for them as they
5252 will be spilled into the register window save area of the caller's frame.
5253 However, as soon as we use this register window save area, we must create
5254 that of the current frame to make it the live one. */
5255 if (TARGET_FLAT)
5256 for (i = 16; i < 32; i++)
5257 if (save_local_or_in_reg_p (i, leaf_function))
5258 {
5259 save_local_in_regs_p = true;
5260 break;
5261 }
5262
5263 /* Calculate space needed for FP registers. */
5264 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5265 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5266 n_global_fp_regs += 2;
5267
5268 if (size == 0
5269 && n_global_fp_regs == 0
5270 && args_size == 0
5271 && !save_local_in_regs_p)
5272 frame_size = apparent_frame_size = 0;
5273 else
5274 {
5275 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5276 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5277 apparent_frame_size += n_global_fp_regs * 4;
5278
5279 /* We need to add the size of the outgoing argument area. */
5280 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5281
5282 /* And that of the register window save area. */
5283 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5284
5285 /* Finally, bump to the appropriate alignment. */
5286 frame_size = SPARC_STACK_ALIGN (frame_size);
5287 }
5288
5289 /* Set up values for use in prologue and epilogue. */
5290 sparc_frame_size = frame_size;
5291 sparc_apparent_frame_size = apparent_frame_size;
5292 sparc_n_global_fp_regs = n_global_fp_regs;
5293 sparc_save_local_in_regs_p = save_local_in_regs_p;
5294
5295 return frame_size;
5296 }
5297
5298 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5299
5300 int
5301 sparc_initial_elimination_offset (int to)
5302 {
5303 int offset;
5304
5305 if (to == STACK_POINTER_REGNUM)
5306 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5307 else
5308 offset = 0;
5309
5310 offset += SPARC_STACK_BIAS;
5311 return offset;
5312 }
5313
5314 /* Output any necessary .register pseudo-ops. */
5315
5316 void
5317 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5318 {
5319 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5320 int i;
5321
5322 if (TARGET_ARCH32)
5323 return;
5324
5325 /* Check if %g[2367] were used without
5326 .register being printed for them already. */
5327 for (i = 2; i < 8; i++)
5328 {
5329 if (df_regs_ever_live_p (i)
5330 && ! sparc_hard_reg_printed [i])
5331 {
5332 sparc_hard_reg_printed [i] = 1;
5333 /* %g7 is used as TLS base register, use #ignore
5334 for it instead of #scratch. */
5335 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5336 i == 7 ? "ignore" : "scratch");
5337 }
5338 if (i == 3) i = 5;
5339 }
5340 #endif
5341 }
5342
5343 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5344
5345 #if PROBE_INTERVAL > 4096
5346 #error Cannot use indexed addressing mode for stack probing
5347 #endif
5348
5349 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5350 inclusive. These are offsets from the current stack pointer.
5351
5352 Note that we don't use the REG+REG addressing mode for the probes because
5353 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5354 so the advantages of having a single code win here. */
5355
5356 static void
5357 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5358 {
5359 rtx g1 = gen_rtx_REG (Pmode, 1);
5360
5361 /* See if we have a constant small number of probes to generate. If so,
5362 that's the easy case. */
5363 if (size <= PROBE_INTERVAL)
5364 {
5365 emit_move_insn (g1, GEN_INT (first));
5366 emit_insn (gen_rtx_SET (g1,
5367 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5368 emit_stack_probe (plus_constant (Pmode, g1, -size));
5369 }
5370
5371 /* The run-time loop is made up of 9 insns in the generic case while the
5372 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5373 else if (size <= 4 * PROBE_INTERVAL)
5374 {
5375 HOST_WIDE_INT i;
5376
5377 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5378 emit_insn (gen_rtx_SET (g1,
5379 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5380 emit_stack_probe (g1);
5381
5382 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5383 it exceeds SIZE. If only two probes are needed, this will not
5384 generate any code. Then probe at FIRST + SIZE. */
5385 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5386 {
5387 emit_insn (gen_rtx_SET (g1,
5388 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5389 emit_stack_probe (g1);
5390 }
5391
5392 emit_stack_probe (plus_constant (Pmode, g1,
5393 (i - PROBE_INTERVAL) - size));
5394 }
5395
5396 /* Otherwise, do the same as above, but in a loop. Note that we must be
5397 extra careful with variables wrapping around because we might be at
5398 the very top (or the very bottom) of the address space and we have
5399 to be able to handle this case properly; in particular, we use an
5400 equality test for the loop condition. */
5401 else
5402 {
5403 HOST_WIDE_INT rounded_size;
5404 rtx g4 = gen_rtx_REG (Pmode, 4);
5405
5406 emit_move_insn (g1, GEN_INT (first));
5407
5408
5409 /* Step 1: round SIZE to the previous multiple of the interval. */
5410
5411 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5412 emit_move_insn (g4, GEN_INT (rounded_size));
5413
5414
5415 /* Step 2: compute initial and final value of the loop counter. */
5416
5417 /* TEST_ADDR = SP + FIRST. */
5418 emit_insn (gen_rtx_SET (g1,
5419 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5420
5421 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5422 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5423
5424
5425 /* Step 3: the loop
5426
5427 while (TEST_ADDR != LAST_ADDR)
5428 {
5429 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5430 probe at TEST_ADDR
5431 }
5432
5433 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5434 until it is equal to ROUNDED_SIZE. */
5435
5436 if (TARGET_ARCH64)
5437 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5438 else
5439 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5440
5441
5442 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5443 that SIZE is equal to ROUNDED_SIZE. */
5444
5445 if (size != rounded_size)
5446 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5447 }
5448
5449 /* Make sure nothing is scheduled before we are done. */
5450 emit_insn (gen_blockage ());
5451 }
5452
5453 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5454 absolute addresses. */
5455
5456 const char *
5457 output_probe_stack_range (rtx reg1, rtx reg2)
5458 {
5459 static int labelno = 0;
5460 char loop_lab[32];
5461 rtx xops[2];
5462
5463 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5464
5465 /* Loop. */
5466 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5467
5468 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5469 xops[0] = reg1;
5470 xops[1] = GEN_INT (-PROBE_INTERVAL);
5471 output_asm_insn ("add\t%0, %1, %0", xops);
5472
5473 /* Test if TEST_ADDR == LAST_ADDR. */
5474 xops[1] = reg2;
5475 output_asm_insn ("cmp\t%0, %1", xops);
5476
5477 /* Probe at TEST_ADDR and branch. */
5478 if (TARGET_ARCH64)
5479 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5480 else
5481 fputs ("\tbne\t", asm_out_file);
5482 assemble_name_raw (asm_out_file, loop_lab);
5483 fputc ('\n', asm_out_file);
5484 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5485 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5486
5487 return "";
5488 }
5489
5490 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5491 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5492 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5493 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5494 the action to be performed if it returns false. Return the new offset. */
5495
5496 typedef bool (*sorr_pred_t) (unsigned int, int);
5497 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5498
5499 static int
5500 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5501 int offset, int leaf_function, sorr_pred_t save_p,
5502 sorr_act_t action_true, sorr_act_t action_false)
5503 {
5504 unsigned int i;
5505 rtx mem;
5506 rtx_insn *insn;
5507
5508 if (TARGET_ARCH64 && high <= 32)
5509 {
5510 int fp_offset = -1;
5511
5512 for (i = low; i < high; i++)
5513 {
5514 if (save_p (i, leaf_function))
5515 {
5516 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5517 base, offset));
5518 if (action_true == SORR_SAVE)
5519 {
5520 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5521 RTX_FRAME_RELATED_P (insn) = 1;
5522 }
5523 else /* action_true == SORR_RESTORE */
5524 {
5525 /* The frame pointer must be restored last since its old
5526 value may be used as base address for the frame. This
5527 is problematic in 64-bit mode only because of the lack
5528 of double-word load instruction. */
5529 if (i == HARD_FRAME_POINTER_REGNUM)
5530 fp_offset = offset;
5531 else
5532 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5533 }
5534 offset += 8;
5535 }
5536 else if (action_false == SORR_ADVANCE)
5537 offset += 8;
5538 }
5539
5540 if (fp_offset >= 0)
5541 {
5542 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5543 emit_move_insn (hard_frame_pointer_rtx, mem);
5544 }
5545 }
5546 else
5547 {
5548 for (i = low; i < high; i += 2)
5549 {
5550 bool reg0 = save_p (i, leaf_function);
5551 bool reg1 = save_p (i + 1, leaf_function);
5552 machine_mode mode;
5553 int regno;
5554
5555 if (reg0 && reg1)
5556 {
5557 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5558 regno = i;
5559 }
5560 else if (reg0)
5561 {
5562 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5563 regno = i;
5564 }
5565 else if (reg1)
5566 {
5567 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5568 regno = i + 1;
5569 offset += 4;
5570 }
5571 else
5572 {
5573 if (action_false == SORR_ADVANCE)
5574 offset += 8;
5575 continue;
5576 }
5577
5578 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5579 if (action_true == SORR_SAVE)
5580 {
5581 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5582 RTX_FRAME_RELATED_P (insn) = 1;
5583 if (mode == DImode)
5584 {
5585 rtx set1, set2;
5586 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5587 offset));
5588 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5589 RTX_FRAME_RELATED_P (set1) = 1;
5590 mem
5591 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5592 offset + 4));
5593 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5594 RTX_FRAME_RELATED_P (set2) = 1;
5595 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5596 gen_rtx_PARALLEL (VOIDmode,
5597 gen_rtvec (2, set1, set2)));
5598 }
5599 }
5600 else /* action_true == SORR_RESTORE */
5601 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5602
5603 /* Bump and round down to double word
5604 in case we already bumped by 4. */
5605 offset = ROUND_DOWN (offset + 8, 8);
5606 }
5607 }
5608
5609 return offset;
5610 }
5611
5612 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5613
5614 static rtx
5615 emit_adjust_base_to_offset (rtx base, int offset)
5616 {
5617 /* ??? This might be optimized a little as %g1 might already have a
5618 value close enough that a single add insn will do. */
5619 /* ??? Although, all of this is probably only a temporary fix because
5620 if %g1 can hold a function result, then sparc_expand_epilogue will
5621 lose (the result will be clobbered). */
5622 rtx new_base = gen_rtx_REG (Pmode, 1);
5623 emit_move_insn (new_base, GEN_INT (offset));
5624 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5625 return new_base;
5626 }
5627
5628 /* Emit code to save/restore call-saved global and FP registers. */
5629
5630 static void
5631 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5632 {
5633 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5634 {
5635 base = emit_adjust_base_to_offset (base, offset);
5636 offset = 0;
5637 }
5638
5639 offset
5640 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5641 save_global_or_fp_reg_p, action, SORR_NONE);
5642 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5643 save_global_or_fp_reg_p, action, SORR_NONE);
5644 }
5645
5646 /* Emit code to save/restore call-saved local and in registers. */
5647
5648 static void
5649 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5650 {
5651 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5652 {
5653 base = emit_adjust_base_to_offset (base, offset);
5654 offset = 0;
5655 }
5656
5657 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5658 save_local_or_in_reg_p, action, SORR_ADVANCE);
5659 }
5660
5661 /* Emit a window_save insn. */
5662
5663 static rtx_insn *
5664 emit_window_save (rtx increment)
5665 {
5666 rtx_insn *insn = emit_insn (gen_window_save (increment));
5667 RTX_FRAME_RELATED_P (insn) = 1;
5668
5669 /* The incoming return address (%o7) is saved in %i7. */
5670 add_reg_note (insn, REG_CFA_REGISTER,
5671 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5672 gen_rtx_REG (Pmode,
5673 INCOMING_RETURN_ADDR_REGNUM)));
5674
5675 /* The window save event. */
5676 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5677
5678 /* The CFA is %fp, the hard frame pointer. */
5679 add_reg_note (insn, REG_CFA_DEF_CFA,
5680 plus_constant (Pmode, hard_frame_pointer_rtx,
5681 INCOMING_FRAME_SP_OFFSET));
5682
5683 return insn;
5684 }
5685
5686 /* Generate an increment for the stack pointer. */
5687
5688 static rtx
5689 gen_stack_pointer_inc (rtx increment)
5690 {
5691 return gen_rtx_SET (stack_pointer_rtx,
5692 gen_rtx_PLUS (Pmode,
5693 stack_pointer_rtx,
5694 increment));
5695 }
5696
5697 /* Expand the function prologue. The prologue is responsible for reserving
5698 storage for the frame, saving the call-saved registers and loading the
5699 GOT register if needed. */
5700
5701 void
5702 sparc_expand_prologue (void)
5703 {
5704 HOST_WIDE_INT size;
5705 rtx_insn *insn;
5706
5707 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5708 on the final value of the flag means deferring the prologue/epilogue
5709 expansion until just before the second scheduling pass, which is too
5710 late to emit multiple epilogues or return insns.
5711
5712 Of course we are making the assumption that the value of the flag
5713 will not change between now and its final value. Of the three parts
5714 of the formula, only the last one can reasonably vary. Let's take a
5715 closer look, after assuming that the first two ones are set to true
5716 (otherwise the last value is effectively silenced).
5717
5718 If only_leaf_regs_used returns false, the global predicate will also
5719 be false so the actual frame size calculated below will be positive.
5720 As a consequence, the save_register_window insn will be emitted in
5721 the instruction stream; now this insn explicitly references %fp
5722 which is not a leaf register so only_leaf_regs_used will always
5723 return false subsequently.
5724
5725 If only_leaf_regs_used returns true, we hope that the subsequent
5726 optimization passes won't cause non-leaf registers to pop up. For
5727 example, the regrename pass has special provisions to not rename to
5728 non-leaf registers in a leaf function. */
5729 sparc_leaf_function_p
5730 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5731
5732 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5733
5734 if (flag_stack_usage_info)
5735 current_function_static_stack_size = size;
5736
5737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5738 || flag_stack_clash_protection)
5739 {
5740 if (crtl->is_leaf && !cfun->calls_alloca)
5741 {
5742 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5743 sparc_emit_probe_stack_range (get_stack_check_protect (),
5744 size - get_stack_check_protect ());
5745 }
5746 else if (size > 0)
5747 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5748 }
5749
5750 if (size == 0)
5751 ; /* do nothing. */
5752 else if (sparc_leaf_function_p)
5753 {
5754 rtx size_int_rtx = GEN_INT (-size);
5755
5756 if (size <= 4096)
5757 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5758 else if (size <= 8192)
5759 {
5760 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5761 RTX_FRAME_RELATED_P (insn) = 1;
5762
5763 /* %sp is still the CFA register. */
5764 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5765 }
5766 else
5767 {
5768 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5769 emit_move_insn (size_rtx, size_int_rtx);
5770 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5771 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5772 gen_stack_pointer_inc (size_int_rtx));
5773 }
5774
5775 RTX_FRAME_RELATED_P (insn) = 1;
5776 }
5777 else
5778 {
5779 rtx size_int_rtx = GEN_INT (-size);
5780
5781 if (size <= 4096)
5782 emit_window_save (size_int_rtx);
5783 else if (size <= 8192)
5784 {
5785 emit_window_save (GEN_INT (-4096));
5786
5787 /* %sp is not the CFA register anymore. */
5788 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5789
5790 /* Make sure no %fp-based store is issued until after the frame is
5791 established. The offset between the frame pointer and the stack
5792 pointer is calculated relative to the value of the stack pointer
5793 at the end of the function prologue, and moving instructions that
5794 access the stack via the frame pointer between the instructions
5795 that decrement the stack pointer could result in accessing the
5796 register window save area, which is volatile. */
5797 emit_insn (gen_frame_blockage ());
5798 }
5799 else
5800 {
5801 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5802 emit_move_insn (size_rtx, size_int_rtx);
5803 emit_window_save (size_rtx);
5804 }
5805 }
5806
5807 if (sparc_leaf_function_p)
5808 {
5809 sparc_frame_base_reg = stack_pointer_rtx;
5810 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5811 }
5812 else
5813 {
5814 sparc_frame_base_reg = hard_frame_pointer_rtx;
5815 sparc_frame_base_offset = SPARC_STACK_BIAS;
5816 }
5817
5818 if (sparc_n_global_fp_regs > 0)
5819 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5820 sparc_frame_base_offset
5821 - sparc_apparent_frame_size,
5822 SORR_SAVE);
5823
5824 /* Load the GOT register if needed. */
5825 if (crtl->uses_pic_offset_table)
5826 load_got_register ();
5827
5828 /* Advertise that the data calculated just above are now valid. */
5829 sparc_prologue_data_valid_p = true;
5830 }
5831
5832 /* Expand the function prologue. The prologue is responsible for reserving
5833 storage for the frame, saving the call-saved registers and loading the
5834 GOT register if needed. */
5835
5836 void
5837 sparc_flat_expand_prologue (void)
5838 {
5839 HOST_WIDE_INT size;
5840 rtx_insn *insn;
5841
5842 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5843
5844 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5845
5846 if (flag_stack_usage_info)
5847 current_function_static_stack_size = size;
5848
5849 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5850 || flag_stack_clash_protection)
5851 {
5852 if (crtl->is_leaf && !cfun->calls_alloca)
5853 {
5854 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5855 sparc_emit_probe_stack_range (get_stack_check_protect (),
5856 size - get_stack_check_protect ());
5857 }
5858 else if (size > 0)
5859 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5860 }
5861
5862 if (sparc_save_local_in_regs_p)
5863 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5864 SORR_SAVE);
5865
5866 if (size == 0)
5867 ; /* do nothing. */
5868 else
5869 {
5870 rtx size_int_rtx, size_rtx;
5871
5872 size_rtx = size_int_rtx = GEN_INT (-size);
5873
5874 /* We establish the frame (i.e. decrement the stack pointer) first, even
5875 if we use a frame pointer, because we cannot clobber any call-saved
5876 registers, including the frame pointer, if we haven't created a new
5877 register save area, for the sake of compatibility with the ABI. */
5878 if (size <= 4096)
5879 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5880 else if (size <= 8192 && !frame_pointer_needed)
5881 {
5882 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5883 RTX_FRAME_RELATED_P (insn) = 1;
5884 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5885 }
5886 else
5887 {
5888 size_rtx = gen_rtx_REG (Pmode, 1);
5889 emit_move_insn (size_rtx, size_int_rtx);
5890 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5891 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5892 gen_stack_pointer_inc (size_int_rtx));
5893 }
5894 RTX_FRAME_RELATED_P (insn) = 1;
5895
5896 /* Ensure nothing is scheduled until after the frame is established. */
5897 emit_insn (gen_blockage ());
5898
5899 if (frame_pointer_needed)
5900 {
5901 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5902 gen_rtx_MINUS (Pmode,
5903 stack_pointer_rtx,
5904 size_rtx)));
5905 RTX_FRAME_RELATED_P (insn) = 1;
5906
5907 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5908 gen_rtx_SET (hard_frame_pointer_rtx,
5909 plus_constant (Pmode, stack_pointer_rtx,
5910 size)));
5911 }
5912
5913 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5914 {
5915 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5916 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5917
5918 insn = emit_move_insn (i7, o7);
5919 RTX_FRAME_RELATED_P (insn) = 1;
5920
5921 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5922
5923 /* Prevent this instruction from ever being considered dead,
5924 even if this function has no epilogue. */
5925 emit_use (i7);
5926 }
5927 }
5928
5929 if (frame_pointer_needed)
5930 {
5931 sparc_frame_base_reg = hard_frame_pointer_rtx;
5932 sparc_frame_base_offset = SPARC_STACK_BIAS;
5933 }
5934 else
5935 {
5936 sparc_frame_base_reg = stack_pointer_rtx;
5937 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5938 }
5939
5940 if (sparc_n_global_fp_regs > 0)
5941 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5942 sparc_frame_base_offset
5943 - sparc_apparent_frame_size,
5944 SORR_SAVE);
5945
5946 /* Load the GOT register if needed. */
5947 if (crtl->uses_pic_offset_table)
5948 load_got_register ();
5949
5950 /* Advertise that the data calculated just above are now valid. */
5951 sparc_prologue_data_valid_p = true;
5952 }
5953
5954 /* This function generates the assembly code for function entry, which boils
5955 down to emitting the necessary .register directives. */
5956
5957 static void
5958 sparc_asm_function_prologue (FILE *file)
5959 {
5960 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5961 if (!TARGET_FLAT)
5962 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5963
5964 sparc_output_scratch_registers (file);
5965 }
5966
5967 /* Expand the function epilogue, either normal or part of a sibcall.
5968 We emit all the instructions except the return or the call. */
5969
5970 void
5971 sparc_expand_epilogue (bool for_eh)
5972 {
5973 HOST_WIDE_INT size = sparc_frame_size;
5974
5975 if (cfun->calls_alloca)
5976 emit_insn (gen_frame_blockage ());
5977
5978 if (sparc_n_global_fp_regs > 0)
5979 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5980 sparc_frame_base_offset
5981 - sparc_apparent_frame_size,
5982 SORR_RESTORE);
5983
5984 if (size == 0 || for_eh)
5985 ; /* do nothing. */
5986 else if (sparc_leaf_function_p)
5987 {
5988 if (size <= 4096)
5989 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5990 else if (size <= 8192)
5991 {
5992 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5993 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5994 }
5995 else
5996 {
5997 rtx reg = gen_rtx_REG (Pmode, 1);
5998 emit_move_insn (reg, GEN_INT (size));
5999 emit_insn (gen_stack_pointer_inc (reg));
6000 }
6001 }
6002 }
6003
6004 /* Expand the function epilogue, either normal or part of a sibcall.
6005 We emit all the instructions except the return or the call. */
6006
6007 void
6008 sparc_flat_expand_epilogue (bool for_eh)
6009 {
6010 HOST_WIDE_INT size = sparc_frame_size;
6011
6012 if (sparc_n_global_fp_regs > 0)
6013 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6014 sparc_frame_base_offset
6015 - sparc_apparent_frame_size,
6016 SORR_RESTORE);
6017
6018 /* If we have a frame pointer, we'll need both to restore it before the
6019 frame is destroyed and use its current value in destroying the frame.
6020 Since we don't have an atomic way to do that in the flat window model,
6021 we save the current value into a temporary register (%g1). */
6022 if (frame_pointer_needed && !for_eh)
6023 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6024
6025 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6026 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6027 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6028
6029 if (sparc_save_local_in_regs_p)
6030 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6031 sparc_frame_base_offset,
6032 SORR_RESTORE);
6033
6034 if (size == 0 || for_eh)
6035 ; /* do nothing. */
6036 else if (frame_pointer_needed)
6037 {
6038 /* Make sure the frame is destroyed after everything else is done. */
6039 emit_insn (gen_blockage ());
6040
6041 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6042 }
6043 else
6044 {
6045 /* Likewise. */
6046 emit_insn (gen_blockage ());
6047
6048 if (size <= 4096)
6049 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6050 else if (size <= 8192)
6051 {
6052 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6053 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6054 }
6055 else
6056 {
6057 rtx reg = gen_rtx_REG (Pmode, 1);
6058 emit_move_insn (reg, GEN_INT (size));
6059 emit_insn (gen_stack_pointer_inc (reg));
6060 }
6061 }
6062 }
6063
6064 /* Return true if it is appropriate to emit `return' instructions in the
6065 body of a function. */
6066
6067 bool
6068 sparc_can_use_return_insn_p (void)
6069 {
6070 return sparc_prologue_data_valid_p
6071 && sparc_n_global_fp_regs == 0
6072 && TARGET_FLAT
6073 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6074 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6075 }
6076
6077 /* This function generates the assembly code for function exit. */
6078
6079 static void
6080 sparc_asm_function_epilogue (FILE *file)
6081 {
6082 /* If the last two instructions of a function are "call foo; dslot;"
6083 the return address might point to the first instruction in the next
6084 function and we have to output a dummy nop for the sake of sane
6085 backtraces in such cases. This is pointless for sibling calls since
6086 the return address is explicitly adjusted. */
6087
6088 rtx_insn *insn = get_last_insn ();
6089
6090 rtx last_real_insn = prev_real_insn (insn);
6091 if (last_real_insn
6092 && NONJUMP_INSN_P (last_real_insn)
6093 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6094 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6095
6096 if (last_real_insn
6097 && CALL_P (last_real_insn)
6098 && !SIBLING_CALL_P (last_real_insn))
6099 fputs("\tnop\n", file);
6100
6101 sparc_output_deferred_case_vectors ();
6102 }
6103
6104 /* Output a 'restore' instruction. */
6105
6106 static void
6107 output_restore (rtx pat)
6108 {
6109 rtx operands[3];
6110
6111 if (! pat)
6112 {
6113 fputs ("\t restore\n", asm_out_file);
6114 return;
6115 }
6116
6117 gcc_assert (GET_CODE (pat) == SET);
6118
6119 operands[0] = SET_DEST (pat);
6120 pat = SET_SRC (pat);
6121
6122 switch (GET_CODE (pat))
6123 {
6124 case PLUS:
6125 operands[1] = XEXP (pat, 0);
6126 operands[2] = XEXP (pat, 1);
6127 output_asm_insn (" restore %r1, %2, %Y0", operands);
6128 break;
6129 case LO_SUM:
6130 operands[1] = XEXP (pat, 0);
6131 operands[2] = XEXP (pat, 1);
6132 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6133 break;
6134 case ASHIFT:
6135 operands[1] = XEXP (pat, 0);
6136 gcc_assert (XEXP (pat, 1) == const1_rtx);
6137 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6138 break;
6139 default:
6140 operands[1] = pat;
6141 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6142 break;
6143 }
6144 }
6145
6146 /* Output a return. */
6147
6148 const char *
6149 output_return (rtx_insn *insn)
6150 {
6151 if (crtl->calls_eh_return)
6152 {
6153 /* If the function uses __builtin_eh_return, the eh_return
6154 machinery occupies the delay slot. */
6155 gcc_assert (!final_sequence);
6156
6157 if (flag_delayed_branch)
6158 {
6159 if (!TARGET_FLAT && TARGET_V9)
6160 fputs ("\treturn\t%i7+8\n", asm_out_file);
6161 else
6162 {
6163 if (!TARGET_FLAT)
6164 fputs ("\trestore\n", asm_out_file);
6165
6166 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6167 }
6168
6169 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6170 }
6171 else
6172 {
6173 if (!TARGET_FLAT)
6174 fputs ("\trestore\n", asm_out_file);
6175
6176 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6177 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6178 }
6179 }
6180 else if (sparc_leaf_function_p || TARGET_FLAT)
6181 {
6182 /* This is a leaf or flat function so we don't have to bother restoring
6183 the register window, which frees us from dealing with the convoluted
6184 semantics of restore/return. We simply output the jump to the
6185 return address and the insn in the delay slot (if any). */
6186
6187 return "jmp\t%%o7+%)%#";
6188 }
6189 else
6190 {
6191 /* This is a regular function so we have to restore the register window.
6192 We may have a pending insn for the delay slot, which will be either
6193 combined with the 'restore' instruction or put in the delay slot of
6194 the 'return' instruction. */
6195
6196 if (final_sequence)
6197 {
6198 rtx_insn *delay;
6199 rtx pat;
6200 int seen;
6201
6202 delay = NEXT_INSN (insn);
6203 gcc_assert (delay);
6204
6205 pat = PATTERN (delay);
6206
6207 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6208 {
6209 epilogue_renumber (&pat, 0);
6210 return "return\t%%i7+%)%#";
6211 }
6212 else
6213 {
6214 output_asm_insn ("jmp\t%%i7+%)", NULL);
6215
6216 /* We're going to output the insn in the delay slot manually.
6217 Make sure to output its source location first. */
6218 PATTERN (delay) = gen_blockage ();
6219 INSN_CODE (delay) = -1;
6220 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6221 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6222
6223 output_restore (pat);
6224 }
6225 }
6226 else
6227 {
6228 /* The delay slot is empty. */
6229 if (TARGET_V9)
6230 return "return\t%%i7+%)\n\t nop";
6231 else if (flag_delayed_branch)
6232 return "jmp\t%%i7+%)\n\t restore";
6233 else
6234 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6235 }
6236 }
6237
6238 return "";
6239 }
6240
6241 /* Output a sibling call. */
6242
6243 const char *
6244 output_sibcall (rtx_insn *insn, rtx call_operand)
6245 {
6246 rtx operands[1];
6247
6248 gcc_assert (flag_delayed_branch);
6249
6250 operands[0] = call_operand;
6251
6252 if (sparc_leaf_function_p || TARGET_FLAT)
6253 {
6254 /* This is a leaf or flat function so we don't have to bother restoring
6255 the register window. We simply output the jump to the function and
6256 the insn in the delay slot (if any). */
6257
6258 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6259
6260 if (final_sequence)
6261 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6262 operands);
6263 else
6264 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6265 it into branch if possible. */
6266 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6267 operands);
6268 }
6269 else
6270 {
6271 /* This is a regular function so we have to restore the register window.
6272 We may have a pending insn for the delay slot, which will be combined
6273 with the 'restore' instruction. */
6274
6275 output_asm_insn ("call\t%a0, 0", operands);
6276
6277 if (final_sequence)
6278 {
6279 rtx_insn *delay;
6280 rtx pat;
6281 int seen;
6282
6283 delay = NEXT_INSN (insn);
6284 gcc_assert (delay);
6285
6286 pat = PATTERN (delay);
6287
6288 /* We're going to output the insn in the delay slot manually.
6289 Make sure to output its source location first. */
6290 PATTERN (delay) = gen_blockage ();
6291 INSN_CODE (delay) = -1;
6292 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6293 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6294
6295 output_restore (pat);
6296 }
6297 else
6298 output_restore (NULL_RTX);
6299 }
6300
6301 return "";
6302 }
6303 \f
6304 /* Functions for handling argument passing.
6305
6306 For 32-bit, the first 6 args are normally in registers and the rest are
6307 pushed. Any arg that starts within the first 6 words is at least
6308 partially passed in a register unless its data type forbids.
6309
6310 For 64-bit, the argument registers are laid out as an array of 16 elements
6311 and arguments are added sequentially. The first 6 int args and up to the
6312 first 16 fp args (depending on size) are passed in regs.
6313
6314 Slot Stack Integral Float Float in structure Double Long Double
6315 ---- ----- -------- ----- ------------------ ------ -----------
6316 15 [SP+248] %f31 %f30,%f31 %d30
6317 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6318 13 [SP+232] %f27 %f26,%f27 %d26
6319 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6320 11 [SP+216] %f23 %f22,%f23 %d22
6321 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6322 9 [SP+200] %f19 %f18,%f19 %d18
6323 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6324 7 [SP+184] %f15 %f14,%f15 %d14
6325 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6326 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6327 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6328 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6329 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6330 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6331 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6332
6333 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6334
6335 Integral arguments are always passed as 64-bit quantities appropriately
6336 extended.
6337
6338 Passing of floating point values is handled as follows.
6339 If a prototype is in scope:
6340 If the value is in a named argument (i.e. not a stdarg function or a
6341 value not part of the `...') then the value is passed in the appropriate
6342 fp reg.
6343 If the value is part of the `...' and is passed in one of the first 6
6344 slots then the value is passed in the appropriate int reg.
6345 If the value is part of the `...' and is not passed in one of the first 6
6346 slots then the value is passed in memory.
6347 If a prototype is not in scope:
6348 If the value is one of the first 6 arguments the value is passed in the
6349 appropriate integer reg and the appropriate fp reg.
6350 If the value is not one of the first 6 arguments the value is passed in
6351 the appropriate fp reg and in memory.
6352
6353
6354 Summary of the calling conventions implemented by GCC on the SPARC:
6355
6356 32-bit ABI:
6357 size argument return value
6358
6359 small integer <4 int. reg. int. reg.
6360 word 4 int. reg. int. reg.
6361 double word 8 int. reg. int. reg.
6362
6363 _Complex small integer <8 int. reg. int. reg.
6364 _Complex word 8 int. reg. int. reg.
6365 _Complex double word 16 memory int. reg.
6366
6367 vector integer <=8 int. reg. FP reg.
6368 vector integer >8 memory memory
6369
6370 float 4 int. reg. FP reg.
6371 double 8 int. reg. FP reg.
6372 long double 16 memory memory
6373
6374 _Complex float 8 memory FP reg.
6375 _Complex double 16 memory FP reg.
6376 _Complex long double 32 memory FP reg.
6377
6378 vector float any memory memory
6379
6380 aggregate any memory memory
6381
6382
6383
6384 64-bit ABI:
6385 size argument return value
6386
6387 small integer <8 int. reg. int. reg.
6388 word 8 int. reg. int. reg.
6389 double word 16 int. reg. int. reg.
6390
6391 _Complex small integer <16 int. reg. int. reg.
6392 _Complex word 16 int. reg. int. reg.
6393 _Complex double word 32 memory int. reg.
6394
6395 vector integer <=16 FP reg. FP reg.
6396 vector integer 16<s<=32 memory FP reg.
6397 vector integer >32 memory memory
6398
6399 float 4 FP reg. FP reg.
6400 double 8 FP reg. FP reg.
6401 long double 16 FP reg. FP reg.
6402
6403 _Complex float 8 FP reg. FP reg.
6404 _Complex double 16 FP reg. FP reg.
6405 _Complex long double 32 memory FP reg.
6406
6407 vector float <=16 FP reg. FP reg.
6408 vector float 16<s<=32 memory FP reg.
6409 vector float >32 memory memory
6410
6411 aggregate <=16 reg. reg.
6412 aggregate 16<s<=32 memory reg.
6413 aggregate >32 memory memory
6414
6415
6416
6417 Note #1: complex floating-point types follow the extended SPARC ABIs as
6418 implemented by the Sun compiler.
6419
6420 Note #2: integral vector types follow the scalar floating-point types
6421 conventions to match what is implemented by the Sun VIS SDK.
6422
6423 Note #3: floating-point vector types follow the aggregate types
6424 conventions. */
6425
6426
6427 /* Maximum number of int regs for args. */
6428 #define SPARC_INT_ARG_MAX 6
6429 /* Maximum number of fp regs for args. */
6430 #define SPARC_FP_ARG_MAX 16
6431 /* Number of words (partially) occupied for a given size in units. */
6432 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6433
6434 /* Handle the INIT_CUMULATIVE_ARGS macro.
6435 Initialize a variable CUM of type CUMULATIVE_ARGS
6436 for a call to a function whose data type is FNTYPE.
6437 For a library call, FNTYPE is 0. */
6438
6439 void
6440 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6441 {
6442 cum->words = 0;
6443 cum->prototype_p = fntype && prototype_p (fntype);
6444 cum->libcall_p = !fntype;
6445 }
6446
6447 /* Handle promotion of pointer and integer arguments. */
6448
6449 static machine_mode
6450 sparc_promote_function_mode (const_tree type, machine_mode mode,
6451 int *punsignedp, const_tree, int)
6452 {
6453 if (type && POINTER_TYPE_P (type))
6454 {
6455 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6456 return Pmode;
6457 }
6458
6459 /* Integral arguments are passed as full words, as per the ABI. */
6460 if (GET_MODE_CLASS (mode) == MODE_INT
6461 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6462 return word_mode;
6463
6464 return mode;
6465 }
6466
6467 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6468
6469 static bool
6470 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6471 {
6472 return TARGET_ARCH64 ? true : false;
6473 }
6474
6475 /* Traverse the record TYPE recursively and call FUNC on its fields.
6476 NAMED is true if this is for a named parameter. DATA is passed
6477 to FUNC for each field. OFFSET is the starting position and
6478 PACKED is true if we are inside a packed record. */
6479
6480 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6481 static void
6482 traverse_record_type (const_tree type, bool named, T *data,
6483 HOST_WIDE_INT offset = 0, bool packed = false)
6484 {
6485 /* The ABI obviously doesn't specify how packed structures are passed.
6486 These are passed in integer regs if possible, otherwise memory. */
6487 if (!packed)
6488 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6489 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6490 {
6491 packed = true;
6492 break;
6493 }
6494
6495 /* Walk the real fields, but skip those with no size or a zero size.
6496 ??? Fields with variable offset are handled as having zero offset. */
6497 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6498 if (TREE_CODE (field) == FIELD_DECL)
6499 {
6500 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6501 continue;
6502
6503 HOST_WIDE_INT bitpos = offset;
6504 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6505 bitpos += int_bit_position (field);
6506
6507 tree field_type = TREE_TYPE (field);
6508 if (TREE_CODE (field_type) == RECORD_TYPE)
6509 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6510 packed);
6511 else
6512 {
6513 const bool fp_type
6514 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6515 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6516 data);
6517 }
6518 }
6519 }
6520
6521 /* Handle recursive register classifying for structure layout. */
6522
6523 typedef struct
6524 {
6525 bool fp_regs; /* true if field eligible to FP registers. */
6526 bool fp_regs_in_first_word; /* true if such field in first word. */
6527 } classify_data_t;
6528
6529 /* A subroutine of function_arg_slotno. Classify the field. */
6530
6531 inline void
6532 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6533 classify_data_t *data)
6534 {
6535 if (fp)
6536 {
6537 data->fp_regs = true;
6538 if (bitpos < BITS_PER_WORD)
6539 data->fp_regs_in_first_word = true;
6540 }
6541 }
6542
6543 /* Compute the slot number to pass an argument in.
6544 Return the slot number or -1 if passing on the stack.
6545
6546 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6547 the preceding args and about the function being called.
6548 MODE is the argument's machine mode.
6549 TYPE is the data type of the argument (as a tree).
6550 This is null for libcalls where that information may
6551 not be available.
6552 NAMED is nonzero if this argument is a named parameter
6553 (otherwise it is an extra parameter matching an ellipsis).
6554 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6555 *PREGNO records the register number to use if scalar type.
6556 *PPADDING records the amount of padding needed in words. */
6557
6558 static int
6559 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6560 const_tree type, bool named, bool incoming,
6561 int *pregno, int *ppadding)
6562 {
6563 int regbase = (incoming
6564 ? SPARC_INCOMING_INT_ARG_FIRST
6565 : SPARC_OUTGOING_INT_ARG_FIRST);
6566 int slotno = cum->words;
6567 enum mode_class mclass;
6568 int regno;
6569
6570 *ppadding = 0;
6571
6572 if (type && TREE_ADDRESSABLE (type))
6573 return -1;
6574
6575 if (TARGET_ARCH32
6576 && mode == BLKmode
6577 && type
6578 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6579 return -1;
6580
6581 /* For SPARC64, objects requiring 16-byte alignment get it. */
6582 if (TARGET_ARCH64
6583 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6584 && (slotno & 1) != 0)
6585 slotno++, *ppadding = 1;
6586
6587 mclass = GET_MODE_CLASS (mode);
6588 if (type && TREE_CODE (type) == VECTOR_TYPE)
6589 {
6590 /* Vector types deserve special treatment because they are
6591 polymorphic wrt their mode, depending upon whether VIS
6592 instructions are enabled. */
6593 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6594 {
6595 /* The SPARC port defines no floating-point vector modes. */
6596 gcc_assert (mode == BLKmode);
6597 }
6598 else
6599 {
6600 /* Integral vector types should either have a vector
6601 mode or an integral mode, because we are guaranteed
6602 by pass_by_reference that their size is not greater
6603 than 16 bytes and TImode is 16-byte wide. */
6604 gcc_assert (mode != BLKmode);
6605
6606 /* Vector integers are handled like floats according to
6607 the Sun VIS SDK. */
6608 mclass = MODE_FLOAT;
6609 }
6610 }
6611
6612 switch (mclass)
6613 {
6614 case MODE_FLOAT:
6615 case MODE_COMPLEX_FLOAT:
6616 case MODE_VECTOR_INT:
6617 if (TARGET_ARCH64 && TARGET_FPU && named)
6618 {
6619 /* If all arg slots are filled, then must pass on stack. */
6620 if (slotno >= SPARC_FP_ARG_MAX)
6621 return -1;
6622
6623 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6624 /* Arguments filling only one single FP register are
6625 right-justified in the outer double FP register. */
6626 if (GET_MODE_SIZE (mode) <= 4)
6627 regno++;
6628 break;
6629 }
6630 /* fallthrough */
6631
6632 case MODE_INT:
6633 case MODE_COMPLEX_INT:
6634 /* If all arg slots are filled, then must pass on stack. */
6635 if (slotno >= SPARC_INT_ARG_MAX)
6636 return -1;
6637
6638 regno = regbase + slotno;
6639 break;
6640
6641 case MODE_RANDOM:
6642 if (mode == VOIDmode)
6643 /* MODE is VOIDmode when generating the actual call. */
6644 return -1;
6645
6646 gcc_assert (mode == BLKmode);
6647
6648 if (TARGET_ARCH32
6649 || !type
6650 || (TREE_CODE (type) != RECORD_TYPE
6651 && TREE_CODE (type) != VECTOR_TYPE))
6652 {
6653 /* If all arg slots are filled, then must pass on stack. */
6654 if (slotno >= SPARC_INT_ARG_MAX)
6655 return -1;
6656
6657 regno = regbase + slotno;
6658 }
6659 else /* TARGET_ARCH64 && type */
6660 {
6661 /* If all arg slots are filled, then must pass on stack. */
6662 if (slotno >= SPARC_FP_ARG_MAX)
6663 return -1;
6664
6665 if (TREE_CODE (type) == RECORD_TYPE)
6666 {
6667 classify_data_t data = { false, false };
6668 traverse_record_type<classify_data_t, classify_registers>
6669 (type, named, &data);
6670
6671 if (data.fp_regs)
6672 {
6673 /* If all FP slots are filled except for the last one and
6674 there is no FP field in the first word, then must pass
6675 on stack. */
6676 if (slotno >= SPARC_FP_ARG_MAX - 1
6677 && !data.fp_regs_in_first_word)
6678 return -1;
6679 }
6680 else
6681 {
6682 /* If all int slots are filled, then must pass on stack. */
6683 if (slotno >= SPARC_INT_ARG_MAX)
6684 return -1;
6685 }
6686 }
6687
6688 /* PREGNO isn't set since both int and FP regs can be used. */
6689 return slotno;
6690 }
6691 break;
6692
6693 default :
6694 gcc_unreachable ();
6695 }
6696
6697 *pregno = regno;
6698 return slotno;
6699 }
6700
6701 /* Handle recursive register counting/assigning for structure layout. */
6702
6703 typedef struct
6704 {
6705 int slotno; /* slot number of the argument. */
6706 int regbase; /* regno of the base register. */
6707 int intoffset; /* offset of the first pending integer field. */
6708 int nregs; /* number of words passed in registers. */
6709 bool stack; /* true if part of the argument is on the stack. */
6710 rtx ret; /* return expression being built. */
6711 } assign_data_t;
6712
6713 /* A subroutine of function_arg_record_value. Compute the number of integer
6714 registers to be assigned between PARMS->intoffset and BITPOS. Return
6715 true if at least one integer register is assigned or false otherwise. */
6716
6717 static bool
6718 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6719 {
6720 if (data->intoffset < 0)
6721 return false;
6722
6723 const int intoffset = data->intoffset;
6724 data->intoffset = -1;
6725
6726 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6727 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6728 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6729 int nregs = (endbit - startbit) / BITS_PER_WORD;
6730
6731 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6732 {
6733 nregs = SPARC_INT_ARG_MAX - this_slotno;
6734
6735 /* We need to pass this field (partly) on the stack. */
6736 data->stack = 1;
6737 }
6738
6739 if (nregs <= 0)
6740 return false;
6741
6742 *pnregs = nregs;
6743 return true;
6744 }
6745
6746 /* A subroutine of function_arg_record_value. Compute the number and the mode
6747 of the FP registers to be assigned for FIELD. Return true if at least one
6748 FP register is assigned or false otherwise. */
6749
6750 static bool
6751 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6752 assign_data_t *data,
6753 int *pnregs, machine_mode *pmode)
6754 {
6755 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6756 machine_mode mode = DECL_MODE (field);
6757 int nregs, nslots;
6758
6759 /* Slots are counted as words while regs are counted as having the size of
6760 the (inner) mode. */
6761 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6762 {
6763 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6764 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6765 }
6766 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6767 {
6768 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6769 nregs = 2;
6770 }
6771 else
6772 nregs = 1;
6773
6774 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6775
6776 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6777 {
6778 nslots = SPARC_FP_ARG_MAX - this_slotno;
6779 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6780
6781 /* We need to pass this field (partly) on the stack. */
6782 data->stack = 1;
6783
6784 if (nregs <= 0)
6785 return false;
6786 }
6787
6788 *pnregs = nregs;
6789 *pmode = mode;
6790 return true;
6791 }
6792
6793 /* A subroutine of function_arg_record_value. Count the number of registers
6794 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6795
6796 inline void
6797 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6798 assign_data_t *data)
6799 {
6800 if (fp)
6801 {
6802 int nregs;
6803 machine_mode mode;
6804
6805 if (compute_int_layout (bitpos, data, &nregs))
6806 data->nregs += nregs;
6807
6808 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6809 data->nregs += nregs;
6810 }
6811 else
6812 {
6813 if (data->intoffset < 0)
6814 data->intoffset = bitpos;
6815 }
6816 }
6817
6818 /* A subroutine of function_arg_record_value. Assign the bits of the
6819 structure between PARMS->intoffset and BITPOS to integer registers. */
6820
6821 static void
6822 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6823 {
6824 int intoffset = data->intoffset;
6825 machine_mode mode;
6826 int nregs;
6827
6828 if (!compute_int_layout (bitpos, data, &nregs))
6829 return;
6830
6831 /* If this is the trailing part of a word, only load that much into
6832 the register. Otherwise load the whole register. Note that in
6833 the latter case we may pick up unwanted bits. It's not a problem
6834 at the moment but may wish to revisit. */
6835 if (intoffset % BITS_PER_WORD != 0)
6836 mode = smallest_int_mode_for_size (BITS_PER_WORD
6837 - intoffset % BITS_PER_WORD);
6838 else
6839 mode = word_mode;
6840
6841 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6842 unsigned int regno = data->regbase + this_slotno;
6843 intoffset /= BITS_PER_UNIT;
6844
6845 do
6846 {
6847 rtx reg = gen_rtx_REG (mode, regno);
6848 XVECEXP (data->ret, 0, data->stack + data->nregs)
6849 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6850 data->nregs += 1;
6851 mode = word_mode;
6852 regno += 1;
6853 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6854 }
6855 while (--nregs > 0);
6856 }
6857
6858 /* A subroutine of function_arg_record_value. Assign FIELD at position
6859 BITPOS to FP registers. */
6860
6861 static void
6862 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6863 assign_data_t *data)
6864 {
6865 int nregs;
6866 machine_mode mode;
6867
6868 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6869 return;
6870
6871 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6872 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6873 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6874 regno++;
6875 int pos = bitpos / BITS_PER_UNIT;
6876
6877 do
6878 {
6879 rtx reg = gen_rtx_REG (mode, regno);
6880 XVECEXP (data->ret, 0, data->stack + data->nregs)
6881 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6882 data->nregs += 1;
6883 regno += GET_MODE_SIZE (mode) / 4;
6884 pos += GET_MODE_SIZE (mode);
6885 }
6886 while (--nregs > 0);
6887 }
6888
6889 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6890 the structure between PARMS->intoffset and BITPOS to registers. */
6891
6892 inline void
6893 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6894 assign_data_t *data)
6895 {
6896 if (fp)
6897 {
6898 assign_int_registers (bitpos, data);
6899
6900 assign_fp_registers (field, bitpos, data);
6901 }
6902 else
6903 {
6904 if (data->intoffset < 0)
6905 data->intoffset = bitpos;
6906 }
6907 }
6908
6909 /* Used by function_arg and sparc_function_value_1 to implement the complex
6910 conventions of the 64-bit ABI for passing and returning structures.
6911 Return an expression valid as a return value for the FUNCTION_ARG
6912 and TARGET_FUNCTION_VALUE.
6913
6914 TYPE is the data type of the argument (as a tree).
6915 This is null for libcalls where that information may
6916 not be available.
6917 MODE is the argument's machine mode.
6918 SLOTNO is the index number of the argument's slot in the parameter array.
6919 NAMED is true if this argument is a named parameter
6920 (otherwise it is an extra parameter matching an ellipsis).
6921 REGBASE is the regno of the base register for the parameter array. */
6922
6923 static rtx
6924 function_arg_record_value (const_tree type, machine_mode mode,
6925 int slotno, bool named, int regbase)
6926 {
6927 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6928 assign_data_t data;
6929 int nregs;
6930
6931 data.slotno = slotno;
6932 data.regbase = regbase;
6933
6934 /* Count how many registers we need. */
6935 data.nregs = 0;
6936 data.intoffset = 0;
6937 data.stack = false;
6938 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6939
6940 /* Take into account pending integer fields. */
6941 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6942 data.nregs += nregs;
6943
6944 /* Allocate the vector and handle some annoying special cases. */
6945 nregs = data.nregs;
6946
6947 if (nregs == 0)
6948 {
6949 /* ??? Empty structure has no value? Duh? */
6950 if (typesize <= 0)
6951 {
6952 /* Though there's nothing really to store, return a word register
6953 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6954 leads to breakage due to the fact that there are zero bytes to
6955 load. */
6956 return gen_rtx_REG (mode, regbase);
6957 }
6958
6959 /* ??? C++ has structures with no fields, and yet a size. Give up
6960 for now and pass everything back in integer registers. */
6961 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6962 if (nregs + slotno > SPARC_INT_ARG_MAX)
6963 nregs = SPARC_INT_ARG_MAX - slotno;
6964 }
6965
6966 gcc_assert (nregs > 0);
6967
6968 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6969
6970 /* If at least one field must be passed on the stack, generate
6971 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6972 also be passed on the stack. We can't do much better because the
6973 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6974 of structures for which the fields passed exclusively in registers
6975 are not at the beginning of the structure. */
6976 if (data.stack)
6977 XVECEXP (data.ret, 0, 0)
6978 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6979
6980 /* Assign the registers. */
6981 data.nregs = 0;
6982 data.intoffset = 0;
6983 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6984
6985 /* Assign pending integer fields. */
6986 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6987
6988 gcc_assert (data.nregs == nregs);
6989
6990 return data.ret;
6991 }
6992
6993 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6994 of the 64-bit ABI for passing and returning unions.
6995 Return an expression valid as a return value for the FUNCTION_ARG
6996 and TARGET_FUNCTION_VALUE.
6997
6998 SIZE is the size in bytes of the union.
6999 MODE is the argument's machine mode.
7000 REGNO is the hard register the union will be passed in. */
7001
7002 static rtx
7003 function_arg_union_value (int size, machine_mode mode, int slotno,
7004 int regno)
7005 {
7006 int nwords = CEIL_NWORDS (size), i;
7007 rtx regs;
7008
7009 /* See comment in previous function for empty structures. */
7010 if (nwords == 0)
7011 return gen_rtx_REG (mode, regno);
7012
7013 if (slotno == SPARC_INT_ARG_MAX - 1)
7014 nwords = 1;
7015
7016 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7017
7018 for (i = 0; i < nwords; i++)
7019 {
7020 /* Unions are passed left-justified. */
7021 XVECEXP (regs, 0, i)
7022 = gen_rtx_EXPR_LIST (VOIDmode,
7023 gen_rtx_REG (word_mode, regno),
7024 GEN_INT (UNITS_PER_WORD * i));
7025 regno++;
7026 }
7027
7028 return regs;
7029 }
7030
7031 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7032 for passing and returning BLKmode vectors.
7033 Return an expression valid as a return value for the FUNCTION_ARG
7034 and TARGET_FUNCTION_VALUE.
7035
7036 SIZE is the size in bytes of the vector.
7037 REGNO is the FP hard register the vector will be passed in. */
7038
7039 static rtx
7040 function_arg_vector_value (int size, int regno)
7041 {
7042 const int nregs = MAX (1, size / 8);
7043 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7044
7045 if (size < 8)
7046 XVECEXP (regs, 0, 0)
7047 = gen_rtx_EXPR_LIST (VOIDmode,
7048 gen_rtx_REG (SImode, regno),
7049 const0_rtx);
7050 else
7051 for (int i = 0; i < nregs; i++)
7052 XVECEXP (regs, 0, i)
7053 = gen_rtx_EXPR_LIST (VOIDmode,
7054 gen_rtx_REG (DImode, regno + 2*i),
7055 GEN_INT (i*8));
7056
7057 return regs;
7058 }
7059
7060 /* Determine where to put an argument to a function.
7061 Value is zero to push the argument on the stack,
7062 or a hard register in which to store the argument.
7063
7064 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7065 the preceding args and about the function being called.
7066 MODE is the argument's machine mode.
7067 TYPE is the data type of the argument (as a tree).
7068 This is null for libcalls where that information may
7069 not be available.
7070 NAMED is true if this argument is a named parameter
7071 (otherwise it is an extra parameter matching an ellipsis).
7072 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7073 TARGET_FUNCTION_INCOMING_ARG. */
7074
7075 static rtx
7076 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7077 const_tree type, bool named, bool incoming)
7078 {
7079 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7080
7081 int regbase = (incoming
7082 ? SPARC_INCOMING_INT_ARG_FIRST
7083 : SPARC_OUTGOING_INT_ARG_FIRST);
7084 int slotno, regno, padding;
7085 enum mode_class mclass = GET_MODE_CLASS (mode);
7086
7087 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7088 &regno, &padding);
7089 if (slotno == -1)
7090 return 0;
7091
7092 /* Vector types deserve special treatment because they are polymorphic wrt
7093 their mode, depending upon whether VIS instructions are enabled. */
7094 if (type && TREE_CODE (type) == VECTOR_TYPE)
7095 {
7096 HOST_WIDE_INT size = int_size_in_bytes (type);
7097 gcc_assert ((TARGET_ARCH32 && size <= 8)
7098 || (TARGET_ARCH64 && size <= 16));
7099
7100 if (mode == BLKmode)
7101 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7102
7103 mclass = MODE_FLOAT;
7104 }
7105
7106 if (TARGET_ARCH32)
7107 return gen_rtx_REG (mode, regno);
7108
7109 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7110 and are promoted to registers if possible. */
7111 if (type && TREE_CODE (type) == RECORD_TYPE)
7112 {
7113 HOST_WIDE_INT size = int_size_in_bytes (type);
7114 gcc_assert (size <= 16);
7115
7116 return function_arg_record_value (type, mode, slotno, named, regbase);
7117 }
7118
7119 /* Unions up to 16 bytes in size are passed in integer registers. */
7120 else if (type && TREE_CODE (type) == UNION_TYPE)
7121 {
7122 HOST_WIDE_INT size = int_size_in_bytes (type);
7123 gcc_assert (size <= 16);
7124
7125 return function_arg_union_value (size, mode, slotno, regno);
7126 }
7127
7128 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7129 but also have the slot allocated for them.
7130 If no prototype is in scope fp values in register slots get passed
7131 in two places, either fp regs and int regs or fp regs and memory. */
7132 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7133 && SPARC_FP_REG_P (regno))
7134 {
7135 rtx reg = gen_rtx_REG (mode, regno);
7136 if (cum->prototype_p || cum->libcall_p)
7137 return reg;
7138 else
7139 {
7140 rtx v0, v1;
7141
7142 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7143 {
7144 int intreg;
7145
7146 /* On incoming, we don't need to know that the value
7147 is passed in %f0 and %i0, and it confuses other parts
7148 causing needless spillage even on the simplest cases. */
7149 if (incoming)
7150 return reg;
7151
7152 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7153 + (regno - SPARC_FP_ARG_FIRST) / 2);
7154
7155 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7156 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7157 const0_rtx);
7158 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7159 }
7160 else
7161 {
7162 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7163 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7164 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7165 }
7166 }
7167 }
7168
7169 /* All other aggregate types are passed in an integer register in a mode
7170 corresponding to the size of the type. */
7171 else if (type && AGGREGATE_TYPE_P (type))
7172 {
7173 HOST_WIDE_INT size = int_size_in_bytes (type);
7174 gcc_assert (size <= 16);
7175
7176 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7177 }
7178
7179 return gen_rtx_REG (mode, regno);
7180 }
7181
7182 /* Handle the TARGET_FUNCTION_ARG target hook. */
7183
7184 static rtx
7185 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7186 const_tree type, bool named)
7187 {
7188 return sparc_function_arg_1 (cum, mode, type, named, false);
7189 }
7190
7191 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7192
7193 static rtx
7194 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7195 const_tree type, bool named)
7196 {
7197 return sparc_function_arg_1 (cum, mode, type, named, true);
7198 }
7199
7200 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7201
7202 static unsigned int
7203 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7204 {
7205 return ((TARGET_ARCH64
7206 && (GET_MODE_ALIGNMENT (mode) == 128
7207 || (type && TYPE_ALIGN (type) == 128)))
7208 ? 128
7209 : PARM_BOUNDARY);
7210 }
7211
7212 /* For an arg passed partly in registers and partly in memory,
7213 this is the number of bytes of registers used.
7214 For args passed entirely in registers or entirely in memory, zero.
7215
7216 Any arg that starts in the first 6 regs but won't entirely fit in them
7217 needs partial registers on v8. On v9, structures with integer
7218 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7219 values that begin in the last fp reg [where "last fp reg" varies with the
7220 mode] will be split between that reg and memory. */
7221
7222 static int
7223 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7224 tree type, bool named)
7225 {
7226 int slotno, regno, padding;
7227
7228 /* We pass false for incoming here, it doesn't matter. */
7229 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7230 false, &regno, &padding);
7231
7232 if (slotno == -1)
7233 return 0;
7234
7235 if (TARGET_ARCH32)
7236 {
7237 if ((slotno + (mode == BLKmode
7238 ? CEIL_NWORDS (int_size_in_bytes (type))
7239 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7240 > SPARC_INT_ARG_MAX)
7241 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7242 }
7243 else
7244 {
7245 /* We are guaranteed by pass_by_reference that the size of the
7246 argument is not greater than 16 bytes, so we only need to return
7247 one word if the argument is partially passed in registers. */
7248
7249 if (type && AGGREGATE_TYPE_P (type))
7250 {
7251 int size = int_size_in_bytes (type);
7252
7253 if (size > UNITS_PER_WORD
7254 && (slotno == SPARC_INT_ARG_MAX - 1
7255 || slotno == SPARC_FP_ARG_MAX - 1))
7256 return UNITS_PER_WORD;
7257 }
7258 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7259 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7260 && ! (TARGET_FPU && named)))
7261 {
7262 /* The complex types are passed as packed types. */
7263 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7264 && slotno == SPARC_INT_ARG_MAX - 1)
7265 return UNITS_PER_WORD;
7266 }
7267 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7268 {
7269 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7270 > SPARC_FP_ARG_MAX)
7271 return UNITS_PER_WORD;
7272 }
7273 }
7274
7275 return 0;
7276 }
7277
7278 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7279 Specify whether to pass the argument by reference. */
7280
7281 static bool
7282 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7283 machine_mode mode, const_tree type,
7284 bool named ATTRIBUTE_UNUSED)
7285 {
7286 if (TARGET_ARCH32)
7287 /* Original SPARC 32-bit ABI says that structures and unions,
7288 and quad-precision floats are passed by reference. For Pascal,
7289 also pass arrays by reference. All other base types are passed
7290 in registers.
7291
7292 Extended ABI (as implemented by the Sun compiler) says that all
7293 complex floats are passed by reference. Pass complex integers
7294 in registers up to 8 bytes. More generally, enforce the 2-word
7295 cap for passing arguments in registers.
7296
7297 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7298 integers are passed like floats of the same size, that is in
7299 registers up to 8 bytes. Pass all vector floats by reference
7300 like structure and unions. */
7301 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7302 || mode == SCmode
7303 /* Catch CDImode, TFmode, DCmode and TCmode. */
7304 || GET_MODE_SIZE (mode) > 8
7305 || (type
7306 && TREE_CODE (type) == VECTOR_TYPE
7307 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7308 else
7309 /* Original SPARC 64-bit ABI says that structures and unions
7310 smaller than 16 bytes are passed in registers, as well as
7311 all other base types.
7312
7313 Extended ABI (as implemented by the Sun compiler) says that
7314 complex floats are passed in registers up to 16 bytes. Pass
7315 all complex integers in registers up to 16 bytes. More generally,
7316 enforce the 2-word cap for passing arguments in registers.
7317
7318 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7319 integers are passed like floats of the same size, that is in
7320 registers (up to 16 bytes). Pass all vector floats like structure
7321 and unions. */
7322 return ((type
7323 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7324 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7325 /* Catch CTImode and TCmode. */
7326 || GET_MODE_SIZE (mode) > 16);
7327 }
7328
7329 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7330 Update the data in CUM to advance over an argument
7331 of mode MODE and data type TYPE.
7332 TYPE is null for libcalls where that information may not be available. */
7333
7334 static void
7335 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7336 const_tree type, bool named)
7337 {
7338 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7339 int regno, padding;
7340
7341 /* We pass false for incoming here, it doesn't matter. */
7342 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7343
7344 /* If argument requires leading padding, add it. */
7345 cum->words += padding;
7346
7347 if (TARGET_ARCH32)
7348 cum->words += (mode == BLKmode
7349 ? CEIL_NWORDS (int_size_in_bytes (type))
7350 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7351 else
7352 {
7353 if (type && AGGREGATE_TYPE_P (type))
7354 {
7355 int size = int_size_in_bytes (type);
7356
7357 if (size <= 8)
7358 ++cum->words;
7359 else if (size <= 16)
7360 cum->words += 2;
7361 else /* passed by reference */
7362 ++cum->words;
7363 }
7364 else
7365 cum->words += (mode == BLKmode
7366 ? CEIL_NWORDS (int_size_in_bytes (type))
7367 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7368 }
7369 }
7370
7371 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7372 are always stored left shifted in their argument slot. */
7373
7374 static pad_direction
7375 sparc_function_arg_padding (machine_mode mode, const_tree type)
7376 {
7377 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7378 return PAD_UPWARD;
7379
7380 /* Fall back to the default. */
7381 return default_function_arg_padding (mode, type);
7382 }
7383
7384 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7385 Specify whether to return the return value in memory. */
7386
7387 static bool
7388 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7389 {
7390 if (TARGET_ARCH32)
7391 /* Original SPARC 32-bit ABI says that structures and unions,
7392 and quad-precision floats are returned in memory. All other
7393 base types are returned in registers.
7394
7395 Extended ABI (as implemented by the Sun compiler) says that
7396 all complex floats are returned in registers (8 FP registers
7397 at most for '_Complex long double'). Return all complex integers
7398 in registers (4 at most for '_Complex long long').
7399
7400 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7401 integers are returned like floats of the same size, that is in
7402 registers up to 8 bytes and in memory otherwise. Return all
7403 vector floats in memory like structure and unions; note that
7404 they always have BLKmode like the latter. */
7405 return (TYPE_MODE (type) == BLKmode
7406 || TYPE_MODE (type) == TFmode
7407 || (TREE_CODE (type) == VECTOR_TYPE
7408 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7409 else
7410 /* Original SPARC 64-bit ABI says that structures and unions
7411 smaller than 32 bytes are returned in registers, as well as
7412 all other base types.
7413
7414 Extended ABI (as implemented by the Sun compiler) says that all
7415 complex floats are returned in registers (8 FP registers at most
7416 for '_Complex long double'). Return all complex integers in
7417 registers (4 at most for '_Complex TItype').
7418
7419 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7420 integers are returned like floats of the same size, that is in
7421 registers. Return all vector floats like structure and unions;
7422 note that they always have BLKmode like the latter. */
7423 return (TYPE_MODE (type) == BLKmode
7424 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7425 }
7426
7427 /* Handle the TARGET_STRUCT_VALUE target hook.
7428 Return where to find the structure return value address. */
7429
7430 static rtx
7431 sparc_struct_value_rtx (tree fndecl, int incoming)
7432 {
7433 if (TARGET_ARCH64)
7434 return 0;
7435 else
7436 {
7437 rtx mem;
7438
7439 if (incoming)
7440 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7441 STRUCT_VALUE_OFFSET));
7442 else
7443 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7444 STRUCT_VALUE_OFFSET));
7445
7446 /* Only follow the SPARC ABI for fixed-size structure returns.
7447 Variable size structure returns are handled per the normal
7448 procedures in GCC. This is enabled by -mstd-struct-return */
7449 if (incoming == 2
7450 && sparc_std_struct_return
7451 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7452 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7453 {
7454 /* We must check and adjust the return address, as it is optional
7455 as to whether the return object is really provided. */
7456 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7457 rtx scratch = gen_reg_rtx (SImode);
7458 rtx_code_label *endlab = gen_label_rtx ();
7459
7460 /* Calculate the return object size. */
7461 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7462 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7463 /* Construct a temporary return value. */
7464 rtx temp_val
7465 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7466
7467 /* Implement SPARC 32-bit psABI callee return struct checking:
7468
7469 Fetch the instruction where we will return to and see if
7470 it's an unimp instruction (the most significant 10 bits
7471 will be zero). */
7472 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7473 plus_constant (Pmode,
7474 ret_reg, 8)));
7475 /* Assume the size is valid and pre-adjust. */
7476 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7477 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7478 0, endlab);
7479 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7480 /* Write the address of the memory pointed to by temp_val into
7481 the memory pointed to by mem. */
7482 emit_move_insn (mem, XEXP (temp_val, 0));
7483 emit_label (endlab);
7484 }
7485
7486 return mem;
7487 }
7488 }
7489
7490 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7491 For v9, function return values are subject to the same rules as arguments,
7492 except that up to 32 bytes may be returned in registers. */
7493
7494 static rtx
7495 sparc_function_value_1 (const_tree type, machine_mode mode,
7496 bool outgoing)
7497 {
7498 /* Beware that the two values are swapped here wrt function_arg. */
7499 int regbase = (outgoing
7500 ? SPARC_INCOMING_INT_ARG_FIRST
7501 : SPARC_OUTGOING_INT_ARG_FIRST);
7502 enum mode_class mclass = GET_MODE_CLASS (mode);
7503 int regno;
7504
7505 /* Vector types deserve special treatment because they are polymorphic wrt
7506 their mode, depending upon whether VIS instructions are enabled. */
7507 if (type && TREE_CODE (type) == VECTOR_TYPE)
7508 {
7509 HOST_WIDE_INT size = int_size_in_bytes (type);
7510 gcc_assert ((TARGET_ARCH32 && size <= 8)
7511 || (TARGET_ARCH64 && size <= 32));
7512
7513 if (mode == BLKmode)
7514 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7515
7516 mclass = MODE_FLOAT;
7517 }
7518
7519 if (TARGET_ARCH64 && type)
7520 {
7521 /* Structures up to 32 bytes in size are returned in registers. */
7522 if (TREE_CODE (type) == RECORD_TYPE)
7523 {
7524 HOST_WIDE_INT size = int_size_in_bytes (type);
7525 gcc_assert (size <= 32);
7526
7527 return function_arg_record_value (type, mode, 0, 1, regbase);
7528 }
7529
7530 /* Unions up to 32 bytes in size are returned in integer registers. */
7531 else if (TREE_CODE (type) == UNION_TYPE)
7532 {
7533 HOST_WIDE_INT size = int_size_in_bytes (type);
7534 gcc_assert (size <= 32);
7535
7536 return function_arg_union_value (size, mode, 0, regbase);
7537 }
7538
7539 /* Objects that require it are returned in FP registers. */
7540 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7541 ;
7542
7543 /* All other aggregate types are returned in an integer register in a
7544 mode corresponding to the size of the type. */
7545 else if (AGGREGATE_TYPE_P (type))
7546 {
7547 /* All other aggregate types are passed in an integer register
7548 in a mode corresponding to the size of the type. */
7549 HOST_WIDE_INT size = int_size_in_bytes (type);
7550 gcc_assert (size <= 32);
7551
7552 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7553
7554 /* ??? We probably should have made the same ABI change in
7555 3.4.0 as the one we made for unions. The latter was
7556 required by the SCD though, while the former is not
7557 specified, so we favored compatibility and efficiency.
7558
7559 Now we're stuck for aggregates larger than 16 bytes,
7560 because OImode vanished in the meantime. Let's not
7561 try to be unduly clever, and simply follow the ABI
7562 for unions in that case. */
7563 if (mode == BLKmode)
7564 return function_arg_union_value (size, mode, 0, regbase);
7565 else
7566 mclass = MODE_INT;
7567 }
7568
7569 /* We should only have pointer and integer types at this point. This
7570 must match sparc_promote_function_mode. */
7571 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7572 mode = word_mode;
7573 }
7574
7575 /* We should only have pointer and integer types at this point, except with
7576 -freg-struct-return. This must match sparc_promote_function_mode. */
7577 else if (TARGET_ARCH32
7578 && !(type && AGGREGATE_TYPE_P (type))
7579 && mclass == MODE_INT
7580 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7581 mode = word_mode;
7582
7583 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7584 regno = SPARC_FP_ARG_FIRST;
7585 else
7586 regno = regbase;
7587
7588 return gen_rtx_REG (mode, regno);
7589 }
7590
7591 /* Handle TARGET_FUNCTION_VALUE.
7592 On the SPARC, the value is found in the first "output" register, but the
7593 called function leaves it in the first "input" register. */
7594
7595 static rtx
7596 sparc_function_value (const_tree valtype,
7597 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7598 bool outgoing)
7599 {
7600 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7601 }
7602
7603 /* Handle TARGET_LIBCALL_VALUE. */
7604
7605 static rtx
7606 sparc_libcall_value (machine_mode mode,
7607 const_rtx fun ATTRIBUTE_UNUSED)
7608 {
7609 return sparc_function_value_1 (NULL_TREE, mode, false);
7610 }
7611
7612 /* Handle FUNCTION_VALUE_REGNO_P.
7613 On the SPARC, the first "output" reg is used for integer values, and the
7614 first floating point register is used for floating point values. */
7615
7616 static bool
7617 sparc_function_value_regno_p (const unsigned int regno)
7618 {
7619 return (regno == 8 || (TARGET_FPU && regno == 32));
7620 }
7621
7622 /* Do what is necessary for `va_start'. We look at the current function
7623 to determine if stdarg or varargs is used and return the address of
7624 the first unnamed parameter. */
7625
7626 static rtx
7627 sparc_builtin_saveregs (void)
7628 {
7629 int first_reg = crtl->args.info.words;
7630 rtx address;
7631 int regno;
7632
7633 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7634 emit_move_insn (gen_rtx_MEM (word_mode,
7635 gen_rtx_PLUS (Pmode,
7636 frame_pointer_rtx,
7637 GEN_INT (FIRST_PARM_OFFSET (0)
7638 + (UNITS_PER_WORD
7639 * regno)))),
7640 gen_rtx_REG (word_mode,
7641 SPARC_INCOMING_INT_ARG_FIRST + regno));
7642
7643 address = gen_rtx_PLUS (Pmode,
7644 frame_pointer_rtx,
7645 GEN_INT (FIRST_PARM_OFFSET (0)
7646 + UNITS_PER_WORD * first_reg));
7647
7648 return address;
7649 }
7650
7651 /* Implement `va_start' for stdarg. */
7652
7653 static void
7654 sparc_va_start (tree valist, rtx nextarg)
7655 {
7656 nextarg = expand_builtin_saveregs ();
7657 std_expand_builtin_va_start (valist, nextarg);
7658 }
7659
7660 /* Implement `va_arg' for stdarg. */
7661
7662 static tree
7663 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7664 gimple_seq *post_p)
7665 {
7666 HOST_WIDE_INT size, rsize, align;
7667 tree addr, incr;
7668 bool indirect;
7669 tree ptrtype = build_pointer_type (type);
7670
7671 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7672 {
7673 indirect = true;
7674 size = rsize = UNITS_PER_WORD;
7675 align = 0;
7676 }
7677 else
7678 {
7679 indirect = false;
7680 size = int_size_in_bytes (type);
7681 rsize = ROUND_UP (size, UNITS_PER_WORD);
7682 align = 0;
7683
7684 if (TARGET_ARCH64)
7685 {
7686 /* For SPARC64, objects requiring 16-byte alignment get it. */
7687 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7688 align = 2 * UNITS_PER_WORD;
7689
7690 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7691 are left-justified in their slots. */
7692 if (AGGREGATE_TYPE_P (type))
7693 {
7694 if (size == 0)
7695 size = rsize = UNITS_PER_WORD;
7696 else
7697 size = rsize;
7698 }
7699 }
7700 }
7701
7702 incr = valist;
7703 if (align)
7704 {
7705 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7706 incr = fold_convert (sizetype, incr);
7707 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7708 size_int (-align));
7709 incr = fold_convert (ptr_type_node, incr);
7710 }
7711
7712 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7713 addr = incr;
7714
7715 if (BYTES_BIG_ENDIAN && size < rsize)
7716 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7717
7718 if (indirect)
7719 {
7720 addr = fold_convert (build_pointer_type (ptrtype), addr);
7721 addr = build_va_arg_indirect_ref (addr);
7722 }
7723
7724 /* If the address isn't aligned properly for the type, we need a temporary.
7725 FIXME: This is inefficient, usually we can do this in registers. */
7726 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7727 {
7728 tree tmp = create_tmp_var (type, "va_arg_tmp");
7729 tree dest_addr = build_fold_addr_expr (tmp);
7730 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7731 3, dest_addr, addr, size_int (rsize));
7732 TREE_ADDRESSABLE (tmp) = 1;
7733 gimplify_and_add (copy, pre_p);
7734 addr = dest_addr;
7735 }
7736
7737 else
7738 addr = fold_convert (ptrtype, addr);
7739
7740 incr = fold_build_pointer_plus_hwi (incr, rsize);
7741 gimplify_assign (valist, incr, post_p);
7742
7743 return build_va_arg_indirect_ref (addr);
7744 }
7745 \f
7746 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7747 Specify whether the vector mode is supported by the hardware. */
7748
7749 static bool
7750 sparc_vector_mode_supported_p (machine_mode mode)
7751 {
7752 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7753 }
7754 \f
7755 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7756
7757 static machine_mode
7758 sparc_preferred_simd_mode (scalar_mode mode)
7759 {
7760 if (TARGET_VIS)
7761 switch (mode)
7762 {
7763 case E_SImode:
7764 return V2SImode;
7765 case E_HImode:
7766 return V4HImode;
7767 case E_QImode:
7768 return V8QImode;
7769
7770 default:;
7771 }
7772
7773 return word_mode;
7774 }
7775 \f
7776 /* Return the string to output an unconditional branch to LABEL, which is
7777 the operand number of the label.
7778
7779 DEST is the destination insn (i.e. the label), INSN is the source. */
7780
7781 const char *
7782 output_ubranch (rtx dest, rtx_insn *insn)
7783 {
7784 static char string[64];
7785 bool v9_form = false;
7786 int delta;
7787 char *p;
7788
7789 /* Even if we are trying to use cbcond for this, evaluate
7790 whether we can use V9 branches as our backup plan. */
7791
7792 delta = 5000000;
7793 if (INSN_ADDRESSES_SET_P ())
7794 delta = (INSN_ADDRESSES (INSN_UID (dest))
7795 - INSN_ADDRESSES (INSN_UID (insn)));
7796
7797 /* Leave some instructions for "slop". */
7798 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7799 v9_form = true;
7800
7801 if (TARGET_CBCOND)
7802 {
7803 bool emit_nop = emit_cbcond_nop (insn);
7804 bool far = false;
7805 const char *rval;
7806
7807 if (delta < -500 || delta > 500)
7808 far = true;
7809
7810 if (far)
7811 {
7812 if (v9_form)
7813 rval = "ba,a,pt\t%%xcc, %l0";
7814 else
7815 rval = "b,a\t%l0";
7816 }
7817 else
7818 {
7819 if (emit_nop)
7820 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7821 else
7822 rval = "cwbe\t%%g0, %%g0, %l0";
7823 }
7824 return rval;
7825 }
7826
7827 if (v9_form)
7828 strcpy (string, "ba%*,pt\t%%xcc, ");
7829 else
7830 strcpy (string, "b%*\t");
7831
7832 p = strchr (string, '\0');
7833 *p++ = '%';
7834 *p++ = 'l';
7835 *p++ = '0';
7836 *p++ = '%';
7837 *p++ = '(';
7838 *p = '\0';
7839
7840 return string;
7841 }
7842
7843 /* Return the string to output a conditional branch to LABEL, which is
7844 the operand number of the label. OP is the conditional expression.
7845 XEXP (OP, 0) is assumed to be a condition code register (integer or
7846 floating point) and its mode specifies what kind of comparison we made.
7847
7848 DEST is the destination insn (i.e. the label), INSN is the source.
7849
7850 REVERSED is nonzero if we should reverse the sense of the comparison.
7851
7852 ANNUL is nonzero if we should generate an annulling branch. */
7853
7854 const char *
7855 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7856 rtx_insn *insn)
7857 {
7858 static char string[64];
7859 enum rtx_code code = GET_CODE (op);
7860 rtx cc_reg = XEXP (op, 0);
7861 machine_mode mode = GET_MODE (cc_reg);
7862 const char *labelno, *branch;
7863 int spaces = 8, far;
7864 char *p;
7865
7866 /* v9 branches are limited to +-1MB. If it is too far away,
7867 change
7868
7869 bne,pt %xcc, .LC30
7870
7871 to
7872
7873 be,pn %xcc, .+12
7874 nop
7875 ba .LC30
7876
7877 and
7878
7879 fbne,a,pn %fcc2, .LC29
7880
7881 to
7882
7883 fbe,pt %fcc2, .+16
7884 nop
7885 ba .LC29 */
7886
7887 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7888 if (reversed ^ far)
7889 {
7890 /* Reversal of FP compares takes care -- an ordered compare
7891 becomes an unordered compare and vice versa. */
7892 if (mode == CCFPmode || mode == CCFPEmode)
7893 code = reverse_condition_maybe_unordered (code);
7894 else
7895 code = reverse_condition (code);
7896 }
7897
7898 /* Start by writing the branch condition. */
7899 if (mode == CCFPmode || mode == CCFPEmode)
7900 {
7901 switch (code)
7902 {
7903 case NE:
7904 branch = "fbne";
7905 break;
7906 case EQ:
7907 branch = "fbe";
7908 break;
7909 case GE:
7910 branch = "fbge";
7911 break;
7912 case GT:
7913 branch = "fbg";
7914 break;
7915 case LE:
7916 branch = "fble";
7917 break;
7918 case LT:
7919 branch = "fbl";
7920 break;
7921 case UNORDERED:
7922 branch = "fbu";
7923 break;
7924 case ORDERED:
7925 branch = "fbo";
7926 break;
7927 case UNGT:
7928 branch = "fbug";
7929 break;
7930 case UNLT:
7931 branch = "fbul";
7932 break;
7933 case UNEQ:
7934 branch = "fbue";
7935 break;
7936 case UNGE:
7937 branch = "fbuge";
7938 break;
7939 case UNLE:
7940 branch = "fbule";
7941 break;
7942 case LTGT:
7943 branch = "fblg";
7944 break;
7945 default:
7946 gcc_unreachable ();
7947 }
7948
7949 /* ??? !v9: FP branches cannot be preceded by another floating point
7950 insn. Because there is currently no concept of pre-delay slots,
7951 we can fix this only by always emitting a nop before a floating
7952 point branch. */
7953
7954 string[0] = '\0';
7955 if (! TARGET_V9)
7956 strcpy (string, "nop\n\t");
7957 strcat (string, branch);
7958 }
7959 else
7960 {
7961 switch (code)
7962 {
7963 case NE:
7964 if (mode == CCVmode || mode == CCXVmode)
7965 branch = "bvs";
7966 else
7967 branch = "bne";
7968 break;
7969 case EQ:
7970 if (mode == CCVmode || mode == CCXVmode)
7971 branch = "bvc";
7972 else
7973 branch = "be";
7974 break;
7975 case GE:
7976 if (mode == CCNZmode || mode == CCXNZmode)
7977 branch = "bpos";
7978 else
7979 branch = "bge";
7980 break;
7981 case GT:
7982 branch = "bg";
7983 break;
7984 case LE:
7985 branch = "ble";
7986 break;
7987 case LT:
7988 if (mode == CCNZmode || mode == CCXNZmode)
7989 branch = "bneg";
7990 else
7991 branch = "bl";
7992 break;
7993 case GEU:
7994 branch = "bgeu";
7995 break;
7996 case GTU:
7997 branch = "bgu";
7998 break;
7999 case LEU:
8000 branch = "bleu";
8001 break;
8002 case LTU:
8003 branch = "blu";
8004 break;
8005 default:
8006 gcc_unreachable ();
8007 }
8008 strcpy (string, branch);
8009 }
8010 spaces -= strlen (branch);
8011 p = strchr (string, '\0');
8012
8013 /* Now add the annulling, the label, and a possible noop. */
8014 if (annul && ! far)
8015 {
8016 strcpy (p, ",a");
8017 p += 2;
8018 spaces -= 2;
8019 }
8020
8021 if (TARGET_V9)
8022 {
8023 rtx note;
8024 int v8 = 0;
8025
8026 if (! far && insn && INSN_ADDRESSES_SET_P ())
8027 {
8028 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8029 - INSN_ADDRESSES (INSN_UID (insn)));
8030 /* Leave some instructions for "slop". */
8031 if (delta < -260000 || delta >= 260000)
8032 v8 = 1;
8033 }
8034
8035 switch (mode)
8036 {
8037 case E_CCmode:
8038 case E_CCNZmode:
8039 case E_CCCmode:
8040 case E_CCVmode:
8041 labelno = "%%icc, ";
8042 if (v8)
8043 labelno = "";
8044 break;
8045 case E_CCXmode:
8046 case E_CCXNZmode:
8047 case E_CCXCmode:
8048 case E_CCXVmode:
8049 labelno = "%%xcc, ";
8050 gcc_assert (!v8);
8051 break;
8052 case E_CCFPmode:
8053 case E_CCFPEmode:
8054 {
8055 static char v9_fcc_labelno[] = "%%fccX, ";
8056 /* Set the char indicating the number of the fcc reg to use. */
8057 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8058 labelno = v9_fcc_labelno;
8059 if (v8)
8060 {
8061 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8062 labelno = "";
8063 }
8064 }
8065 break;
8066 default:
8067 gcc_unreachable ();
8068 }
8069
8070 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8071 {
8072 strcpy (p,
8073 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8074 >= profile_probability::even ()) ^ far)
8075 ? ",pt" : ",pn");
8076 p += 3;
8077 spaces -= 3;
8078 }
8079 }
8080 else
8081 labelno = "";
8082
8083 if (spaces > 0)
8084 *p++ = '\t';
8085 else
8086 *p++ = ' ';
8087 strcpy (p, labelno);
8088 p = strchr (p, '\0');
8089 if (far)
8090 {
8091 strcpy (p, ".+12\n\t nop\n\tb\t");
8092 /* Skip the next insn if requested or
8093 if we know that it will be a nop. */
8094 if (annul || ! final_sequence)
8095 p[3] = '6';
8096 p += 14;
8097 }
8098 *p++ = '%';
8099 *p++ = 'l';
8100 *p++ = label + '0';
8101 *p++ = '%';
8102 *p++ = '#';
8103 *p = '\0';
8104
8105 return string;
8106 }
8107
8108 /* Emit a library call comparison between floating point X and Y.
8109 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8110 Return the new operator to be used in the comparison sequence.
8111
8112 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8113 values as arguments instead of the TFmode registers themselves,
8114 that's why we cannot call emit_float_lib_cmp. */
8115
8116 rtx
8117 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8118 {
8119 const char *qpfunc;
8120 rtx slot0, slot1, result, tem, tem2, libfunc;
8121 machine_mode mode;
8122 enum rtx_code new_comparison;
8123
8124 switch (comparison)
8125 {
8126 case EQ:
8127 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8128 break;
8129
8130 case NE:
8131 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8132 break;
8133
8134 case GT:
8135 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8136 break;
8137
8138 case GE:
8139 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8140 break;
8141
8142 case LT:
8143 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8144 break;
8145
8146 case LE:
8147 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8148 break;
8149
8150 case ORDERED:
8151 case UNORDERED:
8152 case UNGT:
8153 case UNLT:
8154 case UNEQ:
8155 case UNGE:
8156 case UNLE:
8157 case LTGT:
8158 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8159 break;
8160
8161 default:
8162 gcc_unreachable ();
8163 }
8164
8165 if (TARGET_ARCH64)
8166 {
8167 if (MEM_P (x))
8168 {
8169 tree expr = MEM_EXPR (x);
8170 if (expr)
8171 mark_addressable (expr);
8172 slot0 = x;
8173 }
8174 else
8175 {
8176 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8177 emit_move_insn (slot0, x);
8178 }
8179
8180 if (MEM_P (y))
8181 {
8182 tree expr = MEM_EXPR (y);
8183 if (expr)
8184 mark_addressable (expr);
8185 slot1 = y;
8186 }
8187 else
8188 {
8189 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8190 emit_move_insn (slot1, y);
8191 }
8192
8193 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8194 emit_library_call (libfunc, LCT_NORMAL,
8195 DImode,
8196 XEXP (slot0, 0), Pmode,
8197 XEXP (slot1, 0), Pmode);
8198 mode = DImode;
8199 }
8200 else
8201 {
8202 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8203 emit_library_call (libfunc, LCT_NORMAL,
8204 SImode,
8205 x, TFmode, y, TFmode);
8206 mode = SImode;
8207 }
8208
8209
8210 /* Immediately move the result of the libcall into a pseudo
8211 register so reload doesn't clobber the value if it needs
8212 the return register for a spill reg. */
8213 result = gen_reg_rtx (mode);
8214 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8215
8216 switch (comparison)
8217 {
8218 default:
8219 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8220 case ORDERED:
8221 case UNORDERED:
8222 new_comparison = (comparison == UNORDERED ? EQ : NE);
8223 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8224 case UNGT:
8225 case UNGE:
8226 new_comparison = (comparison == UNGT ? GT : NE);
8227 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8228 case UNLE:
8229 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8230 case UNLT:
8231 tem = gen_reg_rtx (mode);
8232 if (TARGET_ARCH32)
8233 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8234 else
8235 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8236 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8237 case UNEQ:
8238 case LTGT:
8239 tem = gen_reg_rtx (mode);
8240 if (TARGET_ARCH32)
8241 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8242 else
8243 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8244 tem2 = gen_reg_rtx (mode);
8245 if (TARGET_ARCH32)
8246 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8247 else
8248 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8249 new_comparison = (comparison == UNEQ ? EQ : NE);
8250 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8251 }
8252
8253 gcc_unreachable ();
8254 }
8255
8256 /* Generate an unsigned DImode to FP conversion. This is the same code
8257 optabs would emit if we didn't have TFmode patterns. */
8258
8259 void
8260 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8261 {
8262 rtx i0, i1, f0, in, out;
8263
8264 out = operands[0];
8265 in = force_reg (DImode, operands[1]);
8266 rtx_code_label *neglab = gen_label_rtx ();
8267 rtx_code_label *donelab = gen_label_rtx ();
8268 i0 = gen_reg_rtx (DImode);
8269 i1 = gen_reg_rtx (DImode);
8270 f0 = gen_reg_rtx (mode);
8271
8272 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8273
8274 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8275 emit_jump_insn (gen_jump (donelab));
8276 emit_barrier ();
8277
8278 emit_label (neglab);
8279
8280 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8281 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8282 emit_insn (gen_iordi3 (i0, i0, i1));
8283 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8284 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8285
8286 emit_label (donelab);
8287 }
8288
8289 /* Generate an FP to unsigned DImode conversion. This is the same code
8290 optabs would emit if we didn't have TFmode patterns. */
8291
8292 void
8293 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8294 {
8295 rtx i0, i1, f0, in, out, limit;
8296
8297 out = operands[0];
8298 in = force_reg (mode, operands[1]);
8299 rtx_code_label *neglab = gen_label_rtx ();
8300 rtx_code_label *donelab = gen_label_rtx ();
8301 i0 = gen_reg_rtx (DImode);
8302 i1 = gen_reg_rtx (DImode);
8303 limit = gen_reg_rtx (mode);
8304 f0 = gen_reg_rtx (mode);
8305
8306 emit_move_insn (limit,
8307 const_double_from_real_value (
8308 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8309 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8310
8311 emit_insn (gen_rtx_SET (out,
8312 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8313 emit_jump_insn (gen_jump (donelab));
8314 emit_barrier ();
8315
8316 emit_label (neglab);
8317
8318 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8319 emit_insn (gen_rtx_SET (i0,
8320 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8321 emit_insn (gen_movdi (i1, const1_rtx));
8322 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8323 emit_insn (gen_xordi3 (out, i0, i1));
8324
8325 emit_label (donelab);
8326 }
8327
8328 /* Return the string to output a compare and branch instruction to DEST.
8329 DEST is the destination insn (i.e. the label), INSN is the source,
8330 and OP is the conditional expression. */
8331
8332 const char *
8333 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8334 {
8335 machine_mode mode = GET_MODE (XEXP (op, 0));
8336 enum rtx_code code = GET_CODE (op);
8337 const char *cond_str, *tmpl;
8338 int far, emit_nop, len;
8339 static char string[64];
8340 char size_char;
8341
8342 /* Compare and Branch is limited to +-2KB. If it is too far away,
8343 change
8344
8345 cxbne X, Y, .LC30
8346
8347 to
8348
8349 cxbe X, Y, .+16
8350 nop
8351 ba,pt xcc, .LC30
8352 nop */
8353
8354 len = get_attr_length (insn);
8355
8356 far = len == 4;
8357 emit_nop = len == 2;
8358
8359 if (far)
8360 code = reverse_condition (code);
8361
8362 size_char = ((mode == SImode) ? 'w' : 'x');
8363
8364 switch (code)
8365 {
8366 case NE:
8367 cond_str = "ne";
8368 break;
8369
8370 case EQ:
8371 cond_str = "e";
8372 break;
8373
8374 case GE:
8375 cond_str = "ge";
8376 break;
8377
8378 case GT:
8379 cond_str = "g";
8380 break;
8381
8382 case LE:
8383 cond_str = "le";
8384 break;
8385
8386 case LT:
8387 cond_str = "l";
8388 break;
8389
8390 case GEU:
8391 cond_str = "cc";
8392 break;
8393
8394 case GTU:
8395 cond_str = "gu";
8396 break;
8397
8398 case LEU:
8399 cond_str = "leu";
8400 break;
8401
8402 case LTU:
8403 cond_str = "cs";
8404 break;
8405
8406 default:
8407 gcc_unreachable ();
8408 }
8409
8410 if (far)
8411 {
8412 int veryfar = 1, delta;
8413
8414 if (INSN_ADDRESSES_SET_P ())
8415 {
8416 delta = (INSN_ADDRESSES (INSN_UID (dest))
8417 - INSN_ADDRESSES (INSN_UID (insn)));
8418 /* Leave some instructions for "slop". */
8419 if (delta >= -260000 && delta < 260000)
8420 veryfar = 0;
8421 }
8422
8423 if (veryfar)
8424 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8425 else
8426 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8427 }
8428 else
8429 {
8430 if (emit_nop)
8431 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8432 else
8433 tmpl = "c%cb%s\t%%1, %%2, %%3";
8434 }
8435
8436 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8437
8438 return string;
8439 }
8440
8441 /* Return the string to output a conditional branch to LABEL, testing
8442 register REG. LABEL is the operand number of the label; REG is the
8443 operand number of the reg. OP is the conditional expression. The mode
8444 of REG says what kind of comparison we made.
8445
8446 DEST is the destination insn (i.e. the label), INSN is the source.
8447
8448 REVERSED is nonzero if we should reverse the sense of the comparison.
8449
8450 ANNUL is nonzero if we should generate an annulling branch. */
8451
8452 const char *
8453 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8454 int annul, rtx_insn *insn)
8455 {
8456 static char string[64];
8457 enum rtx_code code = GET_CODE (op);
8458 machine_mode mode = GET_MODE (XEXP (op, 0));
8459 rtx note;
8460 int far;
8461 char *p;
8462
8463 /* branch on register are limited to +-128KB. If it is too far away,
8464 change
8465
8466 brnz,pt %g1, .LC30
8467
8468 to
8469
8470 brz,pn %g1, .+12
8471 nop
8472 ba,pt %xcc, .LC30
8473
8474 and
8475
8476 brgez,a,pn %o1, .LC29
8477
8478 to
8479
8480 brlz,pt %o1, .+16
8481 nop
8482 ba,pt %xcc, .LC29 */
8483
8484 far = get_attr_length (insn) >= 3;
8485
8486 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8487 if (reversed ^ far)
8488 code = reverse_condition (code);
8489
8490 /* Only 64-bit versions of these instructions exist. */
8491 gcc_assert (mode == DImode);
8492
8493 /* Start by writing the branch condition. */
8494
8495 switch (code)
8496 {
8497 case NE:
8498 strcpy (string, "brnz");
8499 break;
8500
8501 case EQ:
8502 strcpy (string, "brz");
8503 break;
8504
8505 case GE:
8506 strcpy (string, "brgez");
8507 break;
8508
8509 case LT:
8510 strcpy (string, "brlz");
8511 break;
8512
8513 case LE:
8514 strcpy (string, "brlez");
8515 break;
8516
8517 case GT:
8518 strcpy (string, "brgz");
8519 break;
8520
8521 default:
8522 gcc_unreachable ();
8523 }
8524
8525 p = strchr (string, '\0');
8526
8527 /* Now add the annulling, reg, label, and nop. */
8528 if (annul && ! far)
8529 {
8530 strcpy (p, ",a");
8531 p += 2;
8532 }
8533
8534 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8535 {
8536 strcpy (p,
8537 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8538 >= profile_probability::even ()) ^ far)
8539 ? ",pt" : ",pn");
8540 p += 3;
8541 }
8542
8543 *p = p < string + 8 ? '\t' : ' ';
8544 p++;
8545 *p++ = '%';
8546 *p++ = '0' + reg;
8547 *p++ = ',';
8548 *p++ = ' ';
8549 if (far)
8550 {
8551 int veryfar = 1, delta;
8552
8553 if (INSN_ADDRESSES_SET_P ())
8554 {
8555 delta = (INSN_ADDRESSES (INSN_UID (dest))
8556 - INSN_ADDRESSES (INSN_UID (insn)));
8557 /* Leave some instructions for "slop". */
8558 if (delta >= -260000 && delta < 260000)
8559 veryfar = 0;
8560 }
8561
8562 strcpy (p, ".+12\n\t nop\n\t");
8563 /* Skip the next insn if requested or
8564 if we know that it will be a nop. */
8565 if (annul || ! final_sequence)
8566 p[3] = '6';
8567 p += 12;
8568 if (veryfar)
8569 {
8570 strcpy (p, "b\t");
8571 p += 2;
8572 }
8573 else
8574 {
8575 strcpy (p, "ba,pt\t%%xcc, ");
8576 p += 13;
8577 }
8578 }
8579 *p++ = '%';
8580 *p++ = 'l';
8581 *p++ = '0' + label;
8582 *p++ = '%';
8583 *p++ = '#';
8584 *p = '\0';
8585
8586 return string;
8587 }
8588
8589 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8590 Such instructions cannot be used in the delay slot of return insn on v9.
8591 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8592 */
8593
8594 static int
8595 epilogue_renumber (register rtx *where, int test)
8596 {
8597 register const char *fmt;
8598 register int i;
8599 register enum rtx_code code;
8600
8601 if (*where == 0)
8602 return 0;
8603
8604 code = GET_CODE (*where);
8605
8606 switch (code)
8607 {
8608 case REG:
8609 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8610 return 1;
8611 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8612 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8613 /* fallthrough */
8614 case SCRATCH:
8615 case CC0:
8616 case PC:
8617 case CONST_INT:
8618 case CONST_WIDE_INT:
8619 case CONST_DOUBLE:
8620 return 0;
8621
8622 /* Do not replace the frame pointer with the stack pointer because
8623 it can cause the delayed instruction to load below the stack.
8624 This occurs when instructions like:
8625
8626 (set (reg/i:SI 24 %i0)
8627 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8628 (const_int -20 [0xffffffec])) 0))
8629
8630 are in the return delayed slot. */
8631 case PLUS:
8632 if (GET_CODE (XEXP (*where, 0)) == REG
8633 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8634 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8635 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8636 return 1;
8637 break;
8638
8639 case MEM:
8640 if (SPARC_STACK_BIAS
8641 && GET_CODE (XEXP (*where, 0)) == REG
8642 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8643 return 1;
8644 break;
8645
8646 default:
8647 break;
8648 }
8649
8650 fmt = GET_RTX_FORMAT (code);
8651
8652 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8653 {
8654 if (fmt[i] == 'E')
8655 {
8656 register int j;
8657 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8658 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8659 return 1;
8660 }
8661 else if (fmt[i] == 'e'
8662 && epilogue_renumber (&(XEXP (*where, i)), test))
8663 return 1;
8664 }
8665 return 0;
8666 }
8667 \f
8668 /* Leaf functions and non-leaf functions have different needs. */
8669
8670 static const int
8671 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8672
8673 static const int
8674 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8675
8676 static const int *const reg_alloc_orders[] = {
8677 reg_leaf_alloc_order,
8678 reg_nonleaf_alloc_order};
8679
8680 void
8681 order_regs_for_local_alloc (void)
8682 {
8683 static int last_order_nonleaf = 1;
8684
8685 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8686 {
8687 last_order_nonleaf = !last_order_nonleaf;
8688 memcpy ((char *) reg_alloc_order,
8689 (const char *) reg_alloc_orders[last_order_nonleaf],
8690 FIRST_PSEUDO_REGISTER * sizeof (int));
8691 }
8692 }
8693 \f
8694 /* Return 1 if REG and MEM are legitimate enough to allow the various
8695 MEM<-->REG splits to be run. */
8696
8697 int
8698 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8699 {
8700 /* Punt if we are here by mistake. */
8701 gcc_assert (reload_completed);
8702
8703 /* We must have an offsettable memory reference. */
8704 if (!offsettable_memref_p (mem))
8705 return 0;
8706
8707 /* If we have legitimate args for ldd/std, we do not want
8708 the split to happen. */
8709 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8710 return 0;
8711
8712 /* Success. */
8713 return 1;
8714 }
8715
8716 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8717
8718 void
8719 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8720 {
8721 rtx high_part = gen_highpart (mode, dest);
8722 rtx low_part = gen_lowpart (mode, dest);
8723 rtx word0 = adjust_address (src, mode, 0);
8724 rtx word1 = adjust_address (src, mode, 4);
8725
8726 if (reg_overlap_mentioned_p (high_part, word1))
8727 {
8728 emit_move_insn_1 (low_part, word1);
8729 emit_move_insn_1 (high_part, word0);
8730 }
8731 else
8732 {
8733 emit_move_insn_1 (high_part, word0);
8734 emit_move_insn_1 (low_part, word1);
8735 }
8736 }
8737
8738 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8739
8740 void
8741 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8742 {
8743 rtx word0 = adjust_address (dest, mode, 0);
8744 rtx word1 = adjust_address (dest, mode, 4);
8745 rtx high_part = gen_highpart (mode, src);
8746 rtx low_part = gen_lowpart (mode, src);
8747
8748 emit_move_insn_1 (word0, high_part);
8749 emit_move_insn_1 (word1, low_part);
8750 }
8751
8752 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8753
8754 int
8755 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8756 {
8757 /* Punt if we are here by mistake. */
8758 gcc_assert (reload_completed);
8759
8760 if (GET_CODE (reg1) == SUBREG)
8761 reg1 = SUBREG_REG (reg1);
8762 if (GET_CODE (reg1) != REG)
8763 return 0;
8764 const int regno1 = REGNO (reg1);
8765
8766 if (GET_CODE (reg2) == SUBREG)
8767 reg2 = SUBREG_REG (reg2);
8768 if (GET_CODE (reg2) != REG)
8769 return 0;
8770 const int regno2 = REGNO (reg2);
8771
8772 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8773 return 1;
8774
8775 if (TARGET_VIS3)
8776 {
8777 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8778 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8779 return 1;
8780 }
8781
8782 return 0;
8783 }
8784
8785 /* Split a REG <--> REG move into a pair of moves in MODE. */
8786
8787 void
8788 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8789 {
8790 rtx dest1 = gen_highpart (mode, dest);
8791 rtx dest2 = gen_lowpart (mode, dest);
8792 rtx src1 = gen_highpart (mode, src);
8793 rtx src2 = gen_lowpart (mode, src);
8794
8795 /* Now emit using the real source and destination we found, swapping
8796 the order if we detect overlap. */
8797 if (reg_overlap_mentioned_p (dest1, src2))
8798 {
8799 emit_move_insn_1 (dest2, src2);
8800 emit_move_insn_1 (dest1, src1);
8801 }
8802 else
8803 {
8804 emit_move_insn_1 (dest1, src1);
8805 emit_move_insn_1 (dest2, src2);
8806 }
8807 }
8808
8809 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8810 This makes them candidates for using ldd and std insns.
8811
8812 Note reg1 and reg2 *must* be hard registers. */
8813
8814 int
8815 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8816 {
8817 /* We might have been passed a SUBREG. */
8818 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8819 return 0;
8820
8821 if (REGNO (reg1) % 2 != 0)
8822 return 0;
8823
8824 /* Integer ldd is deprecated in SPARC V9 */
8825 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8826 return 0;
8827
8828 return (REGNO (reg1) == REGNO (reg2) - 1);
8829 }
8830
8831 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8832 an ldd or std insn.
8833
8834 This can only happen when addr1 and addr2, the addresses in mem1
8835 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8836 addr1 must also be aligned on a 64-bit boundary.
8837
8838 Also iff dependent_reg_rtx is not null it should not be used to
8839 compute the address for mem1, i.e. we cannot optimize a sequence
8840 like:
8841 ld [%o0], %o0
8842 ld [%o0 + 4], %o1
8843 to
8844 ldd [%o0], %o0
8845 nor:
8846 ld [%g3 + 4], %g3
8847 ld [%g3], %g2
8848 to
8849 ldd [%g3], %g2
8850
8851 But, note that the transformation from:
8852 ld [%g2 + 4], %g3
8853 ld [%g2], %g2
8854 to
8855 ldd [%g2], %g2
8856 is perfectly fine. Thus, the peephole2 patterns always pass us
8857 the destination register of the first load, never the second one.
8858
8859 For stores we don't have a similar problem, so dependent_reg_rtx is
8860 NULL_RTX. */
8861
8862 int
8863 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8864 {
8865 rtx addr1, addr2;
8866 unsigned int reg1;
8867 HOST_WIDE_INT offset1;
8868
8869 /* The mems cannot be volatile. */
8870 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8871 return 0;
8872
8873 /* MEM1 should be aligned on a 64-bit boundary. */
8874 if (MEM_ALIGN (mem1) < 64)
8875 return 0;
8876
8877 addr1 = XEXP (mem1, 0);
8878 addr2 = XEXP (mem2, 0);
8879
8880 /* Extract a register number and offset (if used) from the first addr. */
8881 if (GET_CODE (addr1) == PLUS)
8882 {
8883 /* If not a REG, return zero. */
8884 if (GET_CODE (XEXP (addr1, 0)) != REG)
8885 return 0;
8886 else
8887 {
8888 reg1 = REGNO (XEXP (addr1, 0));
8889 /* The offset must be constant! */
8890 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8891 return 0;
8892 offset1 = INTVAL (XEXP (addr1, 1));
8893 }
8894 }
8895 else if (GET_CODE (addr1) != REG)
8896 return 0;
8897 else
8898 {
8899 reg1 = REGNO (addr1);
8900 /* This was a simple (mem (reg)) expression. Offset is 0. */
8901 offset1 = 0;
8902 }
8903
8904 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8905 if (GET_CODE (addr2) != PLUS)
8906 return 0;
8907
8908 if (GET_CODE (XEXP (addr2, 0)) != REG
8909 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8910 return 0;
8911
8912 if (reg1 != REGNO (XEXP (addr2, 0)))
8913 return 0;
8914
8915 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8916 return 0;
8917
8918 /* The first offset must be evenly divisible by 8 to ensure the
8919 address is 64-bit aligned. */
8920 if (offset1 % 8 != 0)
8921 return 0;
8922
8923 /* The offset for the second addr must be 4 more than the first addr. */
8924 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8925 return 0;
8926
8927 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8928 instructions. */
8929 return 1;
8930 }
8931
8932 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8933
8934 rtx
8935 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8936 {
8937 rtx x = widen_memory_access (mem1, mode, 0);
8938 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8939 return x;
8940 }
8941
8942 /* Return 1 if reg is a pseudo, or is the first register in
8943 a hard register pair. This makes it suitable for use in
8944 ldd and std insns. */
8945
8946 int
8947 register_ok_for_ldd (rtx reg)
8948 {
8949 /* We might have been passed a SUBREG. */
8950 if (!REG_P (reg))
8951 return 0;
8952
8953 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8954 return (REGNO (reg) % 2 == 0);
8955
8956 return 1;
8957 }
8958
8959 /* Return 1 if OP, a MEM, has an address which is known to be
8960 aligned to an 8-byte boundary. */
8961
8962 int
8963 memory_ok_for_ldd (rtx op)
8964 {
8965 /* In 64-bit mode, we assume that the address is word-aligned. */
8966 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8967 return 0;
8968
8969 if (! can_create_pseudo_p ()
8970 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8971 return 0;
8972
8973 return 1;
8974 }
8975 \f
8976 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8977
8978 static bool
8979 sparc_print_operand_punct_valid_p (unsigned char code)
8980 {
8981 if (code == '#'
8982 || code == '*'
8983 || code == '('
8984 || code == ')'
8985 || code == '_'
8986 || code == '&')
8987 return true;
8988
8989 return false;
8990 }
8991
8992 /* Implement TARGET_PRINT_OPERAND.
8993 Print operand X (an rtx) in assembler syntax to file FILE.
8994 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8995 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8996
8997 static void
8998 sparc_print_operand (FILE *file, rtx x, int code)
8999 {
9000 const char *s;
9001
9002 switch (code)
9003 {
9004 case '#':
9005 /* Output an insn in a delay slot. */
9006 if (final_sequence)
9007 sparc_indent_opcode = 1;
9008 else
9009 fputs ("\n\t nop", file);
9010 return;
9011 case '*':
9012 /* Output an annul flag if there's nothing for the delay slot and we
9013 are optimizing. This is always used with '(' below.
9014 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9015 this is a dbx bug. So, we only do this when optimizing.
9016 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9017 Always emit a nop in case the next instruction is a branch. */
9018 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9019 fputs (",a", file);
9020 return;
9021 case '(':
9022 /* Output a 'nop' if there's nothing for the delay slot and we are
9023 not optimizing. This is always used with '*' above. */
9024 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9025 fputs ("\n\t nop", file);
9026 else if (final_sequence)
9027 sparc_indent_opcode = 1;
9028 return;
9029 case ')':
9030 /* Output the right displacement from the saved PC on function return.
9031 The caller may have placed an "unimp" insn immediately after the call
9032 so we have to account for it. This insn is used in the 32-bit ABI
9033 when calling a function that returns a non zero-sized structure. The
9034 64-bit ABI doesn't have it. Be careful to have this test be the same
9035 as that for the call. The exception is when sparc_std_struct_return
9036 is enabled, the psABI is followed exactly and the adjustment is made
9037 by the code in sparc_struct_value_rtx. The call emitted is the same
9038 when sparc_std_struct_return is enabled. */
9039 if (!TARGET_ARCH64
9040 && cfun->returns_struct
9041 && !sparc_std_struct_return
9042 && DECL_SIZE (DECL_RESULT (current_function_decl))
9043 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9044 == INTEGER_CST
9045 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9046 fputs ("12", file);
9047 else
9048 fputc ('8', file);
9049 return;
9050 case '_':
9051 /* Output the Embedded Medium/Anywhere code model base register. */
9052 fputs (EMBMEDANY_BASE_REG, file);
9053 return;
9054 case '&':
9055 /* Print some local dynamic TLS name. */
9056 if (const char *name = get_some_local_dynamic_name ())
9057 assemble_name (file, name);
9058 else
9059 output_operand_lossage ("'%%&' used without any "
9060 "local dynamic TLS references");
9061 return;
9062
9063 case 'Y':
9064 /* Adjust the operand to take into account a RESTORE operation. */
9065 if (GET_CODE (x) == CONST_INT)
9066 break;
9067 else if (GET_CODE (x) != REG)
9068 output_operand_lossage ("invalid %%Y operand");
9069 else if (REGNO (x) < 8)
9070 fputs (reg_names[REGNO (x)], file);
9071 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9072 fputs (reg_names[REGNO (x)-16], file);
9073 else
9074 output_operand_lossage ("invalid %%Y operand");
9075 return;
9076 case 'L':
9077 /* Print out the low order register name of a register pair. */
9078 if (WORDS_BIG_ENDIAN)
9079 fputs (reg_names[REGNO (x)+1], file);
9080 else
9081 fputs (reg_names[REGNO (x)], file);
9082 return;
9083 case 'H':
9084 /* Print out the high order register name of a register pair. */
9085 if (WORDS_BIG_ENDIAN)
9086 fputs (reg_names[REGNO (x)], file);
9087 else
9088 fputs (reg_names[REGNO (x)+1], file);
9089 return;
9090 case 'R':
9091 /* Print out the second register name of a register pair or quad.
9092 I.e., R (%o0) => %o1. */
9093 fputs (reg_names[REGNO (x)+1], file);
9094 return;
9095 case 'S':
9096 /* Print out the third register name of a register quad.
9097 I.e., S (%o0) => %o2. */
9098 fputs (reg_names[REGNO (x)+2], file);
9099 return;
9100 case 'T':
9101 /* Print out the fourth register name of a register quad.
9102 I.e., T (%o0) => %o3. */
9103 fputs (reg_names[REGNO (x)+3], file);
9104 return;
9105 case 'x':
9106 /* Print a condition code register. */
9107 if (REGNO (x) == SPARC_ICC_REG)
9108 {
9109 switch (GET_MODE (x))
9110 {
9111 case E_CCmode:
9112 case E_CCNZmode:
9113 case E_CCCmode:
9114 case E_CCVmode:
9115 s = "%icc";
9116 break;
9117 case E_CCXmode:
9118 case E_CCXNZmode:
9119 case E_CCXCmode:
9120 case E_CCXVmode:
9121 s = "%xcc";
9122 break;
9123 default:
9124 gcc_unreachable ();
9125 }
9126 fputs (s, file);
9127 }
9128 else
9129 /* %fccN register */
9130 fputs (reg_names[REGNO (x)], file);
9131 return;
9132 case 'm':
9133 /* Print the operand's address only. */
9134 output_address (GET_MODE (x), XEXP (x, 0));
9135 return;
9136 case 'r':
9137 /* In this case we need a register. Use %g0 if the
9138 operand is const0_rtx. */
9139 if (x == const0_rtx
9140 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9141 {
9142 fputs ("%g0", file);
9143 return;
9144 }
9145 else
9146 break;
9147
9148 case 'A':
9149 switch (GET_CODE (x))
9150 {
9151 case IOR:
9152 s = "or";
9153 break;
9154 case AND:
9155 s = "and";
9156 break;
9157 case XOR:
9158 s = "xor";
9159 break;
9160 default:
9161 output_operand_lossage ("invalid %%A operand");
9162 s = "";
9163 break;
9164 }
9165 fputs (s, file);
9166 return;
9167
9168 case 'B':
9169 switch (GET_CODE (x))
9170 {
9171 case IOR:
9172 s = "orn";
9173 break;
9174 case AND:
9175 s = "andn";
9176 break;
9177 case XOR:
9178 s = "xnor";
9179 break;
9180 default:
9181 output_operand_lossage ("invalid %%B operand");
9182 s = "";
9183 break;
9184 }
9185 fputs (s, file);
9186 return;
9187
9188 /* This is used by the conditional move instructions. */
9189 case 'C':
9190 {
9191 machine_mode mode = GET_MODE (XEXP (x, 0));
9192 switch (GET_CODE (x))
9193 {
9194 case NE:
9195 if (mode == CCVmode || mode == CCXVmode)
9196 s = "vs";
9197 else
9198 s = "ne";
9199 break;
9200 case EQ:
9201 if (mode == CCVmode || mode == CCXVmode)
9202 s = "vc";
9203 else
9204 s = "e";
9205 break;
9206 case GE:
9207 if (mode == CCNZmode || mode == CCXNZmode)
9208 s = "pos";
9209 else
9210 s = "ge";
9211 break;
9212 case GT:
9213 s = "g";
9214 break;
9215 case LE:
9216 s = "le";
9217 break;
9218 case LT:
9219 if (mode == CCNZmode || mode == CCXNZmode)
9220 s = "neg";
9221 else
9222 s = "l";
9223 break;
9224 case GEU:
9225 s = "geu";
9226 break;
9227 case GTU:
9228 s = "gu";
9229 break;
9230 case LEU:
9231 s = "leu";
9232 break;
9233 case LTU:
9234 s = "lu";
9235 break;
9236 case LTGT:
9237 s = "lg";
9238 break;
9239 case UNORDERED:
9240 s = "u";
9241 break;
9242 case ORDERED:
9243 s = "o";
9244 break;
9245 case UNLT:
9246 s = "ul";
9247 break;
9248 case UNLE:
9249 s = "ule";
9250 break;
9251 case UNGT:
9252 s = "ug";
9253 break;
9254 case UNGE:
9255 s = "uge"
9256 ; break;
9257 case UNEQ:
9258 s = "ue";
9259 break;
9260 default:
9261 output_operand_lossage ("invalid %%C operand");
9262 s = "";
9263 break;
9264 }
9265 fputs (s, file);
9266 return;
9267 }
9268
9269 /* This are used by the movr instruction pattern. */
9270 case 'D':
9271 {
9272 switch (GET_CODE (x))
9273 {
9274 case NE:
9275 s = "ne";
9276 break;
9277 case EQ:
9278 s = "e";
9279 break;
9280 case GE:
9281 s = "gez";
9282 break;
9283 case LT:
9284 s = "lz";
9285 break;
9286 case LE:
9287 s = "lez";
9288 break;
9289 case GT:
9290 s = "gz";
9291 break;
9292 default:
9293 output_operand_lossage ("invalid %%D operand");
9294 s = "";
9295 break;
9296 }
9297 fputs (s, file);
9298 return;
9299 }
9300
9301 case 'b':
9302 {
9303 /* Print a sign-extended character. */
9304 int i = trunc_int_for_mode (INTVAL (x), QImode);
9305 fprintf (file, "%d", i);
9306 return;
9307 }
9308
9309 case 'f':
9310 /* Operand must be a MEM; write its address. */
9311 if (GET_CODE (x) != MEM)
9312 output_operand_lossage ("invalid %%f operand");
9313 output_address (GET_MODE (x), XEXP (x, 0));
9314 return;
9315
9316 case 's':
9317 {
9318 /* Print a sign-extended 32-bit value. */
9319 HOST_WIDE_INT i;
9320 if (GET_CODE(x) == CONST_INT)
9321 i = INTVAL (x);
9322 else
9323 {
9324 output_operand_lossage ("invalid %%s operand");
9325 return;
9326 }
9327 i = trunc_int_for_mode (i, SImode);
9328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9329 return;
9330 }
9331
9332 case 0:
9333 /* Do nothing special. */
9334 break;
9335
9336 default:
9337 /* Undocumented flag. */
9338 output_operand_lossage ("invalid operand output code");
9339 }
9340
9341 if (GET_CODE (x) == REG)
9342 fputs (reg_names[REGNO (x)], file);
9343 else if (GET_CODE (x) == MEM)
9344 {
9345 fputc ('[', file);
9346 /* Poor Sun assembler doesn't understand absolute addressing. */
9347 if (CONSTANT_P (XEXP (x, 0)))
9348 fputs ("%g0+", file);
9349 output_address (GET_MODE (x), XEXP (x, 0));
9350 fputc (']', file);
9351 }
9352 else if (GET_CODE (x) == HIGH)
9353 {
9354 fputs ("%hi(", file);
9355 output_addr_const (file, XEXP (x, 0));
9356 fputc (')', file);
9357 }
9358 else if (GET_CODE (x) == LO_SUM)
9359 {
9360 sparc_print_operand (file, XEXP (x, 0), 0);
9361 if (TARGET_CM_MEDMID)
9362 fputs ("+%l44(", file);
9363 else
9364 fputs ("+%lo(", file);
9365 output_addr_const (file, XEXP (x, 1));
9366 fputc (')', file);
9367 }
9368 else if (GET_CODE (x) == CONST_DOUBLE)
9369 output_operand_lossage ("floating-point constant not a valid immediate operand");
9370 else
9371 output_addr_const (file, x);
9372 }
9373
9374 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9375
9376 static void
9377 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9378 {
9379 register rtx base, index = 0;
9380 int offset = 0;
9381 register rtx addr = x;
9382
9383 if (REG_P (addr))
9384 fputs (reg_names[REGNO (addr)], file);
9385 else if (GET_CODE (addr) == PLUS)
9386 {
9387 if (CONST_INT_P (XEXP (addr, 0)))
9388 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9389 else if (CONST_INT_P (XEXP (addr, 1)))
9390 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9391 else
9392 base = XEXP (addr, 0), index = XEXP (addr, 1);
9393 if (GET_CODE (base) == LO_SUM)
9394 {
9395 gcc_assert (USE_AS_OFFSETABLE_LO10
9396 && TARGET_ARCH64
9397 && ! TARGET_CM_MEDMID);
9398 output_operand (XEXP (base, 0), 0);
9399 fputs ("+%lo(", file);
9400 output_address (VOIDmode, XEXP (base, 1));
9401 fprintf (file, ")+%d", offset);
9402 }
9403 else
9404 {
9405 fputs (reg_names[REGNO (base)], file);
9406 if (index == 0)
9407 fprintf (file, "%+d", offset);
9408 else if (REG_P (index))
9409 fprintf (file, "+%s", reg_names[REGNO (index)]);
9410 else if (GET_CODE (index) == SYMBOL_REF
9411 || GET_CODE (index) == LABEL_REF
9412 || GET_CODE (index) == CONST)
9413 fputc ('+', file), output_addr_const (file, index);
9414 else gcc_unreachable ();
9415 }
9416 }
9417 else if (GET_CODE (addr) == MINUS
9418 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9419 {
9420 output_addr_const (file, XEXP (addr, 0));
9421 fputs ("-(", file);
9422 output_addr_const (file, XEXP (addr, 1));
9423 fputs ("-.)", file);
9424 }
9425 else if (GET_CODE (addr) == LO_SUM)
9426 {
9427 output_operand (XEXP (addr, 0), 0);
9428 if (TARGET_CM_MEDMID)
9429 fputs ("+%l44(", file);
9430 else
9431 fputs ("+%lo(", file);
9432 output_address (VOIDmode, XEXP (addr, 1));
9433 fputc (')', file);
9434 }
9435 else if (flag_pic
9436 && GET_CODE (addr) == CONST
9437 && GET_CODE (XEXP (addr, 0)) == MINUS
9438 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9439 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9440 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9441 {
9442 addr = XEXP (addr, 0);
9443 output_addr_const (file, XEXP (addr, 0));
9444 /* Group the args of the second CONST in parenthesis. */
9445 fputs ("-(", file);
9446 /* Skip past the second CONST--it does nothing for us. */
9447 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9448 /* Close the parenthesis. */
9449 fputc (')', file);
9450 }
9451 else
9452 {
9453 output_addr_const (file, addr);
9454 }
9455 }
9456 \f
9457 /* Target hook for assembling integer objects. The sparc version has
9458 special handling for aligned DI-mode objects. */
9459
9460 static bool
9461 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9462 {
9463 /* ??? We only output .xword's for symbols and only then in environments
9464 where the assembler can handle them. */
9465 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9466 {
9467 if (TARGET_V9)
9468 {
9469 assemble_integer_with_op ("\t.xword\t", x);
9470 return true;
9471 }
9472 else
9473 {
9474 assemble_aligned_integer (4, const0_rtx);
9475 assemble_aligned_integer (4, x);
9476 return true;
9477 }
9478 }
9479 return default_assemble_integer (x, size, aligned_p);
9480 }
9481 \f
9482 /* Return the value of a code used in the .proc pseudo-op that says
9483 what kind of result this function returns. For non-C types, we pick
9484 the closest C type. */
9485
9486 #ifndef SHORT_TYPE_SIZE
9487 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9488 #endif
9489
9490 #ifndef INT_TYPE_SIZE
9491 #define INT_TYPE_SIZE BITS_PER_WORD
9492 #endif
9493
9494 #ifndef LONG_TYPE_SIZE
9495 #define LONG_TYPE_SIZE BITS_PER_WORD
9496 #endif
9497
9498 #ifndef LONG_LONG_TYPE_SIZE
9499 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9500 #endif
9501
9502 #ifndef FLOAT_TYPE_SIZE
9503 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9504 #endif
9505
9506 #ifndef DOUBLE_TYPE_SIZE
9507 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9508 #endif
9509
9510 #ifndef LONG_DOUBLE_TYPE_SIZE
9511 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9512 #endif
9513
9514 unsigned long
9515 sparc_type_code (register tree type)
9516 {
9517 register unsigned long qualifiers = 0;
9518 register unsigned shift;
9519
9520 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9521 setting more, since some assemblers will give an error for this. Also,
9522 we must be careful to avoid shifts of 32 bits or more to avoid getting
9523 unpredictable results. */
9524
9525 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9526 {
9527 switch (TREE_CODE (type))
9528 {
9529 case ERROR_MARK:
9530 return qualifiers;
9531
9532 case ARRAY_TYPE:
9533 qualifiers |= (3 << shift);
9534 break;
9535
9536 case FUNCTION_TYPE:
9537 case METHOD_TYPE:
9538 qualifiers |= (2 << shift);
9539 break;
9540
9541 case POINTER_TYPE:
9542 case REFERENCE_TYPE:
9543 case OFFSET_TYPE:
9544 qualifiers |= (1 << shift);
9545 break;
9546
9547 case RECORD_TYPE:
9548 return (qualifiers | 8);
9549
9550 case UNION_TYPE:
9551 case QUAL_UNION_TYPE:
9552 return (qualifiers | 9);
9553
9554 case ENUMERAL_TYPE:
9555 return (qualifiers | 10);
9556
9557 case VOID_TYPE:
9558 return (qualifiers | 16);
9559
9560 case INTEGER_TYPE:
9561 /* If this is a range type, consider it to be the underlying
9562 type. */
9563 if (TREE_TYPE (type) != 0)
9564 break;
9565
9566 /* Carefully distinguish all the standard types of C,
9567 without messing up if the language is not C. We do this by
9568 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9569 look at both the names and the above fields, but that's redundant.
9570 Any type whose size is between two C types will be considered
9571 to be the wider of the two types. Also, we do not have a
9572 special code to use for "long long", so anything wider than
9573 long is treated the same. Note that we can't distinguish
9574 between "int" and "long" in this code if they are the same
9575 size, but that's fine, since neither can the assembler. */
9576
9577 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9578 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9579
9580 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9581 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9582
9583 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9584 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9585
9586 else
9587 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9588
9589 case REAL_TYPE:
9590 /* If this is a range type, consider it to be the underlying
9591 type. */
9592 if (TREE_TYPE (type) != 0)
9593 break;
9594
9595 /* Carefully distinguish all the standard types of C,
9596 without messing up if the language is not C. */
9597
9598 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9599 return (qualifiers | 6);
9600
9601 else
9602 return (qualifiers | 7);
9603
9604 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9605 /* ??? We need to distinguish between double and float complex types,
9606 but I don't know how yet because I can't reach this code from
9607 existing front-ends. */
9608 return (qualifiers | 7); /* Who knows? */
9609
9610 case VECTOR_TYPE:
9611 case BOOLEAN_TYPE: /* Boolean truth value type. */
9612 case LANG_TYPE:
9613 case NULLPTR_TYPE:
9614 return qualifiers;
9615
9616 default:
9617 gcc_unreachable (); /* Not a type! */
9618 }
9619 }
9620
9621 return qualifiers;
9622 }
9623 \f
9624 /* Nested function support. */
9625
9626 /* Emit RTL insns to initialize the variable parts of a trampoline.
9627 FNADDR is an RTX for the address of the function's pure code.
9628 CXT is an RTX for the static chain value for the function.
9629
9630 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9631 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9632 (to store insns). This is a bit excessive. Perhaps a different
9633 mechanism would be better here.
9634
9635 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9636
9637 static void
9638 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9639 {
9640 /* SPARC 32-bit trampoline:
9641
9642 sethi %hi(fn), %g1
9643 sethi %hi(static), %g2
9644 jmp %g1+%lo(fn)
9645 or %g2, %lo(static), %g2
9646
9647 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9648 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9649 */
9650
9651 emit_move_insn
9652 (adjust_address (m_tramp, SImode, 0),
9653 expand_binop (SImode, ior_optab,
9654 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9655 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9656 NULL_RTX, 1, OPTAB_DIRECT));
9657
9658 emit_move_insn
9659 (adjust_address (m_tramp, SImode, 4),
9660 expand_binop (SImode, ior_optab,
9661 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9662 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9663 NULL_RTX, 1, OPTAB_DIRECT));
9664
9665 emit_move_insn
9666 (adjust_address (m_tramp, SImode, 8),
9667 expand_binop (SImode, ior_optab,
9668 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9669 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9670 NULL_RTX, 1, OPTAB_DIRECT));
9671
9672 emit_move_insn
9673 (adjust_address (m_tramp, SImode, 12),
9674 expand_binop (SImode, ior_optab,
9675 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9676 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9677 NULL_RTX, 1, OPTAB_DIRECT));
9678
9679 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9680 aligned on a 16 byte boundary so one flush clears it all. */
9681 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9682 if (sparc_cpu != PROCESSOR_ULTRASPARC
9683 && sparc_cpu != PROCESSOR_ULTRASPARC3
9684 && sparc_cpu != PROCESSOR_NIAGARA
9685 && sparc_cpu != PROCESSOR_NIAGARA2
9686 && sparc_cpu != PROCESSOR_NIAGARA3
9687 && sparc_cpu != PROCESSOR_NIAGARA4
9688 && sparc_cpu != PROCESSOR_NIAGARA7
9689 && sparc_cpu != PROCESSOR_M8)
9690 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9691
9692 /* Call __enable_execute_stack after writing onto the stack to make sure
9693 the stack address is accessible. */
9694 #ifdef HAVE_ENABLE_EXECUTE_STACK
9695 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9696 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9697 #endif
9698
9699 }
9700
9701 /* The 64-bit version is simpler because it makes more sense to load the
9702 values as "immediate" data out of the trampoline. It's also easier since
9703 we can read the PC without clobbering a register. */
9704
9705 static void
9706 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9707 {
9708 /* SPARC 64-bit trampoline:
9709
9710 rd %pc, %g1
9711 ldx [%g1+24], %g5
9712 jmp %g5
9713 ldx [%g1+16], %g5
9714 +16 bytes data
9715 */
9716
9717 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9718 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9719 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9720 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9721 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9722 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9723 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9724 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9725 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9726 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9727 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9728
9729 if (sparc_cpu != PROCESSOR_ULTRASPARC
9730 && sparc_cpu != PROCESSOR_ULTRASPARC3
9731 && sparc_cpu != PROCESSOR_NIAGARA
9732 && sparc_cpu != PROCESSOR_NIAGARA2
9733 && sparc_cpu != PROCESSOR_NIAGARA3
9734 && sparc_cpu != PROCESSOR_NIAGARA4
9735 && sparc_cpu != PROCESSOR_NIAGARA7
9736 && sparc_cpu != PROCESSOR_M8)
9737 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9738
9739 /* Call __enable_execute_stack after writing onto the stack to make sure
9740 the stack address is accessible. */
9741 #ifdef HAVE_ENABLE_EXECUTE_STACK
9742 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9743 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9744 #endif
9745 }
9746
9747 /* Worker for TARGET_TRAMPOLINE_INIT. */
9748
9749 static void
9750 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9751 {
9752 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9753 cxt = force_reg (Pmode, cxt);
9754 if (TARGET_ARCH64)
9755 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9756 else
9757 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9758 }
9759 \f
9760 /* Adjust the cost of a scheduling dependency. Return the new cost of
9761 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9762
9763 static int
9764 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9765 int cost)
9766 {
9767 enum attr_type insn_type;
9768
9769 if (recog_memoized (insn) < 0)
9770 return cost;
9771
9772 insn_type = get_attr_type (insn);
9773
9774 if (dep_type == 0)
9775 {
9776 /* Data dependency; DEP_INSN writes a register that INSN reads some
9777 cycles later. */
9778
9779 /* if a load, then the dependence must be on the memory address;
9780 add an extra "cycle". Note that the cost could be two cycles
9781 if the reg was written late in an instruction group; we ca not tell
9782 here. */
9783 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9784 return cost + 3;
9785
9786 /* Get the delay only if the address of the store is the dependence. */
9787 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9788 {
9789 rtx pat = PATTERN(insn);
9790 rtx dep_pat = PATTERN (dep_insn);
9791
9792 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9793 return cost; /* This should not happen! */
9794
9795 /* The dependency between the two instructions was on the data that
9796 is being stored. Assume that this implies that the address of the
9797 store is not dependent. */
9798 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9799 return cost;
9800
9801 return cost + 3; /* An approximation. */
9802 }
9803
9804 /* A shift instruction cannot receive its data from an instruction
9805 in the same cycle; add a one cycle penalty. */
9806 if (insn_type == TYPE_SHIFT)
9807 return cost + 3; /* Split before cascade into shift. */
9808 }
9809 else
9810 {
9811 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9812 INSN writes some cycles later. */
9813
9814 /* These are only significant for the fpu unit; writing a fp reg before
9815 the fpu has finished with it stalls the processor. */
9816
9817 /* Reusing an integer register causes no problems. */
9818 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9819 return 0;
9820 }
9821
9822 return cost;
9823 }
9824
9825 static int
9826 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9827 int cost)
9828 {
9829 enum attr_type insn_type, dep_type;
9830 rtx pat = PATTERN(insn);
9831 rtx dep_pat = PATTERN (dep_insn);
9832
9833 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9834 return cost;
9835
9836 insn_type = get_attr_type (insn);
9837 dep_type = get_attr_type (dep_insn);
9838
9839 switch (dtype)
9840 {
9841 case 0:
9842 /* Data dependency; DEP_INSN writes a register that INSN reads some
9843 cycles later. */
9844
9845 switch (insn_type)
9846 {
9847 case TYPE_STORE:
9848 case TYPE_FPSTORE:
9849 /* Get the delay iff the address of the store is the dependence. */
9850 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9851 return cost;
9852
9853 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9854 return cost;
9855 return cost + 3;
9856
9857 case TYPE_LOAD:
9858 case TYPE_SLOAD:
9859 case TYPE_FPLOAD:
9860 /* If a load, then the dependence must be on the memory address. If
9861 the addresses aren't equal, then it might be a false dependency */
9862 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9863 {
9864 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9865 || GET_CODE (SET_DEST (dep_pat)) != MEM
9866 || GET_CODE (SET_SRC (pat)) != MEM
9867 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9868 XEXP (SET_SRC (pat), 0)))
9869 return cost + 2;
9870
9871 return cost + 8;
9872 }
9873 break;
9874
9875 case TYPE_BRANCH:
9876 /* Compare to branch latency is 0. There is no benefit from
9877 separating compare and branch. */
9878 if (dep_type == TYPE_COMPARE)
9879 return 0;
9880 /* Floating point compare to branch latency is less than
9881 compare to conditional move. */
9882 if (dep_type == TYPE_FPCMP)
9883 return cost - 1;
9884 break;
9885 default:
9886 break;
9887 }
9888 break;
9889
9890 case REG_DEP_ANTI:
9891 /* Anti-dependencies only penalize the fpu unit. */
9892 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9893 return 0;
9894 break;
9895
9896 default:
9897 break;
9898 }
9899
9900 return cost;
9901 }
9902
9903 static int
9904 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9905 unsigned int)
9906 {
9907 switch (sparc_cpu)
9908 {
9909 case PROCESSOR_SUPERSPARC:
9910 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9911 break;
9912 case PROCESSOR_HYPERSPARC:
9913 case PROCESSOR_SPARCLITE86X:
9914 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9915 break;
9916 default:
9917 break;
9918 }
9919 return cost;
9920 }
9921
9922 static void
9923 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9924 int sched_verbose ATTRIBUTE_UNUSED,
9925 int max_ready ATTRIBUTE_UNUSED)
9926 {}
9927
9928 static int
9929 sparc_use_sched_lookahead (void)
9930 {
9931 if (sparc_cpu == PROCESSOR_NIAGARA
9932 || sparc_cpu == PROCESSOR_NIAGARA2
9933 || sparc_cpu == PROCESSOR_NIAGARA3)
9934 return 0;
9935 if (sparc_cpu == PROCESSOR_NIAGARA4
9936 || sparc_cpu == PROCESSOR_NIAGARA7
9937 || sparc_cpu == PROCESSOR_M8)
9938 return 2;
9939 if (sparc_cpu == PROCESSOR_ULTRASPARC
9940 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9941 return 4;
9942 if ((1 << sparc_cpu) &
9943 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9944 (1 << PROCESSOR_SPARCLITE86X)))
9945 return 3;
9946 return 0;
9947 }
9948
9949 static int
9950 sparc_issue_rate (void)
9951 {
9952 switch (sparc_cpu)
9953 {
9954 case PROCESSOR_NIAGARA:
9955 case PROCESSOR_NIAGARA2:
9956 case PROCESSOR_NIAGARA3:
9957 default:
9958 return 1;
9959 case PROCESSOR_NIAGARA4:
9960 case PROCESSOR_NIAGARA7:
9961 case PROCESSOR_V9:
9962 /* Assume V9 processors are capable of at least dual-issue. */
9963 return 2;
9964 case PROCESSOR_SUPERSPARC:
9965 return 3;
9966 case PROCESSOR_HYPERSPARC:
9967 case PROCESSOR_SPARCLITE86X:
9968 return 2;
9969 case PROCESSOR_ULTRASPARC:
9970 case PROCESSOR_ULTRASPARC3:
9971 case PROCESSOR_M8:
9972 return 4;
9973 }
9974 }
9975
9976 static int
9977 set_extends (rtx_insn *insn)
9978 {
9979 register rtx pat = PATTERN (insn);
9980
9981 switch (GET_CODE (SET_SRC (pat)))
9982 {
9983 /* Load and some shift instructions zero extend. */
9984 case MEM:
9985 case ZERO_EXTEND:
9986 /* sethi clears the high bits */
9987 case HIGH:
9988 /* LO_SUM is used with sethi. sethi cleared the high
9989 bits and the values used with lo_sum are positive */
9990 case LO_SUM:
9991 /* Store flag stores 0 or 1 */
9992 case LT: case LTU:
9993 case GT: case GTU:
9994 case LE: case LEU:
9995 case GE: case GEU:
9996 case EQ:
9997 case NE:
9998 return 1;
9999 case AND:
10000 {
10001 rtx op0 = XEXP (SET_SRC (pat), 0);
10002 rtx op1 = XEXP (SET_SRC (pat), 1);
10003 if (GET_CODE (op1) == CONST_INT)
10004 return INTVAL (op1) >= 0;
10005 if (GET_CODE (op0) != REG)
10006 return 0;
10007 if (sparc_check_64 (op0, insn) == 1)
10008 return 1;
10009 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10010 }
10011 case IOR:
10012 case XOR:
10013 {
10014 rtx op0 = XEXP (SET_SRC (pat), 0);
10015 rtx op1 = XEXP (SET_SRC (pat), 1);
10016 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10017 return 0;
10018 if (GET_CODE (op1) == CONST_INT)
10019 return INTVAL (op1) >= 0;
10020 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10021 }
10022 case LSHIFTRT:
10023 return GET_MODE (SET_SRC (pat)) == SImode;
10024 /* Positive integers leave the high bits zero. */
10025 case CONST_INT:
10026 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10027 case ASHIFTRT:
10028 case SIGN_EXTEND:
10029 return - (GET_MODE (SET_SRC (pat)) == SImode);
10030 case REG:
10031 return sparc_check_64 (SET_SRC (pat), insn);
10032 default:
10033 return 0;
10034 }
10035 }
10036
10037 /* We _ought_ to have only one kind per function, but... */
10038 static GTY(()) rtx sparc_addr_diff_list;
10039 static GTY(()) rtx sparc_addr_list;
10040
10041 void
10042 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10043 {
10044 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10045 if (diff)
10046 sparc_addr_diff_list
10047 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10048 else
10049 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10050 }
10051
10052 static void
10053 sparc_output_addr_vec (rtx vec)
10054 {
10055 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10056 int idx, vlen = XVECLEN (body, 0);
10057
10058 #ifdef ASM_OUTPUT_ADDR_VEC_START
10059 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10060 #endif
10061
10062 #ifdef ASM_OUTPUT_CASE_LABEL
10063 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10064 NEXT_INSN (lab));
10065 #else
10066 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10067 #endif
10068
10069 for (idx = 0; idx < vlen; idx++)
10070 {
10071 ASM_OUTPUT_ADDR_VEC_ELT
10072 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10073 }
10074
10075 #ifdef ASM_OUTPUT_ADDR_VEC_END
10076 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10077 #endif
10078 }
10079
10080 static void
10081 sparc_output_addr_diff_vec (rtx vec)
10082 {
10083 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10084 rtx base = XEXP (XEXP (body, 0), 0);
10085 int idx, vlen = XVECLEN (body, 1);
10086
10087 #ifdef ASM_OUTPUT_ADDR_VEC_START
10088 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10089 #endif
10090
10091 #ifdef ASM_OUTPUT_CASE_LABEL
10092 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10093 NEXT_INSN (lab));
10094 #else
10095 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10096 #endif
10097
10098 for (idx = 0; idx < vlen; idx++)
10099 {
10100 ASM_OUTPUT_ADDR_DIFF_ELT
10101 (asm_out_file,
10102 body,
10103 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10104 CODE_LABEL_NUMBER (base));
10105 }
10106
10107 #ifdef ASM_OUTPUT_ADDR_VEC_END
10108 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10109 #endif
10110 }
10111
10112 static void
10113 sparc_output_deferred_case_vectors (void)
10114 {
10115 rtx t;
10116 int align;
10117
10118 if (sparc_addr_list == NULL_RTX
10119 && sparc_addr_diff_list == NULL_RTX)
10120 return;
10121
10122 /* Align to cache line in the function's code section. */
10123 switch_to_section (current_function_section ());
10124
10125 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10126 if (align > 0)
10127 ASM_OUTPUT_ALIGN (asm_out_file, align);
10128
10129 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10130 sparc_output_addr_vec (XEXP (t, 0));
10131 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10132 sparc_output_addr_diff_vec (XEXP (t, 0));
10133
10134 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10135 }
10136
10137 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10138 unknown. Return 1 if the high bits are zero, -1 if the register is
10139 sign extended. */
10140 int
10141 sparc_check_64 (rtx x, rtx_insn *insn)
10142 {
10143 /* If a register is set only once it is safe to ignore insns this
10144 code does not know how to handle. The loop will either recognize
10145 the single set and return the correct value or fail to recognize
10146 it and return 0. */
10147 int set_once = 0;
10148 rtx y = x;
10149
10150 gcc_assert (GET_CODE (x) == REG);
10151
10152 if (GET_MODE (x) == DImode)
10153 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10154
10155 if (flag_expensive_optimizations
10156 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10157 set_once = 1;
10158
10159 if (insn == 0)
10160 {
10161 if (set_once)
10162 insn = get_last_insn_anywhere ();
10163 else
10164 return 0;
10165 }
10166
10167 while ((insn = PREV_INSN (insn)))
10168 {
10169 switch (GET_CODE (insn))
10170 {
10171 case JUMP_INSN:
10172 case NOTE:
10173 break;
10174 case CODE_LABEL:
10175 case CALL_INSN:
10176 default:
10177 if (! set_once)
10178 return 0;
10179 break;
10180 case INSN:
10181 {
10182 rtx pat = PATTERN (insn);
10183 if (GET_CODE (pat) != SET)
10184 return 0;
10185 if (rtx_equal_p (x, SET_DEST (pat)))
10186 return set_extends (insn);
10187 if (y && rtx_equal_p (y, SET_DEST (pat)))
10188 return set_extends (insn);
10189 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10190 return 0;
10191 }
10192 }
10193 }
10194 return 0;
10195 }
10196
10197 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10198 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10199
10200 const char *
10201 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10202 {
10203 static char asm_code[60];
10204
10205 /* The scratch register is only required when the destination
10206 register is not a 64-bit global or out register. */
10207 if (which_alternative != 2)
10208 operands[3] = operands[0];
10209
10210 /* We can only shift by constants <= 63. */
10211 if (GET_CODE (operands[2]) == CONST_INT)
10212 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10213
10214 if (GET_CODE (operands[1]) == CONST_INT)
10215 {
10216 output_asm_insn ("mov\t%1, %3", operands);
10217 }
10218 else
10219 {
10220 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10221 if (sparc_check_64 (operands[1], insn) <= 0)
10222 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10223 output_asm_insn ("or\t%L1, %3, %3", operands);
10224 }
10225
10226 strcpy (asm_code, opcode);
10227
10228 if (which_alternative != 2)
10229 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10230 else
10231 return
10232 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10233 }
10234 \f
10235 /* Output rtl to increment the profiler label LABELNO
10236 for profiling a function entry. */
10237
10238 void
10239 sparc_profile_hook (int labelno)
10240 {
10241 char buf[32];
10242 rtx lab, fun;
10243
10244 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10245 if (NO_PROFILE_COUNTERS)
10246 {
10247 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10248 }
10249 else
10250 {
10251 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10252 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10253 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10254 }
10255 }
10256 \f
10257 #ifdef TARGET_SOLARIS
10258 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10259
10260 static void
10261 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10262 tree decl ATTRIBUTE_UNUSED)
10263 {
10264 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10265 {
10266 solaris_elf_asm_comdat_section (name, flags, decl);
10267 return;
10268 }
10269
10270 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10271
10272 if (!(flags & SECTION_DEBUG))
10273 fputs (",#alloc", asm_out_file);
10274 if (flags & SECTION_WRITE)
10275 fputs (",#write", asm_out_file);
10276 if (flags & SECTION_TLS)
10277 fputs (",#tls", asm_out_file);
10278 if (flags & SECTION_CODE)
10279 fputs (",#execinstr", asm_out_file);
10280
10281 if (flags & SECTION_NOTYPE)
10282 ;
10283 else if (flags & SECTION_BSS)
10284 fputs (",#nobits", asm_out_file);
10285 else
10286 fputs (",#progbits", asm_out_file);
10287
10288 fputc ('\n', asm_out_file);
10289 }
10290 #endif /* TARGET_SOLARIS */
10291
10292 /* We do not allow indirect calls to be optimized into sibling calls.
10293
10294 We cannot use sibling calls when delayed branches are disabled
10295 because they will likely require the call delay slot to be filled.
10296
10297 Also, on SPARC 32-bit we cannot emit a sibling call when the
10298 current function returns a structure. This is because the "unimp
10299 after call" convention would cause the callee to return to the
10300 wrong place. The generic code already disallows cases where the
10301 function being called returns a structure.
10302
10303 It may seem strange how this last case could occur. Usually there
10304 is code after the call which jumps to epilogue code which dumps the
10305 return value into the struct return area. That ought to invalidate
10306 the sibling call right? Well, in the C++ case we can end up passing
10307 the pointer to the struct return area to a constructor (which returns
10308 void) and then nothing else happens. Such a sibling call would look
10309 valid without the added check here.
10310
10311 VxWorks PIC PLT entries require the global pointer to be initialized
10312 on entry. We therefore can't emit sibling calls to them. */
10313 static bool
10314 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10315 {
10316 return (decl
10317 && flag_delayed_branch
10318 && (TARGET_ARCH64 || ! cfun->returns_struct)
10319 && !(TARGET_VXWORKS_RTP
10320 && flag_pic
10321 && !targetm.binds_local_p (decl)));
10322 }
10323 \f
10324 /* libfunc renaming. */
10325
10326 static void
10327 sparc_init_libfuncs (void)
10328 {
10329 if (TARGET_ARCH32)
10330 {
10331 /* Use the subroutines that Sun's library provides for integer
10332 multiply and divide. The `*' prevents an underscore from
10333 being prepended by the compiler. .umul is a little faster
10334 than .mul. */
10335 set_optab_libfunc (smul_optab, SImode, "*.umul");
10336 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10337 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10338 set_optab_libfunc (smod_optab, SImode, "*.rem");
10339 set_optab_libfunc (umod_optab, SImode, "*.urem");
10340
10341 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10342 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10343 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10344 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10345 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10346 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10347
10348 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10349 is because with soft-float, the SFmode and DFmode sqrt
10350 instructions will be absent, and the compiler will notice and
10351 try to use the TFmode sqrt instruction for calls to the
10352 builtin function sqrt, but this fails. */
10353 if (TARGET_FPU)
10354 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10355
10356 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10357 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10358 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10359 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10360 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10361 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10362
10363 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10364 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10365 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10366 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10367
10368 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10369 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10370 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10371 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10372
10373 if (DITF_CONVERSION_LIBFUNCS)
10374 {
10375 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10376 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10377 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10378 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10379 }
10380
10381 if (SUN_CONVERSION_LIBFUNCS)
10382 {
10383 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10384 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10385 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10386 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10387 }
10388 }
10389 if (TARGET_ARCH64)
10390 {
10391 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10392 do not exist in the library. Make sure the compiler does not
10393 emit calls to them by accident. (It should always use the
10394 hardware instructions.) */
10395 set_optab_libfunc (smul_optab, SImode, 0);
10396 set_optab_libfunc (sdiv_optab, SImode, 0);
10397 set_optab_libfunc (udiv_optab, SImode, 0);
10398 set_optab_libfunc (smod_optab, SImode, 0);
10399 set_optab_libfunc (umod_optab, SImode, 0);
10400
10401 if (SUN_INTEGER_MULTIPLY_64)
10402 {
10403 set_optab_libfunc (smul_optab, DImode, "__mul64");
10404 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10405 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10406 set_optab_libfunc (smod_optab, DImode, "__rem64");
10407 set_optab_libfunc (umod_optab, DImode, "__urem64");
10408 }
10409
10410 if (SUN_CONVERSION_LIBFUNCS)
10411 {
10412 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10413 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10414 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10415 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10416 }
10417 }
10418 }
10419 \f
10420 /* SPARC builtins. */
10421 enum sparc_builtins
10422 {
10423 /* FPU builtins. */
10424 SPARC_BUILTIN_LDFSR,
10425 SPARC_BUILTIN_STFSR,
10426
10427 /* VIS 1.0 builtins. */
10428 SPARC_BUILTIN_FPACK16,
10429 SPARC_BUILTIN_FPACK32,
10430 SPARC_BUILTIN_FPACKFIX,
10431 SPARC_BUILTIN_FEXPAND,
10432 SPARC_BUILTIN_FPMERGE,
10433 SPARC_BUILTIN_FMUL8X16,
10434 SPARC_BUILTIN_FMUL8X16AU,
10435 SPARC_BUILTIN_FMUL8X16AL,
10436 SPARC_BUILTIN_FMUL8SUX16,
10437 SPARC_BUILTIN_FMUL8ULX16,
10438 SPARC_BUILTIN_FMULD8SUX16,
10439 SPARC_BUILTIN_FMULD8ULX16,
10440 SPARC_BUILTIN_FALIGNDATAV4HI,
10441 SPARC_BUILTIN_FALIGNDATAV8QI,
10442 SPARC_BUILTIN_FALIGNDATAV2SI,
10443 SPARC_BUILTIN_FALIGNDATADI,
10444 SPARC_BUILTIN_WRGSR,
10445 SPARC_BUILTIN_RDGSR,
10446 SPARC_BUILTIN_ALIGNADDR,
10447 SPARC_BUILTIN_ALIGNADDRL,
10448 SPARC_BUILTIN_PDIST,
10449 SPARC_BUILTIN_EDGE8,
10450 SPARC_BUILTIN_EDGE8L,
10451 SPARC_BUILTIN_EDGE16,
10452 SPARC_BUILTIN_EDGE16L,
10453 SPARC_BUILTIN_EDGE32,
10454 SPARC_BUILTIN_EDGE32L,
10455 SPARC_BUILTIN_FCMPLE16,
10456 SPARC_BUILTIN_FCMPLE32,
10457 SPARC_BUILTIN_FCMPNE16,
10458 SPARC_BUILTIN_FCMPNE32,
10459 SPARC_BUILTIN_FCMPGT16,
10460 SPARC_BUILTIN_FCMPGT32,
10461 SPARC_BUILTIN_FCMPEQ16,
10462 SPARC_BUILTIN_FCMPEQ32,
10463 SPARC_BUILTIN_FPADD16,
10464 SPARC_BUILTIN_FPADD16S,
10465 SPARC_BUILTIN_FPADD32,
10466 SPARC_BUILTIN_FPADD32S,
10467 SPARC_BUILTIN_FPSUB16,
10468 SPARC_BUILTIN_FPSUB16S,
10469 SPARC_BUILTIN_FPSUB32,
10470 SPARC_BUILTIN_FPSUB32S,
10471 SPARC_BUILTIN_ARRAY8,
10472 SPARC_BUILTIN_ARRAY16,
10473 SPARC_BUILTIN_ARRAY32,
10474
10475 /* VIS 2.0 builtins. */
10476 SPARC_BUILTIN_EDGE8N,
10477 SPARC_BUILTIN_EDGE8LN,
10478 SPARC_BUILTIN_EDGE16N,
10479 SPARC_BUILTIN_EDGE16LN,
10480 SPARC_BUILTIN_EDGE32N,
10481 SPARC_BUILTIN_EDGE32LN,
10482 SPARC_BUILTIN_BMASK,
10483 SPARC_BUILTIN_BSHUFFLEV4HI,
10484 SPARC_BUILTIN_BSHUFFLEV8QI,
10485 SPARC_BUILTIN_BSHUFFLEV2SI,
10486 SPARC_BUILTIN_BSHUFFLEDI,
10487
10488 /* VIS 3.0 builtins. */
10489 SPARC_BUILTIN_CMASK8,
10490 SPARC_BUILTIN_CMASK16,
10491 SPARC_BUILTIN_CMASK32,
10492 SPARC_BUILTIN_FCHKSM16,
10493 SPARC_BUILTIN_FSLL16,
10494 SPARC_BUILTIN_FSLAS16,
10495 SPARC_BUILTIN_FSRL16,
10496 SPARC_BUILTIN_FSRA16,
10497 SPARC_BUILTIN_FSLL32,
10498 SPARC_BUILTIN_FSLAS32,
10499 SPARC_BUILTIN_FSRL32,
10500 SPARC_BUILTIN_FSRA32,
10501 SPARC_BUILTIN_PDISTN,
10502 SPARC_BUILTIN_FMEAN16,
10503 SPARC_BUILTIN_FPADD64,
10504 SPARC_BUILTIN_FPSUB64,
10505 SPARC_BUILTIN_FPADDS16,
10506 SPARC_BUILTIN_FPADDS16S,
10507 SPARC_BUILTIN_FPSUBS16,
10508 SPARC_BUILTIN_FPSUBS16S,
10509 SPARC_BUILTIN_FPADDS32,
10510 SPARC_BUILTIN_FPADDS32S,
10511 SPARC_BUILTIN_FPSUBS32,
10512 SPARC_BUILTIN_FPSUBS32S,
10513 SPARC_BUILTIN_FUCMPLE8,
10514 SPARC_BUILTIN_FUCMPNE8,
10515 SPARC_BUILTIN_FUCMPGT8,
10516 SPARC_BUILTIN_FUCMPEQ8,
10517 SPARC_BUILTIN_FHADDS,
10518 SPARC_BUILTIN_FHADDD,
10519 SPARC_BUILTIN_FHSUBS,
10520 SPARC_BUILTIN_FHSUBD,
10521 SPARC_BUILTIN_FNHADDS,
10522 SPARC_BUILTIN_FNHADDD,
10523 SPARC_BUILTIN_UMULXHI,
10524 SPARC_BUILTIN_XMULX,
10525 SPARC_BUILTIN_XMULXHI,
10526
10527 /* VIS 4.0 builtins. */
10528 SPARC_BUILTIN_FPADD8,
10529 SPARC_BUILTIN_FPADDS8,
10530 SPARC_BUILTIN_FPADDUS8,
10531 SPARC_BUILTIN_FPADDUS16,
10532 SPARC_BUILTIN_FPCMPLE8,
10533 SPARC_BUILTIN_FPCMPGT8,
10534 SPARC_BUILTIN_FPCMPULE16,
10535 SPARC_BUILTIN_FPCMPUGT16,
10536 SPARC_BUILTIN_FPCMPULE32,
10537 SPARC_BUILTIN_FPCMPUGT32,
10538 SPARC_BUILTIN_FPMAX8,
10539 SPARC_BUILTIN_FPMAX16,
10540 SPARC_BUILTIN_FPMAX32,
10541 SPARC_BUILTIN_FPMAXU8,
10542 SPARC_BUILTIN_FPMAXU16,
10543 SPARC_BUILTIN_FPMAXU32,
10544 SPARC_BUILTIN_FPMIN8,
10545 SPARC_BUILTIN_FPMIN16,
10546 SPARC_BUILTIN_FPMIN32,
10547 SPARC_BUILTIN_FPMINU8,
10548 SPARC_BUILTIN_FPMINU16,
10549 SPARC_BUILTIN_FPMINU32,
10550 SPARC_BUILTIN_FPSUB8,
10551 SPARC_BUILTIN_FPSUBS8,
10552 SPARC_BUILTIN_FPSUBUS8,
10553 SPARC_BUILTIN_FPSUBUS16,
10554
10555 /* VIS 4.0B builtins. */
10556
10557 /* Note that all the DICTUNPACK* entries should be kept
10558 contiguous. */
10559 SPARC_BUILTIN_FIRST_DICTUNPACK,
10560 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10561 SPARC_BUILTIN_DICTUNPACK16,
10562 SPARC_BUILTIN_DICTUNPACK32,
10563 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10564
10565 /* Note that all the FPCMP*SHL entries should be kept
10566 contiguous. */
10567 SPARC_BUILTIN_FIRST_FPCMPSHL,
10568 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10569 SPARC_BUILTIN_FPCMPGT8SHL,
10570 SPARC_BUILTIN_FPCMPEQ8SHL,
10571 SPARC_BUILTIN_FPCMPNE8SHL,
10572 SPARC_BUILTIN_FPCMPLE16SHL,
10573 SPARC_BUILTIN_FPCMPGT16SHL,
10574 SPARC_BUILTIN_FPCMPEQ16SHL,
10575 SPARC_BUILTIN_FPCMPNE16SHL,
10576 SPARC_BUILTIN_FPCMPLE32SHL,
10577 SPARC_BUILTIN_FPCMPGT32SHL,
10578 SPARC_BUILTIN_FPCMPEQ32SHL,
10579 SPARC_BUILTIN_FPCMPNE32SHL,
10580 SPARC_BUILTIN_FPCMPULE8SHL,
10581 SPARC_BUILTIN_FPCMPUGT8SHL,
10582 SPARC_BUILTIN_FPCMPULE16SHL,
10583 SPARC_BUILTIN_FPCMPUGT16SHL,
10584 SPARC_BUILTIN_FPCMPULE32SHL,
10585 SPARC_BUILTIN_FPCMPUGT32SHL,
10586 SPARC_BUILTIN_FPCMPDE8SHL,
10587 SPARC_BUILTIN_FPCMPDE16SHL,
10588 SPARC_BUILTIN_FPCMPDE32SHL,
10589 SPARC_BUILTIN_FPCMPUR8SHL,
10590 SPARC_BUILTIN_FPCMPUR16SHL,
10591 SPARC_BUILTIN_FPCMPUR32SHL,
10592 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10593
10594 SPARC_BUILTIN_MAX
10595 };
10596
10597 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10598 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10599
10600 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10601 The instruction should require a constant operand of some sort. The
10602 function prints an error if OPVAL is not valid. */
10603
10604 static int
10605 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10606 {
10607 if (GET_CODE (opval) != CONST_INT)
10608 {
10609 error ("%qs expects a constant argument", insn_data[icode].name);
10610 return false;
10611 }
10612
10613 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10614 {
10615 error ("constant argument out of range for %qs", insn_data[icode].name);
10616 return false;
10617 }
10618 return true;
10619 }
10620
10621 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10622 function decl or NULL_TREE if the builtin was not added. */
10623
10624 static tree
10625 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10626 tree type)
10627 {
10628 tree t
10629 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10630
10631 if (t)
10632 {
10633 sparc_builtins[code] = t;
10634 sparc_builtins_icode[code] = icode;
10635 }
10636
10637 return t;
10638 }
10639
10640 /* Likewise, but also marks the function as "const". */
10641
10642 static tree
10643 def_builtin_const (const char *name, enum insn_code icode,
10644 enum sparc_builtins code, tree type)
10645 {
10646 tree t = def_builtin (name, icode, code, type);
10647
10648 if (t)
10649 TREE_READONLY (t) = 1;
10650
10651 return t;
10652 }
10653
10654 /* Implement the TARGET_INIT_BUILTINS target hook.
10655 Create builtin functions for special SPARC instructions. */
10656
10657 static void
10658 sparc_init_builtins (void)
10659 {
10660 if (TARGET_FPU)
10661 sparc_fpu_init_builtins ();
10662
10663 if (TARGET_VIS)
10664 sparc_vis_init_builtins ();
10665 }
10666
10667 /* Create builtin functions for FPU instructions. */
10668
10669 static void
10670 sparc_fpu_init_builtins (void)
10671 {
10672 tree ftype
10673 = build_function_type_list (void_type_node,
10674 build_pointer_type (unsigned_type_node), 0);
10675 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10676 SPARC_BUILTIN_LDFSR, ftype);
10677 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10678 SPARC_BUILTIN_STFSR, ftype);
10679 }
10680
10681 /* Create builtin functions for VIS instructions. */
10682
10683 static void
10684 sparc_vis_init_builtins (void)
10685 {
10686 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10687 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10688 tree v4hi = build_vector_type (intHI_type_node, 4);
10689 tree v2hi = build_vector_type (intHI_type_node, 2);
10690 tree v2si = build_vector_type (intSI_type_node, 2);
10691 tree v1si = build_vector_type (intSI_type_node, 1);
10692
10693 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10694 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10695 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10696 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10697 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10698 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10699 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10700 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10701 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10702 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10703 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10704 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10705 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10706 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10707 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10708 v8qi, v8qi,
10709 intDI_type_node, 0);
10710 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10711 v8qi, v8qi, 0);
10712 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10713 v8qi, v8qi, 0);
10714 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10715 intSI_type_node, 0);
10716 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10717 intSI_type_node, 0);
10718 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10719 intDI_type_node, 0);
10720 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10721 intDI_type_node,
10722 intDI_type_node, 0);
10723 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10724 intSI_type_node,
10725 intSI_type_node, 0);
10726 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10727 ptr_type_node,
10728 intSI_type_node, 0);
10729 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10730 ptr_type_node,
10731 intDI_type_node, 0);
10732 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10733 ptr_type_node,
10734 ptr_type_node, 0);
10735 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10736 ptr_type_node,
10737 ptr_type_node, 0);
10738 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10739 v4hi, v4hi, 0);
10740 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10741 v2si, v2si, 0);
10742 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10743 v4hi, v4hi, 0);
10744 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10745 v2si, v2si, 0);
10746 tree void_ftype_di = build_function_type_list (void_type_node,
10747 intDI_type_node, 0);
10748 tree di_ftype_void = build_function_type_list (intDI_type_node,
10749 void_type_node, 0);
10750 tree void_ftype_si = build_function_type_list (void_type_node,
10751 intSI_type_node, 0);
10752 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10753 float_type_node,
10754 float_type_node, 0);
10755 tree df_ftype_df_df = build_function_type_list (double_type_node,
10756 double_type_node,
10757 double_type_node, 0);
10758
10759 /* Packing and expanding vectors. */
10760 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10761 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10762 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10763 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10764 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10765 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10766 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10767 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10768 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10769 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10770
10771 /* Multiplications. */
10772 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10773 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10774 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10775 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10776 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10777 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10778 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10779 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10780 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10781 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10782 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10783 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10784 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10785 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10786
10787 /* Data aligning. */
10788 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10789 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10790 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10791 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10792 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10793 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10794 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10795 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10796
10797 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10798 SPARC_BUILTIN_WRGSR, void_ftype_di);
10799 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10800 SPARC_BUILTIN_RDGSR, di_ftype_void);
10801
10802 if (TARGET_ARCH64)
10803 {
10804 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10805 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10806 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10807 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10808 }
10809 else
10810 {
10811 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10812 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10813 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10814 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10815 }
10816
10817 /* Pixel distance. */
10818 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10819 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10820
10821 /* Edge handling. */
10822 if (TARGET_ARCH64)
10823 {
10824 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10825 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10826 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10827 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10828 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10829 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10830 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10831 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10832 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10833 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10834 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10835 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10836 }
10837 else
10838 {
10839 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10840 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10841 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10842 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10843 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10844 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10845 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10846 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10847 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10848 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10849 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10850 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10851 }
10852
10853 /* Pixel compare. */
10854 if (TARGET_ARCH64)
10855 {
10856 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10857 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10858 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10859 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10860 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10861 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10862 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10863 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10864 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10865 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10866 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10867 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10868 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10869 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10870 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10871 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10872 }
10873 else
10874 {
10875 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10876 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10877 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10878 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10879 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10880 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10881 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10882 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10883 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10884 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10885 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10886 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10887 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10888 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10889 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10890 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10891 }
10892
10893 /* Addition and subtraction. */
10894 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10895 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10896 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10897 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10898 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10899 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10900 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10901 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10902 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10903 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10904 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10905 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10906 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10907 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10908 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10909 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10910
10911 /* Three-dimensional array addressing. */
10912 if (TARGET_ARCH64)
10913 {
10914 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10915 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10916 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10917 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10918 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10919 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10920 }
10921 else
10922 {
10923 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10924 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10925 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10926 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10927 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10928 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10929 }
10930
10931 if (TARGET_VIS2)
10932 {
10933 /* Edge handling. */
10934 if (TARGET_ARCH64)
10935 {
10936 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10937 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10938 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10939 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10940 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10941 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10942 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10943 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10944 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10945 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10946 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10947 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10948 }
10949 else
10950 {
10951 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10952 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10953 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10954 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10955 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10956 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10957 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10958 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10959 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10960 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10961 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10962 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10963 }
10964
10965 /* Byte mask and shuffle. */
10966 if (TARGET_ARCH64)
10967 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10968 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10969 else
10970 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10971 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10972 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10973 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10974 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10975 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10976 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10977 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10978 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10979 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10980 }
10981
10982 if (TARGET_VIS3)
10983 {
10984 if (TARGET_ARCH64)
10985 {
10986 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10987 SPARC_BUILTIN_CMASK8, void_ftype_di);
10988 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10989 SPARC_BUILTIN_CMASK16, void_ftype_di);
10990 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10991 SPARC_BUILTIN_CMASK32, void_ftype_di);
10992 }
10993 else
10994 {
10995 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10996 SPARC_BUILTIN_CMASK8, void_ftype_si);
10997 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10998 SPARC_BUILTIN_CMASK16, void_ftype_si);
10999 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11000 SPARC_BUILTIN_CMASK32, void_ftype_si);
11001 }
11002
11003 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11004 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11005
11006 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11007 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11008 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11009 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11010 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11011 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11012 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11013 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11014 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11015 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11016 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11017 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11018 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11019 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11020 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11021 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11022
11023 if (TARGET_ARCH64)
11024 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11025 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11026 else
11027 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11028 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11029
11030 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11031 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11032 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11033 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11034 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11035 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11036
11037 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11038 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11039 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11040 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11041 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11042 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11043 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11044 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11045 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11046 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11047 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11048 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11049 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11050 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11051 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11052 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11053
11054 if (TARGET_ARCH64)
11055 {
11056 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11057 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11058 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11059 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11060 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11061 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11062 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11063 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11064 }
11065 else
11066 {
11067 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11068 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11069 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11070 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11071 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11072 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11073 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11074 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11075 }
11076
11077 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11078 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11079 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11080 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11081 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11082 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11083 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11084 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11085 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11086 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11087 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11088 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11089
11090 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11091 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11092 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11093 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11094 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11095 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11096 }
11097
11098 if (TARGET_VIS4)
11099 {
11100 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11101 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11102 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11103 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11104 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11105 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11106 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11107 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11108
11109
11110 if (TARGET_ARCH64)
11111 {
11112 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11113 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11114 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11115 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11116 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11117 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11118 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11119 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11120 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11121 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11122 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11123 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11124 }
11125 else
11126 {
11127 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11128 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11129 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11130 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11131 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11132 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11133 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11134 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11135 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11136 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11137 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11138 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11139 }
11140
11141 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11142 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11143 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11144 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11145 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11146 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11147 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11148 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11149 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11150 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11151 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11152 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11153 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11154 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11155 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11156 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11157 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11158 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11159 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11160 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11161 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11162 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11163 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11164 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11165 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11166 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11167 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11168 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11169 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11170 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11171 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11172 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11173 }
11174
11175 if (TARGET_VIS4B)
11176 {
11177 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11178 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11179 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11180 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11181 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11182 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11183
11184 if (TARGET_ARCH64)
11185 {
11186 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11187 v8qi, v8qi,
11188 intSI_type_node, 0);
11189 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11190 v4hi, v4hi,
11191 intSI_type_node, 0);
11192 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11193 v2si, v2si,
11194 intSI_type_node, 0);
11195
11196 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11197 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11198 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11199 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11200 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11201 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11202 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11203 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11204
11205 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11206 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11207 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11208 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11209 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11210 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11211 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11212 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11213
11214 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11215 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11216 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11217 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11218 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11219 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11220 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11221 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11222
11223
11224 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11225 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11226 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11227 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11228
11229 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11230 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11231 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11232 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11233
11234 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11235 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11236 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11237 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11238
11239 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11240 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11241 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11242 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11243 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11244 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11245
11246 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11247 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11248 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11249 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11250 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11251 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11252
11253 }
11254 else
11255 {
11256 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11257 v8qi, v8qi,
11258 intSI_type_node, 0);
11259 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11260 v4hi, v4hi,
11261 intSI_type_node, 0);
11262 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11263 v2si, v2si,
11264 intSI_type_node, 0);
11265
11266 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11267 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11268 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11269 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11270 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11271 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11272 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11273 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11274
11275 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11276 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11277 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11278 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11279 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11280 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11281 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11282 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11283
11284 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11285 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11286 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11287 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11288 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11289 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11290 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11291 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11292
11293
11294 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11295 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11296 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11297 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11298
11299 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11300 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11301 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11302 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11303
11304 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11305 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11306 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11307 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11308
11309 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11310 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11311 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11312 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11313 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11314 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11315
11316 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11317 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11318 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11319 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11320 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11321 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11322 }
11323 }
11324 }
11325
11326 /* Implement TARGET_BUILTIN_DECL hook. */
11327
11328 static tree
11329 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11330 {
11331 if (code >= SPARC_BUILTIN_MAX)
11332 return error_mark_node;
11333
11334 return sparc_builtins[code];
11335 }
11336
11337 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11338
11339 static rtx
11340 sparc_expand_builtin (tree exp, rtx target,
11341 rtx subtarget ATTRIBUTE_UNUSED,
11342 machine_mode tmode ATTRIBUTE_UNUSED,
11343 int ignore ATTRIBUTE_UNUSED)
11344 {
11345 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11346 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11347 enum insn_code icode = sparc_builtins_icode[code];
11348 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11349 call_expr_arg_iterator iter;
11350 int arg_count = 0;
11351 rtx pat, op[4];
11352 tree arg;
11353
11354 if (nonvoid)
11355 {
11356 machine_mode tmode = insn_data[icode].operand[0].mode;
11357 if (!target
11358 || GET_MODE (target) != tmode
11359 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11360 op[0] = gen_reg_rtx (tmode);
11361 else
11362 op[0] = target;
11363 }
11364
11365 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11366 {
11367 const struct insn_operand_data *insn_op;
11368 int idx;
11369
11370 if (arg == error_mark_node)
11371 return NULL_RTX;
11372
11373 arg_count++;
11374 idx = arg_count - !nonvoid;
11375 insn_op = &insn_data[icode].operand[idx];
11376 op[arg_count] = expand_normal (arg);
11377
11378 /* Some of the builtins require constant arguments. We check
11379 for this here. */
11380 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11381 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11382 && arg_count == 3)
11383 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11384 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11385 && arg_count == 2))
11386 {
11387 if (!check_constant_argument (icode, idx, op[arg_count]))
11388 return const0_rtx;
11389 }
11390
11391 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11392 {
11393 if (!address_operand (op[arg_count], SImode))
11394 {
11395 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11396 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11397 }
11398 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11399 }
11400
11401 else if (insn_op->mode == V1DImode
11402 && GET_MODE (op[arg_count]) == DImode)
11403 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11404
11405 else if (insn_op->mode == V1SImode
11406 && GET_MODE (op[arg_count]) == SImode)
11407 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11408
11409 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11410 insn_op->mode))
11411 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11412 }
11413
11414 switch (arg_count)
11415 {
11416 case 0:
11417 pat = GEN_FCN (icode) (op[0]);
11418 break;
11419 case 1:
11420 if (nonvoid)
11421 pat = GEN_FCN (icode) (op[0], op[1]);
11422 else
11423 pat = GEN_FCN (icode) (op[1]);
11424 break;
11425 case 2:
11426 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11427 break;
11428 case 3:
11429 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11430 break;
11431 default:
11432 gcc_unreachable ();
11433 }
11434
11435 if (!pat)
11436 return NULL_RTX;
11437
11438 emit_insn (pat);
11439
11440 return (nonvoid ? op[0] : const0_rtx);
11441 }
11442
11443 /* Return the upper 16 bits of the 8x16 multiplication. */
11444
11445 static int
11446 sparc_vis_mul8x16 (int e8, int e16)
11447 {
11448 return (e8 * e16 + 128) / 256;
11449 }
11450
11451 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11452 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11453
11454 static void
11455 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11456 tree inner_type, tree cst0, tree cst1)
11457 {
11458 unsigned i, num = VECTOR_CST_NELTS (cst0);
11459 int scale;
11460
11461 switch (fncode)
11462 {
11463 case SPARC_BUILTIN_FMUL8X16:
11464 for (i = 0; i < num; ++i)
11465 {
11466 int val
11467 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11468 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11469 n_elts->quick_push (build_int_cst (inner_type, val));
11470 }
11471 break;
11472
11473 case SPARC_BUILTIN_FMUL8X16AU:
11474 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11475
11476 for (i = 0; i < num; ++i)
11477 {
11478 int val
11479 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11480 scale);
11481 n_elts->quick_push (build_int_cst (inner_type, val));
11482 }
11483 break;
11484
11485 case SPARC_BUILTIN_FMUL8X16AL:
11486 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11487
11488 for (i = 0; i < num; ++i)
11489 {
11490 int val
11491 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11492 scale);
11493 n_elts->quick_push (build_int_cst (inner_type, val));
11494 }
11495 break;
11496
11497 default:
11498 gcc_unreachable ();
11499 }
11500 }
11501
11502 /* Implement TARGET_FOLD_BUILTIN hook.
11503
11504 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11505 result of the function call is ignored. NULL_TREE is returned if the
11506 function could not be folded. */
11507
11508 static tree
11509 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11510 tree *args, bool ignore)
11511 {
11512 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11513 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11514 tree arg0, arg1, arg2;
11515
11516 if (ignore)
11517 switch (code)
11518 {
11519 case SPARC_BUILTIN_LDFSR:
11520 case SPARC_BUILTIN_STFSR:
11521 case SPARC_BUILTIN_ALIGNADDR:
11522 case SPARC_BUILTIN_WRGSR:
11523 case SPARC_BUILTIN_BMASK:
11524 case SPARC_BUILTIN_CMASK8:
11525 case SPARC_BUILTIN_CMASK16:
11526 case SPARC_BUILTIN_CMASK32:
11527 break;
11528
11529 default:
11530 return build_zero_cst (rtype);
11531 }
11532
11533 switch (code)
11534 {
11535 case SPARC_BUILTIN_FEXPAND:
11536 arg0 = args[0];
11537 STRIP_NOPS (arg0);
11538
11539 if (TREE_CODE (arg0) == VECTOR_CST)
11540 {
11541 tree inner_type = TREE_TYPE (rtype);
11542 unsigned i;
11543
11544 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11545 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11546 {
11547 unsigned HOST_WIDE_INT val
11548 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11549 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11550 }
11551 return build_vector (rtype, n_elts);
11552 }
11553 break;
11554
11555 case SPARC_BUILTIN_FMUL8X16:
11556 case SPARC_BUILTIN_FMUL8X16AU:
11557 case SPARC_BUILTIN_FMUL8X16AL:
11558 arg0 = args[0];
11559 arg1 = args[1];
11560 STRIP_NOPS (arg0);
11561 STRIP_NOPS (arg1);
11562
11563 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11564 {
11565 tree inner_type = TREE_TYPE (rtype);
11566 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11567 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11568 return build_vector (rtype, n_elts);
11569 }
11570 break;
11571
11572 case SPARC_BUILTIN_FPMERGE:
11573 arg0 = args[0];
11574 arg1 = args[1];
11575 STRIP_NOPS (arg0);
11576 STRIP_NOPS (arg1);
11577
11578 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11579 {
11580 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11581 unsigned i;
11582 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11583 {
11584 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11585 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11586 }
11587
11588 return build_vector (rtype, n_elts);
11589 }
11590 break;
11591
11592 case SPARC_BUILTIN_PDIST:
11593 case SPARC_BUILTIN_PDISTN:
11594 arg0 = args[0];
11595 arg1 = args[1];
11596 STRIP_NOPS (arg0);
11597 STRIP_NOPS (arg1);
11598 if (code == SPARC_BUILTIN_PDIST)
11599 {
11600 arg2 = args[2];
11601 STRIP_NOPS (arg2);
11602 }
11603 else
11604 arg2 = integer_zero_node;
11605
11606 if (TREE_CODE (arg0) == VECTOR_CST
11607 && TREE_CODE (arg1) == VECTOR_CST
11608 && TREE_CODE (arg2) == INTEGER_CST)
11609 {
11610 bool overflow = false;
11611 widest_int result = wi::to_widest (arg2);
11612 widest_int tmp;
11613 unsigned i;
11614
11615 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11616 {
11617 tree e0 = VECTOR_CST_ELT (arg0, i);
11618 tree e1 = VECTOR_CST_ELT (arg1, i);
11619
11620 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11621
11622 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11623 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11624 if (wi::neg_p (tmp))
11625 tmp = wi::neg (tmp, &neg2_ovf);
11626 else
11627 neg2_ovf = false;
11628 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11629 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11630 }
11631
11632 gcc_assert (!overflow);
11633
11634 return wide_int_to_tree (rtype, result);
11635 }
11636
11637 default:
11638 break;
11639 }
11640
11641 return NULL_TREE;
11642 }
11643 \f
11644 /* ??? This duplicates information provided to the compiler by the
11645 ??? scheduler description. Some day, teach genautomata to output
11646 ??? the latencies and then CSE will just use that. */
11647
11648 static bool
11649 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11650 int opno ATTRIBUTE_UNUSED,
11651 int *total, bool speed ATTRIBUTE_UNUSED)
11652 {
11653 int code = GET_CODE (x);
11654 bool float_mode_p = FLOAT_MODE_P (mode);
11655
11656 switch (code)
11657 {
11658 case CONST_INT:
11659 if (SMALL_INT (x))
11660 *total = 0;
11661 else
11662 *total = 2;
11663 return true;
11664
11665 case CONST_WIDE_INT:
11666 *total = 0;
11667 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11668 *total += 2;
11669 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11670 *total += 2;
11671 return true;
11672
11673 case HIGH:
11674 *total = 2;
11675 return true;
11676
11677 case CONST:
11678 case LABEL_REF:
11679 case SYMBOL_REF:
11680 *total = 4;
11681 return true;
11682
11683 case CONST_DOUBLE:
11684 *total = 8;
11685 return true;
11686
11687 case MEM:
11688 /* If outer-code was a sign or zero extension, a cost
11689 of COSTS_N_INSNS (1) was already added in. This is
11690 why we are subtracting it back out. */
11691 if (outer_code == ZERO_EXTEND)
11692 {
11693 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11694 }
11695 else if (outer_code == SIGN_EXTEND)
11696 {
11697 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11698 }
11699 else if (float_mode_p)
11700 {
11701 *total = sparc_costs->float_load;
11702 }
11703 else
11704 {
11705 *total = sparc_costs->int_load;
11706 }
11707
11708 return true;
11709
11710 case PLUS:
11711 case MINUS:
11712 if (float_mode_p)
11713 *total = sparc_costs->float_plusminus;
11714 else
11715 *total = COSTS_N_INSNS (1);
11716 return false;
11717
11718 case FMA:
11719 {
11720 rtx sub;
11721
11722 gcc_assert (float_mode_p);
11723 *total = sparc_costs->float_mul;
11724
11725 sub = XEXP (x, 0);
11726 if (GET_CODE (sub) == NEG)
11727 sub = XEXP (sub, 0);
11728 *total += rtx_cost (sub, mode, FMA, 0, speed);
11729
11730 sub = XEXP (x, 2);
11731 if (GET_CODE (sub) == NEG)
11732 sub = XEXP (sub, 0);
11733 *total += rtx_cost (sub, mode, FMA, 2, speed);
11734 return true;
11735 }
11736
11737 case MULT:
11738 if (float_mode_p)
11739 *total = sparc_costs->float_mul;
11740 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11741 *total = COSTS_N_INSNS (25);
11742 else
11743 {
11744 int bit_cost;
11745
11746 bit_cost = 0;
11747 if (sparc_costs->int_mul_bit_factor)
11748 {
11749 int nbits;
11750
11751 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11752 {
11753 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11754 for (nbits = 0; value != 0; value &= value - 1)
11755 nbits++;
11756 }
11757 else
11758 nbits = 7;
11759
11760 if (nbits < 3)
11761 nbits = 3;
11762 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11763 bit_cost = COSTS_N_INSNS (bit_cost);
11764 }
11765
11766 if (mode == DImode || !TARGET_HARD_MUL)
11767 *total = sparc_costs->int_mulX + bit_cost;
11768 else
11769 *total = sparc_costs->int_mul + bit_cost;
11770 }
11771 return false;
11772
11773 case ASHIFT:
11774 case ASHIFTRT:
11775 case LSHIFTRT:
11776 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11777 return false;
11778
11779 case DIV:
11780 case UDIV:
11781 case MOD:
11782 case UMOD:
11783 if (float_mode_p)
11784 {
11785 if (mode == DFmode)
11786 *total = sparc_costs->float_div_df;
11787 else
11788 *total = sparc_costs->float_div_sf;
11789 }
11790 else
11791 {
11792 if (mode == DImode)
11793 *total = sparc_costs->int_divX;
11794 else
11795 *total = sparc_costs->int_div;
11796 }
11797 return false;
11798
11799 case NEG:
11800 if (! float_mode_p)
11801 {
11802 *total = COSTS_N_INSNS (1);
11803 return false;
11804 }
11805 /* FALLTHRU */
11806
11807 case ABS:
11808 case FLOAT:
11809 case UNSIGNED_FLOAT:
11810 case FIX:
11811 case UNSIGNED_FIX:
11812 case FLOAT_EXTEND:
11813 case FLOAT_TRUNCATE:
11814 *total = sparc_costs->float_move;
11815 return false;
11816
11817 case SQRT:
11818 if (mode == DFmode)
11819 *total = sparc_costs->float_sqrt_df;
11820 else
11821 *total = sparc_costs->float_sqrt_sf;
11822 return false;
11823
11824 case COMPARE:
11825 if (float_mode_p)
11826 *total = sparc_costs->float_cmp;
11827 else
11828 *total = COSTS_N_INSNS (1);
11829 return false;
11830
11831 case IF_THEN_ELSE:
11832 if (float_mode_p)
11833 *total = sparc_costs->float_cmove;
11834 else
11835 *total = sparc_costs->int_cmove;
11836 return false;
11837
11838 case IOR:
11839 /* Handle the NAND vector patterns. */
11840 if (sparc_vector_mode_supported_p (mode)
11841 && GET_CODE (XEXP (x, 0)) == NOT
11842 && GET_CODE (XEXP (x, 1)) == NOT)
11843 {
11844 *total = COSTS_N_INSNS (1);
11845 return true;
11846 }
11847 else
11848 return false;
11849
11850 default:
11851 return false;
11852 }
11853 }
11854
11855 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11856
11857 static inline bool
11858 general_or_i64_p (reg_class_t rclass)
11859 {
11860 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11861 }
11862
11863 /* Implement TARGET_REGISTER_MOVE_COST. */
11864
11865 static int
11866 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11867 reg_class_t from, reg_class_t to)
11868 {
11869 bool need_memory = false;
11870
11871 /* This helps postreload CSE to eliminate redundant comparisons. */
11872 if (from == NO_REGS || to == NO_REGS)
11873 return 100;
11874
11875 if (from == FPCC_REGS || to == FPCC_REGS)
11876 need_memory = true;
11877 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11878 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11879 {
11880 if (TARGET_VIS3)
11881 {
11882 int size = GET_MODE_SIZE (mode);
11883 if (size == 8 || size == 4)
11884 {
11885 if (! TARGET_ARCH32 || size == 4)
11886 return 4;
11887 else
11888 return 6;
11889 }
11890 }
11891 need_memory = true;
11892 }
11893
11894 if (need_memory)
11895 {
11896 if (sparc_cpu == PROCESSOR_ULTRASPARC
11897 || sparc_cpu == PROCESSOR_ULTRASPARC3
11898 || sparc_cpu == PROCESSOR_NIAGARA
11899 || sparc_cpu == PROCESSOR_NIAGARA2
11900 || sparc_cpu == PROCESSOR_NIAGARA3
11901 || sparc_cpu == PROCESSOR_NIAGARA4
11902 || sparc_cpu == PROCESSOR_NIAGARA7
11903 || sparc_cpu == PROCESSOR_M8)
11904 return 12;
11905
11906 return 6;
11907 }
11908
11909 return 2;
11910 }
11911
11912 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11913 This is achieved by means of a manual dynamic stack space allocation in
11914 the current frame. We make the assumption that SEQ doesn't contain any
11915 function calls, with the possible exception of calls to the GOT helper. */
11916
11917 static void
11918 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11919 {
11920 /* We must preserve the lowest 16 words for the register save area. */
11921 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11922 /* We really need only 2 words of fresh stack space. */
11923 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11924
11925 rtx slot
11926 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11927 SPARC_STACK_BIAS + offset));
11928
11929 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11930 emit_insn (gen_rtx_SET (slot, reg));
11931 if (reg2)
11932 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11933 reg2));
11934 emit_insn (seq);
11935 if (reg2)
11936 emit_insn (gen_rtx_SET (reg2,
11937 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11938 emit_insn (gen_rtx_SET (reg, slot));
11939 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11940 }
11941
11942 /* Output the assembler code for a thunk function. THUNK_DECL is the
11943 declaration for the thunk function itself, FUNCTION is the decl for
11944 the target function. DELTA is an immediate constant offset to be
11945 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11946 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11947
11948 static void
11949 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11950 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11951 tree function)
11952 {
11953 rtx this_rtx, funexp;
11954 rtx_insn *insn;
11955 unsigned int int_arg_first;
11956
11957 reload_completed = 1;
11958 epilogue_completed = 1;
11959
11960 emit_note (NOTE_INSN_PROLOGUE_END);
11961
11962 if (TARGET_FLAT)
11963 {
11964 sparc_leaf_function_p = 1;
11965
11966 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11967 }
11968 else if (flag_delayed_branch)
11969 {
11970 /* We will emit a regular sibcall below, so we need to instruct
11971 output_sibcall that we are in a leaf function. */
11972 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11973
11974 /* This will cause final.c to invoke leaf_renumber_regs so we
11975 must behave as if we were in a not-yet-leafified function. */
11976 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11977 }
11978 else
11979 {
11980 /* We will emit the sibcall manually below, so we will need to
11981 manually spill non-leaf registers. */
11982 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11983
11984 /* We really are in a leaf function. */
11985 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11986 }
11987
11988 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11989 returns a structure, the structure return pointer is there instead. */
11990 if (TARGET_ARCH64
11991 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11992 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11993 else
11994 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11995
11996 /* Add DELTA. When possible use a plain add, otherwise load it into
11997 a register first. */
11998 if (delta)
11999 {
12000 rtx delta_rtx = GEN_INT (delta);
12001
12002 if (! SPARC_SIMM13_P (delta))
12003 {
12004 rtx scratch = gen_rtx_REG (Pmode, 1);
12005 emit_move_insn (scratch, delta_rtx);
12006 delta_rtx = scratch;
12007 }
12008
12009 /* THIS_RTX += DELTA. */
12010 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12011 }
12012
12013 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12014 if (vcall_offset)
12015 {
12016 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12017 rtx scratch = gen_rtx_REG (Pmode, 1);
12018
12019 gcc_assert (vcall_offset < 0);
12020
12021 /* SCRATCH = *THIS_RTX. */
12022 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12023
12024 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12025 may not have any available scratch register at this point. */
12026 if (SPARC_SIMM13_P (vcall_offset))
12027 ;
12028 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12029 else if (! fixed_regs[5]
12030 /* The below sequence is made up of at least 2 insns,
12031 while the default method may need only one. */
12032 && vcall_offset < -8192)
12033 {
12034 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12035 emit_move_insn (scratch2, vcall_offset_rtx);
12036 vcall_offset_rtx = scratch2;
12037 }
12038 else
12039 {
12040 rtx increment = GEN_INT (-4096);
12041
12042 /* VCALL_OFFSET is a negative number whose typical range can be
12043 estimated as -32768..0 in 32-bit mode. In almost all cases
12044 it is therefore cheaper to emit multiple add insns than
12045 spilling and loading the constant into a register (at least
12046 6 insns). */
12047 while (! SPARC_SIMM13_P (vcall_offset))
12048 {
12049 emit_insn (gen_add2_insn (scratch, increment));
12050 vcall_offset += 4096;
12051 }
12052 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12053 }
12054
12055 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12056 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12057 gen_rtx_PLUS (Pmode,
12058 scratch,
12059 vcall_offset_rtx)));
12060
12061 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12062 emit_insn (gen_add2_insn (this_rtx, scratch));
12063 }
12064
12065 /* Generate a tail call to the target function. */
12066 if (! TREE_USED (function))
12067 {
12068 assemble_external (function);
12069 TREE_USED (function) = 1;
12070 }
12071 funexp = XEXP (DECL_RTL (function), 0);
12072
12073 if (flag_delayed_branch)
12074 {
12075 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12076 insn = emit_call_insn (gen_sibcall (funexp));
12077 SIBLING_CALL_P (insn) = 1;
12078 }
12079 else
12080 {
12081 /* The hoops we have to jump through in order to generate a sibcall
12082 without using delay slots... */
12083 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12084
12085 if (flag_pic)
12086 {
12087 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12088 start_sequence ();
12089 load_got_register (); /* clobbers %o7 */
12090 scratch = sparc_legitimize_pic_address (funexp, scratch);
12091 seq = get_insns ();
12092 end_sequence ();
12093 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12094 }
12095 else if (TARGET_ARCH32)
12096 {
12097 emit_insn (gen_rtx_SET (scratch,
12098 gen_rtx_HIGH (SImode, funexp)));
12099 emit_insn (gen_rtx_SET (scratch,
12100 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12101 }
12102 else /* TARGET_ARCH64 */
12103 {
12104 switch (sparc_cmodel)
12105 {
12106 case CM_MEDLOW:
12107 case CM_MEDMID:
12108 /* The destination can serve as a temporary. */
12109 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12110 break;
12111
12112 case CM_MEDANY:
12113 case CM_EMBMEDANY:
12114 /* The destination cannot serve as a temporary. */
12115 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12116 start_sequence ();
12117 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12118 seq = get_insns ();
12119 end_sequence ();
12120 emit_and_preserve (seq, spill_reg, 0);
12121 break;
12122
12123 default:
12124 gcc_unreachable ();
12125 }
12126 }
12127
12128 emit_jump_insn (gen_indirect_jump (scratch));
12129 }
12130
12131 emit_barrier ();
12132
12133 /* Run just enough of rest_of_compilation to get the insns emitted.
12134 There's not really enough bulk here to make other passes such as
12135 instruction scheduling worth while. Note that use_thunk calls
12136 assemble_start_function and assemble_end_function. */
12137 insn = get_insns ();
12138 shorten_branches (insn);
12139 final_start_function (insn, file, 1);
12140 final (insn, file, 1);
12141 final_end_function ();
12142
12143 reload_completed = 0;
12144 epilogue_completed = 0;
12145 }
12146
12147 /* Return true if sparc_output_mi_thunk would be able to output the
12148 assembler code for the thunk function specified by the arguments
12149 it is passed, and false otherwise. */
12150 static bool
12151 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12152 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12153 HOST_WIDE_INT vcall_offset,
12154 const_tree function ATTRIBUTE_UNUSED)
12155 {
12156 /* Bound the loop used in the default method above. */
12157 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12158 }
12159
12160 /* How to allocate a 'struct machine_function'. */
12161
12162 static struct machine_function *
12163 sparc_init_machine_status (void)
12164 {
12165 return ggc_cleared_alloc<machine_function> ();
12166 }
12167
12168 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12169 We need to emit DTP-relative relocations. */
12170
12171 static void
12172 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12173 {
12174 switch (size)
12175 {
12176 case 4:
12177 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12178 break;
12179 case 8:
12180 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12181 break;
12182 default:
12183 gcc_unreachable ();
12184 }
12185 output_addr_const (file, x);
12186 fputs (")", file);
12187 }
12188
12189 /* Do whatever processing is required at the end of a file. */
12190
12191 static void
12192 sparc_file_end (void)
12193 {
12194 /* If we need to emit the special GOT helper function, do so now. */
12195 if (got_helper_rtx)
12196 {
12197 const char *name = XSTR (got_helper_rtx, 0);
12198 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12199 #ifdef DWARF2_UNWIND_INFO
12200 bool do_cfi;
12201 #endif
12202
12203 if (USE_HIDDEN_LINKONCE)
12204 {
12205 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12206 get_identifier (name),
12207 build_function_type_list (void_type_node,
12208 NULL_TREE));
12209 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12210 NULL_TREE, void_type_node);
12211 TREE_PUBLIC (decl) = 1;
12212 TREE_STATIC (decl) = 1;
12213 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12214 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12215 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12216 resolve_unique_section (decl, 0, flag_function_sections);
12217 allocate_struct_function (decl, true);
12218 cfun->is_thunk = 1;
12219 current_function_decl = decl;
12220 init_varasm_status ();
12221 assemble_start_function (decl, name);
12222 }
12223 else
12224 {
12225 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12226 switch_to_section (text_section);
12227 if (align > 0)
12228 ASM_OUTPUT_ALIGN (asm_out_file, align);
12229 ASM_OUTPUT_LABEL (asm_out_file, name);
12230 }
12231
12232 #ifdef DWARF2_UNWIND_INFO
12233 do_cfi = dwarf2out_do_cfi_asm ();
12234 if (do_cfi)
12235 fprintf (asm_out_file, "\t.cfi_startproc\n");
12236 #endif
12237 if (flag_delayed_branch)
12238 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12239 reg_name, reg_name);
12240 else
12241 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12242 reg_name, reg_name);
12243 #ifdef DWARF2_UNWIND_INFO
12244 if (do_cfi)
12245 fprintf (asm_out_file, "\t.cfi_endproc\n");
12246 #endif
12247 }
12248
12249 if (NEED_INDICATE_EXEC_STACK)
12250 file_end_indicate_exec_stack ();
12251
12252 #ifdef TARGET_SOLARIS
12253 solaris_file_end ();
12254 #endif
12255 }
12256
12257 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12258 /* Implement TARGET_MANGLE_TYPE. */
12259
12260 static const char *
12261 sparc_mangle_type (const_tree type)
12262 {
12263 if (TARGET_ARCH32
12264 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12265 && TARGET_LONG_DOUBLE_128)
12266 return "g";
12267
12268 /* For all other types, use normal C++ mangling. */
12269 return NULL;
12270 }
12271 #endif
12272
12273 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12274 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12275 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12276
12277 void
12278 sparc_emit_membar_for_model (enum memmodel model,
12279 int load_store, int before_after)
12280 {
12281 /* Bits for the MEMBAR mmask field. */
12282 const int LoadLoad = 1;
12283 const int StoreLoad = 2;
12284 const int LoadStore = 4;
12285 const int StoreStore = 8;
12286
12287 int mm = 0, implied = 0;
12288
12289 switch (sparc_memory_model)
12290 {
12291 case SMM_SC:
12292 /* Sequential Consistency. All memory transactions are immediately
12293 visible in sequential execution order. No barriers needed. */
12294 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12295 break;
12296
12297 case SMM_TSO:
12298 /* Total Store Ordering: all memory transactions with store semantics
12299 are followed by an implied StoreStore. */
12300 implied |= StoreStore;
12301
12302 /* If we're not looking for a raw barrer (before+after), then atomic
12303 operations get the benefit of being both load and store. */
12304 if (load_store == 3 && before_after == 1)
12305 implied |= StoreLoad;
12306 /* FALLTHRU */
12307
12308 case SMM_PSO:
12309 /* Partial Store Ordering: all memory transactions with load semantics
12310 are followed by an implied LoadLoad | LoadStore. */
12311 implied |= LoadLoad | LoadStore;
12312
12313 /* If we're not looking for a raw barrer (before+after), then atomic
12314 operations get the benefit of being both load and store. */
12315 if (load_store == 3 && before_after == 2)
12316 implied |= StoreLoad | StoreStore;
12317 /* FALLTHRU */
12318
12319 case SMM_RMO:
12320 /* Relaxed Memory Ordering: no implicit bits. */
12321 break;
12322
12323 default:
12324 gcc_unreachable ();
12325 }
12326
12327 if (before_after & 1)
12328 {
12329 if (is_mm_release (model) || is_mm_acq_rel (model)
12330 || is_mm_seq_cst (model))
12331 {
12332 if (load_store & 1)
12333 mm |= LoadLoad | StoreLoad;
12334 if (load_store & 2)
12335 mm |= LoadStore | StoreStore;
12336 }
12337 }
12338 if (before_after & 2)
12339 {
12340 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12341 || is_mm_seq_cst (model))
12342 {
12343 if (load_store & 1)
12344 mm |= LoadLoad | LoadStore;
12345 if (load_store & 2)
12346 mm |= StoreLoad | StoreStore;
12347 }
12348 }
12349
12350 /* Remove the bits implied by the system memory model. */
12351 mm &= ~implied;
12352
12353 /* For raw barriers (before+after), always emit a barrier.
12354 This will become a compile-time barrier if needed. */
12355 if (mm || before_after == 3)
12356 emit_insn (gen_membar (GEN_INT (mm)));
12357 }
12358
12359 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12360 compare and swap on the word containing the byte or half-word. */
12361
12362 static void
12363 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12364 rtx oldval, rtx newval)
12365 {
12366 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12367 rtx addr = gen_reg_rtx (Pmode);
12368 rtx off = gen_reg_rtx (SImode);
12369 rtx oldv = gen_reg_rtx (SImode);
12370 rtx newv = gen_reg_rtx (SImode);
12371 rtx oldvalue = gen_reg_rtx (SImode);
12372 rtx newvalue = gen_reg_rtx (SImode);
12373 rtx res = gen_reg_rtx (SImode);
12374 rtx resv = gen_reg_rtx (SImode);
12375 rtx memsi, val, mask, cc;
12376
12377 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12378
12379 if (Pmode != SImode)
12380 addr1 = gen_lowpart (SImode, addr1);
12381 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12382
12383 memsi = gen_rtx_MEM (SImode, addr);
12384 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12385 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12386
12387 val = copy_to_reg (memsi);
12388
12389 emit_insn (gen_rtx_SET (off,
12390 gen_rtx_XOR (SImode, off,
12391 GEN_INT (GET_MODE (mem) == QImode
12392 ? 3 : 2))));
12393
12394 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12395
12396 if (GET_MODE (mem) == QImode)
12397 mask = force_reg (SImode, GEN_INT (0xff));
12398 else
12399 mask = force_reg (SImode, GEN_INT (0xffff));
12400
12401 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12402
12403 emit_insn (gen_rtx_SET (val,
12404 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12405 val)));
12406
12407 oldval = gen_lowpart (SImode, oldval);
12408 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12409
12410 newval = gen_lowpart_common (SImode, newval);
12411 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12412
12413 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12414
12415 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12416
12417 rtx_code_label *end_label = gen_label_rtx ();
12418 rtx_code_label *loop_label = gen_label_rtx ();
12419 emit_label (loop_label);
12420
12421 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12422
12423 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12424
12425 emit_move_insn (bool_result, const1_rtx);
12426
12427 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12428
12429 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12430
12431 emit_insn (gen_rtx_SET (resv,
12432 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12433 res)));
12434
12435 emit_move_insn (bool_result, const0_rtx);
12436
12437 cc = gen_compare_reg_1 (NE, resv, val);
12438 emit_insn (gen_rtx_SET (val, resv));
12439
12440 /* Use cbranchcc4 to separate the compare and branch! */
12441 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12442 cc, const0_rtx, loop_label));
12443
12444 emit_label (end_label);
12445
12446 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12447
12448 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12449
12450 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12451 }
12452
12453 /* Expand code to perform a compare-and-swap. */
12454
12455 void
12456 sparc_expand_compare_and_swap (rtx operands[])
12457 {
12458 rtx bval, retval, mem, oldval, newval;
12459 machine_mode mode;
12460 enum memmodel model;
12461
12462 bval = operands[0];
12463 retval = operands[1];
12464 mem = operands[2];
12465 oldval = operands[3];
12466 newval = operands[4];
12467 model = (enum memmodel) INTVAL (operands[6]);
12468 mode = GET_MODE (mem);
12469
12470 sparc_emit_membar_for_model (model, 3, 1);
12471
12472 if (reg_overlap_mentioned_p (retval, oldval))
12473 oldval = copy_to_reg (oldval);
12474
12475 if (mode == QImode || mode == HImode)
12476 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12477 else
12478 {
12479 rtx (*gen) (rtx, rtx, rtx, rtx);
12480 rtx x;
12481
12482 if (mode == SImode)
12483 gen = gen_atomic_compare_and_swapsi_1;
12484 else
12485 gen = gen_atomic_compare_and_swapdi_1;
12486 emit_insn (gen (retval, mem, oldval, newval));
12487
12488 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12489 if (x != bval)
12490 convert_move (bval, x, 1);
12491 }
12492
12493 sparc_emit_membar_for_model (model, 3, 2);
12494 }
12495
12496 void
12497 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12498 {
12499 rtx t_1, t_2, t_3;
12500
12501 sel = gen_lowpart (DImode, sel);
12502 switch (vmode)
12503 {
12504 case E_V2SImode:
12505 /* inp = xxxxxxxAxxxxxxxB */
12506 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12507 NULL_RTX, 1, OPTAB_DIRECT);
12508 /* t_1 = ....xxxxxxxAxxx. */
12509 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12510 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12511 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12512 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12513 /* sel = .......B */
12514 /* t_1 = ...A.... */
12515 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12516 /* sel = ...A...B */
12517 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12518 /* sel = AAAABBBB * 4 */
12519 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12520 /* sel = { A*4, A*4+1, A*4+2, ... } */
12521 break;
12522
12523 case E_V4HImode:
12524 /* inp = xxxAxxxBxxxCxxxD */
12525 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12526 NULL_RTX, 1, OPTAB_DIRECT);
12527 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12528 NULL_RTX, 1, OPTAB_DIRECT);
12529 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12530 NULL_RTX, 1, OPTAB_DIRECT);
12531 /* t_1 = ..xxxAxxxBxxxCxx */
12532 /* t_2 = ....xxxAxxxBxxxC */
12533 /* t_3 = ......xxxAxxxBxx */
12534 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12535 GEN_INT (0x07),
12536 NULL_RTX, 1, OPTAB_DIRECT);
12537 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12538 GEN_INT (0x0700),
12539 NULL_RTX, 1, OPTAB_DIRECT);
12540 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12541 GEN_INT (0x070000),
12542 NULL_RTX, 1, OPTAB_DIRECT);
12543 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12544 GEN_INT (0x07000000),
12545 NULL_RTX, 1, OPTAB_DIRECT);
12546 /* sel = .......D */
12547 /* t_1 = .....C.. */
12548 /* t_2 = ...B.... */
12549 /* t_3 = .A...... */
12550 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12551 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12552 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12553 /* sel = .A.B.C.D */
12554 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12555 /* sel = AABBCCDD * 2 */
12556 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12557 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12558 break;
12559
12560 case E_V8QImode:
12561 /* input = xAxBxCxDxExFxGxH */
12562 sel = expand_simple_binop (DImode, AND, sel,
12563 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12564 | 0x0f0f0f0f),
12565 NULL_RTX, 1, OPTAB_DIRECT);
12566 /* sel = .A.B.C.D.E.F.G.H */
12567 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12568 NULL_RTX, 1, OPTAB_DIRECT);
12569 /* t_1 = ..A.B.C.D.E.F.G. */
12570 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12571 NULL_RTX, 1, OPTAB_DIRECT);
12572 /* sel = .AABBCCDDEEFFGGH */
12573 sel = expand_simple_binop (DImode, AND, sel,
12574 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12575 | 0xff00ff),
12576 NULL_RTX, 1, OPTAB_DIRECT);
12577 /* sel = ..AB..CD..EF..GH */
12578 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12579 NULL_RTX, 1, OPTAB_DIRECT);
12580 /* t_1 = ....AB..CD..EF.. */
12581 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12582 NULL_RTX, 1, OPTAB_DIRECT);
12583 /* sel = ..ABABCDCDEFEFGH */
12584 sel = expand_simple_binop (DImode, AND, sel,
12585 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12586 NULL_RTX, 1, OPTAB_DIRECT);
12587 /* sel = ....ABCD....EFGH */
12588 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12589 NULL_RTX, 1, OPTAB_DIRECT);
12590 /* t_1 = ........ABCD.... */
12591 sel = gen_lowpart (SImode, sel);
12592 t_1 = gen_lowpart (SImode, t_1);
12593 break;
12594
12595 default:
12596 gcc_unreachable ();
12597 }
12598
12599 /* Always perform the final addition/merge within the bmask insn. */
12600 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12601 }
12602
12603 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12604
12605 static bool
12606 sparc_frame_pointer_required (void)
12607 {
12608 /* If the stack pointer is dynamically modified in the function, it cannot
12609 serve as the frame pointer. */
12610 if (cfun->calls_alloca)
12611 return true;
12612
12613 /* If the function receives nonlocal gotos, it needs to save the frame
12614 pointer in the nonlocal_goto_save_area object. */
12615 if (cfun->has_nonlocal_label)
12616 return true;
12617
12618 /* In flat mode, that's it. */
12619 if (TARGET_FLAT)
12620 return false;
12621
12622 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12623 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12624 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12625 }
12626
12627 /* The way this is structured, we can't eliminate SFP in favor of SP
12628 if the frame pointer is required: we want to use the SFP->HFP elimination
12629 in that case. But the test in update_eliminables doesn't know we are
12630 assuming below that we only do the former elimination. */
12631
12632 static bool
12633 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12634 {
12635 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12636 }
12637
12638 /* Return the hard frame pointer directly to bypass the stack bias. */
12639
12640 static rtx
12641 sparc_builtin_setjmp_frame_value (void)
12642 {
12643 return hard_frame_pointer_rtx;
12644 }
12645
12646 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12647 they won't be allocated. */
12648
12649 static void
12650 sparc_conditional_register_usage (void)
12651 {
12652 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12653 {
12654 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12655 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12656 }
12657 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12658 /* then honor it. */
12659 if (TARGET_ARCH32 && fixed_regs[5])
12660 fixed_regs[5] = 1;
12661 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12662 fixed_regs[5] = 0;
12663 if (! TARGET_V9)
12664 {
12665 int regno;
12666 for (regno = SPARC_FIRST_V9_FP_REG;
12667 regno <= SPARC_LAST_V9_FP_REG;
12668 regno++)
12669 fixed_regs[regno] = 1;
12670 /* %fcc0 is used by v8 and v9. */
12671 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12672 regno <= SPARC_LAST_V9_FCC_REG;
12673 regno++)
12674 fixed_regs[regno] = 1;
12675 }
12676 if (! TARGET_FPU)
12677 {
12678 int regno;
12679 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12680 fixed_regs[regno] = 1;
12681 }
12682 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12683 /* then honor it. Likewise with g3 and g4. */
12684 if (fixed_regs[2] == 2)
12685 fixed_regs[2] = ! TARGET_APP_REGS;
12686 if (fixed_regs[3] == 2)
12687 fixed_regs[3] = ! TARGET_APP_REGS;
12688 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12689 fixed_regs[4] = ! TARGET_APP_REGS;
12690 else if (TARGET_CM_EMBMEDANY)
12691 fixed_regs[4] = 1;
12692 else if (fixed_regs[4] == 2)
12693 fixed_regs[4] = 0;
12694 if (TARGET_FLAT)
12695 {
12696 int regno;
12697 /* Disable leaf functions. */
12698 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12699 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12700 leaf_reg_remap [regno] = regno;
12701 }
12702 if (TARGET_VIS)
12703 global_regs[SPARC_GSR_REG] = 1;
12704 }
12705
12706 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12707
12708 - We can't load constants into FP registers.
12709 - We can't load FP constants into integer registers when soft-float,
12710 because there is no soft-float pattern with a r/F constraint.
12711 - We can't load FP constants into integer registers for TFmode unless
12712 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12713 - Try and reload integer constants (symbolic or otherwise) back into
12714 registers directly, rather than having them dumped to memory. */
12715
12716 static reg_class_t
12717 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12718 {
12719 machine_mode mode = GET_MODE (x);
12720 if (CONSTANT_P (x))
12721 {
12722 if (FP_REG_CLASS_P (rclass)
12723 || rclass == GENERAL_OR_FP_REGS
12724 || rclass == GENERAL_OR_EXTRA_FP_REGS
12725 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12726 || (mode == TFmode && ! const_zero_operand (x, mode)))
12727 return NO_REGS;
12728
12729 if (GET_MODE_CLASS (mode) == MODE_INT)
12730 return GENERAL_REGS;
12731
12732 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12733 {
12734 if (! FP_REG_CLASS_P (rclass)
12735 || !(const_zero_operand (x, mode)
12736 || const_all_ones_operand (x, mode)))
12737 return NO_REGS;
12738 }
12739 }
12740
12741 if (TARGET_VIS3
12742 && ! TARGET_ARCH64
12743 && (rclass == EXTRA_FP_REGS
12744 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12745 {
12746 int regno = true_regnum (x);
12747
12748 if (SPARC_INT_REG_P (regno))
12749 return (rclass == EXTRA_FP_REGS
12750 ? FP_REGS : GENERAL_OR_FP_REGS);
12751 }
12752
12753 return rclass;
12754 }
12755
12756 /* Return true if we use LRA instead of reload pass. */
12757
12758 static bool
12759 sparc_lra_p (void)
12760 {
12761 return TARGET_LRA;
12762 }
12763
12764 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12765 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12766
12767 const char *
12768 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12769 {
12770 char mulstr[32];
12771
12772 gcc_assert (! TARGET_ARCH64);
12773
12774 if (sparc_check_64 (operands[1], insn) <= 0)
12775 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12776 if (which_alternative == 1)
12777 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12778 if (GET_CODE (operands[2]) == CONST_INT)
12779 {
12780 if (which_alternative == 1)
12781 {
12782 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12783 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12784 output_asm_insn (mulstr, operands);
12785 return "srlx\t%L0, 32, %H0";
12786 }
12787 else
12788 {
12789 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12790 output_asm_insn ("or\t%L1, %3, %3", operands);
12791 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12792 output_asm_insn (mulstr, operands);
12793 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12794 return "mov\t%3, %L0";
12795 }
12796 }
12797 else if (rtx_equal_p (operands[1], operands[2]))
12798 {
12799 if (which_alternative == 1)
12800 {
12801 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12802 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12803 output_asm_insn (mulstr, operands);
12804 return "srlx\t%L0, 32, %H0";
12805 }
12806 else
12807 {
12808 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12809 output_asm_insn ("or\t%L1, %3, %3", operands);
12810 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12811 output_asm_insn (mulstr, operands);
12812 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12813 return "mov\t%3, %L0";
12814 }
12815 }
12816 if (sparc_check_64 (operands[2], insn) <= 0)
12817 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12818 if (which_alternative == 1)
12819 {
12820 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12821 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12822 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12823 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12824 output_asm_insn (mulstr, operands);
12825 return "srlx\t%L0, 32, %H0";
12826 }
12827 else
12828 {
12829 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12830 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12831 output_asm_insn ("or\t%L1, %3, %3", operands);
12832 output_asm_insn ("or\t%L2, %4, %4", operands);
12833 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12834 output_asm_insn (mulstr, operands);
12835 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12836 return "mov\t%3, %L0";
12837 }
12838 }
12839
12840 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12841 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12842 and INNER_MODE are the modes describing TARGET. */
12843
12844 static void
12845 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12846 machine_mode inner_mode)
12847 {
12848 rtx t1, final_insn, sel;
12849 int bmask;
12850
12851 t1 = gen_reg_rtx (mode);
12852
12853 elt = convert_modes (SImode, inner_mode, elt, true);
12854 emit_move_insn (gen_lowpart(SImode, t1), elt);
12855
12856 switch (mode)
12857 {
12858 case E_V2SImode:
12859 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12860 bmask = 0x45674567;
12861 break;
12862 case E_V4HImode:
12863 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12864 bmask = 0x67676767;
12865 break;
12866 case E_V8QImode:
12867 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12868 bmask = 0x77777777;
12869 break;
12870 default:
12871 gcc_unreachable ();
12872 }
12873
12874 sel = force_reg (SImode, GEN_INT (bmask));
12875 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12876 emit_insn (final_insn);
12877 }
12878
12879 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12880 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12881
12882 static void
12883 vector_init_fpmerge (rtx target, rtx elt)
12884 {
12885 rtx t1, t2, t2_low, t3, t3_low;
12886
12887 t1 = gen_reg_rtx (V4QImode);
12888 elt = convert_modes (SImode, QImode, elt, true);
12889 emit_move_insn (gen_lowpart (SImode, t1), elt);
12890
12891 t2 = gen_reg_rtx (V8QImode);
12892 t2_low = gen_lowpart (V4QImode, t2);
12893 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12894
12895 t3 = gen_reg_rtx (V8QImode);
12896 t3_low = gen_lowpart (V4QImode, t3);
12897 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12898
12899 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12900 }
12901
12902 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12903 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12904
12905 static void
12906 vector_init_faligndata (rtx target, rtx elt)
12907 {
12908 rtx t1 = gen_reg_rtx (V4HImode);
12909 int i;
12910
12911 elt = convert_modes (SImode, HImode, elt, true);
12912 emit_move_insn (gen_lowpart (SImode, t1), elt);
12913
12914 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12915 force_reg (SImode, GEN_INT (6)),
12916 const0_rtx));
12917
12918 for (i = 0; i < 4; i++)
12919 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12920 }
12921
12922 /* Emit code to initialize TARGET to values for individual fields VALS. */
12923
12924 void
12925 sparc_expand_vector_init (rtx target, rtx vals)
12926 {
12927 const machine_mode mode = GET_MODE (target);
12928 const machine_mode inner_mode = GET_MODE_INNER (mode);
12929 const int n_elts = GET_MODE_NUNITS (mode);
12930 int i, n_var = 0;
12931 bool all_same = true;
12932 rtx mem;
12933
12934 for (i = 0; i < n_elts; i++)
12935 {
12936 rtx x = XVECEXP (vals, 0, i);
12937 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12938 n_var++;
12939
12940 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12941 all_same = false;
12942 }
12943
12944 if (n_var == 0)
12945 {
12946 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12947 return;
12948 }
12949
12950 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12951 {
12952 if (GET_MODE_SIZE (inner_mode) == 4)
12953 {
12954 emit_move_insn (gen_lowpart (SImode, target),
12955 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12956 return;
12957 }
12958 else if (GET_MODE_SIZE (inner_mode) == 8)
12959 {
12960 emit_move_insn (gen_lowpart (DImode, target),
12961 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12962 return;
12963 }
12964 }
12965 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12966 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12967 {
12968 emit_move_insn (gen_highpart (word_mode, target),
12969 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12970 emit_move_insn (gen_lowpart (word_mode, target),
12971 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12972 return;
12973 }
12974
12975 if (all_same && GET_MODE_SIZE (mode) == 8)
12976 {
12977 if (TARGET_VIS2)
12978 {
12979 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12980 return;
12981 }
12982 if (mode == V8QImode)
12983 {
12984 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12985 return;
12986 }
12987 if (mode == V4HImode)
12988 {
12989 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12990 return;
12991 }
12992 }
12993
12994 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12995 for (i = 0; i < n_elts; i++)
12996 emit_move_insn (adjust_address_nv (mem, inner_mode,
12997 i * GET_MODE_SIZE (inner_mode)),
12998 XVECEXP (vals, 0, i));
12999 emit_move_insn (target, mem);
13000 }
13001
13002 /* Implement TARGET_SECONDARY_RELOAD. */
13003
13004 static reg_class_t
13005 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13006 machine_mode mode, secondary_reload_info *sri)
13007 {
13008 enum reg_class rclass = (enum reg_class) rclass_i;
13009
13010 sri->icode = CODE_FOR_nothing;
13011 sri->extra_cost = 0;
13012
13013 /* We need a temporary when loading/storing a HImode/QImode value
13014 between memory and the FPU registers. This can happen when combine puts
13015 a paradoxical subreg in a float/fix conversion insn. */
13016 if (FP_REG_CLASS_P (rclass)
13017 && (mode == HImode || mode == QImode)
13018 && (GET_CODE (x) == MEM
13019 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13020 && true_regnum (x) == -1)))
13021 return GENERAL_REGS;
13022
13023 /* On 32-bit we need a temporary when loading/storing a DFmode value
13024 between unaligned memory and the upper FPU registers. */
13025 if (TARGET_ARCH32
13026 && rclass == EXTRA_FP_REGS
13027 && mode == DFmode
13028 && GET_CODE (x) == MEM
13029 && ! mem_min_alignment (x, 8))
13030 return FP_REGS;
13031
13032 if (((TARGET_CM_MEDANY
13033 && symbolic_operand (x, mode))
13034 || (TARGET_CM_EMBMEDANY
13035 && text_segment_operand (x, mode)))
13036 && ! flag_pic)
13037 {
13038 if (in_p)
13039 sri->icode = direct_optab_handler (reload_in_optab, mode);
13040 else
13041 sri->icode = direct_optab_handler (reload_out_optab, mode);
13042 return NO_REGS;
13043 }
13044
13045 if (TARGET_VIS3 && TARGET_ARCH32)
13046 {
13047 int regno = true_regnum (x);
13048
13049 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13050 to move 8-byte values in 4-byte pieces. This only works via
13051 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13052 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13053 an FP_REGS intermediate move. */
13054 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13055 || ((general_or_i64_p (rclass)
13056 || rclass == GENERAL_OR_FP_REGS)
13057 && SPARC_FP_REG_P (regno)))
13058 {
13059 sri->extra_cost = 2;
13060 return FP_REGS;
13061 }
13062 }
13063
13064 return NO_REGS;
13065 }
13066
13067 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13068
13069 On SPARC when not VIS3 it is not possible to directly move data
13070 between GENERAL_REGS and FP_REGS. */
13071
13072 static bool
13073 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13074 reg_class_t class2)
13075 {
13076 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13077 && (! TARGET_VIS3
13078 || GET_MODE_SIZE (mode) > 8
13079 || GET_MODE_SIZE (mode) < 4));
13080 }
13081
13082 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13083
13084 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13085 because the movsi and movsf patterns don't handle r/f moves.
13086 For v8 we copy the default definition. */
13087
13088 static machine_mode
13089 sparc_secondary_memory_needed_mode (machine_mode mode)
13090 {
13091 if (TARGET_ARCH64)
13092 {
13093 if (GET_MODE_BITSIZE (mode) < 32)
13094 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13095 return mode;
13096 }
13097 else
13098 {
13099 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13100 return mode_for_size (BITS_PER_WORD,
13101 GET_MODE_CLASS (mode), 0).require ();
13102 return mode;
13103 }
13104 }
13105
13106 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13107 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13108
13109 bool
13110 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13111 {
13112 enum rtx_code rc = GET_CODE (operands[1]);
13113 machine_mode cmp_mode;
13114 rtx cc_reg, dst, cmp;
13115
13116 cmp = operands[1];
13117 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13118 return false;
13119
13120 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13121 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13122
13123 cmp_mode = GET_MODE (XEXP (cmp, 0));
13124 rc = GET_CODE (cmp);
13125
13126 dst = operands[0];
13127 if (! rtx_equal_p (operands[2], dst)
13128 && ! rtx_equal_p (operands[3], dst))
13129 {
13130 if (reg_overlap_mentioned_p (dst, cmp))
13131 dst = gen_reg_rtx (mode);
13132
13133 emit_move_insn (dst, operands[3]);
13134 }
13135 else if (operands[2] == dst)
13136 {
13137 operands[2] = operands[3];
13138
13139 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13140 rc = reverse_condition_maybe_unordered (rc);
13141 else
13142 rc = reverse_condition (rc);
13143 }
13144
13145 if (XEXP (cmp, 1) == const0_rtx
13146 && GET_CODE (XEXP (cmp, 0)) == REG
13147 && cmp_mode == DImode
13148 && v9_regcmp_p (rc))
13149 cc_reg = XEXP (cmp, 0);
13150 else
13151 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13152
13153 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13154
13155 emit_insn (gen_rtx_SET (dst,
13156 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13157
13158 if (dst != operands[0])
13159 emit_move_insn (operands[0], dst);
13160
13161 return true;
13162 }
13163
13164 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13165 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13166 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13167 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13168 code to be used for the condition mask. */
13169
13170 void
13171 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13172 {
13173 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13174 enum rtx_code code = GET_CODE (operands[3]);
13175
13176 mask = gen_reg_rtx (Pmode);
13177 cop0 = operands[4];
13178 cop1 = operands[5];
13179 if (code == LT || code == GE)
13180 {
13181 rtx t;
13182
13183 code = swap_condition (code);
13184 t = cop0; cop0 = cop1; cop1 = t;
13185 }
13186
13187 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13188
13189 fcmp = gen_rtx_UNSPEC (Pmode,
13190 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13191 fcode);
13192
13193 cmask = gen_rtx_UNSPEC (DImode,
13194 gen_rtvec (2, mask, gsr),
13195 ccode);
13196
13197 bshuf = gen_rtx_UNSPEC (mode,
13198 gen_rtvec (3, operands[1], operands[2], gsr),
13199 UNSPEC_BSHUFFLE);
13200
13201 emit_insn (gen_rtx_SET (mask, fcmp));
13202 emit_insn (gen_rtx_SET (gsr, cmask));
13203
13204 emit_insn (gen_rtx_SET (operands[0], bshuf));
13205 }
13206
13207 /* On sparc, any mode which naturally allocates into the float
13208 registers should return 4 here. */
13209
13210 unsigned int
13211 sparc_regmode_natural_size (machine_mode mode)
13212 {
13213 int size = UNITS_PER_WORD;
13214
13215 if (TARGET_ARCH64)
13216 {
13217 enum mode_class mclass = GET_MODE_CLASS (mode);
13218
13219 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13220 size = 4;
13221 }
13222
13223 return size;
13224 }
13225
13226 /* Implement TARGET_HARD_REGNO_NREGS.
13227
13228 On SPARC, ordinary registers hold 32 bits worth; this means both
13229 integer and floating point registers. On v9, integer regs hold 64
13230 bits worth; floating point regs hold 32 bits worth (this includes the
13231 new fp regs as even the odd ones are included in the hard register
13232 count). */
13233
13234 static unsigned int
13235 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13236 {
13237 if (regno == SPARC_GSR_REG)
13238 return 1;
13239 if (TARGET_ARCH64)
13240 {
13241 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13242 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13243 return CEIL (GET_MODE_SIZE (mode), 4);
13244 }
13245 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13246 }
13247
13248 /* Implement TARGET_HARD_REGNO_MODE_OK.
13249
13250 ??? Because of the funny way we pass parameters we should allow certain
13251 ??? types of float/complex values to be in integer registers during
13252 ??? RTL generation. This only matters on arch32. */
13253
13254 static bool
13255 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13256 {
13257 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13258 }
13259
13260 /* Implement TARGET_MODES_TIEABLE_P.
13261
13262 For V9 we have to deal with the fact that only the lower 32 floating
13263 point registers are 32-bit addressable. */
13264
13265 static bool
13266 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13267 {
13268 enum mode_class mclass1, mclass2;
13269 unsigned short size1, size2;
13270
13271 if (mode1 == mode2)
13272 return true;
13273
13274 mclass1 = GET_MODE_CLASS (mode1);
13275 mclass2 = GET_MODE_CLASS (mode2);
13276 if (mclass1 != mclass2)
13277 return false;
13278
13279 if (! TARGET_V9)
13280 return true;
13281
13282 /* Classes are the same and we are V9 so we have to deal with upper
13283 vs. lower floating point registers. If one of the modes is a
13284 4-byte mode, and the other is not, we have to mark them as not
13285 tieable because only the lower 32 floating point register are
13286 addressable 32-bits at a time.
13287
13288 We can't just test explicitly for SFmode, otherwise we won't
13289 cover the vector mode cases properly. */
13290
13291 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13292 return true;
13293
13294 size1 = GET_MODE_SIZE (mode1);
13295 size2 = GET_MODE_SIZE (mode2);
13296 if ((size1 > 4 && size2 == 4)
13297 || (size2 > 4 && size1 == 4))
13298 return false;
13299
13300 return true;
13301 }
13302
13303 /* Implement TARGET_CSTORE_MODE. */
13304
13305 static scalar_int_mode
13306 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13307 {
13308 return (TARGET_ARCH64 ? DImode : SImode);
13309 }
13310
13311 /* Return the compound expression made of T1 and T2. */
13312
13313 static inline tree
13314 compound_expr (tree t1, tree t2)
13315 {
13316 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13317 }
13318
13319 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13320
13321 static void
13322 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13323 {
13324 if (!TARGET_FPU)
13325 return;
13326
13327 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13328 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13329
13330 /* We generate the equivalent of feholdexcept (&fenv_var):
13331
13332 unsigned int fenv_var;
13333 __builtin_store_fsr (&fenv_var);
13334
13335 unsigned int tmp1_var;
13336 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13337
13338 __builtin_load_fsr (&tmp1_var); */
13339
13340 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13341 TREE_ADDRESSABLE (fenv_var) = 1;
13342 tree fenv_addr = build_fold_addr_expr (fenv_var);
13343 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13344 tree hold_stfsr
13345 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13346 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13347
13348 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13349 TREE_ADDRESSABLE (tmp1_var) = 1;
13350 tree masked_fenv_var
13351 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13352 build_int_cst (unsigned_type_node,
13353 ~(accrued_exception_mask | trap_enable_mask)));
13354 tree hold_mask
13355 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13356 NULL_TREE, NULL_TREE);
13357
13358 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13359 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13360 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13361
13362 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13363
13364 /* We reload the value of tmp1_var to clear the exceptions:
13365
13366 __builtin_load_fsr (&tmp1_var); */
13367
13368 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13369
13370 /* We generate the equivalent of feupdateenv (&fenv_var):
13371
13372 unsigned int tmp2_var;
13373 __builtin_store_fsr (&tmp2_var);
13374
13375 __builtin_load_fsr (&fenv_var);
13376
13377 if (SPARC_LOW_FE_EXCEPT_VALUES)
13378 tmp2_var >>= 5;
13379 __atomic_feraiseexcept ((int) tmp2_var); */
13380
13381 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13382 TREE_ADDRESSABLE (tmp2_var) = 1;
13383 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13384 tree update_stfsr
13385 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13386 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13387
13388 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13389
13390 tree atomic_feraiseexcept
13391 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13392 tree update_call
13393 = build_call_expr (atomic_feraiseexcept, 1,
13394 fold_convert (integer_type_node, tmp2_var));
13395
13396 if (SPARC_LOW_FE_EXCEPT_VALUES)
13397 {
13398 tree shifted_tmp2_var
13399 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13400 build_int_cst (unsigned_type_node, 5));
13401 tree update_shift
13402 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13403 update_call = compound_expr (update_shift, update_call);
13404 }
13405
13406 *update
13407 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13408 }
13409
13410 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13411
13412 SImode loads to floating-point registers are not zero-extended.
13413 The definition for LOAD_EXTEND_OP specifies that integer loads
13414 narrower than BITS_PER_WORD will be zero-extended. As a result,
13415 we inhibit changes from SImode unless they are to a mode that is
13416 identical in size.
13417
13418 Likewise for SFmode, since word-mode paradoxical subregs are
13419 problematic on big-endian architectures. */
13420
13421 static bool
13422 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13423 reg_class_t rclass)
13424 {
13425 if (TARGET_ARCH64
13426 && GET_MODE_SIZE (from) == 4
13427 && GET_MODE_SIZE (to) != 4)
13428 return !reg_classes_intersect_p (rclass, FP_REGS);
13429 return true;
13430 }
13431
13432 #include "gt-sparc.h"